|
139 | 139 | "metadata": {}, |
140 | 140 | "outputs": [], |
141 | 141 | "source": [ |
142 | | - "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\"Enter your Elasticsearch URL: \")\n", |
143 | | - "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\"ELASTICSEARCH_API_KEY\") or getpass(\"Enter your Elasticsearch API key: \")\n", |
144 | | - "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\"Enter your Ngrok Token: \")\n", |
145 | | - "os.environ[\"ELASTICSEARCH_INDEX\"] = os.environ.get(\"ELASTICSEARCH_INDEX\") or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \") or \"github_internal\"\n", |
| 142 | + "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\n", |
| 143 | + " \"Enter your Elasticsearch URL: \"\n", |
| 144 | + ")\n", |
| 145 | + "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\n", |
| 146 | + " \"ELASTICSEARCH_API_KEY\"\n", |
| 147 | + ") or getpass(\"Enter your Elasticsearch API key: \")\n", |
| 148 | + "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\n", |
| 149 | + " \"Enter your Ngrok Token: \"\n", |
| 150 | + ")\n", |
| 151 | + "os.environ[\"ELASTICSEARCH_INDEX\"] = (\n", |
| 152 | + " os.environ.get(\"ELASTICSEARCH_INDEX\")\n", |
| 153 | + " or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \")\n", |
| 154 | + " or \"github_internal\"\n", |
| 155 | + ")\n", |
146 | 156 | "\n", |
147 | 157 | "ELASTICSEARCH_URL = os.environ[\"ELASTICSEARCH_URL\"]\n", |
148 | 158 | "ELASTICSEARCH_API_KEY = os.environ[\"ELASTICSEARCH_API_KEY\"]\n", |
|
177 | 187 | }, |
178 | 188 | "outputs": [], |
179 | 189 | "source": [ |
180 | | - "es_client = Elasticsearch(\n", |
181 | | - " ELASTICSEARCH_URL,\n", |
182 | | - " api_key=ELASTICSEARCH_API_KEY\n", |
183 | | - ")\n", |
| 190 | + "es_client = Elasticsearch(ELASTICSEARCH_URL, api_key=ELASTICSEARCH_API_KEY)\n", |
184 | 191 | "\n", |
185 | 192 | "if es_client.ping():\n", |
186 | 193 | " print(\"Elasticsearch connection successful\")\n", |
|
225 | 232 | " \"text\": {\"type\": \"text\"},\n", |
226 | 233 | " \"text_semantic\": {\n", |
227 | 234 | " \"type\": \"semantic_text\",\n", |
228 | | - " \"inference_id\": \".elser-2-elasticsearch\"\n", |
| 235 | + " \"inference_id\": \".elser-2-elasticsearch\",\n", |
229 | 236 | " },\n", |
230 | 237 | " \"url\": {\"type\": \"keyword\"},\n", |
231 | 238 | " \"type\": {\"type\": \"keyword\"},\n", |
|
235 | 242 | " \"created_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n", |
236 | 243 | " \"resolved_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n", |
237 | 244 | " \"labels\": {\"type\": \"keyword\"},\n", |
238 | | - " \"related_pr\": {\"type\": \"keyword\"}\n", |
| 245 | + " \"related_pr\": {\"type\": \"keyword\"},\n", |
239 | 246 | " }\n", |
240 | 247 | " }\n", |
241 | | - " }\n", |
| 248 | + " },\n", |
242 | 249 | " )\n", |
243 | 250 | " print(f\"Index '{INDEX_NAME}' created successfully\")\n", |
244 | 251 | "except Exception as e:\n", |
245 | | - " if 'resource_already_exists_exception' in str(e):\n", |
| 252 | + " if \"resource_already_exists_exception\" in str(e):\n", |
246 | 253 | " print(f\"Index '{INDEX_NAME}' already exists\")\n", |
247 | 254 | " else:\n", |
248 | 255 | " print(f\"Error creating index: {e}\")" |
|
629 | 636 | } |
630 | 637 | ], |
631 | 638 | "source": [ |
632 | | - "file_path = 'github_internal_dataset.json'\n", |
| 639 | + "file_path = \"github_internal_dataset.json\"\n", |
633 | 640 | "df = pd.read_json(file_path)\n", |
634 | 641 | "\n", |
635 | | - "documents = df.to_dict('records')\n", |
| 642 | + "documents = df.to_dict(\"records\")\n", |
636 | 643 | "print(f\"Loaded {len(documents)} documents from dataset\")\n", |
637 | 644 | "\n", |
638 | 645 | "df" |
|
663 | 670 | "source": [ |
664 | 671 | "def generate_actions():\n", |
665 | 672 | " for doc in documents:\n", |
666 | | - " doc['text_semantic'] = doc['text']\n", |
667 | | - " yield {\n", |
668 | | - " '_index': INDEX_NAME,\n", |
669 | | - " '_source': doc\n", |
670 | | - " }\n", |
| 673 | + " doc[\"text_semantic\"] = doc[\"text\"]\n", |
| 674 | + " yield {\"_index\": INDEX_NAME, \"_source\": doc}\n", |
| 675 | + "\n", |
671 | 676 | "\n", |
672 | 677 | "try:\n", |
673 | 678 | " success, errors = bulk(es_client, generate_actions())\n", |
|
679 | 684 | " print(\"Waiting 15 seconds for ELSER to process documents...\")\n", |
680 | 685 | " time.sleep(15)\n", |
681 | 686 | "\n", |
682 | | - " count = es_client.count(index=INDEX_NAME)['count']\n", |
| 687 | + " count = es_client.count(index=INDEX_NAME)[\"count\"]\n", |
683 | 688 | " print(f\"Total documents in index: {count}\")\n", |
684 | 689 | "\n", |
685 | 690 | "except Exception as e:\n", |
|
725 | 730 | "Use search to find relevant issues/PRs, then fetch to get complete details.\n", |
726 | 731 | "\"\"\"\n", |
727 | 732 | "\n", |
| 733 | + "\n", |
728 | 734 | "def create_server():\n", |
729 | 735 | " mcp = FastMCP(\n", |
730 | | - " name=\"Elasticsearch GitHub Issues MCP\",\n", |
731 | | - " instructions=server_instructions\n", |
| 736 | + " name=\"Elasticsearch GitHub Issues MCP\", instructions=server_instructions\n", |
732 | 737 | " )\n", |
733 | 738 | "\n", |
734 | 739 | " @mcp.tool()\n", |
|
757 | 762 | " \"query\": {\n", |
758 | 763 | " \"semantic\": {\n", |
759 | 764 | " \"field\": \"text_semantic\",\n", |
760 | | - " \"query\": query\n", |
| 765 | + " \"query\": query,\n", |
761 | 766 | " }\n", |
762 | 767 | " }\n", |
763 | 768 | " }\n", |
|
774 | 779 | " \"assignee^2\",\n", |
775 | 780 | " \"type\",\n", |
776 | 781 | " \"labels\",\n", |
777 | | - " \"priority\"\n", |
| 782 | + " \"priority\",\n", |
778 | 783 | " ],\n", |
779 | 784 | " \"type\": \"best_fields\",\n", |
780 | | - " \"fuzziness\": \"AUTO\"\n", |
| 785 | + " \"fuzziness\": \"AUTO\",\n", |
781 | 786 | " }\n", |
782 | 787 | " }\n", |
783 | 788 | " }\n", |
784 | | - " }\n", |
| 789 | + " },\n", |
785 | 790 | " ],\n", |
786 | 791 | " \"rank_window_size\": 50,\n", |
787 | | - " \"rank_constant\": 60\n", |
| 792 | + " \"rank_constant\": 60,\n", |
788 | 793 | " }\n", |
789 | | - " }\n", |
| 794 | + " },\n", |
790 | 795 | " )\n", |
791 | 796 | "\n", |
792 | 797 | " # Extract and format search results\n", |
793 | 798 | " results = []\n", |
794 | | - " if response and 'hits' in response:\n", |
795 | | - " for hit in response['hits']['hits']:\n", |
796 | | - " source = hit['_source']\n", |
797 | | - " results.append({\n", |
798 | | - " \"id\": source.get('id', hit['_id']),\n", |
799 | | - " \"title\": source.get('title', 'Unknown'),\n", |
800 | | - " \"url\": source.get('url', '')\n", |
801 | | - " })\n", |
| 799 | + " if response and \"hits\" in response:\n", |
| 800 | + " for hit in response[\"hits\"][\"hits\"]:\n", |
| 801 | + " source = hit[\"_source\"]\n", |
| 802 | + " results.append(\n", |
| 803 | + " {\n", |
| 804 | + " \"id\": source.get(\"id\", hit[\"_id\"]),\n", |
| 805 | + " \"title\": source.get(\"title\", \"Unknown\"),\n", |
| 806 | + " \"url\": source.get(\"url\", \"\"),\n", |
| 807 | + " }\n", |
| 808 | + " )\n", |
802 | 809 | "\n", |
803 | 810 | " logger.info(f\"Found {len(results)} results\")\n", |
804 | 811 | " return {\"results\": results}\n", |
|
821 | 828 | " try:\n", |
822 | 829 | " # Query by ID to get full document\n", |
823 | 830 | " response = es_client.search(\n", |
824 | | - " index=INDEX_NAME,\n", |
825 | | - " body={\n", |
826 | | - " \"query\": {\n", |
827 | | - " \"term\": {\n", |
828 | | - " \"id\": id\n", |
829 | | - " }\n", |
830 | | - " },\n", |
831 | | - " \"size\": 1\n", |
832 | | - " }\n", |
| 831 | + " index=INDEX_NAME, body={\"query\": {\"term\": {\"id\": id}}, \"size\": 1}\n", |
833 | 832 | " )\n", |
834 | 833 | "\n", |
835 | | - " if not response or not response['hits']['hits']:\n", |
| 834 | + " if not response or not response[\"hits\"][\"hits\"]:\n", |
836 | 835 | " raise ValueError(f\"Document with id '{id}' not found\")\n", |
837 | 836 | "\n", |
838 | | - " hit = response['hits']['hits'][0]\n", |
839 | | - " source = hit['_source']\n", |
| 837 | + " hit = response[\"hits\"][\"hits\"][0]\n", |
| 838 | + " source = hit[\"_source\"]\n", |
840 | 839 | "\n", |
841 | 840 | " # Return all document fields\n", |
842 | 841 | " result = {\n", |
843 | | - " \"id\": source.get('id', id),\n", |
844 | | - " \"title\": source.get('title', 'Unknown'),\n", |
845 | | - " \"text\": source.get('text', ''),\n", |
846 | | - " \"url\": source.get('url', ''),\n", |
847 | | - " \"type\": source.get('type', ''),\n", |
848 | | - " \"status\": source.get('status', ''),\n", |
849 | | - " \"priority\": source.get('priority', ''),\n", |
850 | | - " \"assignee\": source.get('assignee', ''),\n", |
851 | | - " \"created_date\": source.get('created_date', ''),\n", |
852 | | - " \"resolved_date\": source.get('resolved_date', ''),\n", |
853 | | - " \"labels\": source.get('labels', ''),\n", |
854 | | - " \"related_pr\": source.get('related_pr', '')\n", |
| 842 | + " \"id\": source.get(\"id\", id),\n", |
| 843 | + " \"title\": source.get(\"title\", \"Unknown\"),\n", |
| 844 | + " \"text\": source.get(\"text\", \"\"),\n", |
| 845 | + " \"url\": source.get(\"url\", \"\"),\n", |
| 846 | + " \"type\": source.get(\"type\", \"\"),\n", |
| 847 | + " \"status\": source.get(\"status\", \"\"),\n", |
| 848 | + " \"priority\": source.get(\"priority\", \"\"),\n", |
| 849 | + " \"assignee\": source.get(\"assignee\", \"\"),\n", |
| 850 | + " \"created_date\": source.get(\"created_date\", \"\"),\n", |
| 851 | + " \"resolved_date\": source.get(\"resolved_date\", \"\"),\n", |
| 852 | + " \"labels\": source.get(\"labels\", \"\"),\n", |
| 853 | + " \"related_pr\": source.get(\"related_pr\", \"\"),\n", |
855 | 854 | " }\n", |
856 | 855 | "\n", |
857 | 856 | " logger.info(f\"Fetched: {result['title']}\")\n", |
|
863 | 862 | "\n", |
864 | 863 | " return mcp\n", |
865 | 864 | "\n", |
| 865 | + "\n", |
866 | 866 | "print(\"MCP server defined successfully\")" |
867 | 867 | ] |
868 | 868 | }, |
|
892 | 892 | "ngrok.set_auth_token(NGROK_TOKEN)\n", |
893 | 893 | "\n", |
894 | 894 | "pyngrok_config = PyngrokConfig(region=\"us\")\n", |
895 | | - "public_url = ngrok.connect(\n", |
896 | | - " 8000,\n", |
897 | | - " \"http\",\n", |
898 | | - " pyngrok_config=pyngrok_config,\n", |
899 | | - " bind_tls=True\n", |
900 | | - ")\n", |
| 895 | + "public_url = ngrok.connect(8000, \"http\", pyngrok_config=pyngrok_config, bind_tls=True)\n", |
901 | 896 | "\n", |
902 | | - "print(\"=\"*70)\n", |
| 897 | + "print(\"=\" * 70)\n", |
903 | 898 | "print(\"MCP SERVER IS READY!\")\n", |
904 | | - "print(\"=\"*70)\n", |
| 899 | + "print(\"=\" * 70)\n", |
905 | 900 | "print(f\"\\nPublic URL (use in ChatGPT): {public_url}/sse\")\n", |
906 | 901 | "print(\"\\nIMPORTANT: Copy the URL above (including /sse at the end)\")\n", |
907 | 902 | "print(\"\\nTo connect in ChatGPT:\")\n", |
|
910 | 905 | "print(\"3. Paste the URL above\")\n", |
911 | 906 | "print(\"4. Save and start using!\")\n", |
912 | 907 | "print(\"\\nKeep this notebook running while using the connector\")\n", |
913 | | - "print(\"=\"*70)" |
| 908 | + "print(\"=\" * 70)" |
914 | 909 | ] |
915 | 910 | }, |
916 | 911 | { |
|
1089 | 1084 | "print(\"Server is running. To stop: Runtime > Interrupt execution\")\n", |
1090 | 1085 | "print()\n", |
1091 | 1086 | "\n", |
| 1087 | + "\n", |
1092 | 1088 | "def run_server():\n", |
1093 | 1089 | " server.run(transport=\"sse\", host=\"0.0.0.0\", port=8000)\n", |
1094 | 1090 | "\n", |
| 1091 | + "\n", |
1095 | 1092 | "server_thread = threading.Thread(target=run_server, daemon=True)\n", |
1096 | 1093 | "server_thread.start()\n", |
1097 | 1094 | "\n", |
|
1143 | 1140 | "outputs": [], |
1144 | 1141 | "source": [ |
1145 | 1142 | "try:\n", |
1146 | | - " result = es_client.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)\n", |
1147 | | - " if result.get('acknowledged', False):\n", |
| 1143 | + " result = es_client.options(ignore_status=[400, 404]).indices.delete(\n", |
| 1144 | + " index=INDEX_NAME\n", |
| 1145 | + " )\n", |
| 1146 | + " if result.get(\"acknowledged\", False):\n", |
1148 | 1147 | " print(f\"Index '{INDEX_NAME}' deleted successfully\")\n", |
1149 | 1148 | " else:\n", |
1150 | 1149 | " print(f\"Error deleting index: {result}\")\n", |
|
0 commit comments