Fixing formatting issue in notebook (#512)

carlyrichmond · web-flow · commit e13191a94f10 · 2025-12-03T18:53:57.000+02:00
diff --git a/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb b/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb
@@ -139,10 +139,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\"Enter your Elasticsearch URL: \")\n",
-    "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\"ELASTICSEARCH_API_KEY\") or getpass(\"Enter your Elasticsearch API key: \")\n",
-    "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\"Enter your Ngrok Token: \")\n",
-    "os.environ[\"ELASTICSEARCH_INDEX\"] = os.environ.get(\"ELASTICSEARCH_INDEX\") or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \") or \"github_internal\"\n",
+    "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\n",
+    "    \"Enter your Elasticsearch URL: \"\n",
+    ")\n",
+    "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\n",
+    "    \"ELASTICSEARCH_API_KEY\"\n",
+    ") or getpass(\"Enter your Elasticsearch API key: \")\n",
+    "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\n",
+    "    \"Enter your Ngrok Token: \"\n",
+    ")\n",
+    "os.environ[\"ELASTICSEARCH_INDEX\"] = (\n",
+    "    os.environ.get(\"ELASTICSEARCH_INDEX\")\n",
+    "    or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \")\n",
+    "    or \"github_internal\"\n",
+    ")\n",
     "\n",
     "ELASTICSEARCH_URL = os.environ[\"ELASTICSEARCH_URL\"]\n",
     "ELASTICSEARCH_API_KEY = os.environ[\"ELASTICSEARCH_API_KEY\"]\n",
@@ -177,10 +187,7 @@
    },
    "outputs": [],
    "source": [
-    "es_client = Elasticsearch(\n",
-    "    ELASTICSEARCH_URL,\n",
-    "    api_key=ELASTICSEARCH_API_KEY\n",
-    ")\n",
+    "es_client = Elasticsearch(ELASTICSEARCH_URL, api_key=ELASTICSEARCH_API_KEY)\n",
     "\n",
     "if es_client.ping():\n",
     "    print(\"Elasticsearch connection successful\")\n",
@@ -225,7 +232,7 @@
     "                    \"text\": {\"type\": \"text\"},\n",
     "                    \"text_semantic\": {\n",
     "                        \"type\": \"semantic_text\",\n",
-    "                        \"inference_id\": \".elser-2-elasticsearch\"\n",
+    "                        \"inference_id\": \".elser-2-elasticsearch\",\n",
     "                    },\n",
     "                    \"url\": {\"type\": \"keyword\"},\n",
     "                    \"type\": {\"type\": \"keyword\"},\n",
@@ -235,14 +242,14 @@
     "                    \"created_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n",
     "                    \"resolved_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n",
     "                    \"labels\": {\"type\": \"keyword\"},\n",
-    "                    \"related_pr\": {\"type\": \"keyword\"}\n",
+    "                    \"related_pr\": {\"type\": \"keyword\"},\n",
     "                }\n",
     "            }\n",
-    "        }\n",
+    "        },\n",
     "    )\n",
     "    print(f\"Index '{INDEX_NAME}' created successfully\")\n",
     "except Exception as e:\n",
-    "    if 'resource_already_exists_exception' in str(e):\n",
+    "    if \"resource_already_exists_exception\" in str(e):\n",
     "        print(f\"Index '{INDEX_NAME}' already exists\")\n",
     "    else:\n",
     "        print(f\"Error creating index: {e}\")"
@@ -629,10 +636,10 @@
     }
    ],
    "source": [
-    "file_path = 'github_internal_dataset.json'\n",
+    "file_path = \"github_internal_dataset.json\"\n",
     "df = pd.read_json(file_path)\n",
     "\n",
-    "documents = df.to_dict('records')\n",
+    "documents = df.to_dict(\"records\")\n",
     "print(f\"Loaded {len(documents)} documents from dataset\")\n",
     "\n",
     "df"
@@ -663,11 +670,9 @@
    "source": [
     "def generate_actions():\n",
     "    for doc in documents:\n",
-    "        doc['text_semantic'] = doc['text']\n",
-    "        yield {\n",
-    "            '_index': INDEX_NAME,\n",
-    "            '_source': doc\n",
-    "        }\n",
+    "        doc[\"text_semantic\"] = doc[\"text\"]\n",
+    "        yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
+    "\n",
     "\n",
     "try:\n",
     "    success, errors = bulk(es_client, generate_actions())\n",
@@ -679,7 +684,7 @@
     "    print(\"Waiting 15 seconds for ELSER to process documents...\")\n",
     "    time.sleep(15)\n",
     "\n",
-    "    count = es_client.count(index=INDEX_NAME)['count']\n",
+    "    count = es_client.count(index=INDEX_NAME)[\"count\"]\n",
     "    print(f\"Total documents in index: {count}\")\n",
     "\n",
     "except Exception as e:\n",
@@ -725,10 +730,10 @@
     "Use search to find relevant issues/PRs, then fetch to get complete details.\n",
     "\"\"\"\n",
     "\n",
+    "\n",
     "def create_server():\n",
     "    mcp = FastMCP(\n",
-    "        name=\"Elasticsearch GitHub Issues MCP\",\n",
-    "        instructions=server_instructions\n",
+    "        name=\"Elasticsearch GitHub Issues MCP\", instructions=server_instructions\n",
     "    )\n",
     "\n",
     "    @mcp.tool()\n",
@@ -757,7 +762,7 @@
     "                                    \"query\": {\n",
     "                                        \"semantic\": {\n",
     "                                            \"field\": \"text_semantic\",\n",
-    "                                            \"query\": query\n",
+    "                                            \"query\": query,\n",
     "                                        }\n",
     "                                    }\n",
     "                                }\n",
@@ -774,31 +779,33 @@
     "                                                \"assignee^2\",\n",
     "                                                \"type\",\n",
     "                                                \"labels\",\n",
-    "                                                \"priority\"\n",
+    "                                                \"priority\",\n",
     "                                            ],\n",
     "                                            \"type\": \"best_fields\",\n",
-    "                                            \"fuzziness\": \"AUTO\"\n",
+    "                                            \"fuzziness\": \"AUTO\",\n",
     "                                        }\n",
     "                                    }\n",
     "                                }\n",
-    "                            }\n",
+    "                            },\n",
     "                        ],\n",
     "                        \"rank_window_size\": 50,\n",
-    "                        \"rank_constant\": 60\n",
+    "                        \"rank_constant\": 60,\n",
     "                    }\n",
-    "                }\n",
+    "                },\n",
     "            )\n",
     "\n",
     "            # Extract and format search results\n",
     "            results = []\n",
-    "            if response and 'hits' in response:\n",
-    "                for hit in response['hits']['hits']:\n",
-    "                    source = hit['_source']\n",
-    "                    results.append({\n",
-    "                        \"id\": source.get('id', hit['_id']),\n",
-    "                        \"title\": source.get('title', 'Unknown'),\n",
-    "                        \"url\": source.get('url', '')\n",
-    "                    })\n",
+    "            if response and \"hits\" in response:\n",
+    "                for hit in response[\"hits\"][\"hits\"]:\n",
+    "                    source = hit[\"_source\"]\n",
+    "                    results.append(\n",
+    "                        {\n",
+    "                            \"id\": source.get(\"id\", hit[\"_id\"]),\n",
+    "                            \"title\": source.get(\"title\", \"Unknown\"),\n",
+    "                            \"url\": source.get(\"url\", \"\"),\n",
+    "                        }\n",
+    "                    )\n",
     "\n",
     "            logger.info(f\"Found {len(results)} results\")\n",
     "            return {\"results\": results}\n",
@@ -821,37 +828,29 @@
     "        try:\n",
     "            # Query by ID to get full document\n",
     "            response = es_client.search(\n",
-    "                index=INDEX_NAME,\n",
-    "                body={\n",
-    "                    \"query\": {\n",
-    "                        \"term\": {\n",
-    "                            \"id\": id\n",
-    "                        }\n",
-    "                    },\n",
-    "                    \"size\": 1\n",
-    "                }\n",
+    "                index=INDEX_NAME, body={\"query\": {\"term\": {\"id\": id}}, \"size\": 1}\n",
     "            )\n",
     "\n",
-    "            if not response or not response['hits']['hits']:\n",
+    "            if not response or not response[\"hits\"][\"hits\"]:\n",
     "                raise ValueError(f\"Document with id '{id}' not found\")\n",
     "\n",
-    "            hit = response['hits']['hits'][0]\n",
-    "            source = hit['_source']\n",
+    "            hit = response[\"hits\"][\"hits\"][0]\n",
+    "            source = hit[\"_source\"]\n",
     "\n",
     "            # Return all document fields\n",
     "            result = {\n",
-    "                \"id\": source.get('id', id),\n",
-    "                \"title\": source.get('title', 'Unknown'),\n",
-    "                \"text\": source.get('text', ''),\n",
-    "                \"url\": source.get('url', ''),\n",
-    "                \"type\": source.get('type', ''),\n",
-    "                \"status\": source.get('status', ''),\n",
-    "                \"priority\": source.get('priority', ''),\n",
-    "                \"assignee\": source.get('assignee', ''),\n",
-    "                \"created_date\": source.get('created_date', ''),\n",
-    "                \"resolved_date\": source.get('resolved_date', ''),\n",
-    "                \"labels\": source.get('labels', ''),\n",
-    "                \"related_pr\": source.get('related_pr', '')\n",
+    "                \"id\": source.get(\"id\", id),\n",
+    "                \"title\": source.get(\"title\", \"Unknown\"),\n",
+    "                \"text\": source.get(\"text\", \"\"),\n",
+    "                \"url\": source.get(\"url\", \"\"),\n",
+    "                \"type\": source.get(\"type\", \"\"),\n",
+    "                \"status\": source.get(\"status\", \"\"),\n",
+    "                \"priority\": source.get(\"priority\", \"\"),\n",
+    "                \"assignee\": source.get(\"assignee\", \"\"),\n",
+    "                \"created_date\": source.get(\"created_date\", \"\"),\n",
+    "                \"resolved_date\": source.get(\"resolved_date\", \"\"),\n",
+    "                \"labels\": source.get(\"labels\", \"\"),\n",
+    "                \"related_pr\": source.get(\"related_pr\", \"\"),\n",
     "            }\n",
     "\n",
     "            logger.info(f\"Fetched: {result['title']}\")\n",
@@ -863,6 +862,7 @@
     "\n",
     "    return mcp\n",
     "\n",
+    "\n",
     "print(\"MCP server defined successfully\")"
    ]
   },
@@ -892,16 +892,11 @@
     "ngrok.set_auth_token(NGROK_TOKEN)\n",
     "\n",
     "pyngrok_config = PyngrokConfig(region=\"us\")\n",
-    "public_url = ngrok.connect(\n",
-    "    8000,\n",
-    "    \"http\",\n",
-    "    pyngrok_config=pyngrok_config,\n",
-    "    bind_tls=True\n",
-    ")\n",
+    "public_url = ngrok.connect(8000, \"http\", pyngrok_config=pyngrok_config, bind_tls=True)\n",
     "\n",
-    "print(\"=\"*70)\n",
+    "print(\"=\" * 70)\n",
     "print(\"MCP SERVER IS READY!\")\n",
-    "print(\"=\"*70)\n",
+    "print(\"=\" * 70)\n",
     "print(f\"\\nPublic URL (use in ChatGPT): {public_url}/sse\")\n",
     "print(\"\\nIMPORTANT: Copy the URL above (including /sse at the end)\")\n",
     "print(\"\\nTo connect in ChatGPT:\")\n",
@@ -910,7 +905,7 @@
     "print(\"3. Paste the URL above\")\n",
     "print(\"4. Save and start using!\")\n",
     "print(\"\\nKeep this notebook running while using the connector\")\n",
-    "print(\"=\"*70)"
+    "print(\"=\" * 70)"
    ]
   },
   {
@@ -1089,9 +1084,11 @@
     "print(\"Server is running. To stop: Runtime > Interrupt execution\")\n",
     "print()\n",
     "\n",
+    "\n",
     "def run_server():\n",
     "    server.run(transport=\"sse\", host=\"0.0.0.0\", port=8000)\n",
     "\n",
+    "\n",
     "server_thread = threading.Thread(target=run_server, daemon=True)\n",
     "server_thread.start()\n",
     "\n",
@@ -1143,8 +1140,10 @@
    "outputs": [],
    "source": [
     "try:\n",
-    "    result = es_client.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)\n",
-    "    if result.get('acknowledged', False):\n",
+    "    result = es_client.options(ignore_status=[400, 404]).indices.delete(\n",
+    "        index=INDEX_NAME\n",
+    "    )\n",
+    "    if result.get(\"acknowledged\", False):\n",
     "        print(f\"Index '{INDEX_NAME}' deleted successfully\")\n",
     "    else:\n",
     "        print(f\"Error deleting index: {result}\")\n",