Skip to content

Microsoft Copilot with Elasticsearch supporting blog content #456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "386dbd82",
"metadata": {},
"source": [
"# Enhance Microsoft Copilot with Elasticsearch\n",
"\n",
"This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d460f865",
"metadata": {},
"outputs": [],
"source": [
"%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==9 -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ac47371",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"from getpass import getpass\n",
"from datetime import datetime\n",
"\n",
"import nest_asyncio\n",
"import uvicorn\n",
"\n",
"from fastapi import FastAPI, Query\n",
"from pyngrok import conf, ngrok\n",
"\n",
"from elasticsearch.helpers import bulk\n",
"from elasticsearch import Elasticsearch"
]
},
{
"cell_type": "markdown",
"id": "64167eee",
"metadata": {},
"source": [
"## Setup Variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa378fdb",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"ELASTICSEARCH_ENDPOINT\"] = getpass(\"Elastic Endpoint: \")\n",
"os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Elastic Api Key: \")\n",
"os.environ[\"NGROK_AUTH_TOKEN\"] = getpass(\"Ngrok Auth Token: \")\n",
"\n",
"\n",
"INDEX_NAME = \"invoices\""
]
},
{
"cell_type": "markdown",
"id": "31041b60",
"metadata": {},
"source": [
"## Elasticsearch client"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d8a8201",
"metadata": {},
"outputs": [],
"source": [
"_client = Elasticsearch(\n",
" os.environ[\"ELASTICSEARCH_ENDPOINT\"],\n",
" api_key=os.environ[\"ELASTICSEARCH_API_KEY\"],\n",
")"
]
},
{
"cell_type": "markdown",
"id": "07578680",
"metadata": {},
"source": [
"## Mappings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c34a804a",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" _client.indices.create(\n",
" index=INDEX_NAME,\n",
" body={\n",
" \"mappings\": {\n",
" \"properties\": {\n",
" \"id\": {\"type\": \"keyword\"},\n",
" \"file_url\": {\"type\": \"keyword\"},\n",
" \"issue_date\": {\"type\": \"date\"},\n",
" \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
" \"services\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"name\": {\n",
" \"type\": \"text\",\n",
" \"copy_to\": \"semantic_field\",\n",
" },\n",
" \"price\": {\"type\": \"float\"},\n",
" },\n",
" },\n",
" \"total_amount\": {\n",
" \"type\": \"float\",\n",
" },\n",
" \"semantic_field\": {\"type\": \"semantic_text\"},\n",
" }\n",
" }\n",
" },\n",
" )\n",
"\n",
" print(\"index created successfully\")\n",
"except Exception as e:\n",
" print(\n",
" f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
" )"
]
},
{
"cell_type": "markdown",
"id": "02a2c25a",
"metadata": {},
"source": [
"## Ingesting documents to Elasticsearch"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "69f388c0",
"metadata": {},
"outputs": [],
"source": [
"with open(\"invoices_data.json\", \"r\", encoding=\"utf-8\") as f:\n",
" invoices = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b96c42fb",
"metadata": {},
"outputs": [],
"source": [
"def build_data():\n",
" for doc in invoices:\n",
" yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
"\n",
"\n",
"try:\n",
" success, errors = bulk(_client, build_data())\n",
" print(f\"{success} documents indexed successfully\")\n",
"\n",
" if errors:\n",
" print(\"Errors during indexing:\", errors)\n",
"\n",
"except Exception as e:\n",
" print(f\"Error: {str(e)}, please wait some seconds and try again.\")"
]
},
{
"cell_type": "markdown",
"id": "d38c1869",
"metadata": {},
"source": [
"## Building API"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ad221fb",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76106dad",
"metadata": {},
"outputs": [],
"source": [
"@app.get(\"/search/semantic\")\n",
"async def search_semantic(query: str = Query(None)):\n",
" try:\n",
" result = _client.search(\n",
" index=INDEX_NAME,\n",
" query={\n",
" \"semantic\": {\n",
" \"field\": \"semantic_field\",\n",
" \"query\": query,\n",
" }\n",
" },\n",
" )\n",
"\n",
" hits = result[\"hits\"][\"hits\"]\n",
" results = [{\"score\": hit[\"_score\"], **hit[\"_source\"]} for hit in hits]\n",
"\n",
" return results\n",
" except Exception as e:\n",
" return Exception(f\"Error: {str(e)}\")\n",
"\n",
"\n",
"@app.get(\"/search/by-date\")\n",
"async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):\n",
" try:\n",
" from_dt = datetime.strptime(from_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
" to_dt = datetime.strptime(to_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
"\n",
" formatted_from = from_dt.strftime(\"%d/%m/%Y\")\n",
" formatted_to = to_dt.strftime(\"%d/%m/%Y\")\n",
"\n",
" result = _client.search(\n",
" index=INDEX_NAME,\n",
" query={\n",
" \"range\": {\n",
" \"issue_date\": {\n",
" \"gte\": formatted_from,\n",
" \"lte\": formatted_to,\n",
" \"format\": \"dd/MM/yyyy\",\n",
" }\n",
" }\n",
" },\n",
" )\n",
"\n",
" hits = result[\"hits\"][\"hits\"]\n",
" results = [hit[\"_source\"] for hit in hits]\n",
"\n",
" return results\n",
" except Exception as e:\n",
" return Exception(f\"Error: {str(e)}\")"
]
},
{
"cell_type": "markdown",
"id": "cf1460e9",
"metadata": {},
"source": [
"## Running the API"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "517c85c3",
"metadata": {},
"outputs": [],
"source": [
"conf.get_default().auth_token = os.environ[\"NGROK_AUTH_TOKEN\"]\n",
"ngrok_tunnel = ngrok.connect(8000)\n",
"\n",
"print(\"Public URL:\", ngrok_tunnel.public_url)\n",
"\n",
"nest_asyncio.apply()\n",
"uvicorn.run(app, port=8000)"
]
},
{
"cell_type": "markdown",
"id": "ccffd29a",
"metadata": {},
"source": [
"## Delete the index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "991ba4e4",
"metadata": {},
"outputs": [],
"source": [
"def print_results(results):\n",
" if results.get(\"acknowledged\", False):\n",
" print(\"DELETED successfully.\")\n",
"\n",
" if \"error\" in results:\n",
" print(f\"ERROR: {results['error']['root_cause'][0]['reason']}\")\n",
"\n",
"\n",
"# Cleanup - Delete Index\n",
"result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
"print_results(result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading