diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_api.ipynb b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_api.ipynb new file mode 100644 index 00000000..25c7be62 --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_api.ipynb @@ -0,0 +1,332 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "386dbd82", + "metadata": {}, + "source": [ + "# Enhance Microsoft Copilot with Elasticsearch\n", + "\n", + "This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d460f865", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==9 -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ac47371", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from getpass import getpass\n", + "from datetime import datetime\n", + "\n", + "import nest_asyncio\n", + "import uvicorn\n", + "\n", + "from fastapi import FastAPI, Query\n", + "from pyngrok import conf, ngrok\n", + "\n", + "from elasticsearch.helpers import bulk\n", + "from elasticsearch import Elasticsearch" + ] + }, + { + "cell_type": "markdown", + "id": "64167eee", + "metadata": {}, + "source": [ + "## Setup Variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa378fdb", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"ELASTICSEARCH_ENDPOINT\"] = getpass(\"Elastic Endpoint: \")\n", + "os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Elastic Api Key: \")\n", + "os.environ[\"NGROK_AUTH_TOKEN\"] = getpass(\"Ngrok Auth Token: \")\n", + "\n", + "\n", + "INDEX_NAME = \"invoices\"" + ] + }, + { + "cell_type": "markdown", + "id": "31041b60", + "metadata": {}, + "source": [ + "## Elasticsearch client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d8a8201", + "metadata": {}, + "outputs": [], + "source": [ + "_client = Elasticsearch(\n", + " os.environ[\"ELASTICSEARCH_ENDPOINT\"],\n", + " api_key=os.environ[\"ELASTICSEARCH_API_KEY\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "07578680", + "metadata": {}, + "source": [ + "## Mappings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c34a804a", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " _client.indices.create(\n", + " index=INDEX_NAME,\n", + " body={\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"id\": {\"type\": \"keyword\"},\n", + " \"file_url\": {\"type\": \"keyword\"},\n", + " \"issue_date\": {\"type\": \"date\"},\n", + " \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n", + " \"services\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"name\": {\n", + " \"type\": \"text\",\n", + " \"copy_to\": \"semantic_field\",\n", + " },\n", + " \"price\": {\"type\": \"float\"},\n", + " },\n", + " },\n", + " \"total_amount\": {\n", + " \"type\": \"float\",\n", + " },\n", + " \"semantic_field\": {\"type\": \"semantic_text\"},\n", + " }\n", + " }\n", + " },\n", + " )\n", + "\n", + " print(\"index created successfully\")\n", + "except Exception as e:\n", + " print(\n", + " f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "02a2c25a", + "metadata": {}, + "source": [ + "## Ingesting documents to Elasticsearch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f388c0", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"invoices_data.json\", \"r\", encoding=\"utf-8\") as f:\n", + " invoices = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b96c42fb", + "metadata": {}, + "outputs": [], + "source": [ + "def build_data():\n", + " for doc in invoices:\n", + " yield {\"_index\": INDEX_NAME, \"_source\": doc}\n", + "\n", + "\n", + "try:\n", + " success, errors = bulk(_client, build_data())\n", + " print(f\"{success} documents indexed successfully\")\n", + "\n", + " if errors:\n", + " print(\"Errors during indexing:\", errors)\n", + "\n", + "except Exception as e:\n", + " print(f\"Error: {str(e)}, please wait some seconds and try again.\")" + ] + }, + { + "cell_type": "markdown", + "id": "d38c1869", + "metadata": {}, + "source": [ + "## Building API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ad221fb", + "metadata": {}, + "outputs": [], + "source": [ + "app = FastAPI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76106dad", + "metadata": {}, + "outputs": [], + "source": [ + "@app.get(\"/search/semantic\")\n", + "async def search_semantic(query: str = Query(None)):\n", + " try:\n", + " result = _client.search(\n", + " index=INDEX_NAME,\n", + " query={\n", + " \"semantic\": {\n", + " \"field\": \"semantic_field\",\n", + " \"query\": query,\n", + " }\n", + " },\n", + " )\n", + "\n", + " hits = result[\"hits\"][\"hits\"]\n", + " results = [{\"score\": hit[\"_score\"], **hit[\"_source\"]} for hit in hits]\n", + "\n", + " return results\n", + " except Exception as e:\n", + " return Exception(f\"Error: {str(e)}\")\n", + "\n", + "\n", + "@app.get(\"/search/by-date\")\n", + "async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):\n", + " try:\n", + " from_dt = datetime.strptime(from_date, \"%m/%d/%Y %I:%M:%S %p\")\n", + " to_dt = datetime.strptime(to_date, \"%m/%d/%Y %I:%M:%S %p\")\n", + "\n", + " formatted_from = from_dt.strftime(\"%d/%m/%Y\")\n", + " formatted_to = to_dt.strftime(\"%d/%m/%Y\")\n", + "\n", + " result = _client.search(\n", + " index=INDEX_NAME,\n", + " query={\n", + " \"range\": {\n", + " \"issue_date\": {\n", + " \"gte\": formatted_from,\n", + " \"lte\": formatted_to,\n", + " \"format\": \"dd/MM/yyyy\",\n", + " }\n", + " }\n", + " },\n", + " )\n", + "\n", + " hits = result[\"hits\"][\"hits\"]\n", + " results = [hit[\"_source\"] for hit in hits]\n", + "\n", + " return results\n", + " except Exception as e:\n", + " return Exception(f\"Error: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cf1460e9", + "metadata": {}, + "source": [ + "## Running the API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "517c85c3", + "metadata": {}, + "outputs": [], + "source": [ + "conf.get_default().auth_token = os.environ[\"NGROK_AUTH_TOKEN\"]\n", + "ngrok_tunnel = ngrok.connect(8000)\n", + "\n", + "print(\"Public URL:\", ngrok_tunnel.public_url)\n", + "\n", + "nest_asyncio.apply()\n", + "uvicorn.run(app, port=8000)" + ] + }, + { + "cell_type": "markdown", + "id": "ccffd29a", + "metadata": {}, + "source": [ + "## Delete the index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "991ba4e4", + "metadata": {}, + "outputs": [], + "source": [ + "def print_results(results):\n", + " if results.get(\"acknowledged\", False):\n", + " print(\"DELETED successfully.\")\n", + "\n", + " if \"error\" in results:\n", + " print(f\"ERROR: {results['error']['root_cause'][0]['reason']}\")\n", + "\n", + "\n", + "# Cleanup - Delete Index\n", + "result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n", + "print_results(result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_data.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_data.json new file mode 100644 index 00000000..15dd1da4 --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/invoices_data.json @@ -0,0 +1,140 @@ +[ + { + "id": "INV-0001", + "file_url": "https://fake-invoices.example.com/fake/INV-0001.pdf", + "issue_date": "2025-04-01", + "description": "Invoice for technical consulting services", + "services": [ + { "name": "Technical consulting", "price": 1500.0 }, + { "name": "Remote support", "price": 300.0 } + ], + "total_amount": 1800.0 + }, + { + "id": "INV-0002", + "file_url": "https://fake-invoices.example.com/fake/INV-0002.pdf", + "issue_date": "2025-04-03", + "description": "Enterprise software license sale", + "services": [{ "name": "ERP software license", "price": 5000.0 }], + "total_amount": 5000.0 + }, + { + "id": "INV-0003", + "file_url": "https://fake-invoices.example.com/fake/INV-0003.pdf", + "issue_date": "2025-04-05", + "description": "UX/UI design services", + "services": [ + { "name": "UX design", "price": 1200.0 }, + { "name": "UI design", "price": 1000.0 } + ], + "total_amount": 2200.0 + }, + { + "id": "INV-0004", + "file_url": "https://fake-invoices.example.com/fake/INV-0004.pdf", + "issue_date": "2025-04-07", + "description": "CRM system implementation", + "services": [ + { "name": "CRM setup", "price": 2500.0 }, + { "name": "User training", "price": 700.0 } + ], + "total_amount": 3200.0 + }, + { + "id": "INV-0005", + "file_url": "https://fake-invoices.example.com/fake/INV-0005.pdf", + "issue_date": "2025-04-09", + "description": "Monthly technical support", + "services": [ + { "name": "Preventive maintenance", "price": 400.0 }, + { "name": "Phone support", "price": 200.0 } + ], + "total_amount": 600.0 + }, + { + "id": "INV-0006", + "file_url": "https://fake-invoices.example.com/fake/INV-0006.pdf", + "issue_date": "2025-04-11", + "description": "IT audit services", + "services": [{ "name": "Security audit", "price": 1800.0 }], + "total_amount": 1800.0 + }, + { + "id": "INV-0007", + "file_url": "https://fake-invoices.example.com/fake/INV-0007.pdf", + "issue_date": "2025-04-13", + "description": "Mobile app development", + "services": [ + { "name": "Android development", "price": 3000.0 }, + { "name": "iOS development", "price": 3000.0 } + ], + "total_amount": 6000.0 + }, + { + "id": "INV-0008", + "file_url": "https://fake-invoices.example.com/fake/INV-0008.pdf", + "issue_date": "2025-04-15", + "description": "Network equipment sales", + "services": [ + { "name": "Business router", "price": 800.0 }, + { "name": "Managed switch", "price": 600.0 } + ], + "total_amount": 1400.0 + }, + { + "id": "INV-0009", + "file_url": "https://fake-invoices.example.com/fake/INV-0009.pdf", + "issue_date": "2025-04-17", + "description": "Annual subscription renewal", + "services": [{ "name": "SaaS platform subscription", "price": 1200.0 }], + "total_amount": 1200.0 + }, + { + "id": "INV-0010", + "file_url": "https://fake-invoices.example.com/fake/INV-0010.pdf", + "issue_date": "2025-04-19", + "description": "Specialized technical support hours", + "services": [{ "name": "Specialized support (10h)", "price": 1000.0 }], + "total_amount": 1000.0 + }, + { + "id": "INV-0011", + "file_url": "https://fake-invoices.example.com/fake/INV-0011.pdf", + "issue_date": "2025-04-20", + "description": "Business lunch with client", + "services": [{ "name": "Lunch at La Terraza Bistro", "price": 85.0 }], + "total_amount": 85.0 + }, + { + "id": "INV-0012", + "file_url": "https://fake-invoices.example.com/fake/INV-0012.pdf", + "issue_date": "2025-04-21", + "description": "Hotel accommodation during client visit", + "services": [{ "name": "3-night stay at Hotel Central", "price": 450.0 }], + "total_amount": 450.0 + }, + { + "id": "INV-0013", + "file_url": "https://fake-invoices.example.com/fake/INV-0013.pdf", + "issue_date": "2025-04-22", + "description": "Team-building activity", + "services": [{ "name": "Escape room experience for team", "price": 200.0 }], + "total_amount": 200.0 + }, + { + "id": "INV-0014", + "file_url": "https://fake-invoices.example.com/fake/INV-0014.pdf", + "issue_date": "2025-04-23", + "description": "Local transportation for conference", + "services": [{ "name": "Taxi and metro rides", "price": 60.0 }], + "total_amount": 60.0 + }, + { + "id": "INV-0015", + "file_url": "https://fake-invoices.example.com/fake/INV-0015.pdf", + "issue_date": "2025-04-24", + "description": "Client entertainment dinner", + "services": [{ "name": "Dinner at El Gourmet", "price": 130.0 }], + "total_amount": 130.0 + } +] diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/openAPI-specification.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/openAPI-specification.json new file mode 100644 index 00000000..e6b7d8f9 --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/openAPI-specification.json @@ -0,0 +1,140 @@ +{ + "swagger": "2.0", + "info": { + "title": "Invoice Search API", + "description": "API for semantic search and date-based search of invoices using Elasticsearch", + "version": "1.0.0" + }, + "host": "your-ngrok-host.ngrok.app", + "basePath": "/", + "schemes": ["https"], + "consumes": ["application/json"], + "produces": ["application/json"], + "parameters": { + "NgrokSkipWarningHeader": { + "name": "ngrok-skip-browser-warning", + "in": "header", + "description": "Set and send an `ngrok-skip-browser-warning` request header with any value", + "required": true, + "type": "string", + "default": "true" + } + }, + "paths": { + "/search/semantic": { + "get": { + "summary": "Semantic invoice search", + "description": "Performs a semantic search on invoices based on the provided query using Elasticsearch's semantic capabilities", + "operationId": "searchSemantic", + "produces": ["application/json"], + "parameters": [ + { + "$ref": "#/parameters/NgrokSkipWarningHeader" + }, + { + "name": "query", + "in": "query", + "description": "Search query for semantic matching against invoice data", + "required": true, + "type": "string" + } + ], + "responses": { + "200": { + "description": "Successful operation", + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "score": { + "type": "number", + "description": "Relevance score of the search result from Elasticsearch" + } + }, + "additionalProperties": true, + "description": "Invoice data with relevance score" + } + } + }, + "400": { + "description": "Bad request", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/search/by-date": { + "get": { + "summary": "Search invoices by date range", + "description": "Performs a search on invoices based on a specified date range using Elasticsearch", + "operationId": "searchByDate", + "produces": ["application/json"], + "parameters": [ + { + "$ref": "#/parameters/NgrokSkipWarningHeader" + }, + { + "name": "from_date", + "in": "query", + "description": "Start date in format MM/dd/yyyy hh:mm:ss aa (e.g., 01/15/2023 09:30:00 AM)", + "required": true, + "type": "string" + }, + { + "name": "to_date", + "in": "query", + "description": "End date in format MM/dd/yyyy hh:mm:ss aa (e.g., 01/20/2023 05:00:00 PM)", + "required": true, + "type": "string" + } + ], + "responses": { + "200": { + "description": "Successful operation", + "schema": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true, + "description": "Invoice data within the specified date range" + } + } + }, + "400": { + "description": "Bad request", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + }, + "500": { + "description": "Internal server error", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + } + }, + "definitions": { + "ErrorResponse": { + "type": "object", + "properties": { + "detail": { + "type": "string", + "description": "Error message providing details about what went wrong", + "example": "Error: Invalid date format" + } + } + } + } +} diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/01_semantic_query.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/01_semantic_query.json new file mode 100644 index 00000000..4483ff8c --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/01_semantic_query.json @@ -0,0 +1,8 @@ +{ + "query": { + "semantic": { + "field": "semantic_field", + "query": "food consumption" + } + } +} diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/02_semantic_query_response.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/02_semantic_query_response.json new file mode 100644 index 00000000..dff8d201 --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/02_semantic_query_response.json @@ -0,0 +1,158 @@ +[ + { + "score": 1.8814621, + "file_url": "https://fake-invoices.example.com/fake/INV-0015.pdf", + "issue_date": "2025-04-24", + "total_amount": 130.0, + "description": "Client entertainment dinner", + "id": "INV-0015", + "services": [ + { + "price": 130.0, + "name": "Dinner at El Gourmet" + } + ] + }, + { + "score": 0.76836, + "file_url": "https://fake-invoices.example.com/fake/INV-0011.pdf", + "issue_date": "2025-04-20", + "total_amount": 85.0, + "description": "Business lunch with client", + "id": "INV-0011", + "services": [ + { + "price": 85.0, + "name": "Lunch at La Terraza Bistro" + } + ] + }, + { + "score": 0.31618494, + "file_url": "https://fake-invoices.example.com/fake/INV-0005.pdf", + "issue_date": "2025-04-09", + "total_amount": 600.0, + "description": "Monthly technical support", + "id": "INV-0005", + "services": [ + { + "price": 400.0, + "name": "Preventive maintenance" + }, + { + "price": 200.0, + "name": "Phone support" + } + ] + }, + { + "score": 0.27274045, + "file_url": "https://fake-invoices.example.com/fake/INV-0009.pdf", + "issue_date": "2025-04-17", + "total_amount": 1200.0, + "description": "Annual subscription renewal", + "id": "INV-0009", + "services": [ + { + "price": 1200.0, + "name": "SaaS platform subscription" + } + ] + }, + { + "score": 0.25688773, + "file_url": "https://fake-invoices.example.com/fake/INV-0013.pdf", + "issue_date": "2025-04-22", + "total_amount": 200.0, + "description": "Team-building activity", + "id": "INV-0013", + "services": [ + { + "price": 200.0, + "name": "Escape room experience for team" + } + ] + }, + { + "score": 0.23697382, + "file_url": "https://fake-invoices.example.com/fake/INV-0012.pdf", + "issue_date": "2025-04-21", + "total_amount": 450.0, + "description": "Hotel accommodation during client visit", + "id": "INV-0012", + "services": [ + { + "price": 450.0, + "name": "3-night stay at Hotel Central" + } + ] + }, + { + "score": 0.16704111, + "file_url": "https://fake-invoices.example.com/fake/INV-0004.pdf", + "issue_date": "2025-04-07", + "total_amount": 3200.0, + "description": "CRM system implementation", + "id": "INV-0004", + "services": [ + { + "price": 2500.0, + "name": "CRM setup" + }, + { + "price": 700.0, + "name": "User training" + } + ] + }, + { + "score": 0.083114706, + "file_url": "https://fake-invoices.example.com/fake/INV-0007.pdf", + "issue_date": "2025-04-13", + "total_amount": 6000.0, + "description": "Mobile app development", + "id": "INV-0007", + "services": [ + { + "price": 3000.0, + "name": "Android development" + }, + { + "price": 3000.0, + "name": "iOS development" + } + ] + }, + { + "score": 0.04272168, + "file_url": "https://fake-invoices.example.com/fake/INV-0008.pdf", + "issue_date": "2025-04-15", + "total_amount": 1400.0, + "description": "Network equipment sales", + "id": "INV-0008", + "services": [ + { + "price": 800.0, + "name": "Business router" + }, + { + "price": 600.0, + "name": "Managed switch" + } + ] + }, + { + "score": 0.017029667, + "file_url": "https://fake-invoices.example.com/fake/INV-0014.pdf", + "issue_date": "2025-04-23", + "total_amount": 60.0, + "description": "Local transportation for conference", + "id": "INV-0014", + "services": [ + { + "price": 60.0, + "name": "Taxi and metro rides" + } + ] + } +] diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/03_date_range_query.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/03_date_range_query.json new file mode 100644 index 00000000..3393a8e2 --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/03_date_range_query.json @@ -0,0 +1,11 @@ +{ + "query": { + "range": { + "issue_date": { + "gte": "20/04/2025", + "lte": "22/04/2025", + "format": "dd/MM/yyyy" + } + } + } +} diff --git a/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/04_date_range_query_response.json b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/04_date_range_query_response.json new file mode 100644 index 00000000..1986ddcb --- /dev/null +++ b/supporting-blog-content/enhance-microsoft-copilot-with-elasticsearch/outputs/04_date_range_query_response.json @@ -0,0 +1,41 @@ +[ + { + "file_url": "https://fake-invoices.example.com/fake/INV-0011.pdf", + "issue_date": "2025-04-20", + "total_amount": 85.0, + "description": "Business lunch with client", + "id": "INV-0011", + "services": [ + { + "price": 85.0, + "name": "Lunch at La Terraza Bistro" + } + ] + }, + { + "file_url": "https://fake-invoices.example.com/fake/INV-0012.pdf", + "issue_date": "2025-04-21", + "total_amount": 450.0, + "description": "Hotel accommodation during client visit", + "id": "INV-0012", + "services": [ + { + "price": 450.0, + "name": "3-night stay at Hotel Central" + } + ] + }, + { + "file_url": "https://fake-invoices.example.com/fake/INV-0013.pdf", + "issue_date": "2025-04-22", + "total_amount": 200.0, + "description": "Team-building activity", + "id": "INV-0013", + "services": [ + { + "price": 200.0, + "name": "Escape room experience for team" + } + ] + } +]