Fix #12 (#13)

Neel Kamath · web-flow · commit a69358b8fa99 · 2019-12-12T19:45:22.000+05:30
* Migrate to FastAPI and friends

* Fix Docker daemon connection
diff --git a/.dockerignore b/.dockerignore
@@ -1,4 +1,4 @@
 *
 !requirements.txt
-!app.py
+!main.py
 !s2v_old/
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 s2v_old/
 .idea/
-redoc-static.html
+redoc-static.html
+.pytest_cache/
+venv/
diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,11 @@
 FROM python:3.7
 WORKDIR /app
-COPY . .
+ENV PYTHONUNBUFFERED 1
+COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-EXPOSE 8080
+COPY main.py .
+COPY s2v_old/ s2v_old/
+EXPOSE 8000
 HEALTHCHECK --timeout=2s --start-period=2s --retries=1 \
-    CMD curl -f http://localhost:8080/health_check
-CMD ["sh", "-c", "waitress-serve --port 8080 app:app"]
+    CMD curl -f http://localhost:8000/health_check
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0"]
diff --git a/docker-compose.override.yml b/docker-compose.override.yml
@@ -1,7 +1,5 @@
 version: '3.7'
 services:
   app:
-    command: sh -c 'pip install -r requirements.txt && flask run --host=0.0.0.0'
-    ports: ['5000:5000']
-    environment:
-      FLASK_ENV: development
+    command: sh setup.sh 'uvicorn main:app --host 0.0.0.0 --reload'
+    ports: ['8000:8000']
diff --git a/docker-compose.test.yml b/docker-compose.test.yml
@@ -1,4 +1,4 @@
 version: '3.7'
 services:
   app:
-    command: sh -c 'pip install -r requirements.txt && python test.py'
+    command: sh setup.sh 'pytest'
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,3 +1,6 @@
+# It is not possible to use a Docker volume to cache the dependencies because subsequent usage of the volume
+# occasionally gets corrupted for an unknown reason. Hence, a virtual environment is to be used instead. It is known
+# that virtual environments aren't needed in Docker because isolation is already provided; we use it as a cache instead.
 version: '3.7'
 services:
   app:
@@ -6,9 +9,4 @@ services:
     volumes:
       - type: bind
         source: .
-        target: /app
-      - type: volume
-        source: cache
-        target: /usr/local/lib/python3.7/site-packages
-volumes:
-  cache:
+        target: /app
diff --git a/docs/developing.md b/docs/developing.md
@@ -8,7 +8,7 @@
 docker-compose up --build
 ```
 
-The server will be running on `http://localhost:5000`, and has automatic reload enabled.
+The server will be running on `http://localhost:8000`, and has automatic reload enabled.
 
 ### Testing
 
@@ -22,7 +22,7 @@ docker-compose -f docker-compose.yml -f docker-compose.test.yml up --build --abo
 docker build -t spacy-server .
 ```
 
-The container `EXPOSE`s port `8080`. To serve at `http://localhost:8080`, run `docker run --rm -p 8080:8080 spacy-server`.
+The container `EXPOSE`s port `8000`. To serve at `http://localhost:8080`, run `docker run --rm -p 8000:8000 spacy-server`.
 
 ## Specification
 
@@ -56,7 +56,7 @@ Open `redoc-static.html` in your browser.
 
 ## Releases
 
-- Create a GitHub release (this will automatically create the git tag). If you bumped the version in `docs/openapi.yaml`, then create a new release. If you haven't bumped the version but have updated the HTTP API's functionality, delete the existing GitHub release and git tag, and create a new one. Otherwise, skip this step. The GitHub release's body should be ```Download and open the release asset, `redoc-static.html`, in your browser to view the HTTP API documentation.```. Upload/update the asset named `redoc-static.html` which contains the HTTP API's documentation (generated using `redoc-cli bundle docs/openapi.yaml --title 'spaCy Server' -o redoc-static.html`).
+- Create a GitHub release (this will automatically create the git tag). If you bumped the version in `docs/openapi.yaml`, then create a new release. If you haven't bumped the version but have updated the HTTP API's functionality, delete the existing GitHub release and git tag, and create a new one. Otherwise, skip this step. The release's title should be the features included (e.g., `NER, POS tagging, sentencizer, tokenizer, and sense2vec`). The tag should be the HTTP API's version (e.g., `v1`). The release's body should be ```Download and open the release asset, `redoc-static.html`, in your browser to view the HTTP API documentation.```. Upload the asset named `redoc-static.html` which contains the HTTP API docs.
 - If required, update the [Docker Hub repository](https://hub.docker.com/r/neelkamath/spacy-server)'s **Overview**.
 - For every commit to the `master` branch in which the tests have passed, the following will automatically be done.
     - The new images will be uploaded to Docker Hub.
diff --git a/main.py b/main.py
@@ -1,19 +1,26 @@
 """Provides spaCy NLP over an HTTP API."""
 
+import typing
+
 import en_core_web_sm
-import flask
-from sense2vec import Sense2VecComponent
+import fastapi
+import pydantic
+import sense2vec
+import starlette.status
 
-app = flask.Flask(__name__)
+app = fastapi.FastAPI()
 nlp = en_core_web_sm.load()
-nlp.add_pipe(Sense2VecComponent(nlp.vocab).from_disk("s2v_old"))
+nlp.add_pipe(sense2vec.Sense2VecComponent(nlp.vocab).from_disk("s2v_old"))
+
+
+class SectionsModel(pydantic.BaseModel):
+    sections: typing.List[str]
 
 
-@app.route('/ner', methods=['POST'])
-def recognize_named_entities():
+@app.post('/ner')
+async def recognize_named_entities(request: SectionsModel):
     response = {'data': []}
-    sections = flask.request.get_json()['sections']
-    for doc in nlp.pipe(sections, disable=['tagger']):
+    for doc in nlp.pipe(request.sections, disable=['tagger']):
         for sent in doc.sents:
             entities = [build_entity(ent) for ent in sent.ents]
             data = {'text': sent.text, 'entities': entities}
@@ -41,11 +48,14 @@ def build_entity(ent):
     }
 
 
-@app.route('/pos', methods=['POST'])
-def tag_parts_of_speech():
+class TextModel(pydantic.BaseModel):
+    text: str
+
+
+@app.post('/pos')
+async def tag_parts_of_speech(request: TextModel):
     data = []
-    doc = nlp(flask.request.get_json()['text'])
-    for token in [build_token(token) for token in doc]:
+    for token in [build_token(token) for token in nlp(request.text)]:
         text = token['sent']
         del token['sent']
         if text in [obj['text'] for obj in data]:
@@ -100,20 +110,18 @@ def build_token(token):
     }
 
 
-@app.route('/tokenizer', methods=['POST'])
-def tokenize():
-    text = flask.request.get_json()['text']
-    doc = nlp(text, disable=['tagger', 'parser', 'ner'])
+@app.post('/tokenizer')
+async def tokenize(request: TextModel):
+    doc = nlp(request.text, disable=['tagger', 'parser', 'ner'])
     return {'tokens': [token.text for token in doc]}
 
 
-@app.route('/sentencizer', methods=['POST'])
-def sentencize():
-    text = flask.request.get_json()['text']
-    doc = nlp(text, disable=['tagger', 'ner'])
+@app.post('/sentencizer')
+async def sentencize(request: TextModel):
+    doc = nlp(request.text, disable=['tagger', 'ner'])
     return {'sentences': [sent.text for sent in doc.sents]}
 
 
-@app.route('/health_check')
-def check_health():
-    return flask.Response(status=204)
+@app.get('/health_check', status_code=starlette.status.HTTP_204_NO_CONTENT)
+async def check_health():
+    pass
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 spacy>=2.2.3,<3
 https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz#egg=en_core_web_sm
-flask>=1.1.1,<2
-waitress>=1.3.1,<2
-sense2vec>=1.0.2,<2
+sense2vec>=1.0.2,<2
+fastapi==0.45.0
+uvicorn==0.10.8
+pytest>=4.6.7,<5
diff --git a/setup.sh b/setup.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Executes a command in a virtual environment (e.g., <sh setup.sh 'uvicorn main:app --reload'>).
+
+python -m venv venv
+. venv/bin/activate
+pip install -r requirements.txt
+$1
diff --git a/test.py b/test.py
diff --git a/test_main.py b/test_main.py
@@ -0,0 +1,52 @@
+import json
+
+import starlette.testclient
+
+import main
+
+client = starlette.testclient.TestClient(main.app)
+
+
+def test_ner():
+    body = {
+        'sections': [
+            'Net income was $9.4 million compared to the prior year of $2.7 '
+            + 'million. Google is a big company.',
+            'Revenue exceeded twelve billion dollars, with a loss of $1b.'
+        ]
+    }
+    response = client.post('/ner', json=body)
+    assert response.status_code == 200
+    with open('outputs/ner.json') as f:
+        assert response.json() == json.load(f)
+
+
+def test_pos():
+    text = {'text': 'Apple is looking at buying U.K. startup for $1 billion'}
+    response = client.post('/pos', json=text)
+    assert response.status_code == 200
+    with open('outputs/pos.json') as f:
+        assert response.json() == json.load(f)
+
+
+def test_tokenizer():
+    text = {'text': 'Apple is looking at buying U.K. startup for $1 billion'}
+    response = client.post('/tokenizer', json=text)
+    assert response.status_code == 200
+    with open('outputs/tokenizer.json') as f:
+        assert response.json() == json.load(f)
+
+
+def test_sentencizer():
+    body = {
+        'text': 'Apple is looking at buying U.K. startup for $1 billion. '
+                + 'Another sentence.'
+    }
+    response = client.post('/sentencizer', json=body)
+    assert response.status_code == 200
+    with open('outputs/sentencizer.json') as f:
+        assert response.json() == json.load(f)
+
+
+def test_health_check():
+    assert client.get('/health_check').status_code == 204

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`*`
`2`	`2`	`!requirements.txt`
`3`		`-!app.py`
	`3`	`+!main.py`
`4`	`4`	`!s2v_old/`