|
1 | 1 | """Provides spaCy NLP over an HTTP API.""" |
2 | 2 |
|
| 3 | +import typing |
| 4 | + |
3 | 5 | import en_core_web_sm |
4 | | -import flask |
5 | | -from sense2vec import Sense2VecComponent |
| 6 | +import fastapi |
| 7 | +import pydantic |
| 8 | +import sense2vec |
| 9 | +import starlette.status |
6 | 10 |
|
7 | | -app = flask.Flask(__name__) |
| 11 | +app = fastapi.FastAPI() |
8 | 12 | nlp = en_core_web_sm.load() |
9 | | -nlp.add_pipe(Sense2VecComponent(nlp.vocab).from_disk("s2v_old")) |
| 13 | +nlp.add_pipe(sense2vec.Sense2VecComponent(nlp.vocab).from_disk("s2v_old")) |
| 14 | + |
| 15 | + |
| 16 | +class SectionsModel(pydantic.BaseModel): |
| 17 | + sections: typing.List[str] |
10 | 18 |
|
11 | 19 |
|
12 | | -@app.route('/ner', methods=['POST']) |
13 | | -def recognize_named_entities(): |
| 20 | +@app.post('/ner') |
| 21 | +async def recognize_named_entities(request: SectionsModel): |
14 | 22 | response = {'data': []} |
15 | | - sections = flask.request.get_json()['sections'] |
16 | | - for doc in nlp.pipe(sections, disable=['tagger']): |
| 23 | + for doc in nlp.pipe(request.sections, disable=['tagger']): |
17 | 24 | for sent in doc.sents: |
18 | 25 | entities = [build_entity(ent) for ent in sent.ents] |
19 | 26 | data = {'text': sent.text, 'entities': entities} |
@@ -41,11 +48,14 @@ def build_entity(ent): |
41 | 48 | } |
42 | 49 |
|
43 | 50 |
|
44 | | -@app.route('/pos', methods=['POST']) |
45 | | -def tag_parts_of_speech(): |
| 51 | +class TextModel(pydantic.BaseModel): |
| 52 | + text: str |
| 53 | + |
| 54 | + |
| 55 | +@app.post('/pos') |
| 56 | +async def tag_parts_of_speech(request: TextModel): |
46 | 57 | data = [] |
47 | | - doc = nlp(flask.request.get_json()['text']) |
48 | | - for token in [build_token(token) for token in doc]: |
| 58 | + for token in [build_token(token) for token in nlp(request.text)]: |
49 | 59 | text = token['sent'] |
50 | 60 | del token['sent'] |
51 | 61 | if text in [obj['text'] for obj in data]: |
@@ -100,20 +110,18 @@ def build_token(token): |
100 | 110 | } |
101 | 111 |
|
102 | 112 |
|
103 | | -@app.route('/tokenizer', methods=['POST']) |
104 | | -def tokenize(): |
105 | | - text = flask.request.get_json()['text'] |
106 | | - doc = nlp(text, disable=['tagger', 'parser', 'ner']) |
| 113 | +@app.post('/tokenizer') |
| 114 | +async def tokenize(request: TextModel): |
| 115 | + doc = nlp(request.text, disable=['tagger', 'parser', 'ner']) |
107 | 116 | return {'tokens': [token.text for token in doc]} |
108 | 117 |
|
109 | 118 |
|
110 | | -@app.route('/sentencizer', methods=['POST']) |
111 | | -def sentencize(): |
112 | | - text = flask.request.get_json()['text'] |
113 | | - doc = nlp(text, disable=['tagger', 'ner']) |
| 119 | +@app.post('/sentencizer') |
| 120 | +async def sentencize(request: TextModel): |
| 121 | + doc = nlp(request.text, disable=['tagger', 'ner']) |
114 | 122 | return {'sentences': [sent.text for sent in doc.sents]} |
115 | 123 |
|
116 | 124 |
|
117 | | -@app.route('/health_check') |
118 | | -def check_health(): |
119 | | - return flask.Response(status=204) |
| 125 | +@app.get('/health_check', status_code=starlette.status.HTTP_204_NO_CONTENT) |
| 126 | +async def check_health(): |
| 127 | + pass |
0 commit comments