-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamService_Nlp.py
45 lines (36 loc) · 1.67 KB
/
amService_Nlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_md")
# nlp = spacy.load('en_core_web_sm') # Consider using small model
ner_ban_list = [
"PERCENT",
"MONEY",
"QUANTITY",
"ORDINAL",
"CARDINAL",
"DATE"
]
###### Call spaCy and get Named Entities as keywords ######
###### This ignores some NERs which are specified ######
###### in the hardcoded list above ######
def ner_caller(article_in):
# Process the text with spaCy
return_list = []
doc = nlp(article_in)
for ent in doc.ents:
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
if ent.label_ not in ner_ban_list and ent.text not in return_list:
return_list.append(ent.text)
return return_list # returns empty list if nothing found
## Testing
# x = ner_caller("""When Sebastian Thrun started working on self-driving cars at Google in 2007,
# few people outside of the company took him seriously. “I can tell you very senior
# CEOs of major American car companies would shake my hand and turn away because I
# wasn’t worth talking to,” said Thrun, now the co-founder and CEO of online higher
# education startup Udacity, in an interview with Recode earlier this week.
# The Mona Lisa and the Statue of David were on display in the MOMA New York.
# COVID-19 is a devastating virus currently ravaging the world.
# A little less than a decade later, dozens of self-driving startups have cropped up
# while automakers around the world clamor, wallet in hand, to secure their place in
# the fast-moving world of fully automated transportation.""")
# print(x)