@@ -123,10 +123,11 @@ def sentiment(self, contents, model='general'):
123
123
124
124
>>> import os
125
125
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
126
- >>> nlp.sentiment('这家味道还不错')
127
- [[0.8758192096636473, 0.12418079033635264]]
128
- >>> nlp.sentiment(['这家味道还不错', '菜品太少了而且还不新鲜'])
129
- [[0.8758192096636473, 0.12418079033635264], [0.33160979027792103, 0.668390209722079]]
126
+ >>> nlp.sentiment('这家味道还不错', model='food')
127
+ [[0.9991737012037423, 0.0008262987962577828]]
128
+ >>> nlp.sentiment(['这家味道还不错', '菜品太少了而且还不新鲜'], model='food')
129
+ [[0.9991737012037423, 0.0008262987962577828],
130
+ [9.940036427291687e-08, 0.9999999005996357]]
130
131
"""
131
132
api_endpoint = '/sentiment/analysis?' + model
132
133
r = self ._api_request ('POST' , api_endpoint , data = contents )
@@ -149,11 +150,11 @@ def convert_time(self, content, basetime=None):
149
150
150
151
>>> import os
151
152
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
152
- >>> nlp.convert_time("2013年二月二十八日下午四点三十分二十九秒")
153
- {u' timestamp': u' 2013-02-28 16:30:29'}
153
+ >>> _json_dumps( nlp.convert_time("2013年二月二十八日下午四点三十分二十九秒") )
154
+ '{" timestamp": " 2013-02-28 16:30:29", "type": "timestamp"}'
154
155
>>> import datetime
155
- >>> nlp.convert_time("今天晚上8点到明天下午3点", datetime.datetime.today( ))
156
- {u' timespan' : [u'2014-08-25 20:00:00', u'2014-08-26 15:00:00']}
156
+ >>> _json_dumps( nlp.convert_time("今天晚上8点到明天下午3点", datetime.datetime(2015, 9, 1) ))
157
+ '{" timespan" : ["2015-09-02 20:00:00", "2015-09-03 15:00:00"], "type": "timespan_0"}'
157
158
158
159
"""
159
160
api_endpoint = '/time/analysis'
@@ -205,8 +206,8 @@ def suggest(self, word, top_k=None):
205
206
206
207
>>> import os
207
208
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
208
- >>> nlp.suggest('python ', top_k=1 )
209
- [[0.9999999999999992 , 'python/x ']]
209
+ >>> nlp.suggest('北京 ', top_k=2 )
210
+ [[1.0, '北京/ns'], [0.7493540460397998 , '上海/ns ']]
210
211
"""
211
212
api_endpoint = '/suggest/analysis'
212
213
params = {}
@@ -234,7 +235,7 @@ def extract_keywords(self, text, top_k=None, segmented=False):
234
235
>>> import os
235
236
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
236
237
>>> nlp.extract_keywords('病毒式媒体网站:让新闻迅速蔓延', top_k=2)
237
- [[0.4580507649282757 , '蔓延 '], [0.44467176143180404 , '病毒 ']]
238
+ [[0.8391345017584958 , '病毒式 '], [0.3802418301341705 , '蔓延 ']]
238
239
"""
239
240
api_endpoint = '/keywords/analysis'
240
241
params = {}
@@ -260,18 +261,18 @@ def depparser(self, contents):
260
261
>>> import os
261
262
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
262
263
>>> nlp.depparser('今天天气好')
263
- [{'tag ': ['NT', 'NN', 'VA' ],
264
+ [{'head ': [2, 2, -1 ],
264
265
'role': ['TMP', 'SBJ', 'ROOT'],
265
- 'head ': [2, 2, -1 ],
266
+ 'tag ': ['NT', 'NN', 'VA' ],
266
267
'word': ['今天', '天气', '好']}]
267
268
>>> nlp.depparser(['今天天气好', '美好的世界'])
268
- [{'tag ': ['NT', 'NN', 'VA' ],
269
+ [{'head ': [2, 2, -1 ],
269
270
'role': ['TMP', 'SBJ', 'ROOT'],
270
- 'head ': [2, 2, -1 ],
271
+ 'tag ': ['NT', 'NN', 'VA' ],
271
272
'word': ['今天', '天气', '好']},
272
- {'tag ': ['VA', 'DEC', 'NN' ],
273
+ {'head ': [1, 2, -1 ],
273
274
'role': ['DEC', 'NMOD', 'ROOT'],
274
- 'head ': [1, 2, -1 ],
275
+ 'tag ': ['VA', 'DEC', 'NN' ],
275
276
'word': ['美好', '的', '世界']}]
276
277
"""
277
278
api_endpoint = '/depparser/analysis'
@@ -296,16 +297,17 @@ def ner(self, contents, sensitivity=None):
296
297
297
298
>>> import os
298
299
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
299
- >>> nlp.ner('成都商报记者 姚永忠')
300
+ >>> nlp.ner('成都商报记者 姚永忠', sensitivity=2 )
300
301
[{'entity': [[0, 2, 'product_name'], [3, 4, 'person_name']],
301
302
'tag': ['ns', 'n', 'n', 'nr'],
302
303
'word': ['成都', '商报', '记者', '姚永忠']}]
304
+
303
305
>>> nlp.ner(['成都商报记者 姚永忠', '微软XP操作系统今日正式退休'])
304
306
[{'entity': [[0, 2, 'product_name'], [3, 4, 'person_name']],
305
307
'tag': ['ns', 'n', 'n', 'nr'],
306
308
'word': ['成都', '商报', '记者', '姚永忠']},
307
309
{'entity': [[0, 2, 'product_name'], [3, 4, 'time']],
308
- 'tag': ['nt', 'x ', 'nl', 't', 'ad', 'v'],
310
+ 'tag': ['nt', 'nx ', 'nl', 't', 'ad', 'v'],
309
311
'word': ['微软', 'XP', '操作系统', '今日', '正式', '退休']}]
310
312
"""
311
313
api_endpoint = '/ner/analysis'
@@ -315,31 +317,63 @@ def ner(self, contents, sensitivity=None):
315
317
r = self ._api_request ('POST' , api_endpoint , data = contents , params = params )
316
318
return r .json ()
317
319
318
- def tag (self , contents ):
320
+ def tag (self , contents , space_mode = 0 , oov_level = 3 , t2s = 0 , special_char_conv = 0 ):
319
321
"""BosonNLP `分词与词性标注 <http://docs.bosonnlp.com/tag.html>`_ 封装。
320
322
321
323
:param contents: 需要做分词与词性标注的文本或者文本序列。
322
324
:type contents: string or sequence of string
323
325
326
+ :param space_mode: 空格保留选项,
327
+ :type space_mode: int(整型), 0-3有效
328
+
329
+ :param oov_level: 枚举强度选项
330
+ :type oov_level: int(整型), 0-4有效
331
+
332
+ :param t2s: 繁简转换选项,繁转简或不转换
333
+ :type t2s: int(整型), 0-1有效
334
+
335
+ :param special_char_conv: 特殊字符转化选项,针对回车、Tab等特殊字符转化或者不转化
336
+ :type special_char_conv: int(整型), 0-1有效
337
+
324
338
:returns: 接口返回的结果列表。
325
339
326
340
:raises: :py:exc:`~bosonnlp.HTTPError` 如果 API 请求发生错误。
327
341
342
+ 调用参数及返回值详细说明见:http://docs.bosonnlp.com/tag.html
343
+
328
344
调用示例:
329
345
330
346
>>> import os
331
347
>>> nlp = BosonNLP(os.environ['BOSON_API_TOKEN'])
332
- >>> nlp.tag('成都商报记者 姚永忠')
333
- [{'tag': ['NR', 'NN', 'NN', 'NR'],
334
- 'word': ['成都', '商报', '记者', '姚永忠']}]
335
- >>> nlp.tag(['成都商报记者 姚永忠', '微软XP操作系统今日正式退休'])
336
- [{'tag': ['NR', 'NN', 'NN', 'NR'],
337
- 'word': ['成都', '商报', '记者', '姚永忠']},
338
- {'tag': ['NR', 'NN', 'NN', 'NN', 'NT', 'AD', 'VV'],
339
- 'word': ['微软', 'XP', '操作', '系统', '今日', '正式', '退休']}]
348
+
349
+ >>> result = nlp.tag('成都商报记者 姚永忠')
350
+ >>> _json_dumps(result)
351
+ '[{"tag": ["ns", "n", "n", "nr"], "word": ["成都", "商报", "记者", "姚永忠"]}]'
352
+
353
+ >>> format_tag_result = lambda tagged: ' '.join('%s/%s' % x for x in zip(tagged['word'], tagged['tag']))
354
+ >>> result = nlp.tag("成都商报记者 姚永忠")
355
+ >>> format_tag_result(result[0])
356
+ '成都/ns 商报/n 记者/n 姚永忠/nr'
357
+
358
+ >>> result = nlp.tag("成都商报记者 姚永忠", space_mode=2)
359
+ >>> format_tag_result(result[0])
360
+ '成都/ns 商报/n 记者/n /w 姚永忠/nr'
361
+
362
+ >>> result = nlp.tag(['亚投行意向创始成员国确定为57个', '“流量贵”频被吐槽'], oov_level=0)
363
+ >>> format_tag_result(result[0])
364
+ '亚/ns 投/v 行/n 意向/n 创始/vi 成员国/n 确定/v 为/v 57/m 个/q'
365
+
366
+ >>> format_tag_result(result[1])
367
+ '“/wyz 流量/n 贵/a ”/wyy 频/d 被/pbei 吐槽/v'
340
368
"""
341
369
api_endpoint = '/tag/analysis'
342
- r = self ._api_request ('POST' , api_endpoint , data = contents )
370
+ params = {
371
+ 'space_mode' : space_mode ,
372
+ 'oov_level' : oov_level ,
373
+ 't2s' : t2s ,
374
+ 'special_char_conv' : special_char_conv ,
375
+ }
376
+ r = self ._api_request ('POST' , api_endpoint , params = params , data = contents )
343
377
return r .json ()
344
378
345
379
def _cluster_push (self , task_id , contents ):
@@ -657,6 +691,8 @@ def wait_until_complete(self, timeout=None):
657
691
"""
658
692
elapsed = 0.0
659
693
seconds_to_sleep = 1.0
694
+ if timeout is not None :
695
+ seconds_to_sleep = min (seconds_to_sleep , timeout )
660
696
i = 0
661
697
while True :
662
698
time .sleep (seconds_to_sleep )
0 commit comments