|
18 | 18 | "outputs": [],
|
19 | 19 | "source": [
|
20 | 20 | "from text import *\n",
|
21 |
| - "from utils import open_data" |
| 21 | + "from utils import open_data\n", |
| 22 | + "from notebook import psource" |
22 | 23 | ]
|
23 | 24 | },
|
24 | 25 | {
|
|
55 | 56 | },
|
56 | 57 | {
|
57 | 58 | "cell_type": "code",
|
58 |
| - "execution_count": 2, |
59 |
| - "metadata": { |
60 |
| - "collapsed": true |
61 |
| - }, |
62 |
| - "outputs": [], |
63 |
| - "source": [ |
64 |
| - "%psource UnigramWordModel" |
65 |
| - ] |
66 |
| - }, |
67 |
| - { |
68 |
| - "cell_type": "code", |
69 |
| - "execution_count": 3, |
70 |
| - "metadata": { |
71 |
| - "collapsed": true |
72 |
| - }, |
73 |
| - "outputs": [], |
74 |
| - "source": [ |
75 |
| - "%psource NgramWordModel" |
76 |
| - ] |
77 |
| - }, |
78 |
| - { |
79 |
| - "cell_type": "code", |
80 |
| - "execution_count": 4, |
81 |
| - "metadata": { |
82 |
| - "collapsed": true |
83 |
| - }, |
84 |
| - "outputs": [], |
85 |
| - "source": [ |
86 |
| - "%psource UnigramCharModel" |
87 |
| - ] |
88 |
| - }, |
89 |
| - { |
90 |
| - "cell_type": "code", |
91 |
| - "execution_count": 6, |
92 |
| - "metadata": { |
93 |
| - "collapsed": true |
94 |
| - }, |
| 59 | + "execution_count": null, |
| 60 | + "metadata": {}, |
95 | 61 | "outputs": [],
|
96 | 62 | "source": [
|
97 |
| - "%psource NgramCharModel" |
| 63 | + "psource(UnigramWordModel, NgramWordModel, UnigramCharModel, NgramCharModel)" |
98 | 64 | ]
|
99 | 65 | },
|
100 | 66 | {
|
|
117 | 83 | },
|
118 | 84 | {
|
119 | 85 | "cell_type": "code",
|
120 |
| - "execution_count": 8, |
| 86 | + "execution_count": 2, |
121 | 87 | "metadata": {},
|
122 | 88 | "outputs": [
|
123 | 89 | {
|
|
156 | 122 | },
|
157 | 123 | {
|
158 | 124 | "cell_type": "code",
|
159 |
| - "execution_count": 12, |
| 125 | + "execution_count": 3, |
160 | 126 | "metadata": {},
|
161 | 127 | "outputs": [
|
162 | 128 | {
|
163 | 129 | "name": "stdout",
|
164 | 130 | "output_type": "stream",
|
165 | 131 | "text": [
|
166 |
| - "Conditional Probabilities Table: {'myself': 1, 'to': 2, 'at': 2, 'pleased': 1, 'considered': 1, 'will': 1, 'intoxicated': 1, 'glad': 1, 'certain': 2, 'in': 2, 'now': 2, 'sitting': 1, 'unusually': 1, 'approaching': 1, 'by': 1, 'covered': 1, 'standing': 1, 'allowed': 1, 'surprised': 1, 'keenly': 1, 'afraid': 1, 'once': 2, 'crushed': 1, 'not': 4, 'rapt': 1, 'simulating': 1, 'rapidly': 1, 'quite': 1, 'describing': 1, 'wearied': 1} \n", |
| 132 | + "Conditional Probabilities Table: {'now': 2, 'glad': 1, 'keenly': 1, 'considered': 1, 'once': 2, 'not': 4, 'in': 2, 'by': 1, 'simulating': 1, 'intoxicated': 1, 'wearied': 1, 'quite': 1, 'certain': 2, 'sitting': 1, 'to': 2, 'rapidly': 1, 'will': 1, 'describing': 1, 'allowed': 1, 'at': 2, 'afraid': 1, 'covered': 1, 'approaching': 1, 'standing': 1, 'myself': 1, 'surprised': 1, 'unusually': 1, 'rapt': 1, 'pleased': 1, 'crushed': 1} \n", |
167 | 133 | "\n",
|
168 | 134 | "Conditional Probability of 'once' give 'i was': 0.05128205128205128 \n",
|
169 | 135 | "\n",
|
170 |
| - "Next word after 'i was': not\n" |
| 136 | + "Next word after 'i was': wearied\n" |
171 | 137 | ]
|
172 | 138 | }
|
173 | 139 | ],
|
|
198 | 164 | },
|
199 | 165 | {
|
200 | 166 | "cell_type": "code",
|
201 |
| - "execution_count": 3, |
| 167 | + "execution_count": 4, |
202 | 168 | "metadata": {},
|
203 | 169 | "outputs": [
|
204 | 170 | {
|
|
246 | 212 | },
|
247 | 213 | {
|
248 | 214 | "cell_type": "code",
|
249 |
| - "execution_count": 4, |
| 215 | + "execution_count": 5, |
250 | 216 | "metadata": {},
|
251 | 217 | "outputs": [
|
252 | 218 | {
|
253 | 219 | "name": "stdout",
|
254 | 220 | "output_type": "stream",
|
255 | 221 | "text": [
|
256 |
| - "not it of before most regions multitudes the a three\n", |
257 |
| - "the inhabitants of so also refers to the cube with\n", |
258 |
| - "the service of education waxed daily more numerous than the\n" |
| 222 | + "hearing as inside is confined to conduct by the duties\n", |
| 223 | + "all and of voice being in a day of the\n", |
| 224 | + "party they are stirred to mutual warfare and perish by\n" |
259 | 225 | ]
|
260 | 226 | }
|
261 | 227 | ],
|
|
283 | 249 | },
|
284 | 250 | {
|
285 | 251 | "cell_type": "code",
|
286 |
| - "execution_count": 19, |
| 252 | + "execution_count": 6, |
287 | 253 | "metadata": {},
|
288 | 254 | "outputs": [
|
289 | 255 | {
|
290 | 256 | "name": "stdout",
|
291 | 257 | "output_type": "stream",
|
292 | 258 | "text": [
|
293 |
| - "it again stealing away through the ranks of his nephew but he laughed most immoderately\n", |
294 |
| - "exclaiming that he henceforth exchanged them for the artist s pencil how great and glorious\n", |
295 |
| - "compound now for nothing worse but however all that is quite out of the question\n", |
296 |
| - "accordance with precedent and for the sake of secrecy he must condemn him to perpetual\n" |
| 259 | + "leave them at cleveland this christmas now pray do not ask you to relate or\n", |
| 260 | + "meaning and both of us sprang forward in the direction and no sooner had they\n", |
| 261 | + "palmer though very unwilling to go as well from real humanity and good nature as\n", |
| 262 | + "time about what they should do and they agreed he should take orders directly and\n" |
297 | 263 | ]
|
298 | 264 | }
|
299 | 265 | ],
|
300 | 266 | "source": [
|
301 | 267 | "data = open_data(\"EN-text/flatland.txt\").read()\n",
|
302 |
| - "data += open_data(\"EN-text/gutenberg.txt\").read()\n", |
303 | 268 | "data += open_data(\"EN-text/sense.txt\").read()\n",
|
304 | 269 | "\n",
|
305 | 270 | "wordseq = words(data)\n",
|
|
344 | 309 | },
|
345 | 310 | {
|
346 | 311 | "cell_type": "code",
|
347 |
| - "execution_count": 3, |
348 |
| - "metadata": { |
349 |
| - "collapsed": true |
350 |
| - }, |
| 312 | + "execution_count": null, |
| 313 | + "metadata": {}, |
351 | 314 | "outputs": [],
|
352 | 315 | "source": [
|
353 |
| - "%psource viterbi_segment" |
| 316 | + "psource(viterbi_segment)" |
354 | 317 | ]
|
355 | 318 | },
|
356 | 319 | {
|
|
373 | 336 | },
|
374 | 337 | {
|
375 | 338 | "cell_type": "code",
|
376 |
| - "execution_count": 4, |
| 339 | + "execution_count": 3, |
377 | 340 | "metadata": {},
|
378 | 341 | "outputs": [
|
379 | 342 | {
|
|
388 | 351 | "source": [
|
389 | 352 | "flatland = open_data(\"EN-text/flatland.txt\").read()\n",
|
390 | 353 | "wordseq = words(flatland)\n",
|
391 |
| - "P = UnigramTextModel(wordseq)\n", |
| 354 | + "P = UnigramWordModel(wordseq)\n", |
392 | 355 | "text = \"itiseasytoreadwordswithoutspaces\"\n",
|
393 | 356 | "\n",
|
394 | 357 | "s, p = viterbi_segment(text,P)\n",
|
|
447 | 410 | },
|
448 | 411 | "outputs": [],
|
449 | 412 | "source": [
|
450 |
| - "%psource IRSystem" |
| 413 | + "psource(IRSystem)" |
451 | 414 | ]
|
452 | 415 | },
|
453 | 416 | {
|
|
490 | 453 | },
|
491 | 454 | "outputs": [],
|
492 | 455 | "source": [
|
493 |
| - "%psource UnixConsultant" |
| 456 | + "psource(UnixConsultant)" |
494 | 457 | ]
|
495 | 458 | },
|
496 | 459 | {
|
|
504 | 467 | },
|
505 | 468 | {
|
506 | 469 | "cell_type": "code",
|
507 |
| - "execution_count": 9, |
| 470 | + "execution_count": 4, |
508 | 471 | "metadata": {},
|
509 | 472 | "outputs": [
|
510 | 473 | {
|
|
533 | 496 | },
|
534 | 497 | {
|
535 | 498 | "cell_type": "code",
|
536 |
| - "execution_count": 10, |
| 499 | + "execution_count": 5, |
537 | 500 | "metadata": {},
|
538 | 501 | "outputs": [
|
539 | 502 | {
|
|
628 | 591 | },
|
629 | 592 | {
|
630 | 593 | "cell_type": "code",
|
631 |
| - "execution_count": 5, |
| 594 | + "execution_count": 6, |
632 | 595 | "metadata": {},
|
633 | 596 | "outputs": [
|
634 | 597 | {
|
|
656 | 619 | },
|
657 | 620 | {
|
658 | 621 | "cell_type": "code",
|
659 |
| - "execution_count": 6, |
| 622 | + "execution_count": 7, |
660 | 623 | "metadata": {},
|
661 | 624 | "outputs": [
|
662 | 625 | {
|
|
748 | 711 | },
|
749 | 712 | {
|
750 | 713 | "cell_type": "code",
|
751 |
| - "execution_count": 10, |
752 |
| - "metadata": { |
753 |
| - "collapsed": true |
754 |
| - }, |
| 714 | + "execution_count": null, |
| 715 | + "metadata": {}, |
755 | 716 | "outputs": [],
|
756 | 717 | "source": [
|
757 |
| - "%psource PermutationDecoder" |
| 718 | + "psource(PermutationDecoder)" |
758 | 719 | ]
|
759 | 720 | },
|
760 | 721 | {
|
|
811 | 772 | "name": "python",
|
812 | 773 | "nbconvert_exporter": "python",
|
813 | 774 | "pygments_lexer": "ipython3",
|
814 |
| - "version": "3.5.2+" |
| 775 | + "version": "3.5.3" |
815 | 776 | }
|
816 | 777 | },
|
817 | 778 | "nbformat": 4,
|
|
0 commit comments