From d7df30373323394792de78ffb62cc9ca915a5584 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 22 Mar 2013 14:59:13 -0400 Subject: [PATCH 001/404] refs #3: ported over all the docx tests (not xml tests) all of them are failing :( --- pydocx/DocxParser.py | 16 +- pydocx/fixtures/attachment_is_tiff.docx | Bin 0 -> 5304 bytes .../fixtures/bigger_font_size_to_header.docx | Bin 0 -> 4517 bytes pydocx/fixtures/convert_p_to_h.docx | Bin 0 -> 4489 bytes pydocx/fixtures/fake_headings_by_length.docx | Bin 0 -> 3417 bytes pydocx/fixtures/greek_alphabet.docx | Bin 0 -> 3554 bytes pydocx/fixtures/has_image.docx | Bin 0 -> 7911 bytes pydocx/fixtures/has_missing_image.docx | Bin 0 -> 4944 bytes pydocx/fixtures/has_title.docx | Bin 0 -> 4000 bytes pydocx/fixtures/header_footer_problem.docx | Bin 0 -> 86241 bytes pydocx/fixtures/headers.docx | Bin 0 -> 4269 bytes .../headers_with_full_line_styles.docx | Bin 0 -> 4125 bytes pydocx/fixtures/inline_tags.docx | Bin 0 -> 3617 bytes pydocx/fixtures/list_in_table.docx | Bin 0 -> 4379 bytes pydocx/fixtures/list_to_header.docx | Bin 0 -> 4369 bytes pydocx/fixtures/lists_with_styles.docx | Bin 0 -> 4204 bytes pydocx/fixtures/missing_content.docx | Bin 0 -> 81640 bytes pydocx/fixtures/nested_lists.docx | Bin 0 -> 4545 bytes pydocx/fixtures/nested_table_rowspan.docx | Bin 0 -> 3765 bytes pydocx/fixtures/nested_tables.docx | Bin 0 -> 3711 bytes pydocx/fixtures/resized_image.docx | Bin 0 -> 7903 bytes pydocx/fixtures/shift_enter.docx | Bin 0 -> 4523 bytes pydocx/fixtures/simple.docx | Bin 0 -> 4451 bytes pydocx/fixtures/simple_lists.docx | Bin 0 -> 4426 bytes pydocx/fixtures/special_chars.docx | Bin 0 -> 3566 bytes pydocx/fixtures/split_header.docx | Bin 0 -> 4108 bytes pydocx/fixtures/table_col_row_span.docx | Bin 0 -> 4057 bytes pydocx/fixtures/tables_in_lists.docx | Bin 0 -> 4446 bytes pydocx/fixtures/track_changes_on.docx | Bin 0 -> 3593 bytes pydocx/fixtures/upper_alpha_all_bold.docx | Bin 0 -> 4171 bytes pydocx/lxmlparser.py | 20 +- pydocx/parsers/Docx2Html.py | 22 +- pydocx/tests/__init__.py | 123 +++ pydocx/tests/test_docx.py | 827 ++++++++++++++++++ run_tests.sh | 3 + 35 files changed, 987 insertions(+), 24 deletions(-) create mode 100644 pydocx/fixtures/attachment_is_tiff.docx create mode 100644 pydocx/fixtures/bigger_font_size_to_header.docx create mode 100644 pydocx/fixtures/convert_p_to_h.docx create mode 100644 pydocx/fixtures/fake_headings_by_length.docx create mode 100644 pydocx/fixtures/greek_alphabet.docx create mode 100644 pydocx/fixtures/has_image.docx create mode 100644 pydocx/fixtures/has_missing_image.docx create mode 100644 pydocx/fixtures/has_title.docx create mode 100644 pydocx/fixtures/header_footer_problem.docx create mode 100644 pydocx/fixtures/headers.docx create mode 100644 pydocx/fixtures/headers_with_full_line_styles.docx create mode 100644 pydocx/fixtures/inline_tags.docx create mode 100644 pydocx/fixtures/list_in_table.docx create mode 100644 pydocx/fixtures/list_to_header.docx create mode 100644 pydocx/fixtures/lists_with_styles.docx create mode 100644 pydocx/fixtures/missing_content.docx create mode 100644 pydocx/fixtures/nested_lists.docx create mode 100644 pydocx/fixtures/nested_table_rowspan.docx create mode 100644 pydocx/fixtures/nested_tables.docx create mode 100644 pydocx/fixtures/resized_image.docx create mode 100644 pydocx/fixtures/shift_enter.docx create mode 100644 pydocx/fixtures/simple.docx create mode 100644 pydocx/fixtures/simple_lists.docx create mode 100644 pydocx/fixtures/special_chars.docx create mode 100644 pydocx/fixtures/split_header.docx create mode 100644 pydocx/fixtures/table_col_row_span.docx create mode 100644 pydocx/fixtures/tables_in_lists.docx create mode 100644 pydocx/fixtures/track_changes_on.docx create mode 100644 pydocx/fixtures/upper_alpha_all_bold.docx create mode 100644 pydocx/tests/__init__.py create mode 100644 pydocx/tests/test_docx.py create mode 100755 run_tests.sh diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index b3006ef0..8e32ea9f 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -66,11 +66,11 @@ def __init__(self, path): self.document_text = f.read('word/document.xml') try: self.numbering_text = f.read('word/numbering.xml') - except zipfile.BadZipfile: + except KeyError: pass try: self.comment_text = f.read('word/comments.xml') - except zipfile.BadZipfile: + except KeyError: pass finally: f.close() @@ -172,7 +172,7 @@ def parse_lists(self, el): if lst_style['val'] == 'bullet': parsed += self.unordered_list(chunk_parsed) else: - parsed += self.ordered_list(chunk_parsed) + parsed += self.ordered_list(chunk_parsed, lst_style['val']) elif chunk[0].has_child_all('br'): parsed += self.page_break() else: @@ -199,7 +199,7 @@ def parse(self, el): for child in el: parsed += self.parse(child) - if el.tag == 'br' and el.attrib['type'] == 'page': + if el.tag == 'br' and el.attrib.get('type') == 'page': #TODO figure out what parsed is getting overwritten return self.page_break() # add it to the list so we don't repeat! @@ -216,6 +216,8 @@ def parse(self, el): self.elements.append(el) return self.parse_r(el) elif el.tag == 'p': + if el.parent.tag == 'tc': + return parsed return self.parse_p(el, parsed) elif el.tag == 'ins': return self.insertion(parsed, '', '') @@ -223,14 +225,12 @@ def parse(self, el): return parsed def parse_p(self, el, text): + if text == '': + return '' parsed = text if self.in_list: self.in_list = False parsed = self.list_element(parsed) - elif ( - not el.has_child_all('t') and - 'tbl' not in [i.tag for i in el.parent_list]): - parsed = self.linebreak() elif el.parent not in self.elements: parsed = self.paragraph(parsed) return parsed diff --git a/pydocx/fixtures/attachment_is_tiff.docx b/pydocx/fixtures/attachment_is_tiff.docx new file mode 100644 index 0000000000000000000000000000000000000000..774362cacfd1d98a37fc1b89d1c722da638d58a8 GIT binary patch literal 5304 zcmb7|2RzjOAIHzm-ZIZjA`T%b>(nW`l4NIPowN5&2w9!Pk8H`_WasR?SD{=SGD~FM z|7h|1{rdOoagXmkzITtu_xXK)UZ3~;s43#$(f|N}6M#iuxNM}eLxnIF0KkC@08j&% z0ormfYX=i+2Yq!{8xwn70T-y%Ep02b~X4ghfYRr6R;6(LCB zzp_Ugna?u`#{u)0lV4>{4zz(ZPnOWF$(5a1s=Jo}_9wD%RoCITbMf3%&dlJaNAn?~ z0htNVQTXi~8NP&mr?G;ajf`sJPh)A;raR~<+c zSHV&bPc^vS()Do=OI9{%XRy+kq7*QD&;<5(;3E+u0b*}Zy6hKCyWMN;3?N)_=f9HT zRZwOTu2CCVWZ&Buy|EUGOH#{SQ<0R!3FZU3zrVA^ne;(||N4b3tJVhlcyblPPzRF? zl+k)0RaHU@R}}j7K7KS^66uBKm*(?c-zTG{#}71=a*|%+52NNRk*rivdJRO*YcJZ7 zziX>H(Jjx`pSV@Q{v3Sk24Oh+Jgg|4xXz_@1$4Sfn)>#3-m~ltqV?AHq zGa(2pGn2B3f@}Aifel@oPhoLhAGLor7^IQWPvGz}#j_;Jj-CLRwjG7@QTZ0swfdgS zMWZ4k5F2oZQPY|53(0FsC{3;FXJ}uaAFirHte{CsC{C1*wvH*;s>xPY+{fnGA@mUc z24phI@fzfVJh$G6NODc8ikFHrWdBB83=gA}JIo#Lr%3n0;Ii3yL^Z7#k=!tTO7~qP zsEM)pU648S?mZJB0S9wa(?fX-pA4vhG1uSRG&<}Rv7aDK^VA@RAIAfttotoUP~^@ejP^7oIa@clhDY$X(9QHja|AoKaGsOTPp%=2=h+=1Nw+(FOfm7Re%H{-l)^KFB{5{R5`|SdWC@GRP~VJh6yhDi(a2wv#Re7U|WjR+z@$!ID(YBXo)`-GdD)7UraC zBwiV#*rWPv$`~5aPEbq@8R$|knAaRUh1#iYYB#Q`1MPYxZ4)0?@YFj0`#?-CBO?R= zD8DOsH1Pgk%%y8g*bj(Pc%$-r=V~IU$5T<}4S@7|C0~-RwY=7%Tv`3oZMFz~Z@xCC zej7O1rN;4SC-0qf-I;}%nkciDgp&}^numr#qv*Nl-YzP1@yiQUjjaS(e7uil6{}Ie zIvvTXg^u>Ar>#{koDcg@NDJ8QpFD4QN^%rJNX%^omQHs zIeD#Ik8zneYrW$q4wys#{p!lXCEFwSZFrOg9Kw^;8F!E_=pi zDyMKw{3YPg3#)(=LKrt5-JU;t(za#FzORHG>YAMM-V(_f^Cs4**E4Fg1$0e**o6}G zYKL&XnW40?vsK0XQoBc$e?yGc@_Zvq~j>$Va@TvdNT6US?YDm6&9XTDed*tEeQ(P_o7#BV~JBDAW+D$L0` zISOe0DI>k|U4Atln%cT{asB+Vn*}IJ(zSCxF;3jCGQ&5e`{9&N(al*O-lg|+YTlt4 z&T29A_Qq_qb-ef1q-WyQ$$1zaLr$u;qvGeaRrnI~o5^pTlJ{YFu=^IJLFCSv7D~tQ zNT+Q`%&BTejh_RTzs5)(AUyDDt$zIKaFW>K<5ZlF6k>M3S^Dt}R%N`teu1azT|471 zDjEhQ8E8brrc^J6Gppx*ddG5}=nA$DK8#9aVq*JM{&g;EOKMLA{4~gF#;#{!Mg8g1 z3b@M=G=fs~2G>b|O_%G2Zl&PR3J-8`kP5k9OsDsnUz}?IetN0>aZu%)C*v#^sk`tc z3gC3xfYjpBI@s)LcbwQI(y)Fp=c#vT5B&6Xp2_8Hf6?qd8S=@H=F3%>p#jh&T4aw%y6iXKhZp?&dcd)rR-he zrP{`|Ef41&MLTps&y`ibfQl%}4{u8E_y}J=6Z1MqQnFhMhb{sy)=!O!(@DmV90}TN z3R;w9!&|ORyG~iysU=Ayry$(vy9|fScZ;>ZH<%c4Th|-TT_TGsVVBZ`Av2m1r)Ws$45MPF}cSswE_s@e69Yp7U)(pR1hURBjqpS1ZL*g znBaMaZ)sCw%66Hgtyr~{J4zs$FPU3`=Vmy*^|;Yfy$_n3mMpRTu1Y>{Dm6QA-j+t@ zHZ3?LjpT^x*7W%oeJ;t1h>1Wb%zmhJt`(*9TE?~0ljx?wxw|c=M}x~pjA#x-2lHXeX9LAH?r~b*>mu%aE@&Ylp(1=kZ48ZCq3$JR>_z`1@c7G zJ^jo2DO>m=d)#JE;ujs+KG&~AuA^n_CJ0e(Q`{{{gg$4?V8P@;LgKWk+c5r@jY9&? zrl}D9`&0X$Ei39}g{E6ja7MJf^;_tufB6F?TQt^i0IB9q8vDX|Ml|+ukDKeo;vBaDZVPp9%b97nggx3C@}4q9;Y8xqb16LNMI^>41((e7<3pBbz{B44$*XX)<2`6*I3 zbD9cyuHc)sN-$fj%~@|0?m}?Picw3Az`} zgX=Wavor{{7b^+Ae6cYE#;&{_#s1M){G%TGzSDdf#OM$$;Y821F{u4V#v#fNFYJMF zH`*k5UQU@!9$&1?%9a!hW}#@zbrmV=X-(7SjK>4`mBeFdpCFYch@B(MQ@w#@?vG?hFfr7T0J4Gi0x#y2+zsh z2^^v-J(X$P7*I3xOIXg6YVdn1y-vPg8(?)@y%_AIc>BHmQd!b4AG zXezc;`aowQj2i&}SXyw|!<%nk2O)Q4YC3G#UR@YaJ16;%Dc~n$^Ug0IU#E&gNYS4{ z@V;B}ZtYAn2F(#{kK``_ck9)ZO3PhquhPW8O$N~s2;b}{~Pvr<8u_-d*U1R zXdCq3pvP<5Q7D)gaJ}j0{EFPe-wxRMm?T)k5ZLMk5GSR;s1yIRhs#V zJ*Z-PvZZ9-B>f(+7~nzD`B#_v3jAlvISSnV4*Z^gFu;QZ^ansZz)_$y`8Nv>{pSx+ p_{UIQ7}Sw<{vnI-7^@b;`cV~sZ5kf_K@^i=KFOGvz@j+#^*@jstD|9?)_%DQ*u%)MuJ_Utno0>&p`z#$HWEiycAkfc^%6li!4j1lP?$i3Zb)2}3%m*F}n_!gH+YwBw`OudWrz zNV*rM^VsVfb|~BMRu?3?PuA_Hgh*v0C5K0xIxSw|)gEbmmjebcRq=~QY4oQ->zgC1kwMMA*DB+Grcd}Ek;Rq$|dApeR89cG2EtB0+Gy{n6tv9*n}owy&& zxh%2S1;4 zr$KX|4L^P3tUh}eYXB#7P9ioW!-~r{!vJqOjPe{KGU40PyQ53RGd3Ka{QB9`LtWjr{=m=Lux17``x%Sv~1YB8~ zdkv{MMsrN})t(KW&`n%Te|hko@$(h4A%Uf%ruJ^zSIrXN?`9v8V^NdcSWw@?VB+^j z)Hr^k_FppDy591J*|~UOq5Chf5_`zn05qEWcQsQ`x)vafG#@^-7lxO(^9PB6)DMLY z_lc5Kt;_b$UPWo}T&KIqthXR#pMmhc6rr%{| zENq-n{7kIJhg08iv506=F8g<4>Sg>RB7gJ9#T#a0=i%sb8w&<1knDPF3cWp(j$x00cF zcck6jdXk{{szza*x}NXTt`*aU)ASFYhZ$4mrk+NTo!adz9MsrH9cFs;`MVL&uNp0G z<~e^Li3!rb!^@-#W?h4^ThfyXOg}Y3$32!)V8qMtj&P&lFFY@>BjnF-X zQ@USKp*@b;-laE7!UcBlC-1GglcM;Fl1N|;J*bf-d(ckRd6O4}y>f@{Q%4xYkGcG( z%a*VCtnV?K#)bE!Fv`IAq(UOB%`Q_n7tAm7UNFZ^4{Y-ZNb1pu8-AHtZInLv653}M z@#=dIn_g?|4RbWNcf{~D3#c;T`ami$Hp4M___2=yA#DC?I#F*;w-cCM7sUQ*U>4w> zhgml6M>hUC73g#KSycX$1IQfqr{Zm)?^ZNl3|vjC$e@( z{V09ZLz?_qGSd0mDKH48ZhDFfJHf+}Z`~y47%O1f7=Hb`h|&IR$Fq&N&OG6g4%iL0 zy65Lg={R0nsGV0kt~s8srCodb7KNV_%b4E4?y4^wrZr-(4lCB1@N3C98ou~#`*Znh zYphPYh6?H2oDhYBuEv|sHgr?sQ(T!*_Xg^F0nD{+SpXhxkWwZ zoSzynWb$Y$jLcMsnCCXu4OgsHh-@5;sF#*Uyk!$6F(IQfaKH1St#o7Pb$@8+gDkxq zRvU#!ekePS4~gtxih_g_V_WuW;QF2O6L#ft8APkL5eNLnMA2$8C=Jf2;4xS4O@%AB z&~|_iPRvn?E-IPUG2}7nh5|$tV#7|Ck^}{Fqq59C)(LV6_Z9|83P$GTZmp2a-VX?? z8~G5uDs6Pg%4jM0wW{g@yrTa0vLjL|J}%Qy&@E+3%iKUYSI)Bc*m}&Aaq)W#>Uf;9 zwnexg5UNw~Q4FE9Fs=G%N(SIurNn`Ub7il+-NN*E^ELad%m*f7wauCAaN!ZP99$;e z`cP8(+W7t)wyT9j&kmZ1FQLUP<&6|-i{S$W-~zQf1}Y}!)-Q+o3~qy}-Ucx}w?b`s0V7s_#}ePq4LwFR#?Ta6U(-LTx+MhtEl(B zBeBO*ne74Jdz@6Npm(7RNI0dbczFImty$%e3Z(9|j|!04M_%i_4szfnmN&9`GThdX zX4>+^pe>vCvD1c>wD379#y)(WNdW1mw#hQ~S&u}h)JMU0*-}T(UVAjv%F36lz#R?K zka0|NiikzOu3GSm_k7#Fd_&U)1(z@_LQKM*ygj~)X_;lJKeY_?&zAA@@^`lL#ENyT z3CwjIKy%V$MSGy6t7`GI&@m!KB!MPf;|M=vp8Fc-#C9st;i@l{_H?M&3h?60yvN?; zjz`zm_40&PdNT%Sdua%5C5~~i^i!Z@V14uI1G7*>2^?I;Z6B!W6S%X`z>zxvZVBBJ z<#;05lPNt1cr6nZHWvZRULJ!^gef)DravD7N?$7o)#_lD;?U@~Gjn@7s_S_ZQn7Qv zewa|!7%@2ofuY5w&0MxcZfv#PzU0nYomatYZq{zSl;#(##wd5G%t|M5#iKa68VYp0 zBTh?Yju14f*fqv`-}2@vtS9@9}-AJ!1g_ZB@2n(1-(4ydW;G_F(=O@O* zhQ_?*>MS=3d5~`;8F}r=6GImeGNjjTwvT@R7wxJowGm1`mHMRK(yt5ok`nv^YTFuX zRHh_u*EV*rC1)8=2Qe5gwBUNLgNRyYw+g;!A4wf|0#sfkuIrl1>kT)^pTqXGvIcYHD z#7H^B!`01G!rILZ%g)UAlH~URG=wv#X0@t2&QXT^pA8j#pHHfLIJJD#_Fz2T*A*T@ z&CRuT1Qy>oP72y0>{-VfK^k!(6p>dG`&0Vyac~3O;39+reNKbWmGOpNY(8;}7(QD+VaZo8_ z@YWd2bDM3LcD2a!T$*8*iqOkxhbe4JmBIHd)2qr=3q1X7Vb-c;H(THaGN|q)l7Vr0oTubxu~q*me#JfKgJXU z8L26}K+@epBAUDc-K~Qz`U;UJvUdgE$<_=cacC`P`&9V+oMof;Om0|$Kq9GeJrm5v zi(!r%nC&k_XKnb+5w$3p+=NENNlf9Ele}zY<9(v+p-ko}$KG!s++ON6-Gn4Cytxdz zAA|cuGbY5mY<8-(oxR`-95LfvPom#-bt5dT$eJbgX;%GENX#@6X<1OP*bTQ5z^Pm; z%x}Yw=dR|@j(Vfl)zB1TCPrJLWxn8OVrybKc=-5X1NVGx>nTI-&hr;pD%&@8>lQ5$ zAA^(;?dWR{25q0UE-e@zXR;ps7)4n>FOr|b0DJmJaOpANTBu{}3}zy?GWK_~^Stq6 z7EOAmhX&AuZwAM9nkLn|sgek+;KuOnLV7!EH zo&eJ5-LBH2$mLuCDrstM{?iu+ljGt%hM%^UxN_nj`&~My=ZvHmtx+1sAMC|JOwT49 zab`8{EaQ9d9;Z@Uc!)p*l1XSDo$y#3`nAdW9c@+}NxK{F9LM!H{WwR8=g)&KV1T|! zfMqf+9s|xVPkq)~Vm~oh<*k6Da;)Px^%5B1#EGA{{~vy@{gI1t|j31O!77F!YY01QZmI zA}mFU^e#l2d5QaWWqog*oRgDt^56ge=4R&3+!*Q*64Bw2l9J+i<)^6Noe2u;XB%&O zHy;sU+^ExQU?V`1*;AbP&Vq3y~Kctcxrw z5VFGb`hj+Xw+jN{UWYD{sgwa$PN%`oWhn=?Z%AssS9uk2O2iP4Jf;#9c|s$sL|k%(Fl$FnWA1Sm*NwK&Xpl&GaV^Uq=qa>( z!s+K>b7{y0Q1hGtOD|Ipo5j3HOn8cKY1JZ_mTVmr5v$L=5*om%0Ntc%KJAFJI$?2RhEIC#6HsJqS?d{ z=-F9^4~lAUN*#$x>r*V>h0exhr@3TpAVkv(k6$RMN`5eF)}A9d##hvPMdp`UbLugw zQ&m=QD_8v!@RWAnfjKU*4}bt^4ELK><>a%NV6FCXXJEFj`CbhYE?EuS3 zz;9V&{fo6fi^aD1AvYs3{^{-?S zBH0!hVbjlqUg*R48e*$e=r6|CJ~=j=NMjZrXjaec-pQOWAX%`{H6EpOGry!<($oCmXG0Twq{2J(@nj$1$Q#wH%1T=1c!uM*glNcV8&P-rL#T5l2Q#dcS*tDBT0}9vx~( z@Dj-$h&dHz>g-_TJE(M*LjN8SiS4D8=Fkf0^<-zwyY^M8=p)woE#WNCCdc$zpL~^m z3e|hV%H5=%fN4738OP=Q#U0Za1ulLOrxDMz=WV)qk2W43?>*#LlNpT|xGz*co+SZG zd_8qraF?(YZYCu(Us>zgn5oUzq^KlgHRz`|5Q*n#Wjl9F_R6_;kl~=vL)B06sX@;~ z6Fls^r>qcMpI$I^8gV!QRUCbwgC2$M`b z6|+(3T=zaZ=(?;fhDdKRin5%HvUCi2oWdGTu4COk7qQ#IZlCT{H8Ca|Z?ToI{}%Ir z!m^zPT;Fa0U8(k^dtBd6QNELpVm2_>+)+IrkR)Zj3ZCa*uU`be-w9)m@lzw2S6Ve>9RXiL4*awC`_f8W{yLN7l$$vC&GwJ4{ zps(N~-AxBe@cl;3Td9Z(uUU7?FpBqs1~_uzD|T`Vve{mNRIBXfH7iQdcG9x?8N12$9dV#Xyj?gGLS6}Gko)#yef+?^@7F#2+FE%f5PWE{T)^t1&VN#``qXt=t&>T zkgmlpkd?#=zn=x+Pr1I2&k!g$lmeNPnU%7mu(LQX7HPd=NEgXsec3Ldnro5nN@@<8 zn8gT2$!mN4S-?#V3i`7C@TKq^1ZQ19%-DlOEiU4V+8L?~?+*KE7GGopeT|GP&o(Xy zh*?t0@5+4JuN;&>Hc@l}N)p_#E=x)VZ~*82L&YQJLru7`H4a0C%GHgJ?bVy2LnD&e0LmW z5X)WEk9;jhK|t@39*Jl~Z_=}C_J`H1><0yped$63N{fq#h+w+h4wk`fGvUO#Yh-)R z(DJr3@ro^i?}jfmTx$n9Jj~4rbFQxppPfxeGHH@1_$ZbC-POy;VLf z`;6{RE{=Wvj6AP;hzo*P$nPa~yHkCvoi-73IAk#WqXUCgCyfgGF`*w=P*MiM~s47WS7Rq#vUionzg34u1~(=HanKp z9qpjK4z3-OT6I0SjZhT=kQuEuLfkP{RM*BII&OTyhP;3lMcg@8_2kpMRj zA3xb-owoDHgBVF6bfo{jZ`w?w;qV_QUZP=Ev7Q^fylaOtFo(Y^$WmM>J-kc`g_toMyB2MQu_L z;^8e~W%=J{KlNXo&E`+o;&E#IF^KVim5^;(WU_yMq*-4NB@2mf#bA82Ynx*QlAc`{b z2_r(0w&__&qfn0>nt?3)H{MD7t-c1+j&=b4vtq zfH-A@)3)FFq5`+Vd=-XVr=OKiN2z`a4tiN-nuoMle%ps`t4<30e8JoO%DWUi+XKCv zWK1W+=^BrX0?%sdTe;bh#9easvT_;aaKl4Ul_kfotZLc5u!)mvOrbSsNc=#Q)GF%G zmt}xM@o9A5L3saBZj_Ry4?q$r?Ua_aZqM1@UVQyw8)w6MBs$1H`OZg@cFJm*EImz`zzLv_0(DGNm!RUzr9eCWAVcBhB!=OAfBaW#w?OWJ)-8rxSG z;N2g;(lvjfLvq5mvLggCo&xLwFTX`Zrs4~YhkTEi6SzfziM0^V4c++!_1R?}p;?3b zuxA?gD)KeVI8EacTtQ|$F2Ek20{?dj;s({<3EuJW{;^p@ldKGooOC$LL4_8o_UTSc zvvTsY#kvHCZh3;91T!UqdUuTV1k_hTr+#)@NkPtYwM*as6GMpMsN_pQ(!O?5fmCOf zudNne(0mk0EWPrsc`CuAP95$ppI=t8TetYm7TinZO(HX?CxJp>bn~3jd4YViw#MJ= zTfT^;wss}NdQ20%B5go2b3aw^Rv`6}VHz~z?<#~{ZDk=gzP+q`KN|n3dNjtYd~Uj9 zgr#^pE@9TUp8RpI+UC8?m#tf|Vbb1k3~hL`LR-8+e=1X}HXdZ(NbdoFUBy95nL0lE z$^QChuNt*7Zu`{Yn@KkVb(Wm_DO8nQPYlFlse{)(fv89ATsIfn^?HzI3T`8!dKxYv z)(Inv@5V)N+^+FYjj~~R#r&3A5SCl;HLPX9ZrwH}fu8n0RzLR5lzx4rC@^v>GzMjw zQtzou%DaLe&GXf8097Ls{{XY#t8;u%295%9lJm|%dUxN2AdJ?7YHNzdmDM(^kOQUs z0_|EA)nKW0TQ9J6mAO3pd=s8o;;1e1DCLdR5?^p7fdEJQpw?0WeRQo?*Zo8T2=NKz zO3T7f6OnK#zhh->>m|#N^Cn9KO{&Jsud$BN1i{J+?)U+;8wq5S7bEU?4z z$DRIRSANBx9izBz{u!Ob|7QvP6@FF+akKVkoFn@${O9ET6?|4>a8mg*;;_HQ-@5W& y;rtbSwu}CGawz2)^k1s8C$YtYr$cVy~Z(iV`;=WF+#@D&@2quvZo@lH6x0Qr9{Ytv7bcupeP3-LbtYCsw_{5oz^M0wXm-C2U1^qRI zhXlJq>}U>OTb3R`_ihvIbCZ)HZDoFtK#hKqhJV;N7_|TNAc%C*DE$RA=afp>>x4p2 zVe7QScH8%*+M0oPZ_4`~Khbdvp-@|r5lE?D&yLX|=4%eU^zU?S+Vfz`>WiLkgm{fo z!a-m6JE!i~>Z^;{hkN&*E($LVr21#;vH*I~~@Jin}1e)>j1^vG-(uMtpn z@m5)RSpEQv8$z<=tm9_eEIcP~Xf-1VQn)-KpovscneV+)fq(ueZl$?pw)#|f07sCn zK$47R?fT@TZy(s~(Y-V5_Zsl0x}zpj^0NGM7i%@Li??nYnP~SpH$$fYTdZKaO70NC z0~+?FO?7$EuleSqLNwt|zLF{FPdRHvECzd=s^*7*1C5QLpQm_~CoPx4vLOjIJkOmq%quk^aklTz=`*upCO=f9w)bm zTG?Aazhynyo0x1O(_#kO@0(;K+$g1?5jLldi;YeLZSR|ql7UBOZeW%r!8S?<+#>5q zY)l3cX?R2fqrY_4AL$18je*_0lbsyW#^v~;C+-_J|5i-K7@FHHc@Fu3ZmK-kV#TMk za|f{72aVp}uT)I7E-3}w6oqu3*{Z$Tvpv&;#1VQ`t2ij-N1(-GFy(R4kF(TspKOyX zF{+inKaEcv7`(nZ7~QEVM9vVmUOpWqz_FOuLq^_y6YYZ!m@IfgV7X@MrYb*ZaqXfi zGI?Ys$T=Q>1fAOD2?a+T>!~3F zp@A(JSJ4^wHUhbE)LT$Vn)d1^l4Z7P$K_Cb3;>r6fnQqnAWBZZwtLoZH_mOT)|Kwj zsD3xi#KNWVjNaAo96AUEZ{2iJ%tKJ^R>NrT!V-^YH*M$zA98|=*pJa|&SMCN-qgm1 z;&2)9>+ZrAfu<~o1{v#S06oy#oyXRwV@aGqz=r*CwOHi*aBeh5F+ba9YWQ9YR3f@z z6cajHxSV%X7$gR?jnzHgboUoFB43bJjyCx9MTup*zM3aZ8IE>a_GWXL>35?)zCCss zev^38x`1!^h@|VvoQd@bS)E~{199@VFW}@U8WFse6l$h_t7yC$xoZh)MwMa?9)I)? zT@qvHNcJBMUhv!CgM!0RK0#mI!U>HX(-7Kj@c>ef@)NJ>={O~F$0dv?jqMMx7C;PD ztL6Ivf7GKPlB7BSDDXSz^b1y_bkwLRf6BS#Jb9&O|Qjc3J&_^30h-T zYsM(SIK)I+;WOp%Ex_3Tvag&OUsvt%&V!li1l59r(8G>t0L++Iw);Eh^|LZL1L3x@ zuSm|_jwgSfu4rEfCJvYDxRLu4YChFfre9BQgT8%73T@E=MlZ4B+zoq0**rH*-G%9S z%7$c-lgAVn2+s_rhSHaXWl`nVTL;zLB#MHLL7#YwjYw z`@z?%p@R9oG!HY6LMlL;b`Y7B`~IBVr-vUhKGKbG6z6qoG){u;njo+70cVJ51Rv!orco>D{~mCxp#&R91Jv3 z(@iWy3oOVYFFh1MWU;3_kjR;ZK1EtA8;X}cdGS%S`K@T1Tzw*+P;ad4oE2HS3RmmaXyV4-p8z;KJhZ`r5ubBG)>WH}$=LghU{EQbO5NWvV0D11Q_%y72|ytbpI& zM1|odou1__TvO8rU9EJu1;od`wXmWL&5_-1^Wrxi)}1Ez!-~LRbtsGqMp;k5J+FE) zo;}|z<1>C-5^Yi?>h)ZYnEhO#z-z-N7a^Zj*_O3}eaWiM)!4?fN8~4sV?2&WrTQI| zH_GdNuFPkT_nL#m3CPa6q|%)aw7q^bBvrDUA~1<>M2oEklQd4llrs! zt%*A&cTS{vw6p3AIoLI_i5n|97zo z%z9!^tvzn;@48{2iCbII7w2>}u&POmaCoZf(gjh^7CKToDHSuBMDY z!`R6lPT>JQL2gU|k+1dGxBa=5J5S>E1B|&2%(EsbeiZ8M9J%>eVLM|O|9R0xPe|Ta z6x!aNIg>;66={NFy0a_fi6QmACFWgPRRhZCts9gF!&DAe-Q+S##KV+2@w2**=RgLQ z2C+bAeZeey!@Vhqb^PFU6Qe7#8!j%lg62`|Qft{-&W;pebtK0qG$Blp??uW4r$okH zKcCSdZ>#Pb+#f8}%!{KV(L;4iQH9=%)l|NQbR nT)WWUjO2Ux?zu7#$4of z3&a8K>h1$^K{?;>aP{<+3h;23BK=Z^>!ELhvP7(7qAUC}>u6csMq>cYdYG^@&roP6 z*koml$9U)#Ei?ah|1X)>y7wM}>+`o02N<_xDR0@NdA_B*?cC_#kz1ay6_yZ**qn8N zMO_#@2U4%F)oTWsc;|4(D0&9X0+~%3!fm8_gD=^*jAS$z(U>!cxGB<D3m2+>IwG1kiDM1(<&^x^@)dSAqmKU$~LPD1_5 z)tWxyR(2|T6OwbQMUE zga75sldv`H;+OVW(j)HrIzAWC{6_x;wRx-gZq4{A>M}m0rtQPY(RVduZC@W1B_0%U zj85FX`t^1ZTn?^XehbUFAs}t|#qSZKg?dy(OFO{0bn?CdfjfO~(N!${uu!kphwJ@> zh@FZO6Q|5%NXDuU)%$^)azmlN&zlurPi6&Xk@ zhSr0TX*h)y5arCQEg`~`!4__**f?6eD(!EeE1`M?d#8t2cA-*oJj_@EDeJ4i?hgJ# zsz8n>PAxTwQjB9GxBI$*KIhTb#~o(BT=bus8U`tTfZTW9hxtix?$U6RcR6~ zhw(eX`hrqK9LCp4ST&uHp_sB}406XdDbn1(h+jOg$=Ti2lO10>o zzPk>hf;T=ZXnB4O4)nf)xg#X6Yap{$jPe2w=3Qq)J76MfD^;IU?obY2rrOH-yFxP3Mr**!vvN5?`NG4&7m6_nEeb(BPreo5aV4WOZQx_M!c!8wP z8CIsaYeapXVEmmv+5Xih9Odb2e$C0E&ywukV5VcUQ;=D!)zGxU z{Rhfwvi(*~dNVZpLa&9L*6H0^1IpI~MnvwwhgQu*bY&*=k65 zalfVGwM%0wkAcM6`L%iVFGNlPh|K-}-NHb8;_t=!_cBemtWT-Rcz2z^p}scZ zE{^T${UWG_65OGUNMBj&@1;HGR(3e4O5b1{o%^yWaC?LItw-80C=l}twDUt`ma$95R36w69`7iFG?8=1bm=pvxio+h7tV5{YS=q8H_?rwVNK5%s=rYFCBga@BPVq~eG zTxf*YFYLFBkbI74amHwrmk-1lh5jKU4_r609m=A)OQ0`khWSzEfyPE|yi+?Z>MLGY zEaU|1H<+DU7mpdfe7d_E=7aNm+W4VZ-Wfb=56LJwNv$|v>+KAgWbT_!Jd1rWh~8ql z>8;7u_%Y8c&q8^|7Edn3Qz#h4TPud2>Eg8^Gu5t1DA9Ok9}>< zR&<%_r&IvCE4Vx2O-n7fT4s`{ZO9D~1JbHy=Qm#3w7(DYhlF(VWJudIFf77_Aug84 z`-{j)7pdmf@c8$snD0opahYT_X^DM}ih3;58ck31xH$}tNiQ^ek}zs({n z$Jx2&^^CR+fWwj>E>3osz2N#O~Ic`75ytF)X~W2u@8TS=_}aT`YFUDslny;^!Nhe@fm*GAUp91TXmv^5HqHOd7zi8k3Ff!=0ung zQ2=*$zejh8>+ynE)N~aEq}3v!Le36sF<;e{%|=7}>cauyN%Tb^=c(pCpE|QLfB2g^ z7Govb>g{|NF019FI!q7$=-5GYS*FFiHJGDT?a4pnUMxLP&{hazwUTE}@l)iC#p1bw z*6swKgl)uBHO;*p0mv;#yH-4GIAhmOUlj+~Gz1lKh2Buzd`TvRo}Y`!+q)$bQ`1(J zDSBFW$RI!kDJn>}U1s61@l8qb20zlPpTDmtY?E5HlK@D=sT9Wga?{$?0aDT)L`rs2@;3>Y(4(s}|XK4pzf8-nt*)a7t^a%o( zKpgM}g-a@@AS*>?;d{MTEeu_syXJVsYD|^RFn9TCFI2 zXPx)%7Rx7xfiwJ_wCHG5YrZYYMP#^E#LnxByfB-|h;Z$F1>C4wsu6$w*U(H;D%DtQ z`|a;}5~42eUa?zg-tCk#<0#9GbD60;NC^ye)=Jvb^(Lj_f3o$h;ofoIcHvAsg7zk+?O8dhfk$AfsPdmBgF#K-u%$ zR>H}*x31HE*mxnjKeKg``PUmQbz;cs5CP}pMZEHa0D#v2iU`%k6`oG67Q$9ej_$gq zX7-jsUJmv}agC0x2zbYV0?5n>&zRpu^1RkPUrb_+C~fXQh3EJIwH(Sji_N5lk$s0|nY+CWrM;$TbAc3s0tiIKN6$W4kO z@``BL!Q7nHkUXZPz#3RkooLkOHy4xs!Y=JcnMg|ZNsaXB4& z>NKuiQkbRpMrjg$O0{Rj6ocy_E~8c+>DMh@(;wwtIcaF`vG~|1{M#RYOnQl$2=za;^eA=#Si}IA^*CylyU3UrX4o)2fb(i67+>Uv?Hc3wrS z-IQ0c*6BTHjgGyw?vK^hm~HrCL~sd;foZ!(??uQo{|XB2e?W1tw6HZ5wskPIwiFez zwzVPvz|OaBsVd7{Hw6+%16Ixflv2)A(k?VIu5{Pk=w;m*Z#-eT;lV8DNsW5Ss^Cqh z=*NNbWyAzio?cb<UJ+=?JMLQDmCm=bnQ}ZIhJZVzEW{2*LF_7@0_CMQmO7*rR(~}$OW(L_0i}_ znzm=1mRFUr=W7$MjJrNHI!|lOe6sHO)mixF==s-M`n}Z;Y_#*wFbryX9FS=fm}eBy z;t-T$7E*5!*619XYZ>0;8kXl4o^KP``UG3(5%vCYbccORrxmW-`&o&5Y@1_TyI)*| zSA3UiQg>)VX~2te_vF6tq;j9+j>wcMzn9%nsnyXhYhqq~3{S5OdOh$wy*~C$Lukfu zTt-t=W?y*rNOERfO!nZjoT2A$hZAxeV{<>@-i^G+J46k&JF0_xW^^7leecl)t-{_lI9-Nr#p8P)k zd46Kz+wkPi-Y-8#zAO&SY)pMw8kr)FGGxIwO z3qK~8c4wA0zb^lrUD;Y#-k)CGUtZasTiae-+gMuvv9Phbw*G77`_IK6drO-KKYkpp z{5)L$xwEmg`+fV@kDa~E-ThxX`}@BRxAqSY_l|cCj*j+Eb`MXE4}R|*pYETW{XRZB zI6Xf(Jw7};KRQ4EeSUIwaX3F`KLQev0L}r|E+_-IdG|^005;t zVw{M;q;qi)f75?{l3yHiSFXTsK&oi3+%dyky{-xXAPIQO?gKzBmBFZ*f_7zPEIP=L zr3s|=%1l*lDgd(Il$iQs@vsx4$Fq(aEOK#;A!i9GPeh2fx2k$A0eW~&-@7FKVw1(Y6*b{nUD&MrriuA>$w%C>33TqUoGA?Kzt z(}=17Rz^(Aj2?MCjvPGfoQmGnr&JBK5#9ZReTo&_e53oagt4Iuz3Tl|<`yrx5idgDF?=wH_;r=7vw zI(>Z4`UsR6T(60d2yb)I6y}`Gjr2DW%_a=VHOIt5n2hnJoxGcnY?#~@Lmfr)G(#G^ znI%9RM@h>w1icQLQS4{y-M288dIMP?j1kLbcVlX#Dj(pv7Z}(~^rVKi8O)pJlHQ+# z-~h!i^`(JDNatHdq^QuCe)^S9weSyIbNM*#X4|`3a?kqk+|HfpZbOYgjVyL1NM!s% zINx*$Ot^{tYJ`_KHUxh5f~isj4yF}Lh11eb@pimbP!h+ZR4XQ5`qErD8-ChD{56Lv|=1eOFbQR zmPi!hrMGI@eXZ06jip-{3%r9WCT;7dL_;WkkSu_<4^1dykzs;p7vixvJgn|9hO$L5 zn91h*V|9j>FQI_2Aes+XU2t8ytn~62U6`_Rm+@Y8GOi7Pr%uzsatfgo3iuX%NwsIa zfu*s6Kx^n(3$jZBk3gRJbt9Cxy7+=e2-W9W5&MBe(66@$7q^@t&DX=w00I^Pn#B;g z;UGu>y*y7-7l08@KpWT;^iJ1?h<}<@aWmsBDR!HE%7j24Y27f)9qQOL9D*!uZi91U znE6)6SV5m|26MR}?m(Vr!!u!QJC2B#kmvN2r7%`NCp9Pt5OW7kY$=z`l&1osG8qlk zeYtQqck*An+o58_?%dPQg&;3&4rX4ARObPYzC6pN0pp8Ujrb%;Pi$P z2*vOOBj$Uw0ZAz38tp6w?Ly*(6mFHr}vYl-z! zHF==CK$lj_4G9pYLe33+X4|n5gvMv!98~5B`$5PXnJ0sxf!|1?{8+-0UQ&|VFtDRm zBMF7(?*+xs6%b-VUR23NV7xNvBn4EWtRFo1O+p$e%Gz}nBN!Z+!cQrKcJ7jqVh3{< z&k?HbLr3JsOhUf156aIsF?ig~;7(BEyz_MnHDx;UI$Nv!yS$1XyV&yai=)aTC2^?7 zMy%w03@a|bnHm&)bM~$UTdQ*j=~hpI2AuI7cn$cH5E+6(_OmGqN}%{-Rx{ohHvHpV5cnjGU2rU94Xr>OSgIU&({=O3rAhqUJ))W{)+*oisY(Dmf#F93BGc z-zW^-RUqlTE^k_N3a;;7*VwF(D+)`%`C7;5v#Pv%f?qnk`7ji^`@9^*a|(|oyhTUG zj&YWWS`*K@w{#{Gp~m=WJei)0{_TeHhedC=h%&xYB;IJqR0IB~X;p}rlKT@gY1X!2 z{S3sk6Ue^^iPgNaNLIrGrD}!?0IdTat{kW~Z^HOH4A}WV16-6;uUW9=Z%MSjufP|2 zUk`EO{iLyCCZo(DGDQnSwig{hYa+_Rm%X9sF(C%A7pyhIJ^`U1BTIFfXBHiKbRdo5 zfUL66>Pv_oQ5588FFPgm(bKAW>LM)16l< zW?jh-dZgiyTZDpv&|QF1N}z)djUk8th9hL@34;gfT}Mj8I|F6aArZmAr0B@dTn1)qVFt=Jn?YKKcUo6!njR$_aO3hj`o2s~F$i|C<-hpZ_~=)POlV#{DJ>}Sd8Y66GzwDWqJ5aR=?59yUTB`?KR@bJ4_jq%QsEw^{+ zxmcW)Nq4CRbGT9UV)}Y{1@HGtD-tnx;j_2gj{t>mavSRt4o|dYP=X>WH&q? zmAV_u;=XY+zv?y?!YOeYk)-{?u-2N5;R7>U6-%;m@iUL0==qo%ljfr`p~BBOvaY~x zEwp0Ytt*T;@|6T=ncN$6lg#}k{p3Fy8!h2>*EXAY@K=jx2TjOJvMbvpR=Nhw{9}8* z4@W#uWRI9!+8dLLxXqg__NLy@Xz;{W|DNdjEce(${zv?}zmLtAuY_)IIjLX!b)`$N zNjMHsX*F=b@2n#&C6JnBQhx_1&7Rq95iDlh;mFG|@9!kDS7gbn#45-MKCKI2qgwto z5NtNVt>jng2J3fE{p7@)X;^g(Ir~we9K~%faHSM z-CO=m|8OeCbXEMtw0*sWNH)D+Xcsk4U1H$% zybhM&BJ1~rv7En%eOUka{ksPN7U+CVPW`1=WhwIWpuMSIw|}m?2~_coss$T&^Cn}s zY^&9o3r=yjltMQWws4;WVLw7{h;@a{Y#vv+@B&$l@A>|IQg#fxWOZhz4=sA|JR_^&E4DH((SJ)tFD97lnDH+;SuH04RZW5X-QoyG$?d}Yl_WTmBMVSMDUF* zzR}-9r50sM`lF$3`^eXGP~G*scu;98IJ`QZa$~x}AW%%3FZl~rUVgxkhz+%|zw30jZuvlbLb*R&=T^a}pv>;7xS# z7srDZmTHS{wI*6c(Gq4$(ai4{CPO^xB^f4|h73}L*bgR!5ge5}4}qk;GMf%pKJX{H zoQ@pk%;oXFRWr=Oq*oH4y!uMvIu*4#LAcDw6!(c!7DMf};~(q=P9)R%0iwVC5%QlG1+@=xQuGKtp7DtY}Pip3Wy* zPkmpGb3Li|`ob7)i;C60Fu8F=@F7E%o460flyt#r;!!NevE7Opxe!*eI&RmPdVozn zxkL^2#_IGD=27MmSmKn&=0)1~Cy2eKpK-s=k*+h)6`xt2PCPV_&&DYVh82~e%{q1x ztZ5q)^0dlQeiSl$w)eQY2_?GFxsu3q?&dh>FcDf6cH8BgU_6{$|0J7zU7G2f@G{F* zJ_i@^UCdwc#Q&CM@QWp2hG(tqDvp!N8IIFzborm zJfrAp*EFr_N_%>s!8cm}?OSu_)BK00iGuJ<&cgN*UL~+imn@6j_NwENl0}@Pp*$s8 zPzsyrK(Wqh6W&B&_5v7F!H~M5)?=f*B}l;vPP3MbL!*JP-&zhJ!KErJKu)#IUYsi}jL2czIyCjg#Gc zN-?W({gb(@VB9kf>J(@4skT*y4wOA|S1Trzu~m3OGS+i^&th)cjg(dG;cW5zK&0w21)9)iqI%Z zx|pBS1k}kjYH0Q+sXpQcRP1T@hZ3`*tg-^HFx}wZSf6*Pz&NPdq~< ze{y$_;IP^ftbs#3sZDNk=ls8>a)S#yxKH_lfP6Ao8lSwD;Kz)|0}lC z7uY^fywHIzq>M+p-p-b84=?klK0#fxRRkV-p_0XY;Qji72+eB?Q(?)7a8~a4noYN@ zSjqRJ(K;O&Mq=l!YsA5QLwvSyNjLi0TD0Vas8KW-qiGPNC!6DNzwBiBZJkB*QMO$r zt2xS$`EjLM_ag#K4Vsg~lT}c|kUmD`7nAH3S|O5R-dDCBux5{b)srYbQCG`pG(-}e z16k8!C}flae&^i!ZPC7G)7AG9`tzxvQl@`#rCqFELtLrbP+QXeb_|fLfv3J2kLN~n zvOZljIO4=VejuoQ{m8zn#VrkX$1KL(;_b@E)hzCgbWc^T^Qu#FenBL&>ws787x`t8 zz~84@U7gFh@~ty%5QkVH>XFJ$uVXs%O?El&c=6Q=Ph0 z?*2Bai(CD_I^IA1{}ggAHLSl)<)XCw<^R7ZTK`<>&#mmG@bkAlyl91gt@KYB=$}jc z**7m~{kP>@#P-(`f9`<)>HcRyylk$2+f%y#cK^H8{?qx-VslwD|F(ICKMKr$SI~cY x|4FC+`Q$B(e|Z0=JpI%C&(yf&+~3B^{QqU3mO2Q0c@gyDhrD=EaeuwL{{y!tc;Ns5 literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/has_missing_image.docx b/pydocx/fixtures/has_missing_image.docx new file mode 100644 index 0000000000000000000000000000000000000000..996e66719895a34a7230d8030aa723f8f91e15b1 GIT binary patch literal 4944 zcma)=1ys}P8^?!~bSvF(i3&dzy1@AE$KeV=EVs+d@m007`DKql@kh}sX?B!mF~Fk=A# z6aadFk(`^Ww~ec}nNEPajh6|(zl*c34jusWv-C^N4)~I$k3Rtb1M3|o0C4RDa++RqD22N+Fh}8Huv0?Tn+b!X1h$8TlkIvI>xLVk{9NRmvrJ1 zU~wW*k26aq<24Mz^d$0Ju2&u`KWea!+sk#VVYC7n(mBM`xy=GCQ&sLBzu+L+3f*pZ{8FSs?Tfq1oNFoG;Zj(|*<^k)L9urptf- zoYgwZrGT{OduQeMFEEL{)p@MKvmnThP_OY`IREN1V%8dNx0oBgkeJncMib5b1{NK@ zeW+kio&lhoPY>{O^RxzAjVC`szWZxtX_fQKlg zH>wvgv#QU;%kk&+Xqwyv8Vnzi&7GUXL# z=@%Wb35O6>y<0Co{u2$mETv?p-3M=rOT4lg5~HO)*Yw&L1KmL)>q~N1d#by0CKv4cgsIXIRA zSdw2uHw~YGOWr{&j2MVovQsoHYS8u21Tq-D0p7luVaDW_p><|1oQRPs3;g+E$izw^ zfixNKhX;kl{#h?I@j&%gaI5ez_p^OiB$SqDfvnLJ#DezhJg|Y=B`JfgVOtGNE9J%k z?$}jP~Ughb%c6}{Q8a!7@`k7JN_f0#M<@wo14g|R+!*#*= z+$ya1YB{)h*I#XbQ>?p8azS*=i-dN|{R#B@%P(>$Cm$GeRU2l**ssuN>!cZvsqVPk z@GF;rY|E$0+jrc1*Fqmyxxe;^S4G{ZCuI--dMXRla?)L}Z)5j3pR8S!`Q#9LtYS#F zgM01#sL=$6V}lrWXXCIu-6I)22i{oR@o{m;^rxoHzXwapY{LCxUWrWCVYnbOkd2fP zrf~Ao%$mIN`$8*F(rfI^$?hVpYTM`8>D?6xw#f(cIA&t|^24S%SqwpYe)X3k$Jz%w zJC~j_VfEEpYP6F-ZFzgP)OGW5l5|)nVu~uH+O!mN1M?LwAeTEPJE4Y#OxDkA9(?x9 zMK!G&E28Th!Zxw<4Jp%m&8S4qZMrimdM%vZd+l;zty;`kHep0`s@6TjdOIfSQaYwu z`ZUF|1mCc@g?PzHt5NYt;2qW+29hg_Z86?<)yAxa3cOUb-c35GRv{w6QqN3{m%!Vw z^%hR(UuAQH=0xSWHSNMH-Gi1PiM{WKqmATPq9>Pj$1X*yapekSH5wWZ9t7#@3a-v^ zjG6G5@#q}xTP-t0Zf&}${=LJ{&DR1|rfb+O(i;40=a?BgAVu`V2_gIGunRdEz zu`YzTiSL%!a49hIvEd*ZZZeTC?+k=nPH-p$S9p=U@P@o}qsw-z@x@t8tb^^`ofALY zYFeP|@%n-s?Z6x5+hTX(LlSXa&?A1_8gGHI+hy-a5{!2d7e{5Q$afhzGBCUolPsWo zcB?peeD#S%t~bp7a8x(wOuFW#hQ~UVn4fXXJj)P-FMawJf!pPKl>iXF9qf9jT2*cT z2q%~-efQwAd}vlH&cJi?KHr(YOk`_vcO`tc%PWM61)>IRvGL#=Py3=1$2%pE4;&!a zXR@P0StTato=I1MIGlMIT5n57_RePpNM*ekhRAykI&+E>CMhs}xu*GWd_#xEL8DGd zm{=)zY9L-W3dtpTu`)lXUckY(>*D1*-t>C3dlA7N&p5j18**t?V9`yG>7>H7<~ zO0@6xf;{j#R$DP8GGFj-gBB>t4gBvkklgi<2=<|=;wfPn*57~f*yyIUY#|$){!*f% zC=oJjcWOs%W7UhdmU~ns+_Z-)O`hGMPL-|*0luOTxpsFG{F0C4LxiMI_pO=r{aOz$ zEJjn^phKU^eG>EvUwIW?!i~~LC@ug%@~y(XyaSwVynZW?IxcQg0;FG>%*gg6iJ%E$ zB04d6VUZK;Q%vs4#Fk^_d|3|A<`7?{dXNRdho<(ey&%6qRZnEupu%Q&R9!OJ+H|!+ zsE`(S+Upo8sR2XGb6{*pxF$K9uZ$(h8z8(R<~@iRd%Y=Fi>$ogPzqm8MW{b)Jw*{V zAlm!Zd}iFaO%|d{2=xT*kO73B z1wIL6X07>Xf<@3LzV5N7#V|)cvjzj?-(NwhU0tso2F{L6_ntt| z8)~)he_$zc!=Ki_DR>xs*Vm8^*Q27+F=U8FrLeMO(jwJF-Kx-L8Rw&e4BiTv(MM_` z{e0yz#Wt}fvqeLoZl{aDiqMjDq>QgDJ?p0#t_L_y?v1w0@`OE=8r_`9{HeSTIaRo{ z6LDOptkO-_^x;knke2~!q0{yH{o2BaGm*znAgehk3VUBWVXI<2i%3 z5;(`%1;q>Hws*Tt5+f*1m9y{0_q2VC-_JHj(JW%_%dS&h3rKLWYbAvi>E$}_G7(vO zOU(m`F&;&ve~`4hITasY;V%PbQ$;py_yjh2Wl&H0C@-u z*9$oMzm@_rnU4VLuSTBq79yVQ!dy(BXNw<@QUXQRg zNMa7kI|Qh{niS<7)Mcyb>E`YQv~YL-T|s6J3KB#JkYdj_Hp^BxIY(b(U%e*bS2iQ( z>DV%@>`8@yt8qemoB887Mo2|H`oJ#@6zFUIcaqFGyo`Q9fh@ljjET3ph zwhQqpqkU8hvE@Ci_-bm%ib}7&;-me#->AGEg*7P?Wkj$I36;uHf$w=o1Gd!g9*?1DF58 zR&bDb;A+QJ_~f}D&2izFb67U!k*U4jM1f{}+mb1_x)a_uiVQ)eu&V6c92e5d7)={} z=5xZq39J<9?nG1V@2ERL&NADY@sTubz%`LXE12Z_y3flD^T}YNViwOHwl3xLXOl5^ zRJd*P4z>{N;4^Y0v}%Ou=O~foh8>L&MPgSMXc(Taxl=tzhD>9NXtxPa%87VbAt`S* zTxwR=c#*1X#sR3_)p`+$%LuZ~31y&_@Tit|QMQNV`!r{B@bc>EVzXq4e92cD zXq(;t6pQOrAp#fj*!G}}i!M?^3xFgQ4Fh>MN2g9Y#yQusT_X-lxbS@PY6g8v^*{Qvst$w_QG zs$Dt8o@`bA42pjLoP>&^k{#_f`ZFN9WS<1;5CHzuwC@7`XH;};JV^x+o}m7xl)u9M zObWhYk8T;2ew-Lv05=j;Q6P*8FJ05mj!nxbg%+hH={X#fDA zMh*bblit=SM+90%`ukdy1cgw58Ez8~IJGwB5oaUugdHJG524S%@Qe=|!vvr_yy|EH zOxFa9&h2aQ-THWH6iGPDHOz{M0e6+d!Cs(|CXg#;R!xTA=7mg?sd~bgL5AfY6X(#l!B++ZW{!4n0T<(3@@d;PFMKgKbH@$r&ML(`b zBH^M=H;OGEJXV(GH1u^Cu_^E^^ZM$lkjj9uayjtNQ)VE1yrQ7{99-?YX{LO=z9Ch- z$U^7V5nRC|V6deRF>>Qo%us&UH1GC(*xwjyTbCwCN+Ja&2q=*I_L-WGCe zR(zi>Iau1`>?N`il&@Z@$t)OHqg+tplOM`dQb<5!SF5-bXRG*jd0oo?ljbfc69c1bZsF>Itnir_ znpT6r;B@yOP>_i6hgn|OSSXO0&-vC4&USFGSma6mR`LkLCk2X7^K|!7iZJ_TYh-S9 z(x>30J1(2A9n_+fX5@LaYAg&|d5pd8pNu*0?)RF5*|;ghT$&JY!Q5dwv&o3sgkBVW zp1S%SN?;wa$zA{MoBk3ZGDh=s+eH|^QBjE$E$Jh8}o+8mp->;Cbi z8bsubRX}|c#jl97u+d-(?SwDVBy}uN0|2a~n13kb_gJr*&;lq*Aic2fk2i)MKt}nN z0s#223|)nK%z_xyWZLtGTkBpIcGz`G+;*IWYX!ei7k_lhtxhD&?v+zJHl;nr$0C?< z%1{(~nYfvlX3A{T)TjdS9LNlZ5;b>faw^M}>s}QWb?`>>a?gfmKkYVra~XZBA{Fo+ zal4^&ZOSQw?NMM&Jvm#NaGDoOt@f?@l-qV0R6~iD%GA*Q;vTwukR_!6eO!0%a{0}9 z_^S2G``#nVS0{6?agDN7CJ-dhW7=UAPvj0{Uwn+deV`Jk?aPRwYdHy47kdN`MX32| zQ7BJsYo;kdjw+4@IBIDnIR7&1PCcZ|`PIYD4yK&q&ug9A^s zV`tk&Rb>8hfLD8#R5N=y1`x_r>!WTS78e}>nNo$dxIorQzb$NLXg^ss+Y1;!5`%_w3n>dEbmmj;AN8|HZ;49g}u$+*iD|MLmIL zV^E*SJCjj7XF^xxf5McHXPZmXC(yCQv!FO(W=>q-#9Kb%D$Td91kIy+M!EZ|k|-wC z83yxU-$u)^@YF<$h)|OZsgr5ly<7O*GM9#qvkPa}vsZezuYK7vH^Ksb3GaJxUT3IU znIj2liX8x8{wvkcH+>O~=@hJ7sQDP>O@A z5_t5bz1rK(O8Y$R<#d;aTgQ$#*7@&z&<@z$&5~OMCPvfo2@nqeD&e|9#rqN$tohvu z#dzbBTRid|`oW49^WyIj)!)b$DaDEdchKhsDNgFrmZ62+&iCdjkbzDh8`Hh~-i&VI=q(@Sl@2=lzXS?gIwDtUil=S39s<0OLM!-_dxDc$T%p3zfOfLR!LKfoF&AohrWQ? zK1ipR&R_c-L8BZ@eDhk}8bnz7Y-8v(2mD^TWB>6O=q zS?Y){!A81J#L^S{1e!yJ`~AiNTy|0tiM>n6V1+w~T;C@}ry0ZAgU83HD}d#;#4->x zzAOBS9-#4dX(I1H#=+bYl+ai3-Y5TTu?{jt2Z+*9nV=}5=S@j7d3T117TqE(F3qGh zvZ5oi98i(uO&>vRv$nICtL41tHujhp?r`=xx)G1T7T|ZgGUNK*dzT{z>RFss9WQ|G#g0%ftzvaaYZ9Wvtv@oyZqYu3thOfTi(cR zP+XZ>E|%J*sZ|umELKFfVK|(_|6El=YD!U<^<{BE{kIhxZ)@>n@b&TgO08pOdm?SX?%BXkply}1I_>Z7tj>%SL*YJV2_VNwCOPU^WVq&KPgz)&8ZXc=2i&+nBW zI{^%#kY$M=9!4kl3=c()U?_iX&bytxII_!;7^Wbatkpkn1){+yYPJmG;do#W| z7Wi{2)LQ|qm&~Ookcn20WW!GML6x}5_+6nRsvAS;f!mC<&RjB!N8P~s-0?-uDy*vP zvFNXPt&UzHMczp2!K}YWZ|{NnVbJa-fsMjI9DPJA-Kc7<d;`_j?*!%jKrn{Z>XrS0$MBx-xT`;d$=Z$XZ6VFPiGne7k05Ip}u?O5Uw> zx$9Tv*`!^lrVYX|!VF`vi$I=4Zn)$0xP#uaOj@F@ly)ID067jcKQu5PWK#pzH^V7o zgP?vs;LSLiDhyAa-hefk)g+vDpkGWYO{nMVl8C^ln7qBLE>TK$=uff4{l%3aX0?so ztti%+jr9;ORdT-eo|JE=b6P5v{zM5KWH9rh{(z~Zeq_l%6trmae99Q`ZMpv-qp@^% zB+i^S<~*Kb4Eap3W36LKo8ghW@MS9ETrIl3#F9hJQ^N7=(&6QIJM)(#=U?-<$FK*Z%-tA<20F literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/header_footer_problem.docx b/pydocx/fixtures/header_footer_problem.docx new file mode 100644 index 0000000000000000000000000000000000000000..6bc49a7ab3282cc48ada05326d1a0d39ba8d1511 GIT binary patch literal 86241 zcmeFYRd5{5(k(h-X0X^I%VLX}nI(&vnVDG@gT>6u%*@QpWP!z&#e7EoHt#>;+=qK# z?l~P%JvH4mT~)aXEPnJM?lJxgEI^>`WvFG4nmHFm2!f$eO+`sI; zC6J@4EM?XQ-3vChwY}W2v5Zu%4#V6Se@U3_zi7{N<4P&FCJ<6s_p8wyfz?WoRP@ac zvui1vo|;%1fKpTyB_)ZBmCGSfb*%G#F>hcuuMX6qu?oDMJo=8M(F(jygiUw$~jbyoua8>W@y8ogcdH)iR+vxjpNC&CC*O zC$r(Xy9$~cIK{{_tvm5-HXpadol)I$j#CwM!w9}1sov&R5MT}Ubeb_=8Y0=~toC&< zz%NO#m*6d*{;Wu63G;90?9Zj}IHE<#${`CL#Lyw|j3mZXKbM)E6xJBOeLE^?8)eBN z-|{`#!U?hp#@)Q@#sQ1uUO9%=d+Vls8Rg5_aM%!JMkb211swzk$l9wLKa-~0O;f@3 z8V&VM_rUY)@muz|`qx^x8|NnxFzap9r2G&Q+L-x8xFTgzt01HH>jqXD#e=6V9B`&) z(v8{4EcG)S1O#r<4Pb04o?&nP)FZIg*1gw-If)g~TXOI9XdpXU$rt~S^(^}RLHfpj zB6#n@kB;u5*2No>7q%$&<1Qz!1pu+@K^&SPk0`GW+cm9u|6cVK+TLu`g4fMzxY6(M zi~E-hN`bdmpLCe3hZEtViEn-#9?P%E6))TLWTCOd_YjX;&vgiW>0KC0{0p$LTWKxv8A;5umc7PlL2P7FCdm~GSFSPIH|4T0aCsWEl=UyH= zCDF%+Jb2-MQ9a&~QXdw=bR_(3Q)m|qzHN>)`K9k%ajnUPCIr%aHee{@;UI0x^VHp; zI>7xf+5Li274s9h^J>~=14cWGY`BM<_%Jq3`w!TtbX^4oXN}Lcx zgC>Q)R+mOwr#C%f2w#+<1l3-Rjs_4YFtMgH+jXh`*v>)DO^zdKTD*JGw-F@;ltK3a z3=L{wr!lfjVPeTr?H3JPWj-5nvt4D8K&{}Ho3nLQ*nv3qwKj><6U#6MmGQs6F`IbT z%~+V)dEtHfshJO3j-D`x;7&Rntt4@tZnsRhrUM7>!ns8W5r@rH>ONunD_UUPj={Cv z`1;#&iv1m_hJNpe?WAw)5JVaAR?*$}T1VPi+VWKR?`BQI`y(56uMq#L=K!Dw`XwlU zAAXs1VuA<&7)t>Mpa3=fkDmXlhASz;0Km)%I^^DqlPeqm1pFr$0PyE@rll5_C$i|3 z74m=tJ5g5FAC>RymRr_(YA3Yzb!KCW^*bV|T&S6lIbdP7%bP)sG+ON%QvHhza_e}P#aNSk+y1?vaog*a*3(<(i~F2aQ}6I^t1*L4hb3>9 z-dkRuho>!X@3S>awlp^H`CZ|Ay|=&%e9@Wut)p*!HHirY__>k0wd1C3aCV4TOjVcxSE zpTc3o18r9_;%-PiflG6n4EbPFEIx96mHZalc0_c95ZKB5GUyGb$w_fFB$%wGF*4pU zP&s0SuX<2AgCH39aom0_V~_Z-#^f`L{1`4Tu=7&^Y6S_ zh5Ys`%Mh6TBn1quhx&*nQq}l{9NYVnV>#%!wUcTbC5M?vO-1fGnxZuOrgS8a(y#E2 zoJ|9lHJlsqWy`%Zi8#5Wa^Gp**vXsS+phoL&<$1L4vwE0;@L$_)1t8%J|RX@RRDt4Qh z@!y?foj$I&&tKc0`7^T^m-Pm;YiVRtA)9c+5k(~Z$S=VAhjG3gnK(~RJCyS&nvM20 z*Ja?4xrB&uhLb3I-9UT$4S=Y}#&Cp>snvzR_;O|uB#n11UmgA?c;c<2dnArszj(Dr z`@&Gb9G1?vZZ4iXZeBY%HQR{fz3(A>!l{hq1+R5d`J(G~PG}n!msGpuhG(Vm>DHwm zP4l|jqcgv$Jh~V)<$@8L0~Z*7?NCr2757&FAljig#_xH`kfe zygb?m?jLGRo$NrheS$20x9xrMwDVkp!Fw8UcRmLjdfY5QoSyTonK2-w#qE%FOG-s6 z8Ce>J&7D6+Kfe08rZf*Z>7q2*>8 zf;7t%yQi;-^WUWr zEI6q=K3rwsH*nwoUU>LzQBuj@YGoJP*8hX4|6D>NEO?1XOeb}Ex-jF@nP~fkE%#Xy zx*kgd9t3iRh4;&+qDj%kYzGbwcmEOg+W87k=gDD&-k)_;ne6GjQ*x5Q#51`a+WQFZ zMl-cP8!>({5Z=0T%$NxL)UNZLj9LlrKK3Up!N;SWWGr?)1;MeYDm$JP_E;=aS^okP zjg3Uk!+!Z$^6>GqGNqTfK-$F^H*-!=Q}t0vD6xf>*Dld_SYej1aY6bewpt8Ct-H#X zi3~Qq6WPOtn?Pxx(C_j-CZ4W9$jyEhpYz$Ma|;*7x~sjOrWx2HQu5w7^7CJMrCuY% zguKKn#n_N9LRN|Kh;VP;d)jS!chF!Y4goOn0XQ-w8$ACg zW!PpoQxG89E0dYRIpj1h13U72r@8WXc06sp;(DEjQ}auP-Zgr8*QiRTk9Z*#XW|*S zPI?R)+pAo3f?@lX%<-Hxgx6B11yt9X{iEh>NR8W=AG67G8@cKu3liN^n;omVls6mzfjwX=~dq1M+$)pQOKmMbh4iEE=W<~wJY9&K-w z*>$}_AbuA2W8;+UI-Yv2Uy^tJ1naNYjCBpqihgtf0V!^q?N9PVynG35;ew~E7T*0P zCm9pDn$O-2rw@}*Ls!jC`oDgsX}2u4XD@4?IJ+c{p(YufK5=b}#n!V~ds`6ShgD5I zUlD0v1SM@MDa2tk9;d3;Dk;Ht7}k|z85v;}e6pxbEYnafFzs{D-_mcL-Y0#W^WtP@ z>nPaPkufi{2n-d>ofjL)E#dNuBP?WpD#*Fd~bBb4WV4v^Lu@Gj=tK(a2m1@*S0 zz>A?C&Lgw>889i0w?AFSh>UM*Z*ugivl|hYz`U0-1amM{ATilvN4wN$md+{cyl~^` z%xpiRw6H|BG~gI9z^zTZj~Lsv0zUjO(XY}PGnHg_*ZiyTTbj_qWbcGThV(Z~Hr#ZC z$;Pu@&NjvLq#cD{6vRAE-iPe{lKQEzU;L7~C0SfU{W_7;zeUAqf^mm`Lb^xy3=5G} zN_Ev|rtLhe6Z57d?SgE}pBZa0WXK_N5)W;ZtTaw@RCT3)CUvQ!3D3zi)W8_|Rej7) z%9Y>|Yu76oA#%~E%&W-Yd%TM z6Yg>t!u!H9Kq7lujg!75mm4pJr|=E>!V!n^0mmNV2fQ^5%3SNq%)IR%auQ7sC_Ra% z{}qHrhc-_1Nu65^PmB&I(b5wT3{pho(9%$FWAknZlqDiNfFcv0lXYm{xBeMjV|iDp zM&p1l|6h&&n8OIZyadEJw(?eq(HImPC_+-;Ihz=gau|ycZhjbaaUM9OKX6nk40c<> z|34i*n1wl0O3L2HxQexH$+=`0LwM46#lmot;~xa56{k|`a)F~l>A&vl^8mFbthC=x z4Nuyb#)-hwZ1q_lC>`Q=ef#})Wn!}80%w5yBl9rW;z=3#eZCRz@+?~DgdEvoj$@)aq&O@lAVnm_ddK$0 z7Zw#NL})=M_vi;vG#l`DKpYhL0#k5q_-wgT#516D+~<==^f3G*{QgL&mEi9au_UnV zPQ|g6?e_#e2m?Znf43OQ|71k{nCo6&aY8{xf#`3u`3viJRR>}GSEZHjpZbIL+h_aLcQhOKHI2N%Q*@P@$@5@27z%HM^Uq6W40*YcZ8y{&Y z8ESF~(7H5(f!AgL7{=!FU_Cb`WKOuAW;j5@`)mBfNPiGkii3y<=G)~P@6sH9Z~rxc z?Uylnb)tJY_~P|LK{Q*m5Fto2Yl9O1}jiPQ6K0PsBpF#KJ&VbOcJtBOY&@{J9~zb>uzN% z`d~G=U(-e~+9?oR&whZY&jcHDZn4`Pt-f$M6Vo&r_T0i25idtb?@!OrOgOxmTH=n! z2vnhFKlvDlD^bN`l!MzcexVT%?N&GZKv&~$vebo=d_MMAK_6Kw&6RvtfK*k9ANkZ; zxIW6U#To@M&{=NY#Ql0Tgx~^mSmBfRv$p5!i~^68fb$iO>KyLSL6iBNXgEzHu1F%U zV{DoDCr*8vV>qb2v^Wvu6HjuQl25OLjTk;jou55^+}3a*8YCs{UnYJG#r;5|8-CGi zOtN&PE=9=UE{pPtn&;TP$v_fn{@^($-P-Z?J=e=kgQIuHJM2voeW|Hu4GgKEok#_b zCGzJdM2&R^hG1?l=ws>)JrnCX2+BAEgQa7`{0I2g)(Dpy=yobC1MFY5-`5FMZ$0LSuPka zWj#74;t~cN9o*ensLuo4uM|67^G8~NZxU51KPr~vwX3RGre&dDew)p7&7W*g+~acB zQEX`IS$Ah4KYzzwr}6WtS!+Xkyvf{*_pS=d(N_P-Yh3K|G7mW*w>V{ioXl#JL3tp{ zQo+ArX2_b+KEONM;s^F611~uZyE$2P$mNYdHW`&seyk7?v3+WhGfntPi{x&i=8*H_ z{lu%@fQp0(6lN9E7xJ_omDaX8&BSJEgRR5N{x_Paw5??uKa22px5v*-{HQc@@;9ZW zlB+4rRyhsq@f8%F(M{X#`eL%KOvINQ2TxiP&!pz`o zv?GFP3*zg^^fM)hmCoChPCknxIFsR9M*&)9^4OnxL>pip!*|1XOR$c69{rlca-=8@ zF&BQ^dmejF77!f2x%FvL$YVoVhVQxweslkRX#mB4AfQizMi=kb-1E>ydLVdsRribX z{GKcc9!9E9`}bm*ESnsJGDP~a4Y96wy|9vpIHzc=+mn2b|G-du&-*MOUKmAv@I;H&&oDbA#ec7i(M5cx-znpGN@+-pUW z7DGFSBN1tp5Br%;!pPE@I%Z>Ggxn2|R*8A!p`=D+T=_PMyidrtvAl0Mk0u?@WvtRx ztorf`Wvr}D)7rAXX(yOfmNUL(y)p-NbN=!cDw5;XjHbI4+LPJn)q>rark^i8$2w5M zA__SWUy-)_5rg|}=J38hQ|QCchsd>lj%r0>u^TT%yS_0kMK3{@`spl1gp%^O=8iiy z*kvKdX~Rp%QHsyaC_hrBnfj^VFman`VJR_NFq>#4z3g)84e(!Gj3Ml(oMWFfYV;Rp za&BQ4wYIu5zq1!wD}o+#HQ%~lKAK%>l-8U+;S``(_xiFXR_j`>?aJqE7>eJdU+d#M zR@AGSwia!cvV#;-C6?2)h8w51=-M^)_qw%>}B_;aI!){FyM#P z(zkCwiE4jbG`$yZup+nQe(>NmE&XcD!&YVLS%6#6ahfU;YJp>+8E1OqERnJ=o;|P; z^fZKf_lR{iGIe)tQe@BUx}~ChhFsz(9lrIf5Tf{y1Y*{~#$*&uG8Wjx-|r{EDsxSi z9RyVh&UO+y@c46~gnfC;)X2q0^Mjlj)~NOo^@vC z^z_SaYEOM>b@7`A>JAh{$|G;LhZ_Ujhf?Q(%o^@(k#M**e9?$TskNv`zELz;P#cGa zF3jbg6#Jv2=4Ue!7LkAlI`szX8D|#y5_A zR`g&;6wR&y(Cm9>-nUz~SLu31PM*N6sOA_VBwdCnkXqNZU%#}lMfwdDKp%lYIn?_r~-ao>gBiPi3d-d3x{_dL_p3&-A_5C3?;Ylq#j?tV z4aR;Rd|&7KecPE{rAg!J)t}d93Ld_+)AVzPe_Tg5yAp3>(sq`R_x*GnSHDnN@gdFM zw<8K)5_upWCal0wnUeX}`cYLnWEfS*MMd_76r?{fWY=1)FHI483?Z%G$q<3{O`q<4 zpbq;30fY<_UU-$ibm1Cj3x39~QGZo5r@6(iN2t2G8ArRkg(xQpRAaC`+m0%NPEk0l|(h)LpSP*9x}5<%V*G7PyY7$p)A|HUCTᛞ{;kr5nSyg0 zNfLehDeRJ7o>r%11ZHG7HW?HJmubY(ytcK|mygRZHl0nMdi2FJQ}}Y~T3zrUlv7GH zR_fEnDI+3YR#x)5y?s@gf`UxRvPk<~jJI(jT2sQhw!J8$4!jA18ZuJA<+-12XC{je zn3Q@i1zmi#zvHixvIEngPHk&S=$=w%`q37llRYCf(HTk8OmAiW%FB#LlpEGg5-rb-6zX9$ zTF??O^$tpvc}QA@Gmv}~6|;2At|Im-?H^%A_6RUKPV*E!oIIx(4JNtv&dejT;F}i3 zg@kz)77%G}AwIA7k&ZYgXd3xiDlLE`n%#{Ea zYgZ3<-%9{z$j<^+4CKyYscqQ)=H5~2;L?^}pyN*&SglQ@;!6!ChYwlGScav}!izvD zGM5sX>0@69IWCkpD1}&ttO$&Aih%(%Frc7>YzEEYAEu<}{d(hT>_><=0h(QkUrcwE zO90&oN}#_O7Hp|?TBq)AAA8I{1@z%FLFBZMinC38MUiozlB<}$VmPo#2WHz6mJ9Cu z6%JG3K^hw|C8s#T*{G$(#kd2bvC%&v=fIzk^Swv$U~ClzjMSkp6ya?l)MX3h-M}$YA^csX{n?n{fWe!w-=S#gxFGCFB-%%5 zGfX5u0l#>VK+tSpkU@^F3)wHiNH+ya8&dr)0e@odp$0HaeR%3`c-*lk!j zdh#}4OkXCe8VnpC|3I@f_)=DioKIB4(SJUBTW#*g3CIK}KVB*XpAx5BhFaZpwi;7% zaIzagZa^^dE|DC0G()!BpI9dlbR3X}oovFt0x`m-?$yijKFea`oUv(Y;Iav9ncYG} z%Jv4aE&m^^G3bF^15^kHF|zf6A^c*FK0r~WL|^wnI4?z9&l6&*Cf1ltjy_r_8+Zd4 zNnwC#f@p$JLK?lVn%uy=14Wrj33;H;l$x?VVVa-<{Wq~3y8}f4Y&?}ppo3(J3B5Z_ z4JcjpJ04FBB;v0#rA0a_bt4e)lrgPyg+_6JO^57`sg&57gwG`5B^=v;Igu zw^YrL+1GeR$ar>w5pCDtv$VCOUMFP6h*pdxM*Kc6U;rNm0@O(9?^RGtaNu^Pm;j|SK8OjR2~>VY z8mS^gBoGJ<=F8z5^%Ve-14i%YuykM%#=4mhf0qt$&XlH@OOe};jA#xrBHdEt4FBUNo~&q zop)1!)T_*zNT+N$h{S8%ES=HRnL8$;!Vq~LEIw?l4QAnP(57ejAb3c|?c^l74kmGD zsM3yI*ZtBnVd=)|_d_k_Y4^Hlu}*<2tdGP#cx`@r?o+znb-iBa81QcfH7bu+%G9?6Tv>5x7R*)|70C)Nuh%|QVG&E5k0kr$ z1?F}$p%KGn2K~GWJH#9A0oOpNE~A!MFOPllaY=+Z2q9(4eZi<;99sMPL>dFvnrv{wpt(M3YjNV- zHEdRl_(c35oJ-o*ASi~VpFg!nqY)@DXp1qp6U7i!vhyP{@SB+NGWfI1KnYwy*}a&( zGpwrT8C>0MGfjv&aibw?CkC!9T%mgG`*t+A{;~zH_W>>^9N2dtY!cjS*}7Lv~T z6TWNL5W3G*Y`(P;diDP;gULo0+TY(+ci;bsVkSg7b~z{Q8ae^m;Pv0cz7^ zuyabG_4cbe9;?mO7U&YDPS(RA*j4P}J~&Ne^uUBUtBR2GL1D>p$y~yO&Qy!w$n#A; z+S1(ose9Vz@*xcE!5>jz*G5X~9`oMpE#Z4F6F7CD2~(RbJY`>JO_-g-e&do4h*!3L zkn~-t9J4fO4P01nBCHm-5oph8m`)oGH1q|>3YESWfOFOz0O_HG)vai2GRIO*tMU1sC1jJ7Fj2 zs0hNBTp?pDXj?@grn_Td-*-G76h&DLTSqhpN?L8}u#Swc1VA<}i(5fnw}3@ey-!`Q z=BUfbg9Sc%xE=uP6OWbJS&?&3SzvHg&?v`YDIvt(DyAI$dH|cgvM z6XpzIu>i^SAO03<&3XTH?3#x`Xiy_Ym*n3VK+oJ$v~hRAgZM)`7ex*Z4M{i8q#~?u zI+|djdL=##c3G-mvO2KjJP3Pn$*Y0iu&~+)e=r3<_(r_~+ZAgC@lm~>K%c}wNRmY1 zL`*-WFhcH)oZ_0n6E0Rz{1F?2N|x(uF*+uk8u?^_Ig1-;g9{69Q7cL z+)HH5m`nc|xnm(S@nZUFcU4Q}kgrxSU>qS~`q(JOZi%sm$)T_!<4;|?9Wd=3Gxo)y z#Jb^H&?IX+uXdF$+qgI!D}aUGjQ1YQbHDg97XK%ERcAsaG8dVVg)Lg zKVkT@d;)B1$o4!+^2ak>&X43$#dr8In#aBH%vI@4_o?&vW)5SuV~JMNEMnZjPMlu30V%edue}i+qud7~ zPYXZmao8vPfq&sq$6E@C=*Ek9QI)cS3JsoRw``Q#u-aesgMA~95PQd3g9>Y%=$~T&e@=7=zWKKWNkTbo;7^l5$X&rwwIT(V;LbhA`z9J*6|)t6G)$E!)|Ut<_67%OSQvK69wZ zMX8I9f+d>x9ZZJl!G+pcPzeRW8-V#-%U(pp%n}c2O%%!~#+Lx|#+*%LgMva{4-Zdt zI0S=4pTj7cfbDx%RD1=MO`sh0k>0yW>Zn`6OE; z{a`o_-5|0}cvc(IE~hoM8SuM@TV*sFhLJfY&L($(A}aCv1#?KrhMo+h3E`WCm!w^J zy{|7K8U?4DTQjd&!cA&6^LZ7buPYl}Im-;#I}jQuCo!du+)F|1JGf6XdUc(O99xMk zZJQK9iHciRV}0- zndg;&?SS3#&nFq7%j-SB6O?8jCJe-;Gm@ycRA5@hizmz*ipMf)=VDbZvz25b2%uun zro)lUAg6(0hE(PpFb-4=SEkDjC?;1%(Tg#m5D!Wz*6sItQ)zecYhqPLWL5yTv;)^O z^jPPmZTRRZYVaAipIE90cKUL9l<;~@M_3-Oc;PUL6iqXOZAgK=enEMZjH!fWE&d8F z{c4w~;XA+C;zurf!uIe(RY^CvLy_bF&|j>aDkUz3YP&&Ru8p6*{JOhM``Y7pWZ%qZ z#(XL;Nv0Ad&KPE%5tpsmI&Xb>Z`-9-mJrVRuqw`KF6t4uApVD?50`1^DcC{&ou#uX zwh8}*rDIekH9Q56GwYR&)8>+1*F(58s|DHWGTmpfCyYC^s#L=9Q+J7)?!N16UdB>l zvDNUYvh}BoF@FrVwrAd}=KGwiT9y0xL9JBH#(n21V>+PqaBQ^lFdyZ&ykyD&D30Gk zq+<-%=S8;L1jHap+*xn1()LT4;yz{r7X_NiW9%*}c|p%`g1Rt-3x1u`M1iSDVS$f=wH zIE(l@KjXi#C3yk)c}eBN8Lv;X&2@p-^94P}P^;|B(IkYi=N<3DUH9JFPbmXt-TX3; z`4oWQ?v(Q6rxxbU&Rql!;jdYNmPpk<=M3`r?BtrE1BV`XlTPpyp)?|bQmp_5uvB0; z5&BA@5P2qfp(y{LXHyFTT01z}Wf(2C>rO#qN%gTy-`)OrZ*?J<3-Qe#Qcl>1Q31Fl z_;cl={Ae%CBSf~AOf8@?1Iy;-Bi#rJgjXj!>mtq2C&rMY5XX^Ufb^^<$}9d(&&xBU zP3!aT^n9;M5X1mm#JIJIK zW2IKRQ+N-`SWn9yg#k84F>R|nN~*rkTWR6D4{k96!ZD+gs!DCW?WY(G%<0T4cgj!h z70#ENR}sqL3efq=)26i73!AapuyX|rTzrtL&FaePd}ijw1pHR5@#!%Xk1XFv7wH9B zc8%m{SmrtP%NM3Cif;OLL2LX2Z&RUyXGU5vafYbhCh;{O`I|VyqgqbHuYcY27e#yg zq|x%|hz5uAsMtoBhtfDoZjg6!c+7H#vhd5j#a^@u#rVF+mdD)7V~qy9Ay zYPvFZP@3sR1*cI}I{H=~t)jHZA)`g4qPo5BPVT118LE*E(|!j}q)yPDOyo57*42+& zzp{C-S9W3~bXR^V*_%UV)5p6Vf`@?HbdJEu68wUgds2RSz}p6S@7qqIM>V)$t(X`V z!Xdo1opN~6H@ZgPB0IqonjksaCmZuO&a3rcg4}&)4_-dYDmJQVN9h>4a%0t@9-c!g zsIvm=pc*&so8i}$U9}oG=bFf)EU7La5dXb5N`YRK!{NP4w12qjpIKtik$*t z1V4BYcZ*2wk~-ZtpCix^dd_t=1!jmW!GtO*?t61)l3iU<7@;F#;C$P+_D-&ERH$rIPCXXW#6%uD!(<(u>MnLL+sr_W4Bs)T{4b2| zEnF1so;H)|pp2T6Y-$s|Ws@Q{qQM|hXI1e(BIm@mKtL5-Na~YIriAyIlvXGGj7q@X zi$q;T3Y!nZ;r4Y-V6x6F>6)z`F!!%NOf^;F-Z&ZmWFEMWk0`W|cZO)GN4?j}9${?o zj#2vU4hZo7T)WV~tkgKPfr3o>7k9h~AA(KLU{O)4)x1ueo+hy_8ijX^qW=w}4+yWj zjhM19dX@vK@f&xnL2_ggoAa+O2Q^3=!eeK*&c4gq_R85usl4c`j7aL@gLqv2GBEk zFaP14WXO-1_t7D@e|SU^aT&5F7pJoWt)Soz;^E;-*i)_07OUNO_F9^f2OYR3&Pmxw zSvWuct|K9W;G&I{k>?orwVxTdOV1%{Q~To&{4h_#R1`C&shL0zN3QkF)3tJaT7S#4 zI>-=8@U%~%MHnjmRT29*!?SF{pj;d5U;DilSqeqeQ2X1Wi8W)H2XUE3lkpa0O!9=v z!JP)v7#t~5C?xqGT&1zCLocG`vDB$51uv4>XmcR*ygrp@&CmNR_?DB!98}3NNyyk$ zXzdQ-ndzZRAc|DjjdKXL-1%aHu~QYg@`G`PvKxrQba59+prg1a&MaMJb*OA}vs5HK zK0sgZ0xS&kSORVgrr3L3gYDGU^I#}l#m%uRQxu@ASX_?!oRQgEUfp3nj1UzPF^I~h z)1AxB(hD&9e)_^sy}8Rwu#Ln4(S@5#q&q@=@Hk|<`uZ(0c;OB5pB|(E5F;6QgzP{d zM&1975z{{~Dh1;6Uj7rKxDlB?I&|SXv1h?OFI*Qn_1waQYyoPNKB(rXIV`RbXSBz= zc6gbE#{l5;r@kCSumnubPjgeTDw&~h-wrc>B~ zDFmCA$Mxe@;0vP#7|;cGWRV70VPr;s#leWDJr zJ{J%DgwH6K@aPw^H*c5=w2_e4LQk*lgu!r@vR0{Gx+YEeZKZFS;>&KjFJY~jJ@Pio zM-7($Xn~S|-lLL6Z`SgO87~lBORmDoy1a0l9c#m7moI$>1$UV+D2!g=J_XyD7)Aw5 zL@Vnpnmh2+drp1ch^YF5MzaW6sY0{&gJck$wgK1LRvulw2?RTgPX!mNpm=RMcR9y! zomHBO4<JHS9XM-vJ z<)`Q}y!pX|oDtY5k6tJQ-+-X8xrOVpYOBxBjm2#6%$2urRY;jTtp9}53*>JyF2j#T_JVlaHg><8Jy%Kk8Ik$hy-BD-WM=3q7B}pTZ-ZcQ{vEw~#P)de4jc}f z+&G3L^>aBpJH5b4%3x>uTX{#+YEt2+(!=n6?bBTx_E{6Vmt$G>{pYAF>B}gY908Wy z{65KiJOQM%w3pX2G5K9v&kEvAm<$EQAr`sDwOVYcEIQZ8b~L5yQ#tq0jQlovyieHn zf~8_5k(EfJY&0ZAwLJYsB>R0&+~Q%bR&C_>X&O9(qV#Z|K$-IV&~eRHFfJ~nP)rkD zAMdaAs+|bmrkAH1ct1`qkggM@$Bi*Xiuw{>y5qfx`xcl`cB=6H#};2z-8$S>;G2*b z2tP0!{L5+iYq#RRtl9n9toZHw7;G;cvhbb%8?v#@Tq|-+p|y}L)eTsXVX8MvkzbFT}c~U~V_;XEM!t zHAT#rh-w7v#JiInmhCe4#4r{1!CGeFdDO@EBT}K_S?BwgoH}+c<3Kh(#AU?$y2>L| zdm|1!XPpK2h;g{OHSPUIf?#cIhC|LPr0$MoxJOJo#t{R%P=CEqyZf&fj4e>xRq|@Q zZh+GrgZwXT|K|c_A5hcZ%YSJ5M#5JcptcWf#oQw%Zmp4`#nYH`R7gU75uB0W^-W@n z?73AlVJ+Aq_-rOn#(+uG5B8WohcVTW{EBrRtFno6(>{Jpe?oS22Ro8mw41bDG#2%Sc47x zTAP%zmkVOQq|Ubu-ll{%xh4mL*~p3dj*9ll9vrT~LIUU{#9aNDRoTYcsw55cENXVH zF!0rMhn#Li9lSL^mR<0yJr+LR!p~K%-D+YrScf(A;D!Cfna8cGhiq?M@x4&XUM`{+ z1;LUO$YaolncSiJ0!R#x_OAhV^By157A0P|LM{e+bw~tnYr4D#6a@Nyalq#}ia*JX zN;bxwGsH3HwEZ-1Xgv~QuuQCKnOc$}Lmz0gXHp#xAX@R(?8DROv!Akz)`(&*3Xia2 zH@j_Z%-v8Q)u)M%0WNY|Wpr7RwmmfySCC}t7n$9AV>(|>N(lx5jG(3mIaaoX?wH9z z_pu76(uN={q+f|u=NguR;`Gq2=i0SJNItWqb%dYurpt_%Z{%=Gb2dN$hH9|ax}Q8{ zR;PX^A%BVQSl+&w`|)BpLZuAHM>g_#GQVtC1kPpi{;d@M$9G3@UeJc3M^Q-rC*+3@ zM^OLn=KpUk<_~55|G;8^`Cko;%ERI1ONbH3#?PQINH9~(}8t#eg*+LV7(du$b|&f8UCeC4*=vfzh4(% z;HLwC2zLWO+3JCHdH|TW@^z{dk{lxP)b2~~{r z2?cHR9E}VK$rV&&-n0_Ow(2-?`X*_)Y|IubI_(a{m|8`|g_5h}Pj zI2u_wP!oz<8_)s(s%q+le|6rV4gj>9006OcR>qEhh6D}<0~EnucmHwzAHD-G0a+K6 z6dWI770L{H1(pRa5Pk^Z4T&CE8zmgI32hex4wK=73YHsoGEM{TEZzmdr;loc!9=yh zt0ZrqNXUdf8bqiqPTfmeOdrl@%dE)C%8ttM%r(zb$rmnQEF>&KEP5_J zE?FuaD61>asrX*$Rb^4FRwG)=Qb$^k+5l)cYg}y_ZflE!`?k4U* z?s@G!?OW|18fX~I8;TkB9jP5+3!QD4tH9i(WKe;a=jAVzP-&nAzi_hvWdR~El4&#g|Z zf7YHYjuAU*5$&&Rc z8$Aa+=O*_cZ#jRoprf#+sIWM#B&IaD%%|L?!lu%sO1E0QMzL15PO4tKL8MW*NvK)4 zMWj`%O`=_en^xb&CzR&7iIL?ev}8-Qph=d$s$G2d#%4Kf90mj^|JIPjAk_ zFHnCGUQ+*NyAr&Xxly|{xHG@Eez1A8d@_7iei3^8^7i2!e^3apWe8KqL1;)A23UDG zBY1lRS44LtcVu@IH&iDyTXa(lZA|$O!dR@>6gW({+<1KWyaXH{nFy(gNQkjXkVwHv zA3mLv{rJ30K2Fg?*+^AJokbHz8~nwS&X(SgL5)$GiJzH;g^HDk4U-*?1Hf_1dB*jF zdyQv~cZ{!}zeAu&uvVyCxKJcVG+iu7JYFJNGD<33I!q>1HbgGuYp8q(Fh@B=DMUF` zB}_G3EmHlvMvP{hR-$$?kQXxavh=eJat!l~@{J2kicE{mO3X_wN-fK*%B;(5%4|#Q zitP&=@*K0BGMrOf5?o{4BHV*L{5?IroV{&)%zX9y)cwB(h<)P;WDcSVCJOlw3Lgd< zb{BpYu^YJ@HSxVKx+SJ6wjeG&J~kmV(KpF8**3*I)gVnfT`fZ?Q$9;BTQ)~FS1wOJ zU#UQ?P^ZYa*t*2E)W0mMJiVfodaLHD4yK;4fvxdtlVx*oOMYu_+fD~aCsCJR zw^2`UZ)x9T|K%XY5c{z9i2rEe*y#A#B=Qu)wBn58Z2Vm9{M5q9667+$3d^e0n%=tO zM(}3xR?&9dPUr6b;^D62qH5kifv*XQ7$~BmCUj5eeo&6*Ir}xj#|1|u{{5|7$)1Rn6 z5B`??-}nE?M=RCmS8KQ-qF$sm0S$r|+H-;N(7Y(#-d>t~z(k*18bq z;OAoImAPilk6*Ca&1<2{BG96j?w1$$EHN$REkk-7T^_w6Xyr1`>7Kt=z4W@adT>pH zx5kI%i&&erF5YjO|JwD717-w{3H%WBF!=I@!H|}XrcJU?Mi^#u(U$bB@!KM{hlKm= zShCY4!Xe^s5Pf1#M@&s@`ChT@IY7o2B%~(BCT-pCf50Po&cR6se;#_3 zayxY-t>AKq@W7X37AdTWjIINXW$lofM6lANnmm!Wdn5)Z368j{XC6{(h z)jTKOcm7?$pwKGfi{X+q=?Q5 zwtT95X?<9At9rENLhW$f$@;;DfyQG^$C?LPPPCqCJKKJ_<5uURt~cG^d&l?9=wEcq zXCUnO-oeZh$RX}Y)2ZImSI>MHnR3?i+>Y}{E&vy`m-;T>A02mf`L*5GOK&J|4&Hiq zXWrfLd!_fwADn+Q_Hon``O|05*T0~=y#9LC8{qA=_r4!^AK!hB`eOYy{`-L+RX@M} z3jU4y)AiT(006e@Ni*&&Zr?~Kt^#|!k%bdsb7VN22D=;c2zLy7Bz-gP4_2`4Fo45N zSa=lw2blk>9b3jkJf4P4q&J?qh22A&)vyVBkkTym#uCU%v=G}%>P&lqn@0S+6@-%z z;S2P@W8%G^w(ObI{^3r{e39?zB+N2CvhEya7uQXE6+>Y=qM|X^m?5dn*Z?|x^Dwrb zwtD_}+a>{cRFzM=j*;P1LqZ!Zlix=@Z5a~SxzU_!~?b~oX9l5-{)Eb36+PI!~4|%$}1bPXz%{)E17S*r! z*)#>cK|9T5KH83}`?LUIt9su2fM`B*cpw7NKESRFMZE7i%zcL3-+T@_5qYO}D0w0( zrINbw2hZ!HN3(cnYwOvIla0c01F(q?xc z2Q{VPcF0W>vT~WT26>93e)EPt6T9jvmIlH!^q!?oz$BVlC_b2VoOxstW@p(2(k~3_ zKpshndA^ZD(qfr&m=q*1=GAFxE!}<;L@`h?J#2CZ#k~Sc`c59lx3uG#lr8Ge(S<&m z_?K{tYJxJ1rq6u(@#&4tkyXf))KoaYC0woK9m#6LkaO}Bke8zk;19i0dJ5z zOBjoX2>%Af;VFVCGo1+v*0U!^NK;zU&s`-hs2|tXLfC74shfcBFs-4E;-kxl3V-7# zsk`GB0*@5y0_ngfS)kKGd^cMruCe*Vo>oJO?gU)))E$ zLgSl#2Z0RT^?*A-nI>t*KKwXl+oP4lr6b~zI>P!B8(L)ejNYx9alo0jVag;Rr{PWk z3|LpqjvE5(EeQcpz+}V0>9s&UbH~GI!iDQy!#D9yFGM#F;%A;Zto8@Wj?E%R15TaD z{88M!=Cg4jxX${W>o4NEE2dBP#C>MGy%QohjFz3u;~~*o8xlB;=;?}+?0M)JL@Bck z9g(MD5YP>KA#@sM?RpMg{H23x&SK7++TW}rta~zm@qy`qkEb`$ z8*_4LCK@^>k~&O<`a!7(>V&C3=?Rps>pQsiO2P3p?6opf4W3C6OQmj%nS$Sd3+*|t zH#?5{l+(Q@nL3m8cb%Ftz*;bM7HtKcboB)%(&T*XA`7nfs9wsrqA3+$qGu~7W1rI2 zN?&H}qHYqej>b@m1S4y2Q0DWGIYv{L(f*E_*$wq4`)&2pYLN9SW0M&sYNlN^Y{opK z8np*AlPG58&8Qag9odhyUSx*!;}i>J7ma&`$6nV}(I;SLwN9w~NFS&-343YDRYuHY zs*l+|Lql1we-<@`oT{z!Rg)dmp;O$*Qkw5&18aEbWN#C5Tz^F6Y?jJee5r<> zIFy=#Y(pQ-Au}6K?sg>g)Moj#kq+wKJH(SX)XI^6lpj&a?Lzqul)K@pq#ZShb61px z@<2EXwxi(bIlK(ik4P}*CR*rygyWCFPR8;U;`)b!6%R?j+cITKh|dgT#B#zf_D^9H zVHbQAe=Gj_VKf(lS43dh-|+Y~2zDa=(WLVnCDH!$W%)f$P3wH=GS)!(6OoMZo%KMF zOsB)#c`3AgX+(~OnzB>I_N3@mn^`Q1dXhhT2l@Fai|m<<)BH%XQG8Bk7WN4Jm|c7@ z?+tVum%{m*ieq15@7>YB%4GiIb(cB9JZoRaT1dWk5-s!6Q<{dvB+XAPO6Z`hW{`N7 zWY)6RoEzfLDO|R%P#6A|d4fOawSl>u-)kSjbSLwNQl-l3ipECK7t4N4z5r{Cq?34& zI(FF@PMmtlp>~#9u{?Y;(@*+sRV_m#-C%c%v6~!oB2((o`nut?2xz#grVH$ArqZn3 zQH!E9jQzyOJv5W$Q$BNh1EX1Uc~vmONqNeSNmr8R4GJX3kHH)4L~dObs?&UP>ul;< zF1Y?Jn9C+t#~;KnZ(8KrH!%*HHhbQnTU0;nBIypKx5pn!3P%w2S;8|XuB%q?6Z**1 zXzr4ZrjkGG_086UEnLJP>(hzn7#Db#zOdNROVp{FCq#`uBLN5Qj5$H5nfgMU|fA2M8*Ovt}~> z0wGQD8^5XGmMj=}m2^Zr0G!)Q5OxE%miq}E@R0G@f^h^zC(ZDRHDGbkWi!W!m#UrV zh4?&$3vFfo2k9Z|@j3f%@q(D+a>yVZMJE$=7nmV;E&QO z55*-HeFCWW$y{wSe$4?k{nIk7i+6?8VD#up%YvL=M1#6Tu z%JZ-U*$mCx+#qp+a&i1(!7EwyrsKTV($HmNc*`XYeBU$bv%HCHDa8OLYOBjmJB)BWjgH4Lc}7MrlYr9@YE(#sH{UVY4k zw^{zOD-~TKdEXkEO%sJRI>j~d@7L|HO&cy%T9z*4e3s>p%U~@a?5=gI`-6C_T3K}$ z(aM-^zKdvtF^n#Vr$=|_WXPC3Kh&kjr@;#qUs3EuD-|+y2?!*?qg(@o$$^dG}a;K&h+Rtb3QiBsW#B|&A13Bc}!<#Gw$1q2dt08rJv+KtRL3P$B0 zI-Ay0@rE`J>Z|vm{FDAz(@j|vtyP-HoWN1J7kSshHL@y7I0z&xwkfBYT;VghwK9OO zppI1da7mDu@@#fbx|8}BD>mx6;yE)gaI-9zp?5nYy~b>^<)PpJ$hx`ujH+37(;|}( zP(B$&QkOD_?!2h~aFlwNpgwAgVk3V=K%caom*)nN&f*;gIq(I5T_vyDt71gvZT_YA zrR+5P&^#dhEy zJ9)qWHXSbw(^ea{rma-1*X`N8Sw3I0Vf|MLpvEltBt9hy1FhiA1zas#st&i8N#~kf zn^5Gf2FKcG;8WV2*2k$`%B$weT@Pd@Djuz`mn_l0S}-EsBk};P<8pz;=8h`offR|! zG}v85x~gw&D*>l#zBc|$J))%4b?#EhATT zwp1a4F&FE9A=c&}t!5&!4g^|`BCdqLGu9$=eP5M7McKK$*Mm{LAP|2A0MeK`TJc8C zmgWJV2a{eu0dUOwRK>z+_P3jl;=tkghFToaH(z%bcg&@}d;?&s4Dkv9kosHuFnyG* zXu3;_K%cLhM|qnUZ#9tL>~}LE$$r}(=!ZxnK0mduNrNtU-F%9zipU+dsqD5-g6XX9 zP1Cr~(DQ2dvj54oR-R)WNy1cAF;{QjU;dSm=p)w7WO%t8)XrzvGOgTA07y037Ou!) zr8gdynoy`3lh`kJV&zidoWwWA_xwZK9+Zdj{CzfQ_VPSkHfh##Y=5dm_SkKzel>MkkOIdw*AJBu1Cz>MX9srcwZq@>J{kDjl=9 zVVk)EDXS(JXXbcY#+Oe>TxPhUk!}mp*{dVG2UYJ?%UzsR7Ov8!CfihG%e7_}b8-EZ zx?9M^>Ty+7*)Plpi*Ld`!>)?@t-rKlqsCjMqUd~W%H@BGeF6X}ZnRK(UNMI2%i4*^ zsa54ot=ZLP*Sbduje6Ve+O7ArfmX9OU*)Y^>9SDS%GKMH?fv3^I&lOshp@2k5_}ju zyAuuXIBaOifxnC?sW&6yHaS$6A|9_aRZ5V#(=w~}qa18%t4#$AGJ&V~*4|IJ?@41yhaMsV^45RBV*Y{P-QM%xZF}5v;Te@1*0mV_R zVT#h!^9_?^UeUGH9+Hv}vH60yam9d9BJQ5P%J_)!zZM4CRA&FEMTGm+9M}KVn7u-l^d9|?UYs*5* za4Ng*QN{IWSk)xsypSSOxBl;nCPSQ(JU!pAjcKc$GXDY~&XvCR9j9@%ovWLbMQ@sw z^{Z2s zcyi*5-m7p-cvFWEe$(Hs#U7Eg=xZYkQET_A@dnCP5263Csr2(&pb(-wjm2eU79VF} zYZA`&T*Yn+FKI8suJpUnG=hy;^tJvER%@SC{}T5M1Y*p#smOC=>Iq2EDKuqs=GFl+ z>1TqxyNBc+9@@5xsPXG<f`&9RoA7 z5};j5#=P*Qtt;p|{aP9_X@A_kY9r{6CmpI$QsQlDyG;d+$RuP)(hyu^m${(7nEyT@ zw^PcC2zPIB<}URctdHm1b3ahCh>MyuQoV(uv#BJTS}=S?;|_^AVN}k`T+zpo@e+s~ zbrN}aV6(S)m*1nhL*htxLG>I_*kr#dSISeH$_7BhKf_)X=OK}U@AN^L0lg#I4GHD# zFI2z74>lDlJ8XxmDU>ztXRFpqZcXM}7gCqm6vU=(pHbA9AlnD`RYEf(dsmpoBn-4~ zG;qR6jRX2}|BW@@buSiAwi2Y{CqJv)Lrt|Qz@~c7IJKOBY(IXY{$=LT9%1#zgpX|o zYi9UB<1))Ve_Bm~aqZ%a%3IRS4m&LM)c^I127o?NP~iy1&2K0bqGI4T>MP=+6@yMf z&gU1Sk0TkdF3bkh?1Nu1$5GbMt5`VtpzCbhMa;DK2N68dyBonsJL2gWMJs*@t#2u0_S=`LT<@5pmr%b?#?6Gyc$vg~s2BKSPc`YVj&=g3mTYC-C9dHLrrMA~IE;Gr@2Z+v8bc+3sP73y88a$6t0#gzW7B zmCuFzXob>7po8_x1}b@S~>rLF?d;$}c)~!0lOD(+H!QI6d3-n=RJ%IR1c#ZToR-0utZfe-k=DSTjI1LX;2T;# z64$_Gnm>e#h$++q1SUdV_?~bGxhp=6um?3Z$b%q9U7mT5q(*b@FC^#)J!ifVR^q=k z-@%{2N2%}O5x@v#34SY3R?viB354x)!}|bk0dMg{z{?3vTm+czlJJr2s?*WH1LobP zSb)m#R%HRPwBw{Jz*;IRp8~9*;NzwMVPwwwX8@b@a7GBekyLv78Q>|Co;2c4h({Z& zIH)jK@e&u#yFx_cLO26?Nw}S?yL;_%N103gf8wg?52mjIAk<^G(r_CMf}y3@>)My~ zYq304o*a+OkuN29V-HJQa)Ys0VRo!D*1-GZ?~Z-Qjhq&S%V5OZbi|TtYX)y(7g?9q zzQK%E^vHH$8p^qNXH31iGG{GjSm7EIi1{IP@Jqq26{k)`VFCJ?>z^>6I$j<>h2b>g zYi?pT)%i+8FmtSnfF&5WilXepn9%a&d(fCHHEUfvMy||sypB0eKX`pQhJ0fGz$?tc zzNyt8(Wlz4Nn+6Q#z(kqXkAS~mKr^55l7!de>2`#=Y?_AB{{}nR?x>@JBsnY=rRz3 zzHs_kwGX;*U`R|r`*nT8lF)%IXEVQ{GwP+$`_R&A$=XhIn|}9{=ja+*=fz{}Sg4?< zh4~6Lv2q-v5{4CG=^XeZj4w?JZ_J!PJ%K<*X{alZ5o-gf%TT^kuF{vH5f?(3AApnH zXoeivWWGs1f;%V3rp4p1=!MimT;kDxD7CoQ-6Dz`?x3$fWgbrE@S4g54xD?)sARf! zxzkhWDpNIW6)k}8P4%HVB0o}+C^L^dBv+EdcX7!cB#F;9vb$}Wup4D5ao{YL&J{yC z8fXzhmkJN+GTu$@S;`vDc|-#_pY4!-k~G4EMyg1$3}^2`QaFA8pit)@>8pxX6 z=1u7_?h!U)87%A&N(#3WhD&VL9m#gEq55iW=wC&dy$l-;L)P{A#8 zTLUP*_0i=G7KF6OJP&FnX6nDCxezXCckGNOB&i^)QG`H6g#A-OIA#24FY36y zzbysi6P;PQdn9f1U1mA4vQ7ZGOL%7;NX;T-nv6S6;@=p4tR9c=)D+uu@wMdBr}&iG z;f>8_$z_Arb>5_iUL@lsaaVgb#Gk-v0;P-+Cf2s>h{Ox6SG}b86pe>{B;KE_9I%u0 zKuC4_MGUBe%27asN~!O8wXpTz?_4|Br$ZCjBKYw34i*?;S~ZRpjbu%%Wc@{}`n$y# zoL6m}Fa|rUoW+mBVyR=eG;ADr68jBy&%yaD8n$5DIc5=7?zx<~75j6d7c(Dl>EnnJ zY1eD!2>hsO`9Hk%6bSh-2TrCJ|7E=-4JEH&3P|2tKQQpb&Xq?PQN)f3y^P63kKQ8T zW`VitF@FV5F6-b1aLP%i*u|_zMX#8jnVAQC7#7C-t#j!b`ivD;dIs&;gjCu!VtLOF zL6(YaJAi+r^YF4HRZKs&|@bBFMl|G1KFE! zr}4NBS$4d9rCWgDTa>;~}*SX|t5;Z6A3jpc&vh$&051QCcUVU?8fsaZWuFz zT?BO0b*tlezM5=B0Vj=BC~ao7BHF}0%ypSF1wZLOW3l`Rv`-rfxo}#LyOne{%E)A0tiN^OwsIZwwA({}DMYs${nb?t`u} zjsX0cwaTb!o$|G;$a0MST-;{-16w8xF2^3}R(=Q?VCO2wE{bQ*7T*S) zqSpe{YM$a%Ypyaz=GgFvzEd1q1A&qS=PDD9EaC|(hNGWy%!bII{cKQq@}em$p12Cs zLw5y$>RAd^zglrw+S^q`>lK4q6CmFN@CJRlmKRiWHX6yu3_>#_-k7v zrNPZG_*6>JkAw~t&(KQ3QhhPHv>2-OLl+;qsR}|%BNxk+=(~P>(i+U_c^72<7?~}0 z$Ol0BQVWAj6NDNclgRk|@=T(!s8Qof_;YBXGK4^jK*$V)ZGN_F8)4PF0_j}BkSz*G z13=~VW--T`f5h;WMFcQ9fbpmBmUvc2b?60c^xvBeH` zHWh4Iti8*9qo=DrVSj07EBp%*lsBcV2foX0iYYsEk|5!ewOyhOLi${iD1rCVrtAQa zR$WnU`N+-EPpHVk!ZZy1#QalA+wsW<3T4r%3rJ)BASCO zQ(-m!=(@|nl<#YE$B@()8ku?56vSHOex8(4MGVguy{p{fdrok@yn4A_M?2DJe8IW5H?lm8O$t4wS;#71g;(BUS)5!|Zy675$`->4 zJ?q<4KZwKCnTlPdS1f_j(Iei*&ElHab2^dmNob~ezu@?)Fr|ZVrqgugN7i}}h|&ar z1ikgkD#*mNs!{!}(rj~uHZpz4P@^)(7V5?*IH9hpJ~?vLbVayKzjaDgXrkAM_$RTZ3#hEANX8fg=~%7Q(cIB8<=VtMQvT`XF7?(Pu5!2pwn&l zY6U>lyFDKXFA(YNzww`QQ<^>SYYs^2xIjz9*Q(t>S>QZ#HITn_W(6J)O}=L$;yr9C z&!(i^&lobqn6@u;*PLffi>X%*c-7)5*CIsL&6L#v=S^zz=#r1dWXc=|mkK*_tWB-7 zsZHIJ1!?e#w%NS&98%+Ej%j~?jfTB0BD6A^H9vq`QO%Srxn@XUZE}b-bWrsm5a|N| zqTK6BSH!_%TC=2AbNm}?LQvXg>yFaLUlOk{DfcdrTRzQ%^OJ-ftSuT?p98~2Z>J6Jx*i=+! zx>XDtZ8>0e%O0xFGko1Yt*WU!W2eHLqO~2MXE13eFR|)=$rd>D>zb(_Z3<;mj-4}_ zHo^Lvzt;t2x7B^BA|`#QT4J&6q?vlmK>;)Mzm3zD4CvBis~tY*7SLwe)L{UGDjfC& z|1OZ9Xe|v$P3p&$Dq>2z2xZH|G_Ci_hP}5pxkHY+PHgxN$s1Qx9|fa>K*Xgsm3<}* zNh_!t96-)UP3g-++=waej6mEBlebWjj^2A4EXV{`+lFFf>Uf7*Z*;g#Ew(A}X(#-R z0_kxLa6NTL??zx+Omv3>P8Wu04#pLD$JFn@!CW!50$kO2X>|kuu_@cTj4D3)oIa}n zGf+y^q`LIXpk&1?Yr8@whov;x4%YIHtQ$+Z=UP*6Em)5l>aqsPyJioYwyF=m-u7mJ6laWhY63(4~YNR)B>AwIYH7w3U>6p zR5hi1>r7MZh?(3nMV1+sT)#|u$Qw}&k|O5ssx%1QCvGsEBL)1ckY7(sH2V~U_9YvB zr~K+TR_+xuyZNwoR#;XYM6=(UU6r7^JRfekFASSVt$0PswW;|42zh%@UAL)VQ}2T+ zVd|ugKudCrXVZvjT^PCcn(>@>hqYd}eSVY0SC}>NR7E&R@~?Wj5L#S1=YlJ2SLufi z0KTwnMDGcI38`Z&MKnUoOO7CMFhe2*wFj=<;Enc0NM@@s?uh4)ru*V z+L{WPOm)(RLno7eQ?W36@{Gby@QI`?@tfdZiHN`-h(W^O%t6E`!Qp;RiBKGP<}A2Y z(ATUg?dRpI8_F(nc2j0Rp0c0?jnJ=*fqkB^k97BdKk!Cso0AC6B)`6Ux)`EUpI%!c zP(2}>tb`tVC%;Hm?RfG= zakIIz!CEqGz$tElpKFmsSlKjXb6yg}U$$=V8t7*6wDnt{4+JNsH$zFxUALnPU$wD@ zjunq@Os;2^IM?{fd%-@I5CXXLkU^0PE@NnS#~y%)ln{R|beVkFv_FtO#)Vr&g$1?~ zB%T!Edz|YG#k98Zvf&bC!xB6he7dSS$5{H?)EZLNy{hy?NOO|)82fDy}nto-!DkaupVkVbytPy@k%6=PGO#NJ@Wz4_o zjGJ20dh7{qI`~ZWXWW9)^U{Sl|FVk!1eXB0ll>M)gMN+i#hr)S`ze6^2)k)X_>YK? zD=bVb8rR>5DMcfz&S9izwD=FEAI-(ZVLqWdvMyr-(BJmBVc8hJbv&FC20V2$uoFYP z3_@L^#`M{t$5OUfH>1~)w~6rR5>gT754wxklc~oz6Fs6GFi?VQtq}8uu*`8c_7a|W zu@FfS%<0KMcJt<1%8}nVK0-%S7%K;DMU^w|WWdo==-{YHXeiCyw+4Ne>NW+AVUvEH z|AYXmUUa=fSQQTD)rdFJ&3q#=R3t-XA}a**k0zrQbD!;=ixRRQ_}oUlWZ@hJ&@k$! zb4_ri`BG;S{EBg5#Ycp*u7n3haXPgeZ-Q>?)g5%-J%1MU@5zCEj5l;}R^1L;_5XaQfleZvO zQ{Ion!`x19YLA4)9$#Weg~7V-vxi|Vtpl)A@TCpjhZn)?t5FgA5X&te*I*EvwMCQO zBkCyaLs;6JVn)+1>bjC%oqz%c-(`RFyG!c^A$n`t@nJfNA`z%kAEnF!0u|d!=Hs484IE|= zZo~Z{ZS32IkEji(9tO15X!%iKf@v3d5ssrzF5ZaSrfEMQz|Bz-wwkbC<%?IwV!unb zO!$fmCE54#fOG9PtEU4V&1+?H9H}mi6pb5e)fSav+f32>uVcmf%B@qdFkSJAd)O2u zVL~g`K)l^M3xFR#Rz<*F?mZ%Xja%A&j3~qkoBRuhuwk_k`?uS&cBNZ{SUa=N3Iyh< zGGzi0>p|>nJIGHgYB2e8HN|g4?wt8xC*U*dbm^M>VWzBXU!ss9hET%Z(eFcz9(cwE z7=E0m?Z3RIHHrHG5m&L5vjd?Ky0NYxUf_N(smRd0tBh1+WrCHSiCP*qnbv}mEo-Fx zM13B6i$=oav_x>2gqOxC?6HJHf^kd%{xbF&BMzUFJ3=hC0l zb3pr8F?BYeZ}wtGvBdf-%wgsxUMnM){v6Xm_oOxF^w1VktM{FwZlavrlt+#b+4WRkPF~zS%3;=%jTPh`X5Eq| z@*uNg>@w0Bd}-rECRw*o*UOls-p`TIsuf(6fQpdb%sN9U5huqvk!3>PkUykv{Msd6 zq}P1+F+Ye;@Cgn6j2G6a+E?@vlZ4$*n`UrCc2jO>)manBqbk(iT+(m(l@JIiQnqPv zKgma|9y3Z@j^9`>V1S#KYr|-N>c_GtQ6*Kq$gz|Jvotf79AkJ8J47PtK5uwWJf}@v ze3&SbhK^|`u<a+X<n zeBJ*N7fXJP(Gi^Rb1h26iz0g&M`qiKL#>d6l;l9_MIm5TdW#^tv@Ke|8!dYlc!fIv z`Rr!kQJ^GSGzvy-H=mKK;mf6$q#NM?<(haZ{7%_pp(A3);aB_(h!as4xYfv*z_}bT za>Q*S*Z+U@O4<#q(DY80h~FT&D47JDA-@pb0y0Y9@Sgzl4^QANz*ne=JY|#xE09AaEu4jaa2ShLF3rRzQlQc*0SKe9bv@}=lOv>inL2L zCA-f5#kxbz1c4|UY|25hMpz;`Cw$E}5Qq5}xDQLda&L3irG|56vzm5cSrtsodQT>c zQ9GZ@yuf&5o0vD% zr6Sjh3@^cX+l)wT%N~3X<eoz{ zsc*P4=~RIh1S0&lDTZivheEKEf4?Oezl2*`|EVaDU0&UH(8|1Cxi=z!k!IfMx0mjt zzvdc1pChpC?IDnCRq4IPTZJTMNO@;b9csVkPVuE&lq#^~bCOs-RO-6DP+C_O>I0T! zKvp>al|?~T*v4>?E$iQC=z^_bH0z8oGV-|k8068>!49SH$}^nNM! zM7(h}Nv(NQ*g)VFSQt43Uabaf^*DSD1U>!pBOGhVlQmlAkM_z@J<)m zVV^ti6T@-#wlN$JfMk*78MH{cqvi;88)A-Csl9=qM?7&0WD? z(kf>=(I#7$K*SXQNQ%{=d3R`A)oAV(_zp!myCW>jfO36Be|xnmse4xstzdt$qQREg}J$XT{|^I!fi$?4V2yaEY% z)?t1i`;<-XvgI7kX&S1e6t%L)5)Bo|E*XDhv`VIyU)$#<6l-HQQ~3nV^wmk+7s~sy zf_NmhEmHy9w<(Awz4<+5gEFsSC3LY2UK5_NS!}JWjcXQqnx}5|<~vmcub#@qXrQyk z@}{tI%FAF?461*_LuC6W|en=vqLs(14?1b&#J$bEp_zP zwU?f-jRwx9ylR7>o3RtENGPWWWO@Vrm^#lOhUG`E(h*_*YK5q#o!wQZYt4R7ougar{KcZ%6wi59hK+0y~dXM`Gf8uJ#P-P2o zhOJ+=tO!GC{lOuk{mfBp#{xIQF=q3j&AN03em7mcn@$gAD~{1#dx+$|w7-s_ii6aR zHZ{+trdu~l22r&piRf&;%rHUl`e3KFoFBA%l8Vf|7`#?~hx6GZSr*DocHAOgPWxt4 zwu^45)Myu>!YaP2s`5Sbwk^*GcWQpi=Ikn0-jy5-dL&;Xrg*HErigAkM#^T;e*LF* zRr;CDs0kGeV^7|*^2g=plD})xwM%wIDudKPK@6E(RlV$!WLOS%jFblbe>KkXyqH< z+v~E>Df`w{Wuy0P^E3~j^VYT+N6`|u4nqsNd?MJe9%J}VSzG%^f|3c1CB*vlq1ugv z4{>L#frQMhr%e*VthHc+5&z!Js81)%p4eKB!$1B{?QZE~1r{3{z>Ej!$u*nk^>Oi) z0koQ}Wfc_a`?bD$AywnXEBB+Vn)pHclhp99!n|+(Bbr#er@lrImTq5N!js00wWM=B zwk|VTIZnQR%GK-^H@t2M`@WsK#z4CCpVBtxD4!NP)H}&r4-ZwHmdf|`n(M@aTOJy| ziU_`!bazEaH>`Gzz-$+-hLb-0r*fNK7@ibeuj7=*9OhMR)STT*F}tcNw^SPrDvNzp zx=6)Cw@eLM#I{4Ipya9lsr60D>K#QF>Q+@I941+hnGWqeY9bh3ZQbC|zq+6~X+d{Tp*G@j+v_56Fr;}*F@9xSgI~$& zS(1hq#qA(a)ygu|e@Z`S+uWLQysx2*nH1bLzRY$$SStk*7A$FmLi$#+>u^waXXm;> zNEc{Y7wJs$Y)iOa1Lb=5w=$QbvE z@4(=H3fg~(;gFHqZAUwm*x$CD+8P0Av`}sa$JUloVpnRac2FXm-&pG?yfJ)ZJbv1L zDz@(jKRY9`Yaw?yv9UFpvo9j6!NOh~ytyWym9o-eb!U-hFRx5u632Ydzr@Gi(5>dy<=!J*YW!NAJa%0Qvr>@>@2uKU;><;{4;e`;p$WnEgv z#*Puqyu^m)QRUl+qxG{D!NI{*m*lN0+bkFvVm8?v#SI?I)-~Zf|5YV_dZCu}8CyGI zD)fn6&07sCBJ%2B<$1x|t5)h_S6(omQEi`XF>$$j$98Hz;jjOzZnTu(^Q|YQg0B?@ z)b*9Q7PZNhkcg7iglcFW*pT}Q#wc^$I{~hP5Y{6RI%vPsSR?{kbFHl?5q9@@aq(^V zj@k(DW_X)yS1A|ahc7F;gy_vV37v|Jjd=z0LtgdIhP$ISO+N=8K-P>tEFcjV95WWV z60)jWiskqL$p{#Pj{xSErT{Oqg=LL^YR?VG6ClzrAKHz3H?1GG8MpY#+`N~Z$iC46 z4vS}fTQryPUL0I3r9-gw;Hfmnk37IRB7xiw) z50i{k?kl(|3KiBCrSL~Fdy5}(MVZUMDQte!xl$w3vbLgZ2V>6EsbyCvmWv)a45PaH zRbF8E63f{9^J=T$aN$8kEP8&?bE#*>qmvukwDFSMehX$oglfIYo0cPmg*OYnAJE&nkD=jmu%IX^0AgkmAolgU<57w zTt`;-P+BLDbGRQK)B zqb)mg0P#4F*@i(LE!br^hJ0H1iaQpSU0jBUN4+iincjj%m*OHXVVuj}dOyHifVw#Z z;f_KBPQ8FVhlaE!!uin0dLv>rjL3eC7=$l|Z$Tb{pE+EI{EI+F+(7A(^VWPpM<98V z^Dqra%gGF=C-Atr7Wx#Rmd}TY0WVf5d@Jrb^dtNWt|N_%sKLp0o<|D5P!mpa~W$*bn=u$`<_iR}Pl+J#TvJ;lcS{?odzKEgr%7=sL z40|DBge*M16r8LZs828TQeRV@EB&arOUo|nm3}Ubgh<8f4()=L32MV#VSYU3svcM# z7iqs3euoSUtSaGFVe2x%hs`6(v!zQ6Kd9NIZ*}Xyhsw^Yst;ly4RYxATxgEu;wmpF zRh(!y2zMce9V;yU-WpYFEIHY5L-7-gukodDO1D{rCDbwxV^nevWQLBl?HuH^y5F-F za#FFx4i1Bo9sA!Gi;k_Ybty^d9;es~p43VwUj_FzxD}6<%Bxo;yOohF_S-fHFk15N#KV{ok}PZl^OyM5 z!-CyLB#obh+ljx@T7cd!cu?Vhj^PIi`p_|49FBxO#6Fb=Lj$bU3EAjg#>vn>=+AWC zvcH%)^oiqovEBq)OFSxFJ;m?|m7*~4EvOWk8y12p6f1KPD6udko`E{gcMAYAsiUP9l9Lh=Bx2YPnNj$Xl zHOf=8Xj}vuLkMabK{_<(^hwAmHACF*h`*I}=mF&T3VHTzq;L71ee;kAP3NY$NQ3I} zQXDc@ws)Kx>JDLk(?Nu}dr|o%gt<+|O+c_4d(pEH#F`h`K?qBwG>(OMYuda~ikxRa zFLgkim8FjTg-RieufERg$+@Ze!2t5Qn7`@Rf?`BJt+L2IGl%-2SQh)5nhsvN;W>3t zDcQY&`lT$|7Q^CUORGLH#+GKN>}cQ1;u&vgQy^>M9n?^$eMSj|4Sg7EN0|ug-Y|pQ z4~H-MO38(vv_(>4l(Y3cEf;-DF-k?Df6#AGWEcmSntT)Eezb(_hY62aNovPHf@4Ww zjC@f%=`)6Bi;13M8I>0)UnvLVM#?mD8;wWaLUiHvd#<6mAx8dD*&xpPXvQtmHv<66D9uc-D64jdd%SH894U-)s_YV%!Aw*%XF4w(g~7UWhEV1L5IYHV{lI;eBYMsb4s#4TbuE^01kIm=WunpfwiX_+ zt%RS;Ya^{<{pLClo6uj`SfYRaB-V67`~IJdtAwQRbM(uENxo0$KKQe9W-x3uuYdcc z^x%@%UMvv%A!|D7J1d3pBF~=jmEO01HNAz_7(SoYOTF#8pXNf1ajB;bQIq~vioG12 zB$gS2QaaM!CZgFEZ+O16xt{^e0u@)3j2f4Fm)w6$|ZtY&6@D9k{j9Yv;+o~ zbwKTdL@>@OT5^!|6xqha8k)Og^0raxd{MGb3MEnadG=pQBbWW3n$OO*jL^R@uUEW4 z+-7XkpUu8aJFRU?bfVJK)3$|Cn92z6r{o^R%Gtf-2i(K|Dp?QPzA=GLVxnq4AQI@e ztAetlX?xAQ1U_}5QM7fGGG71Hn?erME|{H1-pT!Mj?KHSd5}FqxTe~j%PzgA7@O~R zWKjC9;KW|4#Gxo6^tFgm+^{NA_`IZPdcNdtiTD5Rg}J~BQ?pBs5Oyl3gJVlWWXDT` zk9bH7Ws76qi&`O5LNkP(kQbhp1rR85dZ6eF#8w*xY!_Fia!1_2L*$)ECoo1@fQ(8{ z6UQONu^WZC$QPT2`43QGp8NUpQ65{lh*_cth~#82vknHWZMN}{ZE19%?t}}NV=ciQQRzEKs%Y%EVQHE-E);c zpE||1lSoHFu7q)pQ8rDh;ms!h7dx=yWkbRpI2Q?=KU_RU6v7Kjn=Dw!>D;r9w~n2* zaT!O;%3Zmb-O4&Njmhb!^Z&gUYe+Un`3g^<4e7qDSjenX zx=)+W{J{wL_g+ML<;?5=Xh_A^ocr0%2DdzT(nB3Q|IbdQW@;gLeWS|1sBLkI@=@^t z`y_Q;@uL5fllfzD7UZt+YRQhQn(}90uOz&7dFkw(KB}tHKkKs;$IFHnuTt!QxY+~B zGKlZLiXbsb;MtH+0}ZavTC5YopCrChUqWo$QLo&G(5(l_vk-sWU&%(0&h~0~7SjBm z0#$4QV#;{>U;vbvs*T3ACPu4RIKqzkiaEF(f2M3DF4etU3c)4W*Gic<;lB!h)VPec zqx60GQYt*7MH51)Nzf`0ur+k}c#}_TA!p#Ebtb;JRTKFSRsV z7tNiUu||EEJ)GdIkg>SoGi6Rpn%^mj9pi)hD)CZgy8VbKjcm)o0{^=T^gJ0A{7oyD zoIBdCvWUmVtK~O^QQJ+@SpG*pm^g%=?Cvab<2|>3D4a?e`=6SnpQXDDKCX4ubR5O1 zTvP?|vGV`J*;_|N)qR2AgQSR{qKG1bQX*j?EeIlr3L@Rjba!_!!!R&I4BaUuNGS+5 zwvR0aDt4e?q9VNG7oX={>;3Ef{qAMfV!{2)KKq<|V()X#KK3Q9yF&^KiW;0&=F5sw z%+KYa3RpU?6#${uJjJ?RJ=h$NWmNPvyia#5ZK&HC{kEA2|tPLB1Y#h3LeyTVFt>!N$#l@C9C?hH*r^!;ZRlNSN`_dL6cf?-ldZEsA=%W`C*twv2)u zpqI@j^Df3B8!QyN_8Zr<<&JsYsw~O*V86NCBUjt_ewixbzIvn*%JuzACA8ixUjXWC zb}03T-C8d!j@);zMoU@inP2g~u*dFQSzaO0c%bxL?ooAdVHh|4FXhTe9Aqq-dZ5#(&s7Y)?syEcf!?yonZZt zXMKeb^e&5TP3SqNjfY>uzF3%bc)@G**LCiJsr>ns8aUNnGq&o6ObqQnZcVs$%nf-j zyW&hKPLAg^oE%!0($Ln(AE(43ze&KUFlcVx%xMPzN>m=~zf|y1Psu>S+MYGy3n6};fXs)!6>W<%mu#D9 zmWvb?<&Ey569z9ETZF1HUUBY!lx;stKAW(-J0SObi1%UR9IkJEtBDN0ZK|m>`>91^ z!&ceuCF>hvByRwKqR0PHi~Bwo$0V#gQlWeq;@1&V7~!jE2`bRp_PlWRp1x5yQ&Bkb~_#<#gY|L#1U_+uk%TDn(j!*5wziUlLiz z%hCe?lwJQ)S3=+jpx@C~$jx9v{Y}(8Xm0KkOd33&rjLD$%ubKMA3!tq!wGfRkS%S* zX55{{Bjhc(-b?Y2518=&XxJ+(s1AYX#gTFrJFnP z56KaFFNt{4iwl{c3-sh(IpiTdzUC?H5LL043#Z3Jk|+aHZ3*Bep<#Zg?dRoGoK!89;f`0m5B4GHYd07jl=lW=}HeRHl#P zTC|xBQT)OV^m3u}6f}9@_-uzD^=JR9(ysLBBY7e#;COp5b|v_JGcG;>daB-Q zcNW~G#>?&;98lJ@5P@DMOd9II%QBMNGzeVKPzjuffqWDWkz!%Lu?A#c#IJbOY6sMZ z-CuYE0v0XkB@l5%7=0oRe>@IK!Vn(& z5y=U}=NnxqnxtHE+=ZG~IV?|FsXyI2$MERu{nz-7nS;K^Kl zUjYHhsoWqZ^l==u49EiZmi~75^&Hzq8|2pP)A_?Fx-^6XNB2t}Bm6Pn#r){iIH$}| zpC-J6$irp~?R()tdXp5 z=Qz$Vdzpo!P)S{7P zF5V58gyPTEf#|n|;~HN`)x4OVNJ#2Y%i4bEh3-DN1#GrsnsE-Xsl^L&4M}bI6k&im zTMP0wMz>clwi-dND9+NjNP5NFaTraXOK+_TVwi$VbCxo~AyM=~)na-Oc$P7R$O!ji zMxw&JmNIW+3@ibx7Ob_}cMb@z))7X%j;yNOMzcX1WAnJqrt? zcMz8EaAnvNO)YjYE)Xl#dRgscOuH%hHd$W&gc3wqB^{#lQsmTqRBI{#_?%isC58fN zX4DB!G_9Pre|;H!0WDmO!L*}Cwf2*ia6x4_QW4iG%Y|&jxkmm;F63Y{+9@_1>w~eB zLG}xe9I89JcHK*AIy+KrALBafSPPJ#lVNy|6FW@&bg5A=L#*wQw6VGb^?H5DI=yW8ZNg2NN* zI6?J<_0@-6=>%S-(2Rs{D)0w%Gq1BLH7vo|^i_FP0wbW9r{tG`Phq|J0nqD7{rpAn zi~FW{pOCH29KJbv&zcND%AbCgy`QLBb;|2U@Dz=_-Kd3JHup6ef!WTj#0({#=E86q z`&M#e@gCbxa2W(wlk+?j0r;oaqaLlw;*1h+%YmFsQWhtH<3YAVFXO0_Pb8dRk5MA_ zy0Y(5o^RJ=f2JNVap9Cu?f+0r_evmp727Iz8|x8!J$ogqk*$t0WKmdS@ef$(tW$w( z7Le8EL}oEr`KvFop0fU092FkS!(v8`C6kePjit>D5zQfR>?@QUm5z+>43Y8Rau?e z)-Wt3k5-M)e@XtQm&&4N&<~Y?vSaAN5(r}lJ*vnNZcTS8cpmFScg(vG;6!&-=x(j2 z+sUn0VQ3?=e=YhKv|sg~rMvGWQtC!aZAH>%XzQ9rp<~qN^`6nQ)EhOX{;Q}@s#b65pq?wQH14N- z%I%w{@~`LUr747;vdc53NRv_u*c?=uC5GBWnn(uVD|YRWY(zYDBuZ>i0fy(LrKsh9 zDC*AqCD{&es?aX$J7SikFL{9i122icpie}^h^?@TeC3&WIB$o?nGF0|!&C_a*ZPNI z^yY2JN+JXZ)<}Yg`b1;#9^#pdTbb!3bT~s)NnY)9S7b;&>JTXWKru5^&t#EH=2NJ! z7xy!M@fT%wF)i_*L?@Zs)87imnI2)yLPutRkE5WO8DW1(KxBq5J1VSVYRyyf&6)d! zmw5)FFkuvKMhFv}PWviQ2uwr2@<;icon@-EXL|N=d~@FLG7|qaPdHEIMu|LfhPdIv zy|OOcEWh zg54jeFn|TQJIl@CU#&*vu1K{-o_PhTsJDO0%U^iA@Hb3CxmI9?SO6=|gCIfiyX6m1 z!}|tvX&5cHp&Sm@#)_On#GYT|C^yFLo|h#wFW7^7K>m=IiLZy=kdG2nODS^?64&n2 zlYJ%La&yf7Ny1y{XWt?XELtNwPh!kdf<1YM=|-ek`3!wO9*(JO!flX2$?gdxP ztb<&UrK@C?3(}pCOmRNU%idW2spDE>J7WG4r=enZznH z&f=Z;L?%pkR16aS@117@c&c651%8An*`vHk&|tQQA~jl=)s%a74@?r219n+1_LUV` zRAhF^&gq`ZG!+HT%i?zC+$q0GAj*8pG(qlCW%0*o4aqm<<$$;1yu#JaIhoP|n1z$* zP5uhqUeOYf*Iz0&C%u7BFv`AK7YDo~U0FR6C6|CIHwExBzn6zPdx~C^q1S&9xhX4j z3xxinzrHVOZcTkE8fjQ%onDaKQvLy06>47k3M}!BE?El=-aKA33p>4rQ+yKct^T;E z8b0(#7V96%OtlY`hN!Bz3>!~&Dq|sh4jwH@KoNk#pr#~VDJE0PLg>n9>cgGy3pg~>O*ivBX$MTc=YwclH1P5h^!*SrA|s6^8IfoGms(o#NPZW} zE~>~qldz&NI!73MEZQgJW)kAk*V_(g4enP;+=A=*#w#s{=2~+ z_B3E;Z53jpt4nnVccu`$9*2RCU zMjfZ-GEsRHcg)750*iNCrl>N+6#)PRRbm+wHsFTCOe%IQQ%lx5L zRPsj`am6-@kM*OnhZOiSWyMzVS5+jU&GfXt)P}}f7ChOtu7ed4#;G1<+3kK-Y0c7b z*;4io^NV$PDVq6dnW9X{RQUlEum80xVaFRp!cR$~we><$m{rw?;Q4Mr#Ueq7^S#m* z{tIhV$p-%IWr|XDzRF&fpZkwWukVvFlH@fvvO7W_SH6;V?RGC;n>FApEE&u)vGypo z%G$fEzPL%OvgLCh{6|^V8x$@{3a_!w{~B6Wxh=2K?{is_BF1@V@fZ0Kt2ad{^1)@? zqBf~&79iK=UkW-|S6%)rDYCk*tSPj(qNwD%-@VeB;z;Lh#Vd=hS7l2F23B|FrW<>vbezsO?J&?L1zD|8wE2Phb>iFWKq~KR zUNlzadH9p_DVzJ`pa&5n-3K5G0u`Mrpst>%_C+wD!;6+mc>0>j7G3x;ovGG^a8>y# zR+5nORPx*2MacINq^@Cx zDfUNJ@}cf@!o3Kq&egD6YUm8u=1W&cqpJ02BRg%7ma7aiYI+ay9%UMykK2L)q0Vtosp_?Z$39p8iH0NO|;*HxTSWNPQbh}k~7yM zEI}V|5acuzlF)}f3cv0@g58Z;Vt);1jeex7LEMO0K5U+D1-;(34=8~PN@U<+q`mMp z^akpZDhSjyOmch^vJ?Am_XTtyKFsbldWw*+5P=61V8;Vf3J5C4QMx%XxL6y=Abu9$ zz(+{+=xOK;a(o;I{tsoT-$f*v(qU(S9Hr{&yum)CIt-AK_OTP1`%}KM4V1gm;jHt# z55SYGY?KnB#)8C#!i-r#yRN~9ShgE|;m?^9Iz-G7rts+HgorGSCiCQ#;)exYsSTMu zTu6qVNQn#ur3z&++abAvM&DO3H~tk{F>Doowa#*E4A;9K6L+HENW-l}MczO@I>kZW z!`YHnlhccE0IFx-iuMG%NKJiLL!L>hHoSl^#huzGu`dK(eJZy?Ra*V!gvj!WJZRF_ zl4I;Gsc7XGd`J3N!Imf}Xtm;m4+cDt+p!@TyfWvaHXRouSkfztVYTSjYsRNG(iLTi zv$aF4q!eP64h)(0s;oG&4QOASv-1wAxr(_n2sl|`W`oQ)U!0_Ej92GhKH`FWpA=A2j6RUM zMXrx&On<`!W50n&&~CgpWFq1Xfddcn9wgQwN!ANUv8Wl1t+a)hWu2$s_K=op8sZvE znhQXMAVL^0^l{`II7gOF*{o55FF4L^S8UkcG zNbkeeF&(J|@DZjh$Q21;ZVnwqePDX*Sc6%~^t8Bve#%^;9!Cvh+O;josLTu~I|3>a zWo5x35aBV3Bg|az6le&)E?9B!E)vcMdG13gcq0~@QChq(b!%!Hhud-|-B`gWWdqG} zXCywLpE)sPeaLWjM}{VhDqS7&4Phkd@W@BHi!;|(Ari!93w~06aMcezOnq3^QzA}( zUxF3y171?jlFYyb1p-w~h|k*|;tZRWTf5I8mgVHE-=tdI^l-s#nhWRcA&Zpv4HLze z(`IYmWb!jctD1zf<79QbEmm2Wp}4p zv42{4Tb$^1#`@+ULMQNH{qEGW;F{{X;GfXF6}@inV0+7ct~&sc6bcro!c%h2*4L3% zB%Us$k~&h337yFC>3Do2WjXLy$|U6`1Qe`AErs>DX3>C%;kB3O9>|-2S^#Wv9T`sn zXBWg0!l5#O8qo?4!DSJ@A#IXBkS?Ka2dyX9V&E>PDM0MV+6|O@xPyNvl4h+lb|p3> ze*w-6hvYrS>*MiQFn*czBnA0@GvGAi>NBXXwC{>+ z%u4zJZZy`6F2$_Et)(AL?8jTt-|X`wtYsKGZztSlIIsCkATiYc$YR!1!I9R2GPx@% zR^Y+WK@<3Ws9P8w-z=dUo6pPL8-c6jt=vw>2XWihKyd;tYQANw41ss#MCO_!KFYjU z(L|`^{zu4L70rpd7n#o7ZrObWvR0RUYE;h{Zgr8Riu$Ch} z6nG;hQH%0y;y$1qZ;{=e&FIj=RokwkZxxuCxTChqseh?($W*IRrUTp2jAfjJhu43CD-lUG zu-JMeqcSL<2PG+2-xi6QD|DEL+~-99_|i=(;r#Hcx4#F1XLvUXShM?sA`OKm}m=SJR5k~U9Y&`x4;KNI<+x7@?PAaW=-G{THL!ujP}NZG^b zc8sDZIK<^|C_$XCc}j7MP$%8TZzfu2i3oh6QX6@tC;T(Ch z$YD8$bWZqJmO>lPEfDh_5cKjb@O=b^f}SoXe8@c+#vsCTY<+x)@^Vawf! z%&fm72RRg{vQc<0_|0V(@r&>kCFHbM1f3$o(7Oa-!RMXTL_j{!{ytGVZ`SZDft1ah zPhswF?C+*E++(<$dI^q!`%;sa7J)xlnG+g@KUem9=Qe^{DbyZL*j6kx%))nPyUbHr z_L&6R!U@<>eXYz{Sim2Oa#M0p z=mczIxC`pQJ;?V02WUwWj(-)N8hnf|K^l1G@ad?W4LkXd(K(Cn3IkE9(XXn!qQ!}P zZB!WDlvjtYMNIH~F%J?++|SsbL65lu_(dM~xSjZ)Hb=PM38KXnyjS>-^C`?cz+Fyy zP5sF^OIE`BIb_Pl1U_dgr6~x&SxjB)VZqU+a%}85Td6IJft>5qgn3G|mmSH@qWor? zuxnwbSdZC-@mZ_}HgZ3RMPmoLH?bIO2kVC{8GFg%P}VH#^*kj6v$VwrD9+4#nKxkz zn5CH(ai^GxqR4&OOc$Zj?Kjh2xX}6_(^4?Hcrla8ubrm^os7?lrDO`@vRn%qz{t+& zi(AYHmWAwn$gs^ea04=or1n-{>CdF@dT$w9GYkJx>lvmcw@I()zl!}Jhv|LFq*w|a zRgk(jknW$?<$9Cur0BHb(q1a|>RqDSWd41l`2BQNZ6^s&Pp)o-Y@!=hy2gB?4VHBV z-l2&~T3v}WSn(4pYnq74xRt){0AAo*|LCbla(S+G3I}EBy>&h;Mup(AT>tMTfJ7+zF zzu4B3#X#;hlSyrmPqd&}-;k4k^kR+C+q3T?uA|+hXHgcZ&$9;6so}wre=st?EXiVQ z?KVeoGfrscEU~~{)*^{3@pkjMLA#JOL3o4mlYAf^N*NRXAccns#D=7)U3W7R$h57s zqHp9^YaeC$lP9$3A_Zl5o}#{y>}6^q$>M0HU&^XX7&ATeut>&)?m8&E#6)a$7OEnE ztSuKxnQ2;5!MT5DrK#&Pe+vB&M>CBCzmgM08wH}!MM7WxwC^ziosZe_iT|B%z81;< z!S~RT^F6r_=4DZMqAxNj!dnoL%9+6_r8oP{C07rNp=V{?cAm=_jH%-9Nxm^2Mae1M`+^H+(tdgB7TgDFZyYaRfGUg}l>*R+%92_P`ClHg z_M*R+=%-6FKt%ySRCIS?4S46?rv(;}X^-B#w=mwujyxp1#b|TEN_akCQ|=J5cU~6y zK#?KbH@&71ijYO?=U+lz-s_zgg!<*-q*#hsxG_@B#auL6p?HW90B|xie%m~S$}8N0 zdzN-RzYyORWtXQ-$Oz1q=Ma~<7w5(in`~d?oFpw=F_t?@Yy;FtVHCf43VAobl$M*e zGEa{-9*L22>85-B$z4d_<-SaIjSjLU$hOewE6Q^8>F$8jl0%HYH3hjfFOC0B`2iP(m%oR@`YRUF8&N%@+4TT&en zp0iv0Jm5(7uy}==mXw%D+;A_utXL%(kyfd#7G7un&mX1o9(>H5F4a#-%3%~whab(p zRg@I4L~35x>57$f7F^%pCO%tWvwTK;LAF`VMySp{KcB*k)P|_I!?DZi%o0}wP1aZ> zXY6dLic8Jj@~KiY9c=!p(lA4^=vVbX#%h(Vmyh`C#Y#U?cR9@_rlCfbJ{6Q#g#aGj z39q1mBe&crn}ZCSe=fTXmFOL)_zC%~iu3aedwE_~x?Rm{C?Q6#svp)Lw5lQ>KIJ`K zR)So+g;lx~wc7ljl9Q+cJw_QErKQ?gNh8(FQ)%VZ+L)GTSj9M&x*uG20q5yWD7lXN zx@Bqc9(=ZWZqXS2w4SVF6+S{0*=9NI#a~Ll5=owndRiVviQYF>>OpDqdRFXBdB3?^ z*-W)D4^lc%x9G`=)TkR(t5UDC%I2xm+zNYkWE7^%h@G;JP(01v<3(3~X0P5HQ5ekn zX>L_e!Uj3I!xm}i+V;6b3WUs7r@6Dn$*+V<-75tVy*wmiyCncI?7~Lek;BF9g_e8IDpfIob{sF*kk`7YC2wyY$gEQlC;zB7EPtO_ zsoG2UpOqbB(b*Q89=y6EG(Oumy=_(Eq3vZY#>o{n_RY^yEmq{Uq@;q?cQ^N@#j19v zzQU+Vjjg()(T71PjG$v}<7pZ5ro zs=c)l<-Y&%A#2ocpQy$r^iii*^${3^_2b$>%zGo>x7DR;)^ThDwc|!HG_&aB{4) z$gQ&mm2b)|HnJ}Jmi=54T7>2`tJcl3|D}4YQ^I1pYG>1~C#Wl&ft!+DSVAx+Z9i-b zS_$-z+5m5dSov&3royh-yhGkcJkmOfuSK3cvN7RmdP)r@xdhmndn?Tu{F1pZqaC&c zdJMc6u_1CTbSKJj=XTf(M$5(+z6_hFl}6Z(^XUwZ?L?_n9ZEQY-kTGYtU5W4ekaWu zdliDrXvHrM{{@;Q?DF=43=ku&4?;3Xx>_p>Nr_^1~YPQ{}*Bg(^UY2rev8!aaJ z22}}Op2ni7h26?{OM`g@gPLe(tdzh`~suA~$`;9V| zc$P~6j;BbuI-%#&mvTCG5Hl`vqAfqA^Epd3%ZWTTqBS&(C3DSE=YsUC*I(E72;;)Kr47ToXyW$!lnFIC!n-cFEg_=)zv{Khg4h z9_eCSks>DjVHnl%n&tfw9ytGEspJc?79|{TK*E*N14L7OUCDx6K zEq5X5#io~dq`M@9D^=STlBNpQdX%L|@*-6sqv7)Rn%1QKJX-U~14V6wVnvu!b5$le z5?+6lNQpsLPp2J@Ppl{o(MbGRde&n@GOI+>VtwN7!nK-_A{;_KR0kPGSf&(Cs2LH zbK!AF33v;^0Y-$TrLqyea7OSEK6wP21nj3AOqvB>p*-9#g&v}&xwODuQm?K32`#5SRn6>&(}C4Z2`mmnk(|7W zlfZRIrE{#&v+4cp@x<4_r|i0YO7JZ<$@w)@oxN`@42)-+0^TwStXoxSak*JExm}{K zq>!^PxnHb6y-8b`$xC>du~Wp|djYgvSnIqC94(kO{Qztc+y{&@~OVh?>sJ(_652c^C4+CFhJvGhA*p?*)TxK@0B^Bk-1C=SN zg#ejl-&rwDl*5z&=Is|=3&C*@38^q#M2r%JS)PN&xty;yT8ZE+qM(9gWUrk6US zyQ05KZ(}aUCo@3Usia)^Al@WZ8vB&smysTDk_ZADZv8|8Lz7n}lCHp1?Y7D*Vp{wZ zAxh28szrXtFraTi`+y*@T8tD@9rFTv9X90u60eOow$&B?6_sp!1AhVa@DD{iUnGE5 z!_6h$@STVe+ET=ER0LFjvPPea{($CTH~1T4F5vK6im~;0x^WuT8lU%vqK+#yz?y{O z%#)ByL?(p?1Cjh8?(p}dA5lX{IJtJW5$Y7hXY&YJOzB&B5%r!T{X;P^1yvd6So$Im zsDb54-Udlz`GcmRKUi*&=ir$vdp|eC1Z%_Q)yN9g+LfV5E$08M096DzTH4!8g>Zky z<4j$WHK;)p3H%NT6G|iQKqmy3cd_8X0=rF_@K^j{qc?CY|HL0zs#tJ2si}%(f{b)v z?mePE&^ZSPTnC!S9*m!U=0P3hRz{J~71@8s8wy}@P~B&~V`@ST2>&bH zwvwMdnsKMBEbJbLTC&pTBzUwa%yB)$Sed-y1!TYM>bxu&o%|}glVd{(jbDiEqdZO8 zkUC4{ry7Q;(YB=D@-C-+1SQ&e($7NTmW(k&Ajy9y;==4u!qb#v>?OqF^qrV8QYf%A z+{N~0=Y5;QQl1!Qa@$C;qHItg+OrSl?Qglr||V3v8UCM94KHZCGf;C3(BL0N*m^oL^fNN%I^a9s=-rU`!nWs9vQv?aa9 zWfMgQ4&bSzeLFye1EgQJGQx2(WQjWQJ?U?GiSv*!dOJM?Rm?C#E8a)qk5aC{q6__C1#o&kCq4loRy6OoK+5C>?uSruIp1v9 zD4U%31^`Tm#A==rDj}9tM<@rNtcq@!F07*LVBA0O>f*6|jfisP4foy1q{4Qa7087J z8TyTAx%jUifOD6V6`eqc=0wLSLFYKJiPh2XIgAw2o|ByZbQ2eKt_={g{yNVTG_CWB zUk7^pM-~OgzQpWJ9>;^37t@k}x0n+dJ<%&!YeBE}EMg@?J~^XV7hy-&gV=od1)ay7 zH}H}_6hn)3jJ_C>fODfq!fZ3H(3S9eQLE?=k-&hh3D+Po1(T&2*D>t0*Vv<~pi~Qgqb_hpoC* zMTQ3o)GCQ+}_$RW(I%%YFcw@Vwn;AacFLShGjaGV-kV{&iY57x@mcMwWI4$A4@FPHM`xNOy#gN*6{m*9 zy%ss9R|HK9M}cR(nuRUkqYg6RICS5t1DTtl0}Jd$aM=Iec@_@`705xaK&=8V$dR~3 z{5LRQkOjXQ{$s~9pNvd*(B@a8R1<-MVpP?FV1XgVc|L_$G|wCL7+B5I!K{k;#vQ@B z?%ℑ=Fb^anX1Sdjwa8zi3>>bH;ZqNZ{!bugp^z2FI85B;zQ@lq`?YXTPPa-^Z980wl+e_P6atH?VS9pVB|FRA=Z!hq5BrHhX(m>)3jp z+RSOzq}>tb8`cZsZOqTC*9)>)$63?!vQXccZkdbHQ<(0eb5Ylri$y#3+A%H)^F6SP zQsE*yTLwdLYvp&wMxp6~LMD;(R~8Dwu+FtiyUW;=gN@qF0LVarhv+@op6*}ibZPR& zI{HDW=gM4KcUIhjN(Pj3ZeAAhH$ALqUmBF2tn`h%Nw+T;4)mbi$zSiTq-EzV+Zaa+ zRLreBNG0UtEXbu-aR%ophTBTEzGx$V7#jWxGGNs6%L1}<6Gu=*;C zWANYAVi_G-48s1{LdhU;MR2Kjig?PWLhMKi*#^w4B%QZV&#WQ+HqaA?k@8g*M=7qt-+$X2Y_9G1!`)4e*RwS zn(BWpEe#EIO>Hf0?LQx#g^L#I=;-QbYwIr7)m@~gy0jPS8!Xn-|MUBwFZqN2p;SM5 zI@&sap7=kz=1u^X>Hxyj+|||A0~Rb*Q(vk!cLrdlI=or)&-v}DquT#oY75jgG__Po z(p{u_pl69H`ReKmREgEp&`>?iQ2igEu~c)}x-C9hh6hu$*JGA%<;bgbEPOjpt_Zz8 zX}K*G%U!5zw9hSTj^b8;f41wbC z1R{w{q4M|wp(s-KhuHnh$jy>F(+6>pyzz)af&4&yAeFaPh{? zTet7ry*GOQ>G-qfFH|R$zIi+K>GPMb-@Z@(`1waKHGulOTK{SG|IuryO0NZ~GSJZe zqnFwOlIo+rR6}#!7OiDI2enf$hU>R-be8+dt2<9FwAdCpxgr&NUDwFc>8aJ!AJzV+ z+5bPqxc^t0{ioRftJi&ip1PW<@YI(AyaB(z)JPWUF0=%=S$?ZpAW?f*#nnm44cHZ( zWH@9VJ+omN(>`4piY{EO$-nzqfhovdluRp2rjgKEwYJxW9aoXs}LZ^>vZ|-Ll>yCyIwp7IA+%mBBYJ%Ns`8+(UgL>xiF>?;Ebk=EF z6o#{|ZI6$6{IGLJdf`4L>V#p=sbN36SN#XJ6{aWyOg8n|_B=oOBI8qJz`&Uwc(fb; zu>99jE^|;m@MGX`4VTz#$zGnG_fLpUvW_|Lyv1?Gy3bE{uXr*%88SS*p@Z%Hq|{rh zYKkR`3mMD{JL>EnoY%Pi_{(P3(6b?1bPu&$@>sIcqS@RO9MT+HQy-XkARQFhYU^{W z>Af;+tf}KNOHet91Mvt3;`>HfQyx#gpGo4ExQMNy-xD5%Q+oST0foD_oEwX>z%~4? zdM?E!HC*4Dcc#O{u9kiABGz~+#rO8yyHoL!w6biH6=_{+#a zAB_;T9d7LwO&gjBCez(*UgK|xwu`$F_EN3Ahq!?w|3nqcj<9;?0IChum!3YF14xF9 z-?_Mti5sAc8{&g@@bAF$S|RHax(;=kq^knHUxBRNmT&4;Uih(Q=rKjVZh5}R?e_!s zz8r0E};iK9bQH* zxqBw|)a!pv*0csWg&Nl>X%uNK(!zpQCQtH#^zp)OPJCDz6FF{`Z~ImqfM&VhCsXB( ztGg24cYwwo6#I8uxdiQq==glEM51%Dqk{FMu5$8V&SKWdZ9{qxFXyRXPRF!lBT0Wq7@l-K4bl{&f>^Ti&ohCtqI$F8fZ-4g{gJA@BY(vk44k^+yzuz zBIVxlP!G7j&9u+<&y_QVjkCJ@o_0*z8VKk~Eo9i~MC-|0iTbyNa?IMUPP>PD!kE)D zeJOow`?q&*{%R8(k$U{z{f|B0wiST=F^x8@9jL-r*SkZXJacueYYkc;*>VEGw2 z-^H5)=njmFEWD|w=Kwi(Yq=LOG1wz#e zPYu1Eh`G-MO`m?|ea38b-?L3(PnW9c%CO#>U&{T%`?~wrzkKKyJUyvc0JwMQ=8DtI z(Z`bqhu6#mO>5(xWJm5;kvgT<*>?6ytAXA}y>MijK*79*(KNOZdX7_8^AR2?si@eEQ_0pe?idgF2zw+4nKMP@rTY#=p3M(O8VU2Q9Fz>yI(qDc0|@wbNcBo#BxZ< zm4SkRqkWVNSvsnGOTVLY*V2(T`qMdp*6e1)u5q!p`PoL6hvb*?TlVjAC)EA8lS{$( z2fq`8AZW)nWLxi#+`#S*e~lI9pLU40pME+rlXuH|;^S|}faBxw8~3@L*%*A|JL^kP zr2*Y;y64#(;Qp&;*Mu`}gAeh z;d%fqz}kY(F7G>kX}$G6q8xGi`YRiFI`)S0K*`wkJJ!Odmc#>tB8v~g=Pt~GFSi*N zCMk&3xje#<^-lp4tE#`JlMa78YjB@*VtCOYVX`t{?S?6XQ;1ZAZ^Dv|`y3u(jZM>K zHtT@4R>y=nEp}z8pIoBYPi(kH0Q$|LQ4&mgW zOKbwb+S~YGNjL2C^5Vm;+uuGXSm_bmd;@nd9eSzW2|Q;$h+hUQO^o|^?90RH+VN{M zMgyu$od~FXa53|fi74}8f0sWKj@cdMZ&2D(>|T&dFEf>y59_@ z?S>~{Q=li`Cv1OhBzu2DO`Rzr<~^I(>XBgm;Bs8HN2GznptIs?{5H>2mq;5oi*}gm zrAM+Jd>kK{F*@+1qIfb%^@pycET_s_zGNBeaqh(V>w#8-EKuahNXi#xnMD19oD&H>Ut-naOsH(AMTdopz(XK3x~$UCtKM;V>ayLXP<=PYrM+RFs~ zX!PmhjBoiV@ZWjh18HMDKUWBPj2;*Gy1hJcp>E5Ox6iQ6LMM4JkmFcVGu!+L+$ zJJZcmN#FP?s-Jaozt@JD_0us^u!GYMlXst7pm|(+w29n5+#g6U1a0Z^lzz4!2tQU+ z7jrW0n#Fj|T6&6#Qys+aH>A6ZF&TAsc;TStWK#I*a|6zO%WKnHoI)=)1W2~qzZ`Q_ zY}XtDri}!lOcQ6KRFpOAWT@9BdSK`DzHyW!Y2lFbp6~Cxc!q<8d4pG6{nG=M4q^u{ z^_$%O7OK2W|ZaI#+3s9*V{u=6uwxF>Y%M#QaA&LwybZ|&v?gY927%`A)i zhMjKvbzt_P*=ld-N3qx9;c1e#3}5`^z+&)yiKmOcqIJWXmP5nJHIY;8-y+7Yp7qzH zh-Q{vnFD|;M{~X@>34vEJQeAMqv|-E47b_1=U0 z(qNz03lFuqGq3inrYn!zSbeBx^>rXR5Iw!`CFs)Ty{$jmew|&}Ht~(g9H`jz%ZYiN zWx{lwwo$#7OFT7yAyo2aumiVFC(*Z05k9?f$IE}TF7W%V?*P*Dt#nvQ#x55gtn3+v zcHF=8Y}5PVIe;eTcliN{=Yi%okM+E^K<@Oe9u}N=R&7Mt$sgKMG&YPD_grF zLd@ExuKn@aw0*#P+IZW_S)IPgfymp)`*VQtsP|K-S|};%-KNrY;AfJtcLuk)N3$=1 zDlN7*Y+dfvxp({W(54rQwwu&j)dss`8**C2d=NT{bqM*4sb+g^Bll&e2n(cdu$G% z#M~b;|1I*|H?`|&$IKG#agxl-kUS7KDa!OTm|8{f8123j@}0OC0@FKxvVfzT5y%*< zO~SEG9#^&GJ*esAVb%E6XvSxaGI7)N$JV#hTc0(4d`Lau>~!Ltu)IB5ZS-uq>fzxo zujAS?hIZqPrz^!{OQ*C2dFNtZ{La4I0Sc{qy`R*_lFl7Z`gSBp`z)*bWx+3} znT7v2xoQ=(c4W5I-k~4Bcb>B<+?QVHAp+)~-?}H;FMh$bYXa8ErL1JLtUG3tA=w)A z=*d2@)3t@ntH+Qnu&0-@RjU~md9)O_zliu`dLqUL@=6sO{ZwO5;IB|}?7MGtRULao z51y(#w`ug+Iaaq3XZ!&;fb;CgO_PH?{-Xf?#;&5wAcbGVQQv(Cw=0O%#?S5LG&!tLF77h)F>Inh=+bmvX;sH#75^nOy~y~1lf z^TOkvSx)mz$k4PvHD1X%{XJI1_k>txpc=wfysbM~hT0f3h^@-AAIP+nsy+8opK_UK z{uQA528XXn_bq2{P_K$>X1wD@5l$^3$-{Tob6JnCU_5C)8~1Iruv^fXwy3P)^X$UU zlPkwNMD{FHv}e*}-kA!WyI$+2BR{>`bHkyV;&sxJpJdiKdOqvL-mrsvuDO|`Yx z_Wd%SQJcCe@_usP+jQC@WD+HQPY03e$y`?_+b^SOw}24G^eqB*`={t7JJsbkI+lhR z__n^<%vL$#+(~CNr+il_9!i2tXerqf(YtiZ{d|?3MjOvZkcyU`578ROaL&?(XI9kA0iKDnn2s}^ z*IQ>@z-C7pPaN$|KYG_Q#U|4MPOy77-X813zp&<$V1c>O)|Fe2Y+1L{%!+xdvMcAT zDQ#BAGek1JDsezmY#Z9^jqrFf{F>;# z=+xtByFTu$aWI?@5Sr56hEXfCj&+T&Z->i#@xsZEqTF*+myuYbY zbo$qxLwj9ws#CuTpaNb#x`9`y9u8B>edMFCUVBZ(WP%{pdGCr7gBmQ$}w@Kle028cZMLRBv$ z)kk-K3jaulaUS)F%Zhh%fJt2CNtE#n?zd>hs-A3jYG~aQu@Uu52bs}*{k`R2$p%6B zjp9`n8iI!xx81aivj3gG?V@3}0_rdUdyvrmwO8c(?Z`!#e-v`U;oDH@yYH1Yv!iB{ z=;70kW!{G!hbJ5EKKU*kzrFpVK|r++qwOrhG`qXT>OgmMB+yYnY5=$ao<|X;48P?} zRn9CknAI_xg!xReGVzod%9P{V6j0mf0nyQ^Bg%_F;EwJ& zz>~(|1z*ji-je|+tx#>NW>1_ge1<^RLpTZgsPwQJ%ODDG|r zTHM=WEf%0S#oeK}yGshiDNww4ic-!X>h|otuGW*Q zWi$EW&rt1Jw^HGW34rZceWk0vNx-UQ=q!?S@x=fkDv3zA;I=C>oCX$z%xus?ZN8<4 zyoQ|6bitx$$z%O1vUhxpF+L<$XQU_MH9P5bgv{qtA^?9G1^`qU!u62fNp!D0*GUBD z0;}f3;*~XLO%--q(sUVK8;JeTOU<-Zm=QnvC^zmeb_47N5}AZ71F7#Z!3>dIHH3X? zQHb|Y23q*10R*8xx7Utlp5tnAjT0j_P9dewhq%zNA>{9LAVhL0`YzB{Gl=dcRg))oiMJ{S8^t-9Jk+AE8JG8``O73_+R~VkKL0*C@$QDYuWp+=;0Ojj9BfA&M zBZ5}7f#Tv;E3ug`2@-Lua-1vI9#MkLFRpDd-QS`>_yOj8aMt{OV8SLu>3k&-oL31p z@$9ANfU?D!&$o*bv{|!+9g!M2Wxd4v7?d!56L^aX%6qEHnn1zxU?Iz;-cjjnwAaw; z+av`(+Oj4;G0tmn4&0P!aot~eP!dV(tzPx*eMG5A1vG3Ualn|ZDMSn}Gs9}}X`pdz zHh=Gb_qN}O`Q?q2w3W*m7g8$;`wZ;S@8t1Yg)u!Oce37dL-7~G+U22q>TL}R+)l9% z=@%E6VWhZG=r^oxJ9zs}Ia!y%X0Ypr0x=|6ML&yzNvkcFZ@mbT6VVRA%hq+I9%!kR zCiUr~_`S!1WjwNb6}(>l>!G6ylmjH{#0(i?s0H-pQufiD$6Y0&`Gsv=_W!a=f4$LX zA#k~BtYq+oPOfV5`{Wbi6hn3`*dIg-m8a)e`dXvgu1`mO!>QxZyEHXxK>1svbg{** zq4CBpBU9M#2B_W;pdsp>loo-*1)&O(hF|$Z<>lccmGZV__Nc7L+&zSz=%hV#_-T^) z_Hqddy>um<7*c=E;T*H~c2MAHW%;t-5bW#kM-=P)P(!(_WWSK3seYl6(&Jetl`~kW zfG>U~ULm&+XD`arY&IYQHyVOU1ffpmqp%Wv`U_vLZk$yNIPIJ{Wv`5_N` z5N{ekz>4=N%xT0R%bUMc8*;^6M@*VQ47=hsIN^PFLVi=QPj%2wMS18Htws()v4j~r zPYEiqapHFAb_UgROWsyFF-7>z^s_M7k8l+g2D4aOos!KQ&CY(Gj@~O4Nj1t~6La?E za-?l*@R<#6%{99xU4^Z|L+@08J#TcNn+*1~ zyHPM;v}^;&%#S77>W--)sC1hF&YNBhxin6B55??JF?%=n;fPa`x=4f51)Jtj+b@*Q-zARG{ZNLChZQ6W7YmmOo&3ms1&Ncy#C8a#DnwT~#n^M8czX>Z*Fp)!eQ_M4Mi?Su$idL#C(tz_XC0BIQ?I_L#M#PfGnw5~QfcT#A`tHHc}@x;ZC3Wx~9r zXzwO%t9puuCO-Lw>B5!#C48LKS!ee$s8(G!pv^Ddy zhA;qo7U-Ga4O@O*YB-1qn#CCJxG)M)9zN6LGw{eG$_G(RAkAepoTd7nSPzUDAM%zp z7b>lyHN?Wr+!{8ndK(-!LHnF@jMhJ)P1()Z>(Hzo__WKm8Z~6nluCuR+msNp|LIS^ z`VV~8UQrEJ(H3>wd>`h;P0QNMrvP7EsflYB#Sq_nVn~Y>ztj8RV#86cY2h1vC~#R1 zmi$nFkPvJ*k3%2_qS*C{HRHF7pC#shrahwAEYNyr-zgjfmmd&P2gyOM`{L}_padNS z2SjTv3DknL2bC1MXe`>p-J}U_TsB>-NVagiNtj3;LL-su?=PSL(Fkd8o34`2bz28oBMR%f_I3lQIEE~RO=>lwS zLlC>Ez!o#4V$xF&#C4G%0`{mihzmXFijE+t@HH;l-aGm}!N~y!W(I$EbFS%yuDvmi){6-=|!gaB~b$w6gTrS+d zN!o!nyZ*0|9W> zBg9Nnypk)SGU5|O+y!sJ%8%3h<^A06eW=R&uIa)TZ>njk6ytvs?5b_q^JKRz3kK6( z)*fk1xTSb#NdLMuWl0FJkV83p?1#K9%%Z3!#S_TQMCNsjb% zPg$yO-YY<08KsAf!lsPV&gb4=Doa+!fF#O6v=Og3oQ4VHgg)ggg7WXlOAxqEDiO$V zLk&w)q?k2CdB6;Jcj0PGJg`qBcQM=jhvPyL_ZaJlRU}^ywnnJQX!jMJ?J>hjoe|uq zBxHTFCZak{jAiS=axg0#@RP(Vo5hSPJEfi~&WmxN)`t?ok|0en%MTHz*+ zr4qs9`MuR0r^id%2J3G}mC{Q+j?OySTowU3zNBl5Lktkzd^lUi00W#oG6vj*>-TQ! z0PYR4Nw5pj`;l$HhFH~2ri?z7{@&ij)2VBYZIQFp%(t42z)kXnFu)4ikP4SCm@ryG zI7o8ZoKj?t#%qRI1USrW*B#68Y+2(=OK({^Zt^l((L*K>)S8dqDT3(VBo5&_Hk7Ca z({_c&k`74Fr7Z-u`RAO5S~#)03*@(%^x!m`cU!V$;AiYnxxBY#_i3Eknih3*{NwE6 zr8j6^erZv{d%f;lu6J=?w_K8OLWFMl{n}-Iu5DVZ77ibISoC*AwAbZT@AX_jG zS6`$U2}7 zoq@Jf>}Z1*>Kcn+T{GV7wwyOz{}%op8dKzkJHr2q|9;3$v(VLWzk7F%Eh?RlT$qY& zw4!Pi8L~lljvJhhw*%KeHBJUT<8Cf_ZuN zO|z4Jr&*lgo3450kWG=%P^qQ~rHmDOa`y&~lslfbvjjK`@=@VtGOD*Q zw0DOVMic3Tjj+0nwYj)!Unr$HDp9eP$r@}Ny8iMF{L{E)P$}ZI&8@kU(wWuK6yS9U z&=aP+62M?;sg4FXy25#dLxHx0OZ#Bp;5u%fVMam_7gQQ(t* zI4$y;a7!yp)?YZH3$)!lb;l*ZzMz`@C4&GkJSTjCOvY#3La@gn4CWo$}a5h>Mk&RQP z&#P>Oe*{5rh`5*7dk$rLRkW|4u9`Oe9?kQNa&QUo7|XCNznFypBt1Oy{uCt#WWl3g zNM$5!%})&$`w-p9aj(+^NvJG>XM@&L7bRFA-t#lXxJf- z!AOy3!R+&!u1|SD`6zxyU|_VsvAE~^w)REuxa>ln#wbrR&C&Gl>5RW9^na(byv;xn zTFbLd-qtK-!+#ESsGoVbb9SCsu#a%TRx>#5Y+bzsNX3!PrI_VEqBN=96Yp9aRScIXM=u}WND*VOyHnANF5=Vmk{ahq@R*f)aJ_7!DO$SAOizecFPRy@G0BqO3C zl4QKX>_7=Iy_bQ=#{=viQQ|k)7TW>fTvy%3xd!U@q(e?)STU-@k0^di#!fot#%8CU zNJE9gqdIC$e z2Z|^92Dnq&@F#MM-5a#=@(m%b@aKzR)t$6KGVpoux(Z%ELWH-yg+~#PPSf~1;N540 z;4Q5wX4IF|*EmGUb;gHCuH+$6-f!pyrJnExdS8eGdMH9%gLvSwP&Al4{QR%LcT}H9 zT4S1}-;4J4CJj%Q8U;KL5@<3CPXA>>+O1^8vv(YL6|b@0q6f?*wJ0zW$zh4(zxauq zmxls+veG#R&tWCDnEh-rOVNciH_#z}VpLZh_!$r^hE(Jg&C1gD^6N8BydhJtuA1MK z$f{Es8}LC;cOa%`N%rguDKe&?QV=p6%poDl$Zu}8B+rs0{rw{ZLuwPkj~1c}?GJbem2kL_fI2xXKb zLpRk)4{4Rk%wvKjNlc0qR&!){ zH1R6yj83KvC*xIU^anb20ONJFPYJPR8lg?W`S2EO!E~$h&JX}0WTj?NSlHjw)l%`7 z-|)HH3Bg9PKyI(QO+v&Jfb@@v7OXP^<|08g&kJk{VskGxQe!PDAdgl2qIAssid_$^(jW4B6C&HSTn;OScL^u7TSI0>FZ-L4W2&oo_^O<=G~E;P{J=>%OWWy8 zp1!OdosTFmX9z(to)r|F|{Uk&Y*n^A$Gy!|~R%j8}iI7bx z2wUvE@D{$S=Dd?27rv-kCUeX5>G%3fmdip@bH-Nr+KCo%0aadmp8X(ww&%K@JRHNN+$n&b(x8C(c^{2_ zVB2iT2#gewq7kT2pC(WxKpo1^3kOH_E>4LZ-HOQc6k?J0HqLyz2e;~R^XHIb_2xch zEu8A8J}>9(0Dy8yMxi>^oN2~dLD^;}Z7{$~ASmCAF6j9kEwIN2=x_&!Gi97cn?4&u; z5Yx+IjQ{K#G0el9p!W4*SNkTSDXdJ2R*t_k+uY%eUJmpA83)&0V|=xdeCqlY_3@s( z-0tQ5iBeU3F&a%!;M>s$lQE#QIPs8fl(lRoU9Rh-+=KETw`Ku5DdIHfI1R z=g)=VZHu7q$b47Carv1?K18=GRHW&Y4e1L&@D|Vm#)k3)(sRFaET^>_G~IKwC)xd= zv7SG&f?19O-$Yoq7-p3Q&2V7 zNubo`R%2-lQ)Etd9PGAq>(WUlq$(bp7|(I`I<+l4m_FQsG$3kf^>te+M`t^-I&Qt? zr}FWUHTg4-56K;B`tCI0XyDCH&H%6xJg~>G@<-H`w*A7LPFp%g>0(WzXmrU0Dgl1)&z~yi zrJ92=w&qi<{^%}d9i6u>=TLnvC)Y|YDk>Jp@^$73G%^vp2CxHN&chLuaO98zUF#v| z4^ou#t2TfRg?sbGP&VDVl$0H~7LK{&mAX;k`uaW>E(g~|VlsU)(?-H~%&o{U|Ktlf zxwK!atU3nj=Os7sW`?wo=%9dyNCXLDw(|*OKcd|e^eCexkM`2Dl;5BaZ_Gt77v*{`9Q*;h{vhi7LCP#GXPwK){MY1LyYJs5iP1HveR~3gL9VC-A zGI2DV-6DEdy1K4n>A&-LCzRTXs`#YtbB-lzIexyycPcr>M-+UiLrO={A1Tu}C9W?V zcgc4-lz4)I#&nx>?HkH2YH#czm|*#j+yEPf)w;nvTsU}(AztK1DQIlWmE=g+lr;Fo zsN<0RE+z4#l~ry|jM1*fMYGFit}~Xle|!4RiS>S^Gt8%`hK!Amc2Sx{98dRJ95F( zf7sB5CCKmb9#JCf?~LzNpyKrsrn%b_qVL;TD*GdO7GaM*+ktx_NAEQ1|cVHJhf<;YY)B?MiNE>fetp)xuO0k;zL8$THf3e zHLom}1lJeIh%K8KwG4(YpEJc$*t^BHOE49+tFe@A<{v~57*u5mWY+9w>3JRaiKHR3 zAp&p>B)aCh;62;Qt^|h&{@HtmM-*(q!E9ICH??8AmHnlbwZ)fI8P?H_5MR2`mGAS& z@g=1lB@kZ%IXC{@qQqOjOF5doLQ5x>#_4uGtI&{Xe-CA`PEDmPs^8BpN4l4UU*GuN zlkvfHAVn*{KEkJdwmpVB)9~^k8hP06`e)(m^cB;1XE$CK`VY8rv3peCpF~O6zd<1p zHBx(e#DLlbD%m7LF#Dyb&YiWhZ6oX8P{by&!q8wWAWM6JU)6=%T0bohAUv6%{h`e` zihAuEgQ`HyS-yy#YnGzn;xbEN0FO!Q63Cm2y`ShUEw&7klWlOfC|D0GK zUfcfaC)`k!Ry3YgbJm%qt|b|~V&REiEA|WjCkcffx|Q;$oZ1`-S@CN@v z@pOH5_5{5rPg<7SO7SQ7Yl({OG z-`~;~-YErnLG$|nE3}}g@bViy>CBtg@o*m0v9e zPj>!PJ`3C;>^CF1*M>&sgKq9|8KQ#T78h4g8V*!W9(o@>v2L_yN<~Ar`(2-{6>=Cx z@Q!#eWJQ7)n%;G%5;B#6Ae^?Hvz`$&ajvRf5nsrnnJ5}VHBk+4)=ddNX0nibtx$aT z{{C5%26U>MAu3(s*enw$zkh6p_=W_C6n&r`6A?WMM=KymebG)nQ%%>E{aVs!bsVlfpye( zW*FTd2#3`e1Wv~)t2-MP$Ht(hwrdCiD8~0<_=|yi+n;_P@VyKhCN2+W1$;zxT9N^8 z&M$gX$6g%;TyPG>R5g^$i%AH){loA}#m`Ke^2a;wMrpQ@h9x)a%lF7t`z%~`^m;cN zY)|%mJ-4c3I=N>h=pD22VSw=Z&4^Bdne6w$GA3$)yR^Adg>MabDyC1n8IZ_O4S~^A z1X)7zqdO_?B^R&wFyMSc>SK^7@Q)-=BB8^}!(SU~Y^(c?%Bw$>(n&|`?eXwPh<1kS zQdW822C>=81$41k2l`(p|5>cb6VhmPpAN~mxBy0Zp3?$}4zB|$(uvKMm4k88lb6|a zvtDAMr(!aqO78g5H*DwE+YX&m_>m$R*IUS4F9-PM2Rz36=SW#&p{>AqiT#4^hEvn4 zue(ys!w{M8q0|{~w=qK>V#LsRhEBDhfB`)Q!YfzIr9%;yOeEvOANi>toM+&5U`gr9 zG(6;Wsp9UjCwJ5MkW2ox)8Qjr!~RYasp#LEmBFj~43GWvz?)&GVgT@J+?Z$^mx}bo z-jU)JcI93#<`Ppw^p_>EZAP2AlE@O(ik9^`%aH7b64Usq4(g)gNw4kEinYoMHM0)3 z>%2^8KI|jZHu4e0X)^+dC-yRrK7DrJ)~B@c_`=Lkg=a9^(t77-@{8nxvvkMkZQ9h( zNEi3UjskqcnRE5L8Lo1Bms9D?Z;Coa;lf-kQo~bDSJhLi`Ivqg;Kjksm69Rk>;^IX zEoEe^yz}WVNY}^h=2@4`J6(%?tSB{HQlqX8|tkoz5c2+v8cQV?38VpF@W-~otR7y%q-KoW?XvAkJl$fz2@*$AmqbBvoUyIN*mi-|tFup5)HEoi zxN6ZOAL6K+Vp%v5|Cing{=E1k*copHG2{%-)aY@zO5MmtHB?;s-mWbmNeYj@s0NhR2BvXFFE-V zfb}Mu$RU$meiQ(o+oe7p=WgrSj}mXka)-uqceF8^8e&^K2WV7tl8yaK;}36`yFd6r zH<#pLwsmlxJ9ED)uT5;nEE}ZBVIOQ@)0{3zch9_duPEHth5@~R z16843>La`*7@1tSZ|1GN4b&9|7KCJVZ(@3wZDS9r+NCV4y~#+;4Bm-B1)zhMW$j7T z+#u5AZ^X%OsH@{8-Ue~aJ9jzop{|-4;C4|2xxy{W;A{hw6kud_Tf2mu3)S^xs>1X? zC9*t>NK4|_TGun0wSup1KX8F}H$Qt#(Do|YgR;f7dOh$x73RW8^omyUX&Ui} z?NpV#e61>E71~z9l;;Qnpd+S8@5P{9y?hbpV6|uoa=0tF3xl_?q$H|dp=;|qlxYO! z?;^;mzY~d_ye>1!%GFSX>q&En4xck*LnB`$_(FD)|qHy z8kzFXk-c;tNAwJLl%ZV>ORfh{k=?}+9SoMP=1j5pYvI_m(-8LD($T{##k399<(jd+ z_JAFwU*9}uRiALSqTF0)VEQ!eeROXSFoC86bcML&zz^5V0ru-dd_{Tn3 zz_1`b3(%Mv(Z;er10+?|$Hdem(`Y}}7uU}cjf1EuP3p-BptBS%9(yo_$jOOhd~ywNQV?3hbhp)>F9v$QB<@q^t9){JmU6CPpkMma_H`_Iv><{8P0#1Eev&7-OlVJ>?7$t*%dmQKvA+t7W+4pTJZ_$w?}pbAMtXo1TrP+ zH6)+BEnbrxRAn#tC=vMZB}nIIkRluk9aCTAHPUE@RL3zx7Lu4_3r}b~deAp*B~ehQ zX+4F6UumzY4h;RyP6)_iuNwDIYRIrdDD3q#9-3SOvq{A?QjTTG2faey{vWl5_PDIB zjD}8zeAogDsntG5NpGm!N=GJSQl~{hAxD1V>*`^}=VfQ@Z2fP4U-9}mJDnJ)xvmP4 zbrCN4GJ3gMviZM@9hnbd=D{x(-B;sMl@>G7iwOJlP4_qy z&0WT2Ka;n|x^WP}u?_~E>|1n=Ok!WDh^2z4hBs&uOVxg>r~DD}*b>L6j!6#aw*Bx; zk<>P#2LsbP{)^VKHR`hu3$TxB-HDMqofkxXnmsbB<(c_S_3qx-)wXe84Qm?ft6W2t zTb-obYMlIH-Lzj;Uei``89fa~Gk2h0Jicn}2#}gWsX71jLpS~PgdDJPW5hmP66T)g z)3i__XdD!S7|j{4R4tixvi_xCfMLZz>j`gIaj^7U?xFoMAiNKWpR{ReI|AVkZ|Utr z9=IxzPY(%yi=N&wC#Zb3a&9|^JN&ioR0&Sgz34z46d7j6UZ7k@bt%IW+d#FsT|kI} zZ>2wtUTUv2xD>^Hw&lmnzJLc2=_#fb-5G0nK7p%f_FKE5gHQ{P`c>ie(am*r9zb@8 zFz*=veh3HmZDRrqr6_zMZbLyL( zEaj&Ud9ieVCOB*|ed*qc^f!Q>J=rb9@eB5tBK9MSi76n@@hw<-a^^89VytawA%Dr4 z>HkKt*J1C2=HRJikp4hh9O+P3ha!$QKcTWb)J2Bw)isqW8G)iS*^KA?$1gaiXPQS# z-Wj)H$`ZNUbE&*8Vq(j(iEFgS3(3?y#6$Qv)dgaDzbT&yh@)#&7{(dr3}+zO z`wA7wW2e(gqTBkKh981f{!?kVM;mT!ti}oPQlOIW_^4peu$PX4d;og0N4=JFy>skG zZ5&xFz4ml_=vzLfCb*Pvue)Kyv){~|mVhsqLY6d?9G66s>mzED3FD z)fi3o2hjq!MB@C!D;er#xj1yQJLjXX3e^*x@r;e%Mxw&Ej)%yTt{N$}RF+c8UR&JB z+?a%m8hEuCqyIUrZZYypJ|V`syouj8841?&=y(hoF-e`W<*Bb*!Lio6#&6^h;_+y& zc}K4`h~Y3gjBlT1g?%UUJ4&i!GKWCX{eu#d-}q7MC)}q(j$c4WOl1X9;aYYz$9b$I zqo2LlHAY$JPR89>oa3WdO_nV!v>2$I#J}jDUUU;wAAs+T)xW%IQ?Vn+e_Ked0;(9- zt+JAlI$gBTU6jtvG+K|gQOfPBRHqI?P6olFSO~{Jms&;DHI2AzvBAeA9as2v>Ta7GizuCT#RI$F7{mti=_@mPK z$J$m7i^&aa6m;^&#y-5xqg+d1$_iK5-+0)3Luo72CG8jO?qqeW;tj55G2P%Jzs{c! zRN<#wp|FpHi_NG^xz)~zxa96XrR`-NIzJBf%n?LWG{;5QV{taW7kYg9cJ9%6(tw7w z65L$^vyVoPcu$rNAKGvL!;U~0aKT~-cZVY!N{J1=StvC%tJuUqXwA&&RW{_ z=ethA2J)UL3>CayjDp)_XR$B5Uwquq<5Qm1_Y^4_%-U3@G#Yjqn7Nf0LBuVbc$N+B zgHy|yg!?cwq9 z1CkhHBG9M--@sm;3O91jiV?`M$ZM)FZNhzFeMm+TH zfk!~A9j%PiUFgVwdAfCS{i}xw(T2HwkEPjFcMM+q24K(Tilp>^qR=}qXb&+Mc(Bol zA+Lj?D61+{C2bz|zcW@#T}ow9WR{L_WSS!6@A@~!>S^ueg4>J(YpK_pE-I6WuOwr9;$Yg#q+2<=X(&vg*>G#Z4;6ujSjavizF-On4%-=)cVemSurtdrN$e-FDSe=cknm!{Af=}0L6Y9Ht%yT|) zoKXR)28TbV*(w>fcsPmbyU&e~wV?tdr)|1g08}5v))_H+~8|plbsiAQIz+?4(rWZZEVoQ z(Es|(heR3GW;*EyHG$MJ5#HGKNRp+}E2oucyjSFJL7q%qpa<1o1UbA^BzUUEi3ycs`^+yu26S~EEYIIynV*^4(JjNbmZSycJ5+(8Y}g(_43qsF4$Ga zJ`C)nPbYlJvg|M2Y9l>U5VGN+KQnki_A>1`MMil0N7kY;mYln)D)tZqBjQ+-ftsU~ zbB^1c?ypXn%#Us?g?Uo+-VKfEcE)$l#IVqVXy`2!jV5Gfcb-l0l-awWf}@UW=wO(L zW1P_wQb9Q7#oW9I;nLo@Hig;Z>kdxAUD=6XM?knq(9UlMTo zL|xxYC6!ls=K~sc^)ePtw3di2`c3a3HvZb15cO~I(1_`yzYS3&`)e+mNYxW+u6QR}Yq?6MO<0n~ zC-(5iWj}&~;boD%ulT=Aws~bDwkeHLM4*F>;@cv$J3B4Q$(ZayhMM7PPOWt=4XU1i zbt<(W!zH8DpI1KHBAK-_HA@ z`tjY-SDvKf@IgQ`yD?BShp2fp)y-}3w~U~+cYb^!YSr%3XS973v*OjzEyV`o=5Kbt z_NVOE9q#fZkJ(&#+Ad9LGsdPF*+ZiVX+uoUFxHh!3srH=^qEMT@y^l^fmYYsJT1hLjW<9<;l@-*3FV>m>&}Q2bP5 z7E6!TJRqKWmQ+7hExNnHX5Hs|Bqf{YwivJC36yaxFHK~N(bh|US^g48&w5wLzns&9 z!Ea5=+N_-xdM%(%uuatdqvvxAji>GS>{nsS&cSz=$X9u(N%s{O%vq%iIqM`2?(mM`}>!DKx9AiKPbh&kBxf?3_qJAa|Ssh z*#Q*_JBp>ZrZ9L54>6!ve7$nW_7E)`m8%6%`%x^P3H zk^{4=Dn*7`HCAxKOuc%mUInj*sc0tal|*l@dwCw@G(QuI$bf`AZjAZezMws){!DII zY(*5_k2c`VGWUXou#O9;AQtytr(zmMvp^tIRaI>;?U@cJ`W^gv9A1 z3*+&+Mise8>#`$mID*!}F-PhXD)(vTYZ)ebQS`uQ%Lc=z<$k$QxaN}kO~Tj5!vumZ zpTv7FGMX)=fSI~ua8jdg(=3epZRqNxfMYdz$!|%3uQF!OM|u)i2XT)=V9oYaQ26ST zLjBpcw}qZ#fP+M@)MJT zji2A%xLeSbzsb%Hbv?0?YN1P%96GaJ>6bWr?d6jXbitjSTRy#M^^B)ZTD@py(mhzb zh_7OonOM=wh@#{DLat_x4|o? zoAIu$t_KVwo#%ijJRb%+nhG{T!G@^|`MbJkb&^y`H81Qx71o_!bnC>tdGL7msc+%j z`V&-)diqcD541dQ%)pm3EU2*vSH{iewj;QW+|;XbaxaGVR5wG{yeJ2=lM#>eXapfop8 zeLwPs9{%vn40d5B`@NQ}#L9Cv zt5U^#sg)^t|Hz{Sxq6fYr9aXNmefZ~c7nT=S_X$}{FxWc79;)Zs=^_)4c~At%RT{T z4LQjS9esXbp$j?i7@W-rMfYhP^pATF=@t~Qr&WzvG`-4^G<8MjxHFUur0QN8gXX{Y zF`cr8^j=lUwO72xN6Mwp_bu& z;p%c?2jwYoqeF{5Z`9y-MvV>dW=(md6HReY?|N~uK%@5aE_YM=)X1*A1#sz|^lDxm zK>Jxw+B!8c>x+TCgHc#6){#%EXJzhEr^T%G1}K7s5`jVdI>Os?MUnG;W}S+-%-TXB zrn|~=_6x;B;Y`=8yAqXN`sbuvPG@2Xo`)FGHroM{OUr(H4~db&^Y;J{YzIR=cQ(Ud zH~5J|6@x{cridV0Fky@Y4x6oqhD9}i`nkq9 z{~eZQeMUzl)Wb+4FT@i6bzc(V!KxF(&rE?Y=h$RUO#yf^nBiPAIYd}3RnC_9pfy+C zRqv6j<&bMkdv)73U?pG6gsT5tXPsg|a+?%ZBAEOf@d#YsQMv8*<#LCt3 zZx_P%Z*Jw^90~dpq(+SVhyS-r&TkdhPmEOE(7WVd%--CK=#?0$>@w$Hs|4$3aT zce;??@#h!Yd{O77B5jY!mG%oFcRq%4`Pp%iKedQPWllg9r_tiw8u1q8oXZf57=vE zvERZd)VR1$USZ!Liq-+W6=z?UwFG`^3$lMs2w;#$1MHvAQ;@~QcjR&ugtEOpY1=Ij z2RjvWLw88a3uJ^|#uVi)oUr1hb16}cS$(6cNPV3sV&L1rbmVQ~>V7@oEUTjvVLy zBMOYu*MMqFGv(P^u0u)An^OSxtCcE?4?r7tTOeEkQ{CYjlAM} z%Q1Z=`^}IaB@`HD8A=kfUiV=;tu@No(YH-@IWZ~k1>q78hobhVAO1BK#6 z`E)phxHW2~`(4E7XuP2PDl=p1=tGJzM$Yot`n5WNu2QCaZbjP)@l|6Lz&k|nTQI-Q zBJHqA*Awr6qU8H7l|Ilj`VpQdh4;kY*L$VGBzx!YW8$1Hqd#=xM%p?Wr3Feg#fRKg zKRo?sFbKRvE&PvQ!2g$P`4|CW%w-Nx!dWMr`RBZ{k{4{oxHOlGt#4QrgTzdb9LhQ0`tKDU8F(wweE)}Gokb~opsPlGXIw4#}anP%Gn*eXEa!g}JNBVr!`H@aRA^2x4K zLqhEz9eBg}s6`?2$UJiUM|tAE)J*Rur3k`a5=DT^znmR@WF7pX$;-$k?2`Rjl62L6 zMe0rGhtI8n*^gsd>wD(|Xj(pGh8kl^WK`VvyzeXZ{kW$JW;umkY4rCKctSa)T|RKx z>@A^Ki53bQ1lW>V(-;_DNNyi+_kJ%^1tm?GFLkRCGuVus;Z`1VE*2IEoEqRaAv}Ut zi*YkmYS`;(hI#4Hu35igUq3bMd6arU3NZh8?>4MilqpD@zCim+!v9}~LarXx{~8D( z5%J$kPI8YblCMObE405$3(WXo`LV20;p3aoaC6dVBZYRJ3YEhCij!0K)dr_%(xx|v zhh;>{;k=%Kj1iC?#E~T3wWV4XZ~R0pWhW=&qo#6d`;$Qz z#$V!^sLhT(*9rcZ|It6pYzT% zR&+V8ZFE;vwG(LjlWCMjertDEa9wgwk{z&-er!v}YKn9>^6x{>Os~!tv1maVi zo$gx@czjV}LD0v#ILy#dizT8)nlw0KkYhZqm88O3$Lvr0wIdqO{^1-}G`VB9aIYCxV1 zy$^G{H{HDaGo_1M`^wFDuF2ODQsLkPcsbs<`!Osz8AXdf6gi9 z18J~t=+QTsp*s1bZT3pMO>}o%17fNTuHd7%f>&w(P-au zjZ5SaHKf`8U$pT5(%o8pk#Y=2_F3l<2+R?aLZ61S55}#A7S+WOrtjz$KaMD{^70fv zb|mFBisQS|Uv>`74H5Fo>$w$8^cez zxNw#|)c@ArS%*c{bqybo?(PN!Y3c47=~B8|x(B2|QW}&Fk(BO`9;5}NQ&PIS{6=n{ zTj241ukZW+J6sII%&gyDd+&AjK4*1-50K1al<;{k$R0IHu8Jbn!G;^G3-O^{y~ z0&+ix$p;G8RbFMybYPO#(CP>YF*368j6N2sW9Q;15ufjBX^|Cr%J!6oP3SXi8TkiH zOMJEP@`|A#kIsm9;G5LHoE~rMc_|ja@2LIHR`dT<)0C*z=N&BQZ+885(O1*tlufMP z<;cudm%db*c*Q9{V@a#U2o=1w)m6IwYx6PE0e_-{ z_qb`kLqK4HKXL1vY2$2dXyO3g?EW=TZs|26RXN9{@E}Xqhg($$eb)zVghl6!Ib7^bL&R9u9vO0E(H+Y>lhRM7ytg9N` z2M%TpE1I_J$WrFis?)4j!;agncjxt<2%UJmY`At3NN3{O+^RlJ+IAQ6yx7zeY?}F$ z{=!uQm8{WkSAEweZQ)?atNC+X(_BAk#i-M%K}G+V-M&|BNsfnGqL6k;&bfDbwa@@( z;kHwAtri2`<@l?w_F1~e9LdvjDosuKO^aVf4HIl#k}o~&d;3>!yq@(I2nF#u#Lqu= zjo_@vRjZzR*X)*RCgb*|-~F_9Vh44;n)R9!e{pF+CQZWMO_vcZ-D9%gYKlL-sfE8J zVYy6?^Za$Pt?8r<+-^h{Xo;&nVy&hSfuU*9EvScT#QSiMx^Ugl^D0S{KkrzGX%0@k z@zuffIOwG(349-j20dqe)+SxS>{2i7#mlOf=|5@&a<97FYu>k4{aDAGv;S<5E&Jy4 z=Hlw;H;3jk9*=8ng1-8f%*K@<0)5WRsT1mkMJ?nQt3LXB@Y=GB6+8Tm*c({AH zXA)<-UJWU&-j8VPiN~4KAyp~n$!uJ^Ju~29NB8@4w>f)%R%ytMf3dxexpry1^1oa- zyO}o)vhBe`i}X0SIY|dT%LdisZ?ZPb&z~5UyiWPDevzpvBSe>ecCnIk#c~yB#$>}w z;u|Iz6}RiN=ii4{zd;vaUI*+xNUyjS?2U}nC5rh%sJy>6gHb1<&q9rcVliPX?I&@d zL50a2^P{bxC5Gf>_clHFJ8sds6Y)_k()w`=EgmnE!&l)TsxM&_M~w%45h96*8H72q zn;8pW8X?6DZ_$^gAXL^8#VeDf#;tG3t%iM^`A9`;er5`z{yhh2=3rpy_y&@WELyHW z=H@2a)0}FU#{y7;YI+nkjF~d(RG3IQ*os<@q}E!PQq`UFqiI(2OqW0J)9y$KKBMZC z6vUT`r6nY`#*=0I(&bNz9r={&pshc_Vj<9Bmv7Aa>{x^EVe(vWZ^5h z@dfTI3gnuX5w5_qN$u_GHH<18JI-<5iJI{&xF^#-qqqVN`PRY~71h&Or%z`<7`$>} zJMm+crbCzAUB_SS*}_OAmdNEKIkk=kOlZ)1ZT!7B-(6cHyBn8dI5Y7YpeHWt0Gtur zG6y7fwU2s$mD3qDOIcW`ji!TGw##o9?b-Zy8X$!ErMfc;%g0<5dQDzD8^k8L__6zw zF9uSN=S-6HqE}Ad?ecZG#;)skQ#scP&|>xJ=H`aYqXkm(H*hTWRb05Q$WMPvUSo@5 zgC0RP(ltv&54z5o(^02g!{jVNtA#fjupEd$jLMd>C$t*h1ZFLO4DsN;e)%xhrqxb< zM!NEZ7AVNjVxI$I4TfYlL0*K1pB*Hj?a?AY!3GVy4;H1h715J*XHVe5P0Xm)7u6gg zC${4}5Z>PZnCvfj?JZa?Yy=Ml@k;cxMhq1?nAlD#))xMyNS$MqRg{`sh z;~9vtc_d*3L!%?dN6@sdKD;fX20+e)0Js#+DSJ!2x^?A!aF2JaT&F+G4Oh0){aw`6KoEScog}NgZUo$%RmDuNL?Ip$RPRzGr7I?NF?ePmHGwoRug6Y+NeB;IzD;Hf7 z1@OYV8q&@dbU|;-t`Y{RGs>`|uB2$oFR}aUQnXd+aywmj$%W`}fvOC6QB%6JFA~1V zesw^isVi9MK@e06HvcG)$wppAX{n_+Oa=W|6N~qeGW|(@ zkuQ}~Qm2q;CmCj?)ItjZ2ipx*a0HJ$Ip+<|(|t)L3IztLi4(nmQDreWGm|pR=jF5} z3-Zu`E7PK9@ILB(d&lkO@erc~^ljoJ^Ov0J3_=-O*#-KkUvnh~yAacP?LfGvMI5M^ z%rflqLQxYjodyhM2w^ycHY?x;4L~XytnI?{ps+2nhj-V^vVYc6%R1JUzq~S_HJg?= z!7WElWJJ#Qo*L}xZ(Md&#wG9D$JbfgVkQPs7m4C-H^@r*M%7=&Lfds9)*mP;JJ#Ee z6<);fLQm{RfMH81EbqC_cAXEb@jEOent*;j@=udVNan&WKzn5}%r7vnnT=nrjRlFd zik%=*h>xcY5=7~w0YHR)T15z{kdrvnBb`~)PWa@LqAn6orlFuXwtv06qB89lLrT#x zlr)$arNdOkVHY5eMRkNB2~W%gH9duC>fP) zUv{e1K&kF@9UGIEj_=$o6#53lpEqQsoY7gi*k-J*vBEDo((g-1KlhOn(k#NZ`ygq8 z(!8o04uM_xnM?9ju@q8eDV@J#?scW+E97k30}}v`l|cE_@`u-nhwon-+9)8k_+h)P zM-y>`*&e43+AozohkP%CX|?=B&(iuxu4yxLq1dkqQe$`l;#0=uOcOrllEF*y{ZrE6 zP}&)}1^S&vj@(P*>_8IehTSld*E`Uuk@`~RU5McCLK0%93}w!?hqu!ZErl!Ffdwy1 zRJI6kVSYNaAolZnVXnPQurrD`%pJhVI8>Wwo{r=jVKn`%caqyjS@Rmk9&dp>5Pr7> z&jo`?lYzOSypE-{HWwwp-wA#qc(xWo2?|HDxCRMV8A>W3NkG=Dgr&FlKuBV9ZYfN! z<{JU~AtRoz_Dy4&*v7VXiDn2UV&&Ls1})E7xw=V1*2T_-I`DbB_G}d;z=}i42s^Nh zjMIt)2UW~mWV9qfw2QGx$vLknQYo?YaCS95i|p&2+TJ; zO5JTlXM4NZ7^PL#8JLRKHdZ@;^z2)$>vG`toV-?U5^;aPV7MsRai)s9cb0f&v%v`Q zBD$zI1~H8AN}vcY%n$hJ>UYl!qllzVVRE(gPom`8*3_8VOYJt05hsl0=o6Kk?d3=} zjz9=xqc1~n@VSU$2I)Czp*c1#0`nG50tU#rB6hHfshxsrtQqCH$+?)ar#+t{q*m40 zn-(>r^G&4Oxv1{3GH<14xL#4%z81as-(Vi ze5TdgP4NHhJJ+gj>TB(7@pA?{Xv~AH1jdQ7v6{15w^EFf(ZV9#mY#$v|;PbrG z)9h=J{rmXJcD1!gRzR}O)YZTRg6K!F8>{i^*Ze9ayqGEU!lX^GRWRW>ENjY|`POXQyiM^*;{^Y5L7o1f@k(=xV&DLB>j~Yg!4F1_ z)XiMW&@=PqzOh&hT#+*~kA`6@#;UZmbsRgr6nTTJI&UK`{3F@% zX!-NmHTzqUXC+&yeKz9yake&Glo^Np@c{$6;149xBczT7w=m4l{PXbuc|45o10OLe zarX?Ft=_;7o1@pZw*$P1n}vu`KHNYL9Ee2wAV!W*LC%X4u@$0=e8Gr{xa3Z)!*P{s z)-GIvFJQyil7!aLQ|YCGB$0~x=Xy5~JozB|un7T=fvDGUc1GVO6fUA2^Kv-_s<`mQ zi1l9w)7YVlINQxbQ$d;9_>^*w`HLb)+;aOMbF)wxK%)^YMHN3AOVWf|~ z`YxF_a7NUW-YOW@KrNtDQ$}dbLD%@B;#MhL+q3Owrx%GuB%8BLBAIK2ilLrw^+Ux~or78= z)ojl?$!>G6J-6eBR~|o1CM_It^?`+;%s3L{wrV8`suP{{w?tQih&unA)9RjfCj6|L zOW&2yN;LHt1VD|Ic*_8A->2H|*51M?=SAU!?=Z703o_;apx2KufhRw1yW!A55Lv|K z?l`6}g*V}eTs2G~0E+u@k56JbAxa17ujvoNAudljWcUN(5+0_U&dCY<9V>w%EK{ z&(s{+hcH4~DHOX`<8Ij4CUtFC%DpoaCMPM32v__9n4HoN#8AoQo|+<-SdE6R*7?V8?FI_O@^e^HxI$8_WmXJk1kOc*qIkG zH9?T@=2s8XdC`+!brOYAm$!|BTqm+_izH?W%kKokLu=`MF2=X5Q$P_rM#cOyDJYVDMlY_E<2E-@T0Qy8|^W)!=Rb$#@av%Hh;Q$KbWeVd|TPw*@8QC?EDWq6M?u z&_(sI{F3Kby3z$%J+oy{c>aA>s*mJQP`YzbD33nsU3qcpsXr9(smJu*4px^;B3Va+ z0yRq;5e(*|Xt~zDU}H&c4ya6w;?L6N;tHA{9V_tkSla03!#vZ?F$#lO4kZ`-5d&bU z$;Psf8?KjldJjST{&L$Rn}u+}7?9dGEU6uIPB9UJ`X?-@pW8|pdw*h2?4K=;Z#%)7 zmm{M_^`?(ImdYv)OQP$XcZB+4iIb}2Kzhg$!@Mlzym44Vy_ zQ}JWmwrxJrdOCD)a2%<&=!vI6+UvB4mX>yrCM`w)TEFgSPM#G6?sxz(vX!E`RAsNE zqAyGIuFt>p28A`c$%^mz`mi^5J-I2T9mAuc0oGy{l6%vQQwF*sB^LQX37VeLIFtp8 z?MuE%j+YLLU07c~qs9U#Lbx@BkJs5|3n*p_bTMd}v0?*Jr;0HdZE-sCe`7^+k1e(tV_M6j`3xZ1G>@{LntBh(n-{SeJw#74+741R(MenK$F`vGHb zjNt8A^?-T6;rDPB;l%>8A^6WCniCbZ(g8`{Q9iIF`2*N6yN*#Xry8ZY?ggQvqk`fI z-})Of@}RIy%7=LH&v^DQ%XeeO3*Bz4%U~B)MY3UQOOabMl%yi z_?gH%q$UK~^o(XErq}0mo!HmTPkEIEMZ9%tBjO^@?YWz*a)4OI^ZdgHyk;e{uM)eg z$^bnagU``^Y~ff@m_I4SD~ng43~@X`oXb?$Wkp_Cmh4t>VW{Ts5jF@C`|#cH!*ma(>8kR4BEOBOvo-So6G!gdY2luXvANTH$$BT#NheV~T#)cV zb`cl2dC%vd$vL^qtJ%ES=r&28^SrLz_H9I<-GbogPWEYwhzM&$Q0ESaMc`nNafbfn z)s+k}f(FF$xuCFxQvA1s+3p6CY@vfE`ek*k4wmuYGOgT8h1Cw8KrEo6a&0}f)jn=c zxkHtItGN9n-Uo!Lh|&vI1u4ibrd8j-!7==ozPQI?bI=+8mQ)|>$|vo;EZ1V_=)W>T z2QGW!NZ+>rTscF>cHM(c&{^*ij}L74E^77BM&#_X3CQAWPp zx7@Hlip$S!lV3-3L5B6CB&Zbr;(hanDFCYz+>heXO*G!cOk)d$$R{j!@Fb)3g9??Z z47l}Nd@R)yHsIBNwJAJS1shl$yDkjj+E8wTKf3QM&!P^K0kKLaX7qWgZd+JDJ>ypO znT(yuHczF4;}KsfguGMa9xW8(mP{&XRbiv=FU!ygDp5jCrYT4uWsnGhBaSkX+#auR zMY@!EEK8%as`4ushP0IMc*TXO7|Ja7Mf|hf*RgIq?}Pp`WZ)WYVXL*ZSnuyoq1VytVv8o5lP!R3mr< zJY9I3KGzT7iWtRW9-vDdED$o4o3WNX8igMmL-xgG;$SW0upXT*(LbpMzWOTjtwYV* z-*VNWG!3u}V>FmQ#`;r(j)R{~Gm|%iq&|DS!wx|g87Z2?E~~D`*7oIs#7(^n$?3Jy zymbX2KdtVxd6s0up07rs>LrOg-bTbFWa>WZRqL7uxA0UT=fyN6aJm#hIWa@>~fCC(5imMIvYt>2FF z`{-Lp4#<`*)5|Q50p=zXD3$;r`UBEcZT0;bVz{qg2$sNqexo)b5IeKAv>2z4P?Tbc z6K7>WrvD}PCVYHcn2uxDkGU}*$Cup| zlFCjDSW|f3vt;qv9sSBf*LSqrdXEZ@p!&zfGBuSKp3&@AWm`|wm;SIpW`b)^CTefY zNr4(NGGo7Ds8E{~ABTCxm|1pibu|;$+=Fi{P{nH36^2oH>Jt(=t4Mm~cQ7knu7Z!2 z?G7Sv9^ZJWbEqY=9yg^{Q=gYTMuM6~wc=)-w3>8slw+fCl6z1TFg@O{s=M|jMRhEO zHox)m6sqHF+p)&bQ^57Ou(+=9W93TmbmY$I_pAZq zxA;p$vyovgBh@@-_3o6`QwY}e+>Wjv!@rpMzj9ohYfv`bBcUnf0w;s*n=4?3QGzMU zKK{&XPgZv-_Z|<&;^m$v`NzYa@=rQ&z(pq-QjRXTZ2GfR?6V%cc$^EL04=p+M!L>J zq0AHdFo!i7ShoEXoPtilvz!%uUk0G_CERf5LsjcTujySB|&AMZ!pY89W=~-9{DzUSo?3G(* ziXie!)T7t|g#>mpl@8g@;}PJ_Bm>*eD%WLR?zu}=T;$8CN%IltKkH3(B*c}~tx1XA z;H;pwJ&sFW0!Bd2bHAphc5JBjboMK((aezPL-f`JiqkWw;G*VfiVa_EJ|;uxoa&l1 z3>9)Svib!ZY5YZwHOOE=%FIPQVT}WBt z=Wp6;n^yUlh>e6jW3a3g-4z!y3w0gJrDxjp4F}`t)9ef+rwl>?CLwQk^VLM`_>=GM znLjLj4+`jhG?m1rIArY_17Ipml@1mxWz!F2b4U0ek~nTXcCX+_}*P7fqf5FUbu%^H016J1-{(W}MEFrQxxkxIt4> zPKa@b&4}+^qC=8=x8q1Y5g$lL$vYMo4ri32i9R{^@9#VU@jn{Slq{m0V{%OTNBmb~h(ucuFCt|j71DY6j$nPyZh}Piu7&+O zsDtv{U^=j<14Z=9DeHNH5yV&c9!vPZ(SQ&sKcA8E0ge*5Q&FPCcTp$`)koO{<0^Fm zm>Oe-!$%}c;9NdE{jjr=UIOz1)gRi=-vD9C!Co1mY&7Uu=&a4FxJbFL91BtSB|M-{ zAIE4+tx0eR+JBnVQaxH)J3X%cbO37s11D>T99sb^=tw*H6c ziei1iZJlaf!bdWlS-isZO94FzbZ?Q0#GkjP)#VH!lBT#@w^AnKg8W*E{E9N0_8%F{ zu`xxSHD_0;0Q6oL%cnsj>56Q=6-S?sC1%SfsZK$mEg^ocvW_b*vKO^iG$V$MG;rb5 zT&pOhlu!{T>P^T1f*06bSvZd!-;8L~yCo@0U54woO zL|7BAGTN%@ZA4#NtxfNd8TOV<$cr&MQ=4Ipl3U?v|6Snmn|Tz-ZtG&!KH z+ss)v!b&xG54lYM^UcN5@zTwm-_hg8pG8E#SZZ!2?m_LLXpqV#fxq!jVB|+yGtzo- z+GYBSxNiUv1y^aa@Cbvmc+U64?W;A!YTNwHXih6)+L<8=n|FW;Pphn5(4FVC1ihY8pr@}D@JTaWyQg8pfaS~+0B0Psvf z{6`A>ED7%S`o%xe;OH@DYp@c(O8=Kc`smkG3J<~j#$N<^g1Wz-0s^8p5gP1ciGRxi zgCQz?{lM&7Pm;R~HB}ghN0y|Y6`kFXARwXFz+=Ph!!HH@gH;Ri7b$XRxmE{ounv5l?MKg&T6`(jO^FzJ{0JXrkKto`TURcAUN2R8ze7Ds}5PcU!5{zgOx%R3k7|9vLMg5KL|P zFRB+fX~u28{?h@sj#anRg8!Xr0j5?U+xwP*sq0|#yx#$Gf_Lg_;_#;fs{T&B+X3qO z6-4#mW}^g?!99EDkQV+;{l}W`n9P3}vmIEymmkVJkHL^$aHhKZ>hX!(Q}2(}f!zTA zX3PI~>bZ;kMZMdq|EnG&1o$+%Jt!ayQb*)RtHHk9;9uO|um1DLTKqTYP9~yfOqO;g zW)H3HVIsai7?;c6jE8CY{z3dv<6#PF|we(LU6vTpUDm&OGj5yfVXNN{iMUJIC4NZxE8u=y!Ck;6$=!bc9cA#a z{QJ(_cg3s7AC`aHt`7nCouuyq(kLDRZezdyO1bYDe3!Bd9!UR$`~GUo+b(&i#(l5b zy8!SO@B_fD8h@ocbUyvV!Fv3AD7fc}`cR99n=gOlRXG1Ml>ECD^HBKV^1vVA9iiXC z4;Bmli}2u{`wwAS_&4F+9r#1}`*%ryq#+DC>4oZUbDosFwh|;2>5Nc?lccr6(f|Sq^l%gO~q)Kxjp=FTJ zL5dWqF$53<1VKvRPv&zz8NYu#Z>_i9TkD?p?%j8vefKfCdW;%KK}SbNk(8aJNpU0? zNzYc^_O3pX66AYnlK$0WnkOEu5?L>OnRFG4!uD%+Dk9hB z^p*Iiwrk5pa?)Of&jcI{q3x=0!K#8pugTi&^e~y{Inu);&RAFjRn4C6zM?|lnK!}_ zXB{kGnU_~7Nw69O*bP|c1r&MhILoD;@V7t^gI*||=v})mceziqEMVPK>2_^hs%Sn; z=kB4CoJVjMs@-+KaxSJP7c<7SnPYU7mTsY+QfQ2{UNtHT3Zs8T>>^yA!-0atx^EUb|@qrtq;@GetJhps}6dR z|J5L^!pS7j-Rl*1$p{z{pq;j@lRIQpd%+- zP0wi*H?)L});k5>R}q&uothK%ymy+`_deDwCcC8yNkzNh8?5Yzs>ia7dt&QaMt4Oj zpI*-$=)AQrHv@yaqJ__J5O=5cDanXHm=*%Cr1i4?7Lot*$lcEkZtv~n?nnlsC#}*w zTng}r#G|iM=>UE?2D_Lc=9`h5l-!!0fYhQf&z-;dLDCKi)p{f+A;QlU<&dzA?z_eC*R{EQ4F)M7%B; zyx94D5emnn4tCzYP_=+%AJBa1u#vATSY{({=c-AYVmu`UML7NMWZ~4`1V2jcnzRYz zGtzFZ?g6perpyA7kgHo~&1%?61>JTjETybFXzf&N1`Af%Pju+9>Vw1ty4U*r8>eqK z4{oS9PGyMd=j$4O2k^&7U7Np=0@@7-Hg!!bU^;Wb0h$1`h|=k6kTld!>RSHuu@rAN zzpvnRU#Q@KDMV?6(Lh$D=6ttcvT7nkQ+Dq7C-9AEuH+ZF@iC66ge6QXU8y#N!- z>{bJJi~%Jf5O#&;*J5U$Lu)r~AIdt0So#=#&?!cl$OxHXv^+6);iAF7y=%qKMRXrE1k!~ z3}+`|*GdWaiEFbH$Thcl{Di@55cljvZ`ZccB9U#MZ-#*C2)}4HAS^3Egq~EO4p5{u zPeRs6ScTn(QoeKMs^gD?cmow?RZPwpatdA0ot3FIeV8I10)u9E%NNr1M&3(oHID&` zzkb$_9j7T9j%T@er;sKRm1&n^J5sc@5p48+Q=Or^TGpodeKxp)u}KKsJG9;-Zr20| zJw$JnAL=(ulPNX6X^8`p)FJq{lsfr0rF?t?T>*-KBQ?9q=#`zPPY{&7e*|H?^~%ObiEf z)U{mWmYuAd6TGC?6kXGR7wn*kuS^NtjjgwAT5aGEJYhb}qI^@uP|HXa5ng+}rt91I zh0_ReqwF(FC+v0hI0gXcd%(cA54oEP%`AJ2up85@aPC#@kBH~W zX{{$4>w2WC%3I7jcZ1k!oOEHZXuON~#-kd$v+?%8%9=EY@W4jFOGPVQ&dsM=4qa+z zy9495Qwa~d+*#*0ydw4ILJM$t*E{a#8=p#2+G%6XJ$c#jJ=HF4K#PLuj>f`U|_4ZhU}qY~aB;1Zku#ml+O;*jYjBOdE(*ykPa!cKL`L`99&K?~2e@&SkC6 zV+~djNBDH-L=JnrTcXDW7y0Q``2dsW;7F5BphUmeQUTp(dCA+k`QsIDR^qlkXb6qNwX_EnTM$#k0l7jYZIz;s;|Q!W zxAXdf`wqk|QQl1Ll$N+cRIVHI5-;LWGc!DmI<|&AgP_-m&{eS{P#Zfn-Z%{Jm#xbY(3iiz`XSl^JKBO1U?$qdPggfVRkIO!?ki zZH*o<%2V^Nc6A54gA|!9vnppEQ0K%7u7K*~$0Mvk@3K8Kq;ujTcrlVVAfxS~vMa}- z1JBHZgrE|BsTL}Xar*dJ6jHCnF`wu~6b5i#I?3R>rqt)X-wx*3I#V{K^w80lqG11~ zzn+6P0e_7NlfZ=kGmc33j>GS`1v-QM_QA}BMM&pPo(c*<4w}vWQIP4AlkqDI@-!VW za%GTb)9+QZx&(zjRka=h2LeX9KB!82_`Y#4&~{fx+Im%_dOc+-S_`;v^NFlW=wFts zs_j%8FA3Pg--1h`76t4Q$<~vcsnv}DPkSE=GJ()3Jq8#lK*VZDH1<}?TTcx-kwwbK zLYqdt*c!#Shlij1uI_D>z@h;B3?g&zj<4OpRmRJKwKcCsHMCbOFukuqV$D~|t6}oX zHeN8xN>i00wg!sZ2_rU`-IP{&f@n|$l^9?1z!gFs>*HFlw(tZ)IL$s>8U5*f1GPjd z$g!ff@tpY(L-Zd}iw3t5>|wFi3AY05+j?3OGMHEd*u^2ap~1KzTC*L!&R+EKAfB~Y z>ew*-z%Ao-BpqX=oRK}g{9fbdYHB=wJSo4^OCXRwYO|xH=+Q!+2(#?TE5e6$JCoxQ z0?^rY0$*-?n*YU}w>*zn#Hv-sj}7)xAgAXN_IR>U8w$sqo&>ZtHJQnjR6vTKz2(s!i`-lOGmdncza}1S zGsxEYXPhSe>VF%uU-OTea&j~BGjvI+@>l-9v?jlP>8Kz4=S<9w{rJ*9Ed8(fM-P8; zIsXh0&5!(}9`e`pqe4h_2tOm3_DA|pzwm4FQIR2Q<15D31v6RF3B2_b+U~u`@Resqd`s@JCiLWM0BhXjde&WLx?Pi z>@%SVlM>VL&zvo%^Pir%uJ4-fy1wW8-uHg*=YHjc?nv9DV=(P`_*2z4&AE*7L6L5oHj_U*+{|t!BTm3jR2srIivp=KF+G?Iu z+|!Quv?2H^U~sS4v>-WIySvgyI|Mx33iged(v%l=8IfEEbfG- zQ@O)~230v8Q-u4lODUovWXD%A`%r|VH~H#a)kA|DR*L6eYS$t*?Nn_sP1zDKVb(a@OxCn8*wC*VOacX`jCz%a;V zSAP$=hyeeZo1KB($AHhb&oa6O74uc%Skqd=Do-h{&=+i~87!nf6S3-yN(9N*+{XyK z-JagoNF&dW*eSb;f!Y_$1bX=*h3psQlVS>-h2aGjbhE!6KFU*s{(3ihVyTktET{BZ zVP$nh(JONn?bh>vMi%H`_aw>KzcljG@Zx zi9!ku+jDcCLz;R|s=OGgnzJ1GqvlfXHGp&Pk;=~MC=S`S8!s@CXf-d^vRui2 zL=GCiP+whYU8Mg8Nap(N$)AGm12ljPMg|~tE4HKJOQeoxeeP|Y^53G1ciE_@DSag9 zexYCz@l(_U|3d8#GPwu2g$H;9hEbvWgIG6Tus#6;^|zz-bFmkp+Ism{M0J`hwT_jK z0F*g0BzCu%vMSA=>fbt@skRO7O}Z_J2%mQr)Y)6$hFO#x-?K?26atTa^s= z*?d!+gSz@-#g6cicW1uQQW4QVyo|t6_RI5AME>ScV0eJ52h1nXiweeCE;jHwnEM8W zN5q+2V8)OHTGmnnBJ>In++~EW)5aLRYFbxP-dRaMc7YJvtNyy@770<8W?O4oy}=H_ zwc{1|(~)?K!W_FnxI*M?wXi7ONI$|LDPx2;$}-Wqx0~2Jr$j2ILmq9MgN39FJ5~6` z)|+f+ajHp4_gxNxvKnw3)_3<FRh(8g#8cD%pM;w9gq z*S1Nzv`{q}1>$Yil#|?}?DvLF6y(m2uR7qttragK+pv(aqbRVzP>`1R!ekC> z>!orBEDKg5_hs!|g)v`gFjsLXDc?ts{f7V`$rWHHrW7+l%nG$Zs0!6G}5H z$nm1r`G+4Ixzir&*Z%48`b)n7lufefOrl@=u*~XTcSA}u_3P)!vCt-wK*&krs2YBA zD@~;;9O_=9e$#7E1Yia~1Fe}l?l+cr2F|W_Xg#9EzDa+)5~K!WNAOQ?Mm!2kt{#)e zeLayCb|S~cP^Na1J<-EZM%R^(>jl~t^>jgy*eP~oMvNz#Y(_YNdHa?zus|z~F-DlJ zHhsQoRcMmNW%9Pr($M%*&i20rg!8XbI9LSFE={FPf`L)%HhR%sAc-35VHA&y z?^wLG;SdKnEz_1L{_v#x%4@iw#^cIH9jQyst0_yccm4GR_m^zMGwGX5c^%O!x&~ae zd6GA3*@#iwK14$<0OS^ge}D~!Pdg(&qxl)(V|%|D#USl{>-Mi<)=Ozh4i+jFzuGOD zFoHZGftAMKqbgjd{k8e8DVvStZn@ak4%_fmNfi5hP`pPe&$c*?YgYhn-BdU8? z$@J);cn`Pu0JihJnP;`k?SMJ?;5{ z^3(022`}pd+JOy-BUY}U%Pg}bT9^F=9lbL*p21qSl;cLw0wvGacVORss8-z5pya zwvZ|N>xQau<1$vtf?0SY+x`KX7R>56vK=aXiSIPC>b+5+f>hK9(cRFgORYDlwFTVO zIGkWUtNw_s#+r4}crdQyR0KJ*p`NKW1%Q5KZ+kDo{vp@*rCrA!p%NN4&Pi8%fuQD_*# z-y`(*76b_}n*sy(I-EIot{P3IT1wGXs*9j211;wP4jF!K^AkBQ#j~8XG~~g?FSu;A zMtcd!jwJ#zLpJMItbDt$~>hYHd_6$>Ktz9sfqe(2S<&Br`NEWADq!_2Yp*O_Lz+(11bBPui^@4^DF3&@@RsLSl`^;E=D zD|NxHjt)Mq#C-GI?i`4icTblf^;-p3Ut^8_o=TP0P^f?q*;_PpGMxo2@XLjWcc!?V zxhp%EvLz(gxdV6u7ToDuXH(p5u)NsE(IyeI$PM#kMTJ|j06jbL0_6Dn=Tj5}_X#QR ze6jE@wY3mp)K+1p`Z)ch4~KuxO?}5}*&_fwY?xa36u+gDB*5~VZ%VL^IZaP(4(2?Rqr^vO43R@FuE%7&@abbxAB7yS zA*?T#nVsURHnRHha=+#AqeO&{fNt%2;TH)z0g=At{a-K91zQP?m2Es0HOyil+^x5I z=yc``M*1yx7LT1hW5AmLqmdG|vLc?H`@DDJv7ZM(9OK1G+c5mQ#SodJ7~cX^j=amGPUa zIu`$`jX^G3(yI>BanfOwHK`?G#njfDHzvgeZG-T5HqeF6I}8%xmDMkaMlOy5?01a& z2C+<6g*H+cQerMfezDk0V`i&YH1{ApxYqHZk%2&%%qZ&%5s73;*y^pWKrNSwv!CWL z65DUunVysrv7Fyr5-v&4iO||<7D9of8#N~BM+RxqW*5*Tp~Ci0%k;40#0`tdvH-c_3r`b$NYmPm}>sNhap8%e$W4pQQ*fn9duRyT!}sXpKtny zh5j-BU_+&PukT^S_-Fn>C-!6dK^dfW-S3eL_%r=`5B@Rvpu|v>@_UG|{#!SH%syB} t|6Dm8?YZ=55_WNiEJTdOJs|zS+letGFi*M zL|MmPjWveG^3Qbs^K&}?>G`hfyT0$bzUTek=YH?^e(o2hOG(8>1_T1hiXKF&k{t_r z;`22xdsh@#l5{Ui)Yqj{Wr$qeKcT)d;VK^8(X-93hUDJ@gr|L7>FWW=IF%hvs?xoJ z#pJX%_;0SA8gT15$hn*m8>@;baZ>e!^uK~QqdzHwd94S*uS`_#59y>?ij_=;=U!*I zbT7J9?`yHFlxNW+J_iGMyOIrmbzy?%L>>NNh;&Z2)bNONhebU_EkS!v9vZ;@Odwpy z0a0LCQLP|(;t+h3b1w~&dHJvy7*V#CDO9Xlo{*iG_m(u`K~kt-C+fn71nW$ z<0tUv7nIlrOeCy+QT#MY84lpy{~G2hJI|YRJPUgnSh>**?<-9V`ujTv@-I);Fbg zA3361Rhnm^7=gHXRaNB)o3zva4?b?*-?4rzfE%w=%E3 zO~eHKOVm#Nh1#EFvP0VXxY@gVlc4*PSP8v4lv1F!)u0%NUy>U&^O!66>e8FoQYKJ! z^;n+(YvNKJ64~r5VYJpaD<2X()vi}Rd9z&wVy~;(k|^H{apZJ`upkJ9k8Ya!#coq) ziuni_Kpl+z)E+=mSa`!b0@Ny`3!@+VYU&_T#FCf|moueBjP+5%14tV08+vuz0#9(2 zR3p6n2T}U7j&knWO*a|l!y-E7ry(Y}IyAB2;BIliTYJK=9IjADrzdhk^bPa*UK#m| zZG0c*TKx{wBAxeNfFFi^mzl-M$;7kiR7{@Al0Ha|k20^pekDu@lb~Am9|z44fsp+r zRLp-tbsVI8qfhRiAPhlMCL96o{KD^ITB0YSs42HA`7Ts5!2~|C7sT>)dRABP26g8z zv6O-fXgAhQ!C5}^R+wu5JP_tdiLFXP%pC3!vKl(UxYylT+H;gLVWKXl1+~`Z&Jk+d zS+4t+y$J`3b7YSu5A_ISrRJn%J9TDmL78OSeQY)@y*L<7f+Pj)fmhX*$Jo zk)>aH6}El9yICXEb5EOFgdJ7^wx4fzjtW`lt>N5EziG<#<>LHVz^W(6e5)b-l1z%( zTR++Hku=b-^81#mg}4oPM*&kJLwnP6?C8f5f~(aHM2n&7kN}_njBV%{#0) z^sdw&$h5|ax2*??19-;Cwd=(5?x2+H zI0OR6Ztrf?wCoSoAib+B%dt{30F>d)S@lXz_1NC0Q1RzbGRXu+_|VK;a`W3ckq!*) zoSk9T%QwciWnlZITji2IPoU>iHZA@Id zYTx?Iz#v8U0R@RkC3y3X^h6d#Q2$D*nEqlC%G=-79!1)0tyc{VK0!diQx(yBhyi1+ z@R(aAm!oyA%F~Y4vtHwR8t=AiskXayzK|B?s6m%B5~cVuV|9hFqC&uat}^PUR7ln= zboCgx!MrpMEKU`zZ>nDrJge4QJKg1|Jw|ii$||_NNOMV^4wAdisdcrBZWSjIF9pt- zL))Bn{9XqspUXl_NSDX4j-;Iei3S#9uQ!Eordm4tj?u)W`-s7f|A46ucOcKs*Nv~| z&(;-9$boO;DNRHlMV*alt>xwkazXMB`4T=wIERJ^3W&?_6dGBE}1aOvVR|WNplnirPtGBmXmINm<5!jaM28C_z4eZYC%(RU8Buk6 zbL^W_Mxm9*L4t^KbP?<1AO@X8wsVM8{=&b+#lmy7woG&uh)tao{*0ea;;yYA`w^q)%)i4L4eqNdZ_C21?U2%aZ;TgB4d(Q2ZgPG77C`0RdH^&MBjSIRcO!NMO z;BQ>MaE$d%DCf2Z?C#ulNLc1-;u$oT_l#Q!wNh|cttw#TwE_soia4Mqan>%Rf6$M} zSv2jhI13_jmMDBMFQf+weBHx?MB2<=Df-(G5cPCpliD*E*C@EaXSkwY<%GJIbMyEm zuM_)tebJ%D+1WSkH%qMclO;g8XG&YEL^Nm|I}}ejuP?dpYS<;nnJY7^NkF4>-55Xf zI7T)z+B~3&tK~>r)ai26TvKE(ZX5c>*08{aIbFP(87iKZ3b6ARdsiKvLM|~Hco2#e zH>N*~HJ8o~S|4*mZEy}Ve1Nti(JKvhvDyhtb_|(0z(6&pd4rOZ2fX?l${OEDR(;D*Zb&z6G6|c2L&nw zl*}Xi#ux)(BSX>bRQA^4DP6A4-zLj9sJqnWG&g!Ui~7ka%%OfM9(txs6_biNj z9aKV=ck84SBRxSDeM)S1AfkKE5StHmn%>06B43j$ow19TyC~^DhXyUUOXnuRV=gTR z2g=huY3>W9VWljfZVf3lUZ0*`xQjxoYON`bhqv4D<5l&Irtliq;w?XCWACLdK3^|> zDUC6qy`b*{mzfFGx^Ko(X=}S^XQiT4CqDEZm^ts1;IFIeYjDzxEj-Y8C)>t@2R|Zj zpk$g<_A-@Ho}0;bz48i1mJ2@f&a1Ti#>n0e#S8A2DdHgld-zLmS&86Us1uDD(OXtj;mpJfxcY=pVK;b zi`i6x>A_{i-{YcdepAM)sl@Q?n3-jvmR+g1HXllxUYTH#bfg~B2}hq{7>b($a3uWh zWIr}+r~N*ic2!G4<4JI!+c3cVfaokTX#lwOaH_Zv{QzIp^>|C|%20NFs2Fg9)vv}pv{97PlzH>kAMbXYm6%_oVp%ge
  • YCN#ZU!1smDX zdEmG&CY=ZV9>-1xKPMiyh9uj36fDGF{nJGMoPXRekqqQfXcKkhNB+Oq$)86$-Y-cT z@F-Bkf`5#3+-?1weq6dpP4y@mi4FKi`caGhIeGb*ERfXgC_I73q~O0=_jC4f-v52( qaH5X>Lr8v3FFCfZq|I>@6?Ff1tH5-rX-FRdiPt`Hl^7XFZ~p;Ey6u+$ literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/list_in_table.docx b/pydocx/fixtures/list_in_table.docx new file mode 100644 index 0000000000000000000000000000000000000000..d1a8738808cc9dab276df085a821a8ff80cba8bb GIT binary patch literal 4379 zcmaJ^2UJtp7Nrw3$?s@OtoqhH_8>&i3#E1s~0PrFSBSCm)f&zPQ2m_loiqex$^h>gd%obZK51H!d1`Iy1GXv52elgKP%dX?zV)_ZFO zqSqYrQ@JeEHQQxPxXbfm9Ve>xQv9wz%e*!;Y|~*-M^Jfq=SV`_i}^J#lF#B{u3<^J zq!1It)2#1Nwr7FkzKv)irN;xSWAEpZls#XyM3p~+UU_cmN$OTrC-UbQ+;%^;7IpCH zs&BXJdpH}>orM`?`y&&oN(xx&#pfBt?pKZg4-fjUh|ptaxH~wR30pYWyWBT6u`?I) zfZG+vHrZn?(zfl%6Pq|ZJ73sAj;eJjye_&-n6@Q#dp5e2ORLE@j6t|KuZr#S-o%~^ z>Ud^APuzr;p?+4K?frQ#4*fadaKAL83+`ze1XF=joJ^T9-=9AoSuCWGr@OMAQCQ@W z*$*WH)hd%#kP#kaxpGP0Szs0B4jG}oX2pAXxhK3h;bTj{R(<1QnV~0w*jb!9jN@AQ z-t@FZubfiLOG~1cH3^0ve5WJRlWfw~%SBRN9OlR?i1zB%-<>5n#FtZhMdp@RdF(K( zT2_>;lL;QBJ*MBWV2zCFBrRvSJbXj*0xOmvu+G$qTBz*Mv*#QL(HL;Ba#a%2W z9=}D6{Re9QC6k$hsVm&v-USEUe~A^_McyJp3*LJSPH9jx04Y6n<58&Aym>KqfK;3& zj{jhfB%u)c4xD@=UTRO|eRwK+2UGECrAds^HQM_MpBik=v%frcA_O;fP)i~%WYoNp zKp{C6SX?Gk1#=)S{BI&EKumm5RdI*Vu_RWZo_g@pwymdQ>Ll|<_cVto?es3)Dy(bv z8>zMS3Xl;LqEE~WebzHY>JrcakH}~)M-h-Nx%fz1*4K70j3GLzn@0|SWly!T`Z{4p zqSFQ@b`8AD=Z_C2kMMDbfT@-|JFxp@`Yj@V^T^&6Zes3aZEuMKqamdml`6s*wC78X z95gpY_>1(#j7AL#6$<6jOygey5j?TGgl_aLiF&VoiD?8>ROIc8n%fjAQp_-tU+I+5 z6zU8%e_R9biGE!#aJQ=KXv(2vN^^=Kt~BsIRW|xGg#6TedttxABIF?5sn-)h#IUOM zWi!X_6Clh--IkkKP4)a5ob4e48F1>k7A7J|LW+qX%{2%?%bU*!+?GJ_d7g`lH$0IY zoK)Ls@b9u#aV>iE5((TUoCsKTBx~R)NC3cVyBdZUEkN6^`LwHvdn67VCl9fRA7AjC zDqhlYTi-ngi$HdzFp0u>uKNWU>)xhm%9~%}zN&|x>fPezmC&UWG4v+ATq||pjef6r zP~A}%E2KI6fga|fYtWF6fxay9dOun`Jk2_BD9KHV7(Rb5m87Sl(?*p|4a8R0KP%#x zgPk_!K|b~lE$;UCMa58(I6Ba6;a;kAljfL>YTeU7ao3y&CX2AU25{>E0jtb3YX;mP z#JW#ymL-yBL<%KxNArOZd)2rqY-47_z5R=1xm8$u^oP*8*n(LblW1_#pU?Qpw}L@2DAJ7~-?d;HT2DJ^G>ufG;AGK5 zc#O7KsH}L_hmgq3s~N>V#(5t?>~;c%UT*ps%Q5SGQABA)w7-P0nQ2dePSSK!AeYw; zXyQ{xoNiFONX)eRek$$*SA?D7!%qlsrf@3YK60E@E^)%JFO=uIs2ejEhh< zZo-Ia^PFCr**=j5bB>f^f(ca0%DKC$NaQG0Ja;}tqj>8kp9*ypge*J?n^k_y3s!mK zCwZrD{F><-9Rb$Qy!q{nL+4@O@Qh)jgX(-s4vq74`elq9XfVwH&EBC(?y}DH1+Qt) zXc(k#I6o{082@e;n0MPCy-t3}DQi1Tib>y0i-lxu%}cjo_X31I-KHna5-63> zBN39l7pFcV0%Ct>tNL(s3BJdC3JDJ#H)^T<#P>2X_k%t^k}(5D|B6-O$$TKv_GT2= zmCV_q$ChGS}dJCq-X9J{NRKxyJ8G-TJWVDrZ0Yc0CD}r=C*s zS)-J1PM?Lk-m)^06VnP&QtDr7Go7`|FLd?i_w*=${ z8)Zl~FTt}@ud;ZM*muc7jW@$6G~#P`v29RXY!Thcb{}hcZ3EMc6)_j|c%-wEMl9LC zi)A4WP*ObAsGNP;$kt5rqGFz;Kd8FVv;2b>!wM5cz?^C5_)Xxz6ppGsYL@JFSgMW! zen(ZBA5?XA@w78{#))L5wvqa{2(8~#Ug*)oejT=iaDV6}BF!PS%Rc_9PVlSj2HkNwgN4ds$AdnqXQFDXSrg1Hsa(S|Fo+?*Q@Bn))1r0Gwt zvoe%{>0!y4LWKhR#R}mbO}W;Ip)KArQKp^bR|6oC;OT~ojn3Lhn`Ffk*lm;bap<+7*T@8Ygx-zBU^jFU*N7bLvmjNCmgsw`;ABs=j z&!5|^qfiApYjsEN_T-DhiNg`e3tTxC3)SRY%E^H45e)k9g#v4Cm&?s^Gc z zg%kk+M`9{>-oY%*I0XZ|C;T-9%%ZuT*9^Pl8&Zkg=&W%Qowjy2KIm&}%N*~BoNQ-Y z5^L8)J0B}hZ!kNM9%xWnM4WpHrQuAX`o`+?xx>eBuP+%kQy!T3QMT51Mgpj4Dgo|- zqglbqvTuiG&oz2eI*zsaR8h_#*SuUu}EX&KG~N3*w9_Neet9yDuhG#P(7F>S~^|*j;F)-kKB7lcr6-!$U~Mfnc5>V z95=&9w#YtQ$eI4}8pRV4TH@)3CWY6wb|IR)E1J^o zr4x!yHg7+xI58dXsR;}=%*{2odlx=9P7tKc;wfq^;|G&ibx5<@Y%SUEgUw>a^yTOj z1jR$M;M6OeR>5znO_GQrD_PKscRQ_af0bq~XdC>4vHlAq=3>ESdcYNQGO3y8<*ssM zBEH~=&q=`S6&;Gx2>t7szFVVk=Wnb-bgKo_VPWBsjBN;Yq2t!{SfI+gyJakAwo=?G~|r0 zh^p31f0dV&Laekx_C}d~pu?h}%tYpokcpixR9*&x+Xe|i9Z~ZQV!hAgh$^f~Y-UnQ zIUm-$PG<+kL~%2uAgIRPEwQwL>~3vCBa!DZ>Z@YWrpq#GHQ#4BrsHC4i@2QL^R%m^ z)QyIs?h07s9PS?T#tCgVP-sit>4=4Btil*bDpDl6+xE~hyF|~riBqhPd8@R z7;})&BDToyT}p7}!rMNT#cZaqD#z9*G3qA7M<&t@^Zj*1LWu&FrB#iW3--Iqh zbgfwYA|J)TJk49Xn`6*a3|uUN;H<|!bAzP%`*%Bg8c4i3zD5#7`l)&CYHXnZ6lJ1N z^Mw-s#?=auQqj0tgR&TK7S$cj$#xVc}$ z5zKIRV>hth;T&4Ct$?;>15A3I-HaJXPoi1zb2kFHMmCoXV7Cm^}qJ- z*Zi{<0B3!E#vQDv{F(nRM(Ec!o$crTxe|TCzuxo@1MzGA*+Ujr%|GK4@n898d%$1Q z&*~uVB>owJSY`ZkBR|jOUz5*j3{EP41}FC4_*+;063$<<&sNbtSAKdn`-dw1ntqlS dxKjHW22}sI@Sv*1B)Car*bCP|cTnR-{{j9`F`obc literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/list_to_header.docx b/pydocx/fixtures/list_to_header.docx new file mode 100644 index 0000000000000000000000000000000000000000..f9b3946ec2615c76f63684b8f5138e26e51423a2 GIT binary patch literal 4369 zcmaJ^1z3~q`=v)nNJx$b34sX+N+=*8?db0An!rdAM5F`+B?SqkyJ4ijfRAnvl#-I} z82mQBkk9}B^WJssdUtK-eV^y-+~>IuSP>J891Ry27j5#xO)0c1MS#3EcC&PF=i^1a z7se_pVoF{M*+Nh}+?;XX4u$s}GD|r#9{_?Azi$ro0R(IdFJ~q3>%d{@oej{vZMq@H zzKe8~l<;uLwme%&SI}S`$j)n8oR7g|n6FOze)Nb!q5)UlY;cAtsr;+ZcBSvRg123B zl9{X?t96Q*F_(Xgah<6;N(kUjPrE%jW(U`=$EZA$I~Nx6zEQ>+%x-1$!JxSOE-!@& z)MC&i6PoLKVka1X&CAdR;q(6PwZ3h2!AJd)MbI7HyE;|X@f_LuvYwZ=g3i7@4V?~y zM)Tpl8Euoa`)OcB0Pe~FI?E(-zv38ZXyE^3gcSJ;PiHp^J}YM@h=!?|gC(z*qeEd# zlT#ZbNypKBY%}L{>O44pR2?LbUvL#OW#^vk{Ht~*^(McUWPF7mt7tzT%^ZnFA?Aj4 zh0IvV8s;Ct3Wtv!z&~y*dwcA&%je}Xn6Y?WRf~|lBOQxEE+4Rja_rly(y7b-x4rUXX_m( z%EL<4fEMNwt7+dF==pNR4FWf z-3}W~(W6GfK~*bZyH;7AKw~e~siic_R;C9_pd(d=D19L~U4puF+*f(O)h;adw6n3K zD9@SmOG@10i%8I5u3zUFk7K?%o2iT^1(b%KRSY@j^OpBF95ayYXI7GD!Bwg+Fg z4Ak4U2I2wO=@MK_T3c@70~&XU@v~TKR?87q?wezxjgzzPYGSNkf)@I7D1@z9!&)9U zPe4Gl&L!76_p0VYw8^LDTOAr+x0-No5(ZsT(pxP=N(G2Hb31v-yysE=qhnn}MUcKm z-4icCdy2Hf7w!a_>suS)Da)zIjyj2+Lj>FBt5?G_hTcejd-!hu^KkA*vEvBkn|n-v z!Qh@BU(v+tgeq|@?(z|9moBZ{da7|@9#rkP4NUf&{suGObDlm|q^&vbhoADH6kPY> ze#tYWdZEm}bHeL?_w?0nhH3WLEr7^p`n9Px)E^`fwU2Phrv&p_;j-?b=iQpd5RZTW zh4&6vG^>WCs7ckzVru%`H?JFri#OrX)8z;$bzN)sypEs4nrBkhJ&=a*-VMdT)VYzs zEiVAc!V&NH7kgs+UXD8_xY;9`U6b0bQi(7(x7muBYOp(plwJCzMm!r!MHATgy?_R$ zpe@y*jA!2ap$>&?Y#jSFu6;J>D++eieSwiU^Aca>UK#<2=sEEv<8kzOYS0v?p`hM% zy9f#?msi}N*XGH0Ma)gctqW}WBzXll?@Rcz$DT1J>7E#Cx5v_MGI&%##QuU)qfek)HIJM8J^E~h6<}k%@XLzFMmQQibJuV^HAGd^MNom@3 z<0I%(?QGL&2_i=*>E09h$b+JmV508{S^7AiqSB7%Qq_oRKJmP`pvD2VnXZO>>~}sS zMrpA|E(E!2-LyS{m0R%IjFJ#BJpi14m~#-lJ*31VXPelOXPLS zKPR4Go$hEY7h%2I92J>TX4GI*n|>7()zk{57o#nCoHwEhl(`Xc-#e}3!tomX3`aD+qlh1wM5c_ zGkY!5Y^9>e?r1pBI1EHj^tg?*#^UWppk(>%H@y6SyAq=lP8IouEzR9edDkF#+5P?) zF(tLd%sM)qJCjvA--1Po0%)5w#I#2>Wo_=RfIV@S__fkS0XYsLW*7(3-)KQ>_@Y@<|eAS!X6kYhp2RN!3(`nosi zt%2?A;AD#>C zr*$2o!zu=keP|PhrW4R=-Adz@i=&(l$n<^I79p>zIT4FASkE4DH=pB zLjpAS^iD{$wWO8^T9p z*{3+@k+ityz3JQI(L3IGrna!H%o=}0){2q&%ABhd;%R$&hMKwJTxV@5Nz0hB8 z%*N(i`^p?>^XeKY#hjavZ-&vM! zLpZax#L8fPYQuKBaP8wKJ>nR5iTu+Y-!lV^yC;~~jlDQGX{kioHmi_qEatT>&>Sj61X6TdFS zBNUq9NVG<86Vgm%_694Wk}7doq1#4wTjWM=$H+c;!#DCa#@wycK<>o101GHrPkC@W zI?sgfMc_QQ7Qtn>9)FtO&ZMLJF3srm^;~!QB(*Maj@mCav#1_jZA*NWiCjJbvQPhe z!bSP|KN~c6cKb2lI>z*644D}1bQoZ>sH?WRO?2%AX%;nR9J_mMz0tMMA)j+FmP9C*Z#y%{4OTP$M` zXz%rPhIj=Pg-hU+YC3D+jcXI+cs78Ra%S&uD5^y>%3y7SdPg4#%dvH9zpQKKNmPpBk%+=#j);{+3)a zB)R$zk2lv4%g(II|EXv){lm9h^}Baf<%bCg=?%lSzI;yZX8Ga@{X5@HUG(O+g z$4uK!<67@5bNG~Ca5FRy$}eY9ysUC<4~l+lhJ9`(0$Uua$Ks9Wv@WS?ylF6kA66T> zs-j~<6%b((tqrwo?`?@rBqU>^;*rVl_gxqP=$$BZ_rY*{fZGvR5dq5HhpIbKxCEtw zV9TZAr;Y0sSW8P&vDw|OOx}2*`&|XOAuE||L;}R}te4d%GgG`wYI8fw3>mN9cs)3& z0fvxqSBOqw4)>u&eVLCw1HNn6TfuZ=MkEsJyK#Zp;&Dk{Uoh#PdbQs1I@>Bc6L3}A zLxc}DB9#lk&0A!byoDs%1Pi5PbPRH|pZWD_jzi_wf7g{X`z!EjltGQ>KZ_LksQ(-K ze#KuM{854DXUQR5z_d@@J7D|BZi)kYzl{f0#6~iLhK7fHp@M%S5$eHq)$ literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/lists_with_styles.docx b/pydocx/fixtures/lists_with_styles.docx new file mode 100644 index 0000000000000000000000000000000000000000..c1c7ecf8d3d9d0e7e61a11293773c5f15a59ed53 GIT binary patch literal 4204 zcmaJ^2{=^y8@7)nI~U2mMz#!DlE^OGP}yY|>oE2#AvI;+N4CaTqHFBgx5%Uh8ObhN zgCv9^+47(1F1g+R?K{si&p9*aJLmU%=l$OA{Y(wXDcDG;si{f$kwTgz$AXSteG$}z32p=6*{dI4;Q;dP6-P4~v@ND_`7hc+ zK5v|Q?S=c6Z~P!WUIT-6*YK4cY?1W{exoMCcYR2v<=TamVS{X23G@uS(2+$yIi~ya zYAI08_xD@@7ZcEn^I*Z+N2$Kk4LezpXY)~VBcmQzn^v;=eZ7NoilM(f7KV$u*q7K= z)hbD|8;3X#UM~tM_1*OVW}FDNb2|(xP&$FzFbC=mXjF!5S}9pJG-ikw+vwalatFe0 z_O-o$4%*}6`wKA>+*>G9Ljd*rSEND{g#D_Kk&u}FCn79_J8r=IoMl{K-T@YlU?@a7 z*b7>b+To4iXL`PKff5YM=R#v?lUf4MXMxM)4>px`@X6f*<{ja&tTGjk8n~x+rgv16 z4&M%0DT0Mr+wdmbJzSwY*7GuPkq;dBZagp}o4rlX%Z^IW|EV-74E)Pb% zHl@*M(FN4ekna@+3aIEUo|F-c9%qnq6P8%U#Z@2%x}rAQo-EbahWJqWD>B6L$kp!5 z&AGf%)9Na7r6_Ac*uD&(OF-WDc(7J0oAq$N_<||&8U(lD5L%mHfmT? zUUU_uJ;8Lyvh8v*A*B~k%c?WfZ&AH$IV`fY|D?OuxwS*)AR>Q{j)3wB(p9G{@RKxK5QF`jKlt!7?cM@Z~jCo=bxB zEOG|S&UwC>GigG#;BXlpm(k;+r9N2HY`Xc zw1)E(KTu-XuGnY@XtYM&G$*V#uG5en08@W+F$X$9(ub!-&L43`iuLp`CY!9+F(heW z<7!W7#WPXB+1tv;r2$nk11NbWy}MSio;W>*2xgi*MHH`zXHY&&%grvo@w#7l?^F9y z7y5!TU*O>8IeN!XuRh9dgJ>|o#1@e_N0|z`_WER@>}7#&c5?1^v50hoIn8kOS9213 zpbr=;{qXW_P_c|MwU-W`i8xXfKN&MuqBbS3CT0k-U>jRYtGp?&zEt1p=8KDz+JhQ3 zppDL{!<)s!vp%8~=rffq1*Xf5tJYl?HZED#fMh<(y_vWwZ7!*}-;hx;`H^9*{|Fwh z(=~FS^~}UruC+hfcDGR0pR%s_4%|)nZGn5tR~>{PUJ?IkhFvZR$!xUDkN4}1{;K|L zoJ)@G`!#rVUN@^LLf;J0f}eP;(e?~r{~;k$iIK}jB_?yP(?)q7f)jdvp8;QSa}H7& zY`AdV9=gnL|Bih9DVIzjuT;Yp6>r<#7+)O)c7jEA&1JIfJhYL|s(viGeM0PoXtyEo zi7v8fEtRIN89ou5enlm(k+rc+bmJYD%RF*{Om&ljLuvHl%jdbpo53^2Q=f7=b91v2 z&(UI_=Y6U^dQ~U2DJSfv6ghXj{4m1vIJT`hX*XpM1Fsn=Q*~wy@fZ9oU=sRedBmJI z;(1H@Zb~||YvcQkI?BJfAZc4~y8A+GDnm&tK2@FFT0a|LMI|$Xfku!MX{Z)_Yebho z!G7jn$r<6tKjiFqN%mjy@_Zx9^zDg*8KzdGdGXR~lPK=Nk z#ciepJmM}nb=5EQx7N8XFgbI>gQxvj~Zb3x^!;m>CE<(_Y6R)3jfOB_U4 ztxySQ&t<^NN-wiGysEB}hX6BWIqq%2#AoSx$Ybt%9}Ki8?be1IuJ*6^-b%l2h6>Vh z#)b;%j0>SN1mN+;38b#8x^jCg>lkygJdfysIr*o6z7dZ_FX|blqh0jq%67#Sk+HH# zsNzup`uqT_!Xu&`y%rSad1Z(2?OM>01%&Iaz`Nngacp5xYNeKQUT6$o^l;MF3Z|=K zVy3Fl`GUhUr%n^FU}OzxU4W@y$A($)XlFtP2kv)kC2j?wZfEMNUCKQh=m@9V-c(Nc zdn3J0Db@xY*JOJ|m`S%+++e_jaD>))0gaE;;#5=6^d66|W6}U=kV)4K%wd4L(aQn> zMB;}ga2UgIy*gw6zJ3Tkt&Z&8tSpW>d4(Q@%3gFJvLq_9dU}bJgrtn>S1e)tizWU6 zAyA0__l{@b1)KcN5$0W0`uxOm3KlUmQSfo$Nj@Ka2JlFY6v{onBO*wzMZ=M9?UR5p zZmBiQK5aibT>3T#@L5KJ*?070^{q2lHh3Cm4LM5~XJoUCjx!_K zX`dyL#JCv9=m#@SP?{q{z$2hRW22cUV8y;)@eD%0-i(H&`BO7>>spY^%P2#|N;;~M z7CoV>7n-8tQq|nXZ0DkcIwkPG5XQd0|o5b<+Hqx_a1g8ZUZp0V(yRXeVN2m%iaNy1rJr; z3Mx`Ic}}5Al6UJ6MMfE+fCfF%=4xch`t(Q@LSPPI+v;_ukcZPF1j0W7d>Xj4AYD_`vISuag0M zpCX~2GkeLP*G*?b<+sx3!&_`^@7OT>rR&Hj$?QykbBIJ=Ej)u%YW(K6D7@rVx}$jO zv#9XR2`~RoCr6l9O8t3rKs{>W&9iPZ#5!8WWG5F9mQP2hrT;dMM78~UgHABN@7~cX z)y@RNPw4rF?DRWQ)zUipebT^I*_-aWY{P)ONP`q1??WiUPNlHN)1Xgt*U z^_W|Fnk%PnGpP`GkhOua!&X*s8{$^Y@yP~w%N0HaCf65FrQn4(qY~c#Zs7;oL%m8O z6OpUC-dPgkuCdAI888{vw1;QQ8>G1%d+n`UHr8&KbtKSkFhgi;$7J(G-{&_<9+hhw zS6G;;k~~DoV<;q6C|6&NpSH?mOfD0_%WL*Mc=Z4wsp7i$Aa(zC$^h+=`3W3mg7TYi zVeN!9g@Vt}cJ+M{8#QQpIxQ;GDX>}o+joz-q{r{UO91xhm*BDzz_qzVaODJJ>0lA! z1M#=}ZmAIc#u!;9_=Sg*Xo8RVz$Gyam_Ky#+GiJkqOsRu7yCl z`=6#{)3XY2N@*8H+?*c%#zD_}8GAS7w$&bU<3dR6sy_li{b%}* z9{Y3hagiZv<&W^DJti!FD(BDH$E)a{D>othm;aKbpVN=?f>>%lqWr}FEj&|0N`k2% PA)z50#F?6lfq3;lV`$dt literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/missing_content.docx b/pydocx/fixtures/missing_content.docx new file mode 100644 index 0000000000000000000000000000000000000000..21bed9643db6c9c2a4b45e2002a21aa1602e7c6b GIT binary patch literal 81640 zcmeFYRd6LulO=e?%*@QpQi)5<%*@Qp%#0;wC^1VVW@ct)W-dvq@9*x}?&&t0mwg)B zxRzm_8DSY2dECQKxZjkQ0s%z@fCC@_000rdc^qT;3kU$Xf(8Ik0NX%XLUy*!CbrIc z${zM6PCE4NHr9j%>p+xw0O0Tb|9|sf05i7@ zUdW96tGzkNm(&MQ0%jE*8hO#1bbdJ4K4@vzi-B3?BhpN?!jSCQ1vcnyL2Le35vNqW zr`uMDZa>Z%c*zc5Hqcy7vNePHTSu-e>?V*s zm4gx9O(O-b{}i4IMn$ks2Jnyqy`#0ji1?Xu0IiiF5t{zFnNnw*1;+m=03uy$j&RU0 zcI|5P1m&Ezys)hcv2*uJ(+=?CM*jBpFUVGwz`k8qcsQ0!D;Rmt;kH?4BKM8?C$`-^ znifa==2cE$Fh1HX>^VV_#uvfM@A#{kJFji-s9W2j!~PgAAr^QyQyib8kz$IOZMVf? zYYbBJFCCG6E08@NKD|QhNIEI7q4Q&HS#cs%ESwvj*)zw2 z?i!KtpP|9>?<4a(dXZFlhlc|9NHJ*qr`|8-jAG7Xu8~Ji{1y*<5iKmYS*+XQoNsB5 z6yGoBzuU;q#ztMQZyWLc_Mo_cQ-GVDqcOw38H%x;k&Df@_x#6g{=3l}0=|9ccklnp zOJnM|Y(FES$g}ugp#xrrjnq5&d0Rxq7J1TUTV{{&mDmfanQk+4i;TH3X9MV9Vx4sv;hc-#uGN(5SjQ7(MKL*7d)k} zQ7%$RUspDf1yeh{#x!8qa?v|zBe=9K$<3feIhmRRprbFRuZEb&gj!szXDLH2unrU4 zDa(+SJN;0S`}UhNp01SZYaE~CMOb-GHnj+~O5Tu$h>1W{WwmblN_UgfZKSs}m#Yx&=uZUt3Pn(Q!6H6Gu{Y(mzI#R=HQL!-`S08{C(+Ebz9Pp6$KvJju z#cxk87jjNOkOU>BB1T4{gj&Y^2#&VqolqUSxHVI<_wIM-N&B92PPWa~*V<)7sr87I9^O>sdzmM(Lb!Pa3wwN9KmO1RPmDR*BM-52^C(~I2cz<#-X{V=fVk$@$rC&bH@xeICBT%;SL=A9bQQk&nIPu$ssy}DD z!^XO7UJ9ZR%Flms>OiEQvovQixW3&Jtc?af&A6T@Ma&p|4T>J`}>@vRi-6B#jR(e(0$2pJM5u$2<_f zQKY@!`DtIT3Xx35hB4r0mzg1ue5~TUT}-?PlKOGP6GV02xRdk8Jh*$xcZlv^t{*)< zYB5Cw=F-_Fs$P!|hduczKt!B~!qVH2aXclm+g9ez>-#opIyxUpne$U={N%_JxqHE^ z_EW{dr|l0bnpc09mMktv;Y&DNs&!B4R0ED^>~P7(Z6cLM%IzO(fLHdXZ>dhl z0c?iy+O@CQIk>SU(RKacRz#yz$z`-RhATC!@8p4?HfB7LL%y3=w(q%+#yvN(ElsqU z?VkG;L0|*p+Stxdru$a-LSxq~gHGfJa|_Cu6jB8uD^kK~l5GimiPWsquzP36CKHHrk2~_$0v}rIH~~ zWGz?O-w3D+@3%s-g?f)%W-U~@jgQ!mKd15N!Te2w*g0IQ^NmV;ZwV;e2xPf8w@y6@ zJtl6xIuSbOMGIfoGK`4K6j&Cm>@HG)Jq>B2_QJz96-^+Ji9T`3tuNg1oj~R$uZ8Sn zm--@7rR|WVG`SKDX+Nf(sNb_uXTgufgpe*NfSVV^7*LvHMwjJ7mhdoMvPctaepsV3 zgefgc5OZQ!1I;K;kFuqSTs55ELdDS?(gq_B7P2La|Imk-My>d8AyN&BunT4wW9&oN zDrMVpu7fihez0n0uM5Ru;e};)4g?UgK&L;5Q5vJcSQVH%OeB_poZ~u(>N$h_rArY+ zR*MnZ+RO(v|B;9xwZ?*NBw|9-k~$Au9(8r3UM!PDQP{0h$n);MUb-V zrimN+b%ILhiW3g{%g3nU448%gE@DF2&Nmk|r5&l8mM3x<2arIfHQ3Y67~^{CiVJEt z+ow8!xlN*P?Z;Jyemrg_1xWP)C{3`H=pYk6Wcji*RI{2pu>GuyuBUAzk0e(I`pN*f<2nMSla6|Lind z&=2!LAdX;lO&#d#_Dn*C`vq>EBk+n!V5|i37j_+881O|TS>KjVRZ?YPmC%Hc`Q*C*?6CZ1dG_bF5>Pn_gB4TCY+Q;uY5P_ zNmIO^f2nQG8e-Cxn`6trRgUa$j+$Gq<^4mb7jSnpwgHtQj~|VKY0i$YAv{nj(WLs~ z-D#862rCr1l}u;jWlQL~Y%x_qbWAl#sn_A-<+9c6sEI>s8L{5;*$@_@y?p8MzE`c( zzK#w}m&o|G{PnpR%YN|Fz$<;7&+HJHtKcolCHulibvV)kDakEVh%e~ATh&(!uFq-j z0BJ@BwE9^3MQ0$}j+jFsF?(?xeUJtCI_1_tD2`+p54KKe5--bkJwV7*bTyUMlg;>+ zS@us{m6crm%y1(OP4afWCGLYPb71__a>%v;sxYNw+7GzyX2e3{_opttr>=+gRAh8| znOID%Qy694SP`7@lV6Z$KIrXC( zcnQ9ReKY(+}AENqIVE?tMZuFz%38ijvL9CG-Iz+n5)xpFr zE#QueGEvg!+;4FDajOLOZf<^e?EjSM@D1jm{NN~^sT|~JcwpkvpnBAn7Lr%4 z1qxmO#-lPeVt_4F2Sq4LvCUw1hZQ2oNn?UU)lo1}_C+-C{|gjyfM_nb+U|Tgr#qkA z1i!RmL^rBILamXQhfabQLQEth6WMAt%|bUWAr#T7?e5WDs#0zdJt%Ot#q8Q_u~CNa z{u-M%FM9dt+f4SAj7%aiqFE9e2Uk6$9nlPEthCs%o)H?vJvjmLj%>P~Sy3B!Ci z6`N?@Ea^Xly#uW~Nr+U94P!txCr&FqO^#&n?jH1lT9`;#m5bjH(hB_Ip)=ASe#(^4 z5T&9OPWLzy9eE?FH~-tdpZj@F0Fl4QCDuB2$N}+p2}81z3L@Ilt)T!hA$_03ihnhB z{OJcVZarWK&DIq3hb)!BVU=P|&WR`=+LZiav{4y_-Ih*nM{CC)fRtbSBN5H^L>7oK zg@Y#nnAO7G+h-&Tv1^SQhU-Q*^aL(!$dtyy#AT%fSBt7J12KM5yrQ{DmI`O_TwwV1 ziHyLNQAQm1h=&q(4X=toNQqGD+4aH@>NxM-G`(J4qkp-Xz337gq0)x2AcU&|7zRAD z_O1WSygB}=Em~e!Aet)^wlXD+$o&h;^9Ty!pa{}eE*JF3fQEfBUsGjzlYr+Ue1|cA zN0GW&=x-A@aL%!~KBjyyP#o@hpQ@(?K^wMr=}`LZ67-g05bjM<&_H-%eV|bN<;Q)} zSU>UtiylEYWckTw!2qiPdS8&4LgM8K+lU_S6ZjMZl$V5%d#D!Y4;9N@V^)4+6bxA2 zkcsf=z7vj`a36_J`||`jdt;OvVNq#Qmy$~<_l9^t9@pxvbmQz zmGgipcSK~8>g^;u?}FgT=Z$X>d)dW%2QynR6YjMx!I8&B609(p3dwLE|GKo*9C&Oua~We( zl#N#{EK2-S%0h44H!$j|z$NWChCHWXX&5qa%GQj&Faz(jFK_D8lLUsBo)14K3;^4g zqonnlPz723xp!>$&;qjLmgo^2-YlK{oZtEK#Ut=5mSz-Bn|c9V>#wGkHJWA(T{&2O z*xk+JHH2jfiR|H9)3vzSWv#PpAcTR<`RD2Uawh!F8s}->mjZ$JY4hK8oKunt5_Ap1Cz6DS&{BBtpgR@KgB;2 z386i4fZT38vH~0&c0|C-dyD4OU0j_N(49_i{wSCfNmh4(Qc&s$-DGj|X+A^fOV(5x zu^QMDcZjsby0SiVMq2yoler~YzTlGc_<^>V(*6m?9yu3Bd@4n5K-yIH=eDUG+A%l0 z9u2x6(WZ|A@p;Nuj2~<2t9y#u9Y<7Jzas>Hb&$Mz)wL`)f&ot9Xn+iZT|=Z7J_+ND zR)?w#ze8CyjFj>NZ3pr|_as92smKnQJ0oY`8FbY828{`&t!`SRDk{ZVfT>swcZjZ< z*A8(HQB_WLy;mPYAzqD(Zw$Mu!iW8)iHqvzG-`i`9cCW#DgE0~kzBFyr6D70;Bm0d zYA#FrX>*0#E814mt0dB-VW{BpqIbhap6pTT-i9lWQ(S2{5B`-Mcm@R6+Cl0<)kVMx z`C$;8+Rm$}&~;?MCOKlfcXzzeXf0A}NWav$Tv%;%c1+O%wf2)rq9vF3LDKBn*TBI~ z<~2jCIC*^I{)QgN!D@fV_|D|2rAx(qZ_~q7o3r80li;unUF{lgW7_hum~BwgaMJ+_b4FqFV6QI)l}OzI0*fnZ zBwN80_7*H$c}rD#FyO2;K!@j^7@Pirla_7A3c&TnxtadTRI8JpSwuV0p5#u zvwOFxMeUGO6!nvUEL{b;Gv;s|?)O)~mgF{fyNO$936iQ1^0U}Hn$DF~7s!c!* z@xmR`1hsDuPzQ&@8{Ca719=i4cBG6uRSiP1C&Ll%7FS`wj03(fE44`1@;1F#2ARIf zC={`^9Gil67U-Yq}#_cL2N~m>B?$`CzMq5C&`A&@@g0mB|gCQy{tiHm2H^u(N0Nq_MThGf@ly2lrX_%9p-V8~i0C`Nv8-^g$) zFMCDqgBL-kQkaZ>5t8^Yvg>`RCqXxg!%94QwZeJ zv&kc}jJQZGcxSE+ERnY(~ z)+1JD861OvlAcG#n-K6$3K*olbj`Xs5zf&fab7^%%<|jsg-p%3X%Md_bJe>EU0cxx z`sE~slCeBFVZAol23 z0j;Xkc`0@;Ndjlak7PizzbJqcQuSH%X?L#LAi6=M{gO&-1Nyi*cd8ZRYkhZd$gv;s zlvD+JPcc}JoJ3Jp-1IP{kc^kLLha|KA=9SKlh9NQqJ{sXkk;=uYRpm=Xb@AFjFO7( z%yG^w{uEO96rTyc;+5!cBVsaH{P43X!f8NvUS9tC=Uu!0R@9p2!D5*w4ry;u_$Z>r zuoO#a=GNTNbmEMfg>Cs=k{M%yV+2yv-r`uZH7Sq2u}Pcwvl`^DxaN|r@CWjhG7@SA zg-fE?pg9eVR=;_9No>LaIR~GGHGM)6)gn3jm6Uh0$RoBgjRZMt*g{flSB$M{qHTA^ z9Fi^KmhwL#VktFLHY?itOIT5!-gjT1-|^9}uVj^V$4o}V4>Y)G8(`8pypkvBi})W1 zHdyWagd_oMBqXAhGTAlPlE66F%=vPJ77S+7TW7z~r59;b{8jHwS>_lahY#72@x zU7QOXuR?U+pT^eOy`0?E6@E)*L=q1_<4q?YIWa)`AjGkPRNo%x$;pYy$MEoRp0B-D zL2G`hZa3*KnLRd_YyL7gpN}lKo-0%D_^Qwqt9?-^%Ie8MDN&Tqm|;C6Q`WoC3EdS3 zGE;=18jVd#mK?>(2O|jqPJo)Sg$2>~5I_V^$gdXeX+ySVdG`J?z~Yhn*&oy?84tCc zD;u`&P93rEC6Wwwz z=%6nr6!`hB?T_qWn#{j`zfU`Q3|qTAVv~HiTt`?<7G4;Y`-m?R789JaoNbc$A+XX- z2V?2kYX0y!*zH2-<*o=Sr^ti7j;wIE(9qeVLrU5U#4i^V5xN5zk3K(;CYK~ua;4N> zW8wCzBe-h)%x*pdwJQxVq1B+^-~<0zo$vr4R}NxKvsZ=6Q|utph2@x~UVgaY8X<%y z*o4Jhe2;N29TiQs&0NhXIXq!qXD<^+}RE*?rpaM*W-RUzGO8&T8-UBcb&*D(GUf$58#n^5{!9=W4D z+155U65So|&=S5Iv;bbU+4AqpiXhhK5_&26qH!kvz)kI#c4di%$P86RYIHEjc%IQq z{wt)0R(Z+DKnh?*;SFgpTBBby?Ub&O*_R0AxCkqtQQiO2)&D(TtFGUG)Arp${GAK< zPU!yUT$H(qfw74r)Bj3E1;>p;_x+Qh3H(AdwVQ84OenDxwx@Xn2{q0T&4iFZeSZeu zpoFA9*1h7AZueV@;H4zr&-O1V4@gMT#VQ5Ue%rT%p*3-arnnaGk%oHObezqx=+hRp zU?r;i=^)XQ=CtCFb0ChcbO_Qq2g|29aTuKek<76$uq<2`R=V%3U6R`F>un56>hcHeVXOT5Ro;fPt&JCm!--C~Ei#6^(G=S!30U0<(9 z1);>5gi;X2DHH$L${y)J*xdH>>b@z>H_ek}_KQVOGE(eT9S+k+DKxpYcR$A z$`t~}Sf&uK!-+b@6I zlx=&es-#1W(Dls?bB@fLb$O;udi!CW+SHG5w8uPs;5TstN9SXgJ`2o6Rnj*X6_jc_ zzkLjs`53bsC@EvWkqp^8$cd_Sjb>~U>n;`$6%=Z6vzz6xV+`+>rXK0fKHd~;AA7J_ z>?Ogb4pdG(;iuv5tR;9eR4(v1AU5jdHJeDvIUY$l+dXYRf`0%TBAi@3U>zFB&jP8E zplw77!!;=y;T&l6&1;F6>yc=^(BlMBKAQYoH_M-xs0zV}@jN`K$JLo1NenL*1k&%V z-)GaBEEH`xtlO#HZ;D%=+)riGJBzwi2R-`EM`PM%EolK*xWiBJ|Yu@~Pi}jw${GO(<+7CR5S6+M3 ze$DRsoetE&bxnMy<77?j^Lvv!#uAuRS$~|}{6k<3@9SK=Dk<^7!WrYC+sNLE3 za{TI(_}IwLgf}=$Ub19D`L%vXd%784xLfddPnhh}0eD@E$eP)qa_moyfQPSg8jP9Wn zgR!6^HKwiZq}pbhOyu{cqrtQ|prY92da&k7Ba*;kum1TIdz}Z!l&k2Cu3`aTnB;Lp zjpkpHO(VfMFg4uD39wfyzpw(;-=h-4ZL0z79oZ6;?&Z!J2_9#_q+@S6|%e$^iLwNx3;&S`6-8c4;M zF~P_*r}1sWtws=|BLowCpi3r|mN>W8XW69k%9vKf|~w{jBOc$p5J%2?x8D0)d)Ow2SwDgie`LMiu#!Ov^@#JOC%Nt&BBm0h+;~+ z^S29M1^G`&dlOq@TRUeHr+++nWs;0-5F=v9bBZU#0WXtrLNg_z3Ym`D2baHwm_oh(y97r_kWtW%hNv0rAGv@ zjt<)^i?`=ZP^(751Cm))DM*Nw6SeH+-?~Tx>XWsjbIba43%q_$1aL^2^HVC)SUv#O zRObFflzd9|qgO!9hgkL&bM;agFXa|F=fEYPikg}n>~-uP%Acp z_DlVxnu|+8`V4DcZaD5am%uI{P7GnKQ49~Kw|ykG0o6HT8wZrjAk74ywl|(u5qYSe zD@ZGAagU2k;Q_Jg4kv4xDLE>Ut?4V<<4oxJ*0H@4hp<^K3(dT_L>yC3bsRn4%+-Zwa(l>{S;t#1>`#N1k}q z?^zIYHHm%lj5iuMtx9rnukr}h#re*utU^`wrCkB#&X&ypx@a)^tbX$@xv2rpAH}?U z`f+JAQ*}2=PL2g>3oIpiaY3)s{aQOWC%TzWziUKq=<4mp|G@~0^uJ&v64d>tYEVhC z;DoPjtj0lTSe{;TY2nlDs19LsaD4K})qh3TQ8jNhlGuQA(q6X0X8{Z1ve-6s(u2<# z>!V?`(6AKNh$6}Bm(Tx54gZg9nEC%YHjEHnVC)P50PvIkH#YpQ$zEvwCpKKh%c_gb z9U)&jvW70MNisU24Gn7^5zCb#98hps)a~0PrdWTt5}Q!*z)f8d25OtYdBfUP16U( z7acv?FIpHbN_;q&IwletCseFtdsAM6L$$0o^jaZa%*EkIh+<)jb7|Uu_Vyuk#?|oV-iPi~f zMq^Bvx0>V@utPT7EAW5}CSZ+={^$w9+qkZ$AiovXtS8_VV!rJo|EXb7Fu1Q*A@p{z zs;7;_mrymDsh2A3Ov`8at?)l@?xcwaSz6i=m~+%Sn(&h2=d&4La8e(*JU5Od3mE=c zH=wXPaWic$XkOQi(Ge+DJaNi|*$X5z6C)^S9tm31fDs8F^0^bla-rnfOyyX#RKfZ_VZ@_7 zD~6T2pe!-@G!^M0WD}%cfoY+-dvP1Q@;U=jc(@KegJ-&xi?G zj|rtCz0;&}F{UpQC3+{wd+ z5i?WRQhEPrTE6H>lh6PPV!C!QU-#FISm239%L*U;gJBIpyHB++`zryYppk#!^EYdXOzu38TePh+sk{9a@6h?f( zM2~^7GPNrBIPn^996UbI%O){9JRxk`;pINGXjWyfkr%|fZt6iSz>Gx=FEdsQvcr)DbHLebsxL8= zmKMh;rf5txS1r?ztvU$(BzhhI=@RWOzCj8|RU3Z`pW&tU(#flH_&LSH ziv~}hrYZ*HFadeZQ;v{u8VlrrnVa~XTAmf$NTgixVvoKpn4GlPYw4TSS?H(SbL6FbPPN1gU@@WH z#I&_FFN>9gVXq|q)+q2Qr`|{LmNOM!;CwAu`(1p)Fc_!Oa)cNIxKqPW;rTNjsSnGk;7a^&FwarK8e9>AdZf?!;QMaVfFh?Aa@gR#9uR zn1B80{PLlAOOC289Yb8P~wC%{=Lu^`La*-FdW3+;`$VaBOu@2aYJ2TBrnvB%p%s(1xY33WAskV z6pNiV3`I@TR7jr%u&aP{MhNmspfN`YAynX=kH&9p_?4eDniS8P4izUlm!jaXCIfZC> zFm0M8$@V%zBoFy{2zu8Tc2wUR=unB7jyww6Q=8YfMbip2cpDl^ z-%n}C4p9!`7I^}+Wm{$Gc&Tfk#>?P3l*~6R_OAh3;!_6R0yW1BoC!8dxcXkPeB(x5 z06Td8(bC%)JGZLNqW#BS!P(rz#)RQt^}m9$7aDSQ z>+GoCF3OG2+0KZ<6&u1Oa-}YlP6lVdHVoe)B6UT+KwwV72WM^HAE>|0APjBACd@!9 z0DYzSoptqxHQr23t(aospt^1%`$}XLA1U(d{`(g`L0QG})i5j`G7**Bn(l|=q!8(? zhd@&8$l@S2E3&mJ?w zJP8!DkX~>Imza2^RTE%5!=)#Gxt_StgRVQoe#Ktl^@{Y|mPQ8`s2a1VG|k%P#%ep? z)i!HE`&w)FNPA_*s%M-k8Z9s%5PeJh<@3k3PEWyUK<#x`Cc zYs*q4o3?kv=UCQ6m2By}wMo5t0k|;%j?+uinx&D`YA;UwAjk1d%#>+M``HfM$e!Bq z%?C}@VrUdcUij>RK71A4+O^pN%t%_prQ{2C&$1!hgm(9FoJMj-jEE>e%W^AN0lIRW`k}3?X&`n_ zo$kRY(Pw#Ns5o?K!h#?rc55q)%k+#XiwZCC6Lrq%uYWxB<(1;u zip#IJKq)@e6M7LmX&M>kA`*%W)2L<(Sl@KjQ`k>e>jp0f7tV?~&nVnB@e$|=>RVhqj?7ZZ{yZ=xy)tWU=jX zHQ+54C*iCxh`cOFYro{nv!QY453a|U3gzQ$V70ZmLV;iQ&WF(w9-GnkWLA>OnGN2B zUv+}HHJsbEK_4z_TD9C0FOVSbUt^ASMt#&AJHNa~b&{ve=o%Z=piR`C;hL$~=^Q(1 zh47Vzz%9o{;4CtXq27gO!UVr}$M6ykhLR-i6K8EfFktLf@jGWEd%J@7RryMMLi1%rbMt^=#;hTONB!a<-+i056}K5uQ% z4Z&cX5@YT98}rS1PQU-Q%}O4QY=!b)DZ5GFJIFR{LT*VVe+S=~+ACsi9|Cr5ecl?7 z@&KLgD7t?V{!w3)l|?HkW>d4MZR@+G99J?i6PX5%zhy!W14ix zV%O|AxIy~6pDm>6J*!U^Pdr3fOI?(mbwU{(n07X+^6$6|yR^ zfV-`lY@g{?Wp%X}$%gwqBSn!|;pl&&LM$Mu|46fcfpIN-*-U9*lUMEb zls;L6gNA&Fiv-5=Qc97($r7nfKhaAingmM=rwpW&p?L|zL*blZ|0#cVMG!$eE6Ff6 z>DiGT%6-oek>%RX!dATKWQX`Xof;-*p*&nNn4dX#Vi2xeQMvSN;Mw*Xk35Oc z$Xpz008UG34pdEjs-f!RYSGlHJ7U$fJ?aavDydnsOGD4TRO^Pwh7Tn3XnE#w!{QeaDUxf%@ki8(g*bD&tbD;l+BeO9vwlH9@urV+*VWPLR zH!=HK`Pu;>N{UK|0)V~?EfDQD+u8F$ln@fqlUI}zm5>(sZU8{~+Y1W+6aoGwLH3T$ zjzTgTQql&7gb*_T2mm$!3_t~7Gca{kKfx=4}6uob@fE3Nrs>{FZA0fCBJunfX7-OaMUP zpMS3ZWENxu0EzSffH@n#WhMZKuk*jl%mABfUQEW zL{F$@Vn`@tXW(pNOh~DuCMQot==wd-|EI;-#Ql3t0DzF4y@#WPnYl9|Gb1A-q2Tu) zx=jd`Je-_OY@BEbC2Wo8zjLV?nuPy0c#|do&|wAuBr@8VI{)jDKtZ6sU%|h%{;&Ff zIRC!%O@@F;LGi%WAS@tPq1ju+u$yq^aIf*n z2s8-8i0X;gNWREO$b~6PDMP7BsfTG!=pgAS8N?Yam?D@~4ER8HfD=Q?YA#bM;s+grzuRN-qP4D#nXQAJr+tt^v}3YU zmUE#?xoe$Ui+iU>zvqb8r1zB1r0;~^xc_LtaNuB2UvPIwduU5oLwI#WS!7XEPIOvK zLTqT9SG;|KaiV&Xbh1DSODg#YtU5Ki z#JbsfhGDp zn3geD04qnUlWXPEQig_|Nw*L$4`s2Jckw=YLB-v_D}!8^1KazP=UZ z1f&#<7CacD8}b+$6b2m@7Y-jD7Xb?q0|^xw5d{Vn6!kCKJ^B&G3g!gX6809(F77Vg zHvT%n65%Y-IPm~U7wI3e2J&i(a>^2_0_r@P9NKKU4Ehv?1jd+eZ3txvVhv#PWB23m zT((B8QNB%~ zM{!tbN_kObS#?!yU42VqM{{56So=)pQukKxQUAr@uMxl)#01FX%k;0=oB4~yqvgHT zjrEnyx$TMFq5ZDImgAb!lJlfXziX>ojeD_2rf0lYh_|PYov*Q(HS3h?BWRDh)RgE`KbWe^<%}sC2 z9L?U&y)S?-A}`@DQ?GEW3a=@w>uy+WdTfPmC+}qK7VQ1ruR5qdY(8o`?mX!^?LS*M zKfHLn0=-7QA-tu%MT32v`YeiAadC zNDxRtNngpX$oDB$C?}|TshepkXmjb3=))Mi8SR;jnKf8sSOwYG*=ab4IMKOaegJ+v zabNQ6^RDyF^N$M*2zLH#5vmui6e$tS7t0d=C6O!{FBK~tB@-zdAr~$mt`MOZt`x2u zt`e>qp%$qgr4geUr{RHS=aTK3?v~=7;1TT^<`wAe?c?fe=V#$>5TF^T5F{SV7s3`w6Gjw{ z5dj+s9Qhn|8NC;?5<3~!AK#i#lUSUTm7JIok?Q};J2-q*=gqLKwC&QJhTX2c;r+>j*~7V`nd6C*fz!6L%Ja;NuuHou^=s`Lty|SQ>3iM> z>PO_qr>CXohL@OE!#9SvhxfX_rXNTj4WBZfyI&6fvj0_(7%(VsWe7jWRH%IDBA8s* zB)C9$djvg1X(SG0A`~c;8`LGVcJy3~Aj|+PA8cnFQ(P51L3~;Q3_<|m8PPm(CrJru zJeeoCF@-E87Zn*bBJ~H&8SOIN0DT=p4r4Tv2eTQA3ac<1GdnQ{5+@MnE!W7!oiNUy}DyC(;`-GqQtnt@2e0`HCsudf=&I zqpGK-pf04rrb(`asSTz5rgN^lp*Nx5X;5jHX%u1XZenVxZ1&Tf(E{HR#`4AL(0b0M z%eLGu)jrU{+ELX>z?sSg+2!5!z-`jK*(1j@)XUae)rZfQ%n#1*!GArVFYtFzOt52! zS}1oIVK`X$S;TZ?LsVL{PmE!#SR7qEa{NoeR^mWXd2&LEd#e5~u{4Hs%nYE6v&@C8 z?(FiM)ZCyvhkU&PxkCOT#$w_Uv{J~@kKgxYXXSep>y?XDGu7iY!?go-ef7N!y^VcM zgUur?lYi!0*W32nFFRhkK)aE82zwd(1o{;ROb0!O;)Y8`x<;4AZYN+Tsi(xIjb{93 zbLYC}cNRgGD3+yHoK`c}2G*}PvA0CGop$nfr}u#mWDe7hj!)Q6L(f(&XfFM)mT#DD zdqE8d>|{`m0vZ2$UJ0RQjbVWZ!qpYjC1U$Jk0eY9V)uSJ@lUyiS| z&%R%;uaTjk-=}Y)uz|my9~gmgz~}$N(_M!}wY7l)9}^TYP%IS1zybk9L8L4cR0L5# zLBODUV7m5ncXu)~3`2J#A{MsT-QCAJ#+~mT?|t^)zh?!o_j=#8-UTq{XB)PhiFiB- zn?!FoeG9vbHmyD!n@VYt`e6xVC0dB>A$4TDz)d24-UPx)i14`v;4$&u44W;@qYd=*1syP{$-*O;N{P1qnheZv`SAI*2pXxw?)lyAN08HT$1 z1oRBesS^$870PKfThJ*|qo@pBB{Cx?VkYpPrin2y?ykrd%pbPzY!-HadFtyp)Yclv z-J7V*mB>L4)B)3g{Sr!|%MwmTy;SXo-$bX#IuFp$=fyiC+A+HY%vqtBLC*Vs$Vgkq zfjj$<+NNy-7Gz@$vkHxTUm3=Kh}vO_g*Bl1v>mCA6TwPu_+@mL3_5cL`W7$xb1>3x z5PaJe>CwCX*dgSSwldpJ4LgjwV7fd#8nv4{ z<>PV$>mKRGFGSYx<)eEM8AETai3riaL+(_>?`{EPJCf4sv_BU$q5gL0O%$?nvAYI& zlB0g}hCUU$;wqK~!qoSirH;WQnVTv8nAM!wWD;gu`32HX3~PTrNr`zL#vy63%$ZCI z5*YF76t#x#JPe{3shDmyxt-!wfhBz-k78XQ9U(0*!w^3cY5OJ-twg8Lb;LHp{psJy zFw&?O|58eX-!3PRVSGr}WRi+I%QTJn4|^(;PJ}QM!F!1_=)*~W2-j)o^*)4GRFON4 zgrZuX?K=6o)W^2|9tkbQvLzw<+-}!Gkj=B#^L@C7BjX zq%%%EJxQ|JW?ooD?5wD6A4~Y4C+H&xd(|e|Tl^!1TZs$aD0!AR5)TpnS(kvP2qsK* zCn#9Yo*X7kXg+lADsgVzsMcn}Zre-U7<{{VC3P4ds~;@-g&(Kx+B*+;q*xtH2R_My z-R9xDSQ{Su5t)5A&)Ny94nm6nf369s`HWv%bC2>IxLjFR6aWZKZ}y}DnY!yicYtzD z@{~RJQOwpy%ZQ7H#6z`&wa3>t%kY^!n>3?<)2(MH$hp~hU*E`SL#6P_d+cbcmdh(!p4N!h`8aWnl>p&I^fb*v27wa(Vo(y1oV0z>C(i`awxp_1*4ILLlJwt^C zLa7Mqn2A5=iImRkTe;3k!LgO>RWejHo=Fi)rJjtbf?vQ4+H+n{P6G8Qr)yUVbt>!6 zYBi;wHFx4P+EO~{>I+Vc+5PB67Tn-%U&OegDHC6!=P1WxpVC%IUuJKohKqe;F_dD# z(5f4hIsBupvDC%1Kf@Mwecg#ZCG(*jWc$Jhx4=YAw5!Gqn1@u8HZ?1mVo~1Q*-XA8 z`@YJD%#ePZV5MxQaj)>$t2--t1W2NC~FPRc1|Ft zYik44WEXYB1W&S*7I4|fIx~2pr;$0TFS>F%J*{J!;2HIEQ#5)bWvF)akuT)iHrNg- zd5k49U=hin4{@m>kETAobe{#jl+^t%qwe$v>jAp!u?K>M^s9iP`(xAW&9#(LyhCy6=k5j5$=L5DEOgVUMA{$ z4488hE%ZCgS%blj$MWXk`p$$X9+G~wX2}*3pBYDp<%FN?AHto4?eG=+P5A2v(Od{# z5shVk#p72Z*h%wzGJPKSB%(r9}!h#U=p(xJNaPV8|Wx5h4UvJ$G*hg zy|tc|#r)UjE^~-^*1487k9_Y0TIOS*G!BYMnjcz}&_!uyka(A5w({4U8{*GtTy}s^ z7xk8ToIl{Rp1FkI;~dKLBJ&5+rAm86Lxboa>po3^0Bee&lXx*YcKHZSf_mYBHkMkk zBx(aQQ2KR64MQYd?{tf?gB*7}OX}V7y8e_1sK2YG3!JMb(rnyetD-ED{lvsQFqP%6 zpSq=<(WJS&B81_lJn6)wE6KA51d?M%;q^`;&&~?fDZZs;I`u6VT=y2tWs~iDQ!&h& zR{552Myh$k@*8xk>bp}6-G%h_*h5Lt5TY(yc>4Ht)lz<9FPR$4UD)1O`kTGB$(A~b z$*!Ned6+TVzHvF4o~!C`nog6G&U8mqTtFgia|}7iosx^%Imj)aT_HQFY`;mnt{cuRin%@G7sQbzmnnh00g(MAvPKjvRrqYx?lquR$`2c*kmxvTU4k*L}H z#A;C>DDAdtCk@pnFqd1KTsu7J?cCNb>Q=9tH8UZ_?Jek*PA2wbAfEMex47GD>w z+q*`H7=0bL0d`t5JWla%9CC&E5)k!Aj*6eLj{To1&SOw@#d> zT(Eb6;FZiCevJ288nJi;Z;8ZZR4w}?0n>6upVZK8DpWtH@#GgM)Rlvn(^6f<#axN# zm0|ZDi2%?N!*}rtG&g{lfF1MnatYpXwUCGdfU54*gfUkt7?r>2Y+8548`^AWfWe>g z@1e(L%CJw37^Srl|g(3b*RFhOM=AdbJ*R7 z+|)l=@jIU@o->1kH^}lB2G7&dYs^MR9tsYCY#Z#SRZX&+R++q?^2sEU&M1fI&WrjE z?o@9V)a~4;2;(me>Xo+f@;xEaX}p6V7rp>+s^r;yDu!f!mY;?I$~NP7&BLbBDycMBbp)TrRM{(q84>pC&Pz2fC_AR}C$#rQk!F zFAYD^4=X9P9otnhNcFt6DDfTB)wve&IFTzTlsg|-s;%oFA<8+!%~gmH%*DE&h*bqg z>`X-V{$T47#FeObrW#~kz$^Vzl+%p&1~AHTnvuU00BOwaEqD`WV^crSjX6{|25`;) zRK>z+_O)4#;J{G@#u^+kpg?yQcXURbem&r*Dd80XAobU_GxTA$qVX;*8hyTYHsx*p zUYn8pW}l}SNevlVCFITjM0|GxY45-Ryt! zY?bF&hm$cCRZQP4`}AKJN&aH(REE!tRP7vwBh$(a2SBRf)+j|T>rlf%sTqZ;Hj4xE z##Sy8&P;k^de1+w`GG!yx5htQvzxbkM!4geDMucbGvH9`TY3#|nK$Y$Y6Zw2)ss}* zT&9&P|CWR>?U!ENjL``sC4S%38i{Gf5_LA$>QHF_NO`hlZIzB$Qoq?!ft1+^rm4Ap z*3tSgNsEnFG}6uMbk6E%zX8>I)sh+RDl1p%P~#mcrukYEi@BigO6@IVl6_QFRn7|w z!Wxix&$zv!ZqrY#*rf4OsVKSthnn#}#Xb&z6gQeF-LDvD>dM=Q$ca_@#+Dqr#iRC7 zVuRsbwQkcrZLrPa$5;93md%)_Y~dOl%JKKb|8)E?VkTi;?4`()VxYZh+RBy#03U^58B`VD}1XO9VJR{Yi;XX=vDv^EX-cE+tn25aJdhZIU zZeT1916AhHg-gGfx6vO>x@j3rcjSgyEQj(r`dqdP5Ot-9n@a>O?}giBy!OpRXS$^NFpo zdrL|~#g+@=hNb-`iMVU>3ezLT|9TkgP+5J$RuS%J=W^49lIhJhJvH4>SFE`gORo~D zokB5|5S4vtoAJJ)XmYG+EYq=J&zj{>-hJ_P|KNr?^mbwKm*#oaGwJNwM-|s&VO8Ty zvqOu`U4}nP8;uD{^5g>JW~O6{pZNy>ajx{fZ$E{r>F{k*7Qbmy*7>GWYvb)dVo|n| zs?<=xk>~wq>21SFrEYSap^drCp3wiwR?R>$JSAyL&s8`ks0TIfg*_V6c(4ARdS4ltxi1GeHFVosf zyDacV;}AA({+GJnSgmt*-Amjv5Qs6|p<>RFsmCG3C()D*S)2OFq#ud$u5OZ7R7C4` zq9(AXfltJHxzvh@-<+S;E+zhUC_jgaI*aB`hoqkqW$*rsfngP9(SNxN!jaATO4ZLkW4~`BoD$xPFZvNO8D;+^E#xw=qRsdckZIV zfx5k%dtUpi=W|ixhU^ul93$*u8x+dS%Ghk`iN?bC{CGi1xao=Rv|OwUsDh{XQ3Fe4|5)X;CxuL-OE zrhBnqyp13oJ^oqcE^4|%0S?u3+O7FGWXrMRbuY7ybPMet6F;^ZZCO$M4U4V!*3haG zO{*4UR^F0saM^0DqyBGPGywFGf{H>gZhl3n5EcEuP+t%iZ5VViat^-)eGJKfbz;_| zrl)?v97EY6u43WnRFCPniSQd~1F5s}UAdmdNb-aGeVAtbErbNg?|xrRT6sZeC~5qdjplhvoB z5kB1*o5+V>*SuPH6_KS{J{1f%v%Q}smG3y?asg3(`q<0%v5?)}ApI=J_ZBE^2s%)g zQ~VsJu|3%P41UBsXWeS}BmG6UcDOUk`jlTj>e{mNZRLpb6WT(`znlorZGnh-Pg4It zQ`^24`@w=6p6`u;Ss

    M*z4Mtb*r#ERlXdL&6O`A z_+bx9X5@unvBI2qcdU{3X^j{5Avb1HBrcN?cheP1uBjfliJfm-RPzQiT+uDtj;Ysk z@$Q&9b!F}<%o&A8TrlRl)Fm(tyGoos5rqZlr>}p)d}@Dr>?DTMgs;Ad39k*1hGJ&g z<^u~co)yJ82Qd-)CA-j=Y&C0j8%D0oa=nf@Mo+yy8ACq4um2ThUhhQvNA$_IYmzv$ zyx|dUGg?<&n5{;iv5I4FqQ9DMtoFfp=#pI%FiYtpuN}dxxj3Ug6n){;GrK>!sDDsQ zKnHey#gfp$&1bW|qBHBHv3t-`yJS@dy4A2_!gF*rt>fZRc05$j-OPLi8(TSwQ3=Be zv2+f6943G!g*Rl4p&mydcWS6hkZ<54+Guex@*~OExh}eG?OXrFq?e(;1;fxAz>SEqa z?pex8&Ur*Vxq$6*=mcqq35`*a;u-FKMWiVDzVZ2_*Ob;FZ(5!9W?K@qKviWtM~Rhd zIC$~_Nk5!Kl8HPI_LH3X7o+=#Lhgl?wL~oE!?;Vt2NcukFEo%Xr`3eu$;Uggb@xcx zrn^i%v9eYGxl4Fw>rc-nWSLD{PvGAezx$5HcW6qSx%e9Lsgrz4&6%*Kv*hxD>pDMD zbPtm8lDNGs2eO91X#}MW6UNpwZ;inVZC8Dy_%w~Ta}0hBS=sL->4uPM_lX!#7nQ4k z2$fRb^J-vg!QZ$}uuliZvPJMSTiRJ*gn7j%RxFY=wvzP+t?KI%V{kq-3Bow+8Ray7 z0v1ah!KGmnz~k6&u)9*{uxQxA&F7fKSo!iL%uU!IV||zfz>HpwD2aBxdZr+ds+RxD zTT6kEA9LVjddVNwOVVJ3qg&!EcVlMCsQlFkhlTBzvW5;XO_X#MDuluyLa&J_N-I*auMcd(%6dj~DWOF74ioQu=LqXsPIn7P z^qqCOeiD8(bD}-106`a7{gal`l1$ek)znDCZEqXps$$XT46>Qwxz*3SRzj^m?)%;ZDOb9kTqGew&&G ziRFeWPCy=_b7WyqR!+V61uP-qx$q`@RhV9|1u7)A zF)|KXZpR!(*`$jw@!79M*D+ZMiv*W2>d;&KNX&x;?fli)JtKTLL%2J2oth}>L@h#@ zO8(5|%9Nz<$QH?b((3Fn!aGFO?l}GfVnFCzUIO9m0#Du=;`$L&*u_A5ZI?QM7of>e z6ml|HMbai#3!+u*&s?1~Rq%uUBOc2iL;JM8hzqB!^AdAoXpNvBtVp1;W>{4sXE?Sf zh0@VXu;ii00&f&$3d%AA1Rgv>TnleC=WWPoj)i^OYX+x~;|F@m3}8P5Iy#A2k(z2YgZ9>yz;}kes-R6EhdQS7y6GMOV4tQ?^{Ew? zr9GX+v>q|2B@yyf0IxS3((=|-pN&Owc2pe*{>iE_XU^+kg^Qt}CRzo6G>x&}MI&TJ!sb9nwvDi2cA=wKc+e3AWB?$2okh&?;~zGD zVG#k04q*H)x~1MsZ%Ey%*hj0{c1C)f`e3zQe31&DJykNB7VKD6!T^x&k@Y}2wGI_xUZB0pePf`j zK4E`qrz_SJCMs`ATlRmG-4s){>LlxgPgZq`)(h#gNTNjEONVj-Kw5Q$-ujW7Z5UIL zjfH6#hOq@Fl~I~c`-^0;s+6tE#SIGKszXAxV)Cpwp@Baa1froGYI?<{hCt3!y{Tq0 z=Cfv@&Anio;-(q7?;q)XqbRCJyu&bY)fC}MZTT#3;bgw!01?f_k*TnmzIWc`VDx)h zy)Y#8g$8E+H3hK-xsNBMR1u>JMDHp$2AmUI*V|`)6*Ti6fI!qb03`m<>|8cL!PK>t z2SIsuLwSGZJL_>s!Jh1j70`eU27@zfl+SzZH<;b6$S?_`P#*uv$^|XlHR}C|%Igw$SkCf{xE~z>wN8MIv#!^l@#w7$m!p`~xk$|+PdbFUV ze4q6;k9N4h^n!D5cZ@!gO^P_Fna9$vz$jG=$%-hM$Zec*0?TnajN*vR1qrpsYqk(QstRoI zGH;8I`OzV`ah~y1{9J9Jo*jNp6{hQ0ep7y0_1(=$ae(DGLq~D+kpFKjvbBJ4x@tz_ z6!5V5Ld}Om%MHC%=(yY3ag}A^8s!!9+2vjGBdRBEW93g-juUz0hX9BM?R`=fk9g4W zPg!-|jh0R2D^ix!pDrJYVN`1%)L@})8Z>IrZtDtY>-c1w6U>pTq)c!q&)zYpm5BBZ zXXKT_EYysTFuYH2$8Mi1&7=e20>DJ!8laBU=BV zd*nWATtL0D-=_voxfU(5ZJ_uDoj0q=!wWx}QYbTBW>h$l;~i?5Lxp#Z7i7RITBq|4 z<&qjUaLoJqsx|D@(GiugtT{p43OiG>@R~7^748yaY^NGPAkqf_M7h^_NRa@KYsr>g z&0SMZ6RY+??DvG-+rC>_g1tcpOqcnC3y&G{`OjU-4I--Jh$pGvp{N~q4e9W4%{R42 za{j4%r>fbv&K|DF-_~z=ExQ%8&g3JrE$qT7wh7C9Gw|M3Z z*5w<&?3+~8sL$M{u%v091WhxVwc{7sbU$TtUHWuQ)Q=8@awyl1DUIQvCFa zKUEQv|EXGNwQi%CyDjU2rW$^kCN1pOWypM8KIrDsraII?0E8+!69E2IC_mm(7L=ac zhbvRWm30!zmqcn>?v*62hXS8+!`~>B9@7BV)3^47 z0h8il+g)(FNK8`*uFx;8ZYvJvfvFMTszyug(E!Au9DmEGk`vGA(+V;DWmHZ2jP5Cv z?6}3PSIFeZw8jzSk$y3?BT4r>s;lRcNn_k>N@9^i&2y-f!Ph)T5y;UJ4v_Y{Yc%V5 z+{l)GW<}(#`g@G?esR^e8Fr6*RTr54WAK%Wi0uyL=}_wi7fT-(?&(vAd1+re=LtD+ zqnd~LUm|zaz2&|3J7~YeA2G+>X6Ct!d2D$={MVu8I@FBgBrT+HYwt@{W7^k_48_*C z@y!!tS&=Dqi=_wr5O$ChF=uxz4;s|=s$)0a(t}Czc8XV#rP}jXZumTPu%pT zgW744*|iYOK0kIsVMr(jDd+f=ID3atq^>iMzq-^E|57_py5A6VaUipw=Is7H0 zmaz!Y0M(ZsM&e+`BnoO5T)W;69e|KbS7W>o&mT`Jr($!?_2Xpjc0Qf!q(vYHjB<)B`I%GW6O&bLr zPyR*4!ko!diax=|k~Z!Qhkqd=g1;jM2m@0G5W@tQ`?;k;aq#K0;2J@1ld7zbSD>yh zzsT7^nF4vrf)+MFzcBjuEQfuhdj zqU^Y2m{e2#MEI=$4Rzw@CM<-xaDD{2!#Y{Nr*L7V^oTnJ#WK6=i5n$NmX3N`=@}zV zaSQxhizLFzCn=lqlObzlt9P%2ZV*peyAk?8aC~wTl*HVAJGSUmD{Js*$>@fZI%cVR zb%4AF>~9SvfXfaT6?x!thIU8%euzj3S;K`cmM@<48`8_TaI3he@R;ZEC&l<~_gZ5K zt#!2QOsTSdA)X9ARb|gLmHjfe#MMH^8Bl?rA!JSSq&&zxM#0T9MIPq{k8dtscH(i( zs}iriPtrf7OFGs9o#0)KKXYD{5o<7UubQ2 zp^;VRFj6#H{2SAU=He1CpU~~um$5&c4s2H zc(bf}S%;t zG0m&^h;Y}H^1z5Z)l*~);;Ni~I1!m3A#D#w4hf(7d!gI~1eawfHkEm{2KJ%ha(f+o zc}=Y8B|NjTmsbq?cm59}$@<1CO^M&rsJ6J%*Kae`tFI zv$g3=lVP_T*KnBdCDpIsc(}51+`$3F5>spR6NE~izw#&Is5*B1Mx-z0{m@>R=c(|v z7+CzVg~oIktm{7e46M1OA9fPHsNV13e0ZH56}<=_y1Ndfe6yVvgfKg)z>PeS z^u69nN+kto(us{kF#QJ6hj6K!O6VZ$NsA}s<3B~6AZ*5KSNuyL;csAd{fr|B+i~KARvd;wHywRSmVYTfu|%)pT8pu z(No=qkJL#NNkEnQ2xS@&tk_aI2lq_6KP4X5BI@6~8K)OCExU%R6HFbu8TXqM*1H)W zT@z$K188lr@*}_)^LFxl9LJDS5{BEXY1=Qr%~TRLnXzBw3zo%WzezWa`GSieIrs2@ zb8R>6lL7ChRWdn_RGUGH#f`LSi_5UB=GcALv0_8zrU_V>u4L&wY?_iVrUh#x-tL(O zz>giRBH%9f9G1SuEowVT6k>&qYl;T3ku}l#wm7nOWgCT9CyW151m>wSZ443XP3&k* zoN|wJ35ttr;a-+(7$*`aJR$jfBZ< zj^;24FHIBJBMAotqnHBxW$ZOZ0zNlyh#rKWwYQTN13ZdYOVt1!i+)hg0c|72)aihs z$%nm@B{p1Po?(XbS{Qls=a_c-a#~YvH*Fr(zULe@oN^*OpCY5UE{dbHke`m+N4|lV zG>&0ymqhFP8D~VnTot`o;EQI`LU=(rb<_Y(e!@M@nnF6>4`L>6iRUD1;Ov3t!Mj@)xT|3=@~& z!|DVKaMKcPB<*+INcK3Yq^buwl9FhVX2p}^j1S@mNkrY}_3w%2wCM{D5=GL85p4uE zep%go#_S%Zri*54?`IWJ*EbC!4v^o~zRheWJ*?`B-%OfneYze;JW>(h^@q4X@^gfa z;D(=NRVrQ-JIgpSM=1`qLK0e<3#k)@f?0=}1vzD{u>#(3`Lp0F+RcH6H|~=o zx&Z^AihrbQ8Ij_C(M$R~(xBi3%@zEGca}OS!-G4OvSG(MwuCHR+sDF^J?8vm-63az zK$P_k<9LT*jnr{Z9?-rk#PV_vV^9Ua8Tuxtz5P4_ok^9Z8P6gX;o z2qed@^lsCwA`&xH-%(tP+NZfwaw!j`3NHPeES3+Jd2A_?)|N;3gC&`erS5-ZJ0VLQ zb2#6T^=~kC!d5bxbS4-Xc}#s2_9FMOatb^x>8DHyf42FKgn&T$y%aA;ym2>6t{@z> zPh=1PlDilvxTEx!S`01?IbJ2fx#rH3zro&5ijpF+7dEdKXJK#n9TGWVpS$l7!*R}z zIUEguWHI_oS`6J)bC|jrF;hvQB;>@(ZjecdfTWD{Xp>NMgp{}PuHX-8g}aj|+%Y5& zaVY?jVs&WV9oi-}n!6FcRiS5hXO~OAvXb{+6yuq*H~ti&7;9D*2qw@kPCG61phr4p z!;!a1H>+K<)=3zO@;z#iyKKXTKm4DPQ@%~SLJ4`=L4Gj%q(g0Y zZ_y_wX)k93l+#NnZ9SXNG9s9?eP?fwecIMe1c}OZ!-6V^8U1SJQCZHsQ~Uf z6hw2V={;q=GQWNqbb$*s=t;ma`n@-l^u7?2F{^; zYSuwFV8_~!P);$({090leYQ~yD~Mg8Bf|b&|6KhU&RRlJ$>EP(Lo^TJ7aij|4uF^) z)vnkBn6k=b%%383ML(u9jcLfm79OH6iMHouk zcMcI9XxYhjE%Y=VWi}nypgY9C@1U!9(CHy;#ZlU8Z;?EJ_Qy3skxC77sM!uR*|tG4 zfT}S|L}v?R#xa7|sU2E9f8CC8Dl+$C$SV0A&S&owSp+x5b)$R(uMkF=TR;eeoyB89CTBM!N3*t5MeHbp&Kr#p>$i`C>hx zvMS-=)QkTD1D`mfCD|+Md-_ zIp{r`ms|SL`Kww?!)S?TyRjLq9}70F#Tfr8TT3rVP&%felvsCYuqKS~A>p(wn2@#U zlvzTUwhC-C;op0j42KBQ$F}Hk_{V?Mj^u^+TQNcI0I%=@O_qOm2r>Z%2ihn(%DJZZv6>mjc9ro|>3$1UKu zUd?Xy#OoHa?>l*EjHF9{m9{BY`Lx8P&Q0EOaIor>RKB~%QY#+V_|W)8Lob=(Z%4>XKd{TV9mZOh5$g2v|oZU^ac&I8j+Ks8ok^q%1M)A-yOM@1% zoe(N0dE#HSwoz%{T702)MP=ealI^JZ!0sbvg7MWxo}t=sEI_2?YCm~G)u%*qC!O** z+41fL91pnm__l((nH7EZ!lvZ8UF(Xp(U)6a7mGt6O(RP1%M$7XOMRzF>R*(!fk0Ku z%29unVZc$`nt80ZzMPpH(mA?(IQl~i1riw|X@Ej{m$7ScP%n4)+5t!>Xj0`1xZ|wI z|82{7ECkh-Y3p%(*^|B7n~}ewuQo42ehs13k3zXE(^P*%1-lnj-$jiE87-GFEB`9- z(Km#fnLxKU{#(+swmkgCXlvsoU??Q3wi=KuQ`s2+&%M^}4-Ad)s`v&B{8iAtOAMFH z^e!jb$)vv4E!381NQ0GfGbFyIj1s?0Q?->6?f%AAOW}>+oA%--{Z;Y3-}yP2F`e_c zXObFPVmW)Fv+J$w1tA-%3s`B(tTrzedHRydWF~RM2g6Hz++XG28z;M*8PRb_azCl2 z87CeeeYoztkR1|X=L!0kwNwTRou+44eYsvEx9XeljK6AX&t+XkW?1`>W_D72)3EYw z^pU#hija_ys!Q^gWvy0>3^AQ-*~twV$<{UEJN{Fpzk8t8wV9jR<0=eEolTpJOQZ8^ zVfy@#Emg~O@yjk)POG*|x0<=!-6K1+pYYfJQ#YDR@ddWy6T#Ptf@*uqJ&If9N=S66 zFToDY2OINV!5HNpyT`zF5W-p{LI>@08;L|ftFN^dC&BI>D=E1R-&zw5-T-fvZ7<^@ z0`cYLmk>RH8*mG*%*ubsiRm3KPFSL#<;?Z-8pG%i#jxWs-MeKM4ZsD5)?w3Vy&$(unJ)%#%;GWZ7d!_SEUUn73{3`#u zWd%RFkZgE{npr$k^Yn;!iB`FK$E?!l(suzN;Qf+m6K<4w(CW@_&6(Hf-Z?qRYwD%jbSfha7zS#$o7r8uYZUCgzJc@i;^olzYl~YoV*o%5w`r}YD8eN8qxrA{qf9v-Ea{=n^U^F zB?-=lJ~o&TzAz&DHDUn10KO4<0Dk&lE%FZn8GQp~K+azI4>}sj8=sG0jhouObqz2%HW%D&!HdT|KQp)$cSp3Y} zuD|Y3nUDIK>Rj1J#a&uXd5`pSSqwxfUVUIYv|LaV^ z`x4R7wKX$J)4E0}c7w;Y(8*W9J@uX?!)0>&iWJXslGS!Dvr zk2{M9qCAx?fDl{&m%hS(FIrAQ;4c>U7XQSzm+stOf^P%g*aYD3ly6-&nt+ESj0q)n zLfIWVfbR0f%EsBkaz;5+Dw4-Y5j%i2X}f0zVNaN1r52 zKwfH}gPVvwYw5>+$F_=P*n3zN!Es^-ODQ~$y^SR%Be37F>$vct`Vunbv$RW%h;umi#b~BMQdK_*W z{z^+BdY|Ayg$p{4A1vra$8m8u68Zr9WIhZHuzVA9&^?S35x>!&>Ab~%Ff-|6NA+O+ z2(;$Cs6*-r#!sj;g^_PXrO7<85LA&^nTJ4$g`s;HsPlZ!h$!?pE@W{IdJ@-hr2=LH zA+-sD)L5b&QL@_P&FezSb;B42(xRs2>X26ykUi&6ZqnXxJ1Sf}xac)%xoG~VXf%ef zu5k$IQm->4BPUc3a=#(|RMw*Vk)tc*Ik%Ak`a64OBN3X8@L5Qs>hU5RGEcU9lqc#A zVNPQz!qPQge+gk}m2ndh?1monGz78wMb0{ewNjeELcBF^2$Ldb8_|ng5NBoSBY&dO z2&3)SnccZJRo@vvekb!69a~s}=%ZB@J7?umKa|MgUsDf(7p;FzonJ=ws;B-_9_xr< z@vuczpBN*{GF48rZ{>R#Z)p=CtKjX_2&i*rDTNJv81F^+&;LTngP(9j zQeu?5?L93IeM>P+MWMgbZ%||y7nquS6XSKHlpKhOid#l%!$3mfNnnh8{$A2&49^i0 zJ;gFAFH*ix_RCF_N#s@3q<`W?FnBWJ5RkWgcs|Z;%$9e<5 z5&&7MNY~_8sf74LilyL*{^Hc~VgevseUOF!!1v#2!&A8M;4SzqoZX(y_+<7S$5(GY z05U%$%`vW$oFs11(a6;VsM=oU_!=orXAIzjWZpYLz*+Ifpc4R8)ZrNq6bO?3`&OD` zh}>#t$wER;We4dS{+Zbu`~dH5U}d-gQf=an1Yn)&MoD$H8@w1(|<~17JKH#a<2%z z^B3aY@(~4BiiUWjitY0M6s94Tq z-XRnQ>%|#FSqeeyWYqQj_gSv!nOix`5$N<)SjJ&Aeq2Zqe_>;ZYYN7(CKKBB{a{=rBuAa2UnYzTctZEbpPe~{;n?%~Z(K@mE{W~K08PvELb+kch@_$OPo1>G&Gb7kz9U1Q+ zCRQ+zw}vr|_dYp|Hq4FMQb@hR{@{Oxx{SSZMl{vV8uOo$o7nHP1O}D0U+s@XFwQEP zbCL8kSy)mv%}X+V^DuReD8)aGk|g{*{ST#q%l@n8uyd?K^naMwD_$TR_wF0c=G>;8 z(zYhKQEBQ)nvCdgyDhv# zK6R`~v}u?!+VI7XLJro>ot{tL#{Ij-=3Uo3$QdGBQ|-uOm)%p0EC@V2AbnSOd^c6% zQXC!eT0|+SUlAjGUfMXhKytU#?|*G!F7P7N?9#)8ZOX~u__9#hv9gfE-V$T^g828M z7RZE%OyP3Ki{+OE5GZnTu;?F%V{a62JY1Q|3vmMvk#`{7z!+&Ea_6B8aRO2tA12I0 zz6d|Ve}IZyzK=f#RXTaB&=0lvKgBt(7$SrL0$CCMRw+s1i$@&fiKgPc<4}Si;AQx7 zUL(-I>@N2qP&w(6qYB3MSApbB3@dI>x{uyo(j=ZsJCV^ObfVtfb(KGdI>Av%q@y60 z!8k`L;ghO))5(8h2ligspl~a0h6K((Q!+vn%8SexFPO*a*tMFsnw=50n4@LoEnC2D zVI7#nAInNLR zfU?rHvAC9`SQQIL*g8is6PLS&DO-k1_tHxtxMb%VDHA9BPvMW47Spztz1J_I!ZVvS zp_J-Gtr9{05G9fQAg8YJkWM0x^x7}kNS@~0CB8?z_@4r<8@Kb)%W`zF+*z3`)d$&U z65SOt7B_0D%#BG4JSlNveDGQ!Uc@})JS55>J94nV|DHkvPeuiQ)ygI3jMO=IFz5_zoSMpX9M(XI6FddcT$HJEyU&DQUC+knZ zNi*N-U%>Z_OjCbH{(Xx=>~(Xo2XbE6G1%!TFzYAGot=RdBFwR{69zeE(C30K4|8$m zGu?hn*GRka2e$UV*??@W`AmM0gR4p=uS^+ZIZvYQXf+iQKZSwyM~GY>o^};6f94I% zS7PtTH;VQ6fB#eEk87r~897UB5EgiUt@#l%c}Kbt!AK7C)aB3>KIxj7^qiTO)Gc)4 zsD-i?!o0sqRGldf%YIsURCs!Sk$ItD(~g;jYTn(@D_RBjt&hKYJ8o77e^muVp1A$GVXV{`D$=}`G_QE25=x*mJC%dH_EB_60^!JC zwbG8Qn33IN?J>^UztZ$mcVT;*{($CGC`z+T{b|K3Wsg!g^P=J-@8l@Em_>8}aIVvFcYfTXR!MHjriD#o@&*H!*FDewyl`G^c!6P}SN*npM?POWy3A2)1~{JF zH$7oRs}7v+SXXpC?w^*8#i~tP8ek=k>u_sVmIf>=tBxsEO>C}tUh)(KQuo1^{Z(hX zo-o(2$ldqiHNrGFV)b8p+RF4yMeUzfx;89)VH3W+lbT=dzh54;vDkqXticPuwQw!n_Xm< z+rVd+=`}WPW48hTMM@9$U&{ZatGF+AedjvS<-q;zfb=JxWi3n7m+hQwlnLc#rS;Cj z(|WJ!n*_=^USals)aD+hY%X?Xhj;d+K=)&-l}8ZeHXBJ{JEt0oGsn#8>vl*FEZbNY zDSit86pa2yE$#kN7#VALqD=8BaDQ85eyFFs$uCc9=ZpHG+%B_{+PIu+%iL=t#Y&H8 zfzQ8G+OO`ciubW*C-lpG1CO+JmeM@)j~16$?0j3lq!?k=UXxp}aamXmH$w*iP#pi4 zx*iCF1NWbNjo6lkuf2nO2+7WVhE9g%Qgtz-h|H8wTmXuG7>2LG1a5C3G-B^B9U^VV zc3q7De?*7$L_l9-Ks9i97nYb+js)QZY(W2A&@=Np1n=VaQxMHjN)FCk!y zSv?n5r+CTjPizL#vO~(bX_-<+h8~0~>BB#OAuZNP|Ek_*~t zXr6HKxvs?D4f@CKrtGazlx_irR{TouOA9H5VjCerMf$OqU_0~Yy@ufJxglF`!>`J} zXfHyY6r4Xhk?4A2@v)`J%+~WI!Ks-?4+|fI__dEQSHQf=o|t(kyiDE89QLm0_tpT| za6!!CHq=Ui*C`n(J#?S7 zDrFSAMw3N1z|v27VfG*=o8Dr9XqUn$93F$>ci{=x0<gtmwpqC#g_)54p3h|4&j5-hQRW0B!VGGN~%{UYVFF{!O&E4Jm4C~`mRrArpY znN_{v6(xlgrgDgWi`m-*#499wi%khzGWLpI5<0{d#CD>)2#``jd?0)u2qe)3GtNV# zUjpk5zN9z&YbvIU)vR+z190H{9Yu@rs=1`}TX>l~pHM?E%Wh1rB%G1n50Dd+Gaa4R z5Jxk9nhp{RGP(f28KJDEMiiD*VOMB@Gb|ktzQa`)8{p39!yA!cELJixm7QeCb z*lrr0TP`pm;u`Y203D25EOHf-e<5W}P6gipDB>!3C240d?mTbEt@s|^64;ePQ{2yp zW(PLU6y>unm7n-$oMjy*C|8|wd*EDoJ@)`|F^9!@gMy=XamvvHapyQttlA+%P87~% z*J%zNzuV{%7l{Y{8TBY9D>B%_gnKd|JDr%pj%B-$tWhi2s-)Agr&%N9(1W{K56CZe zsk6RN0*oBlMU<_7D28)6khO+ok-d}ol(ms%$gF3nBK4VM=19zAW(xDHFN+Cew%C)H z3})`yE6g$GUyGy6g>g(o&oXDw({D008G*ui_zQ-$&^Y=&W4+*@kBqT{U%2x)!;`;s ztrH`XuN=i#%DZ$wJzM@!YEDm;RF&O;7};@1cbk*upGn_IWR&>}j?+m%Z(2yXD$DYH8Lo%Qa}~ zkn~@R{v~y$_KT#8`lLFPCZ*O_K%qn`xfByAqN0l~du36%1?P5{P~nQ7YfLHUvwtt7 zu$WF|wv|hqs7cKT>MV6b!!Tro@}kx?VvcgV%GhfStip`MeXv#g_K3H@$7~7W%}8(kOOj&b%0CokZ|<^8 z8yH1ko$&)cN7NO+M5Y0+ioT*whenDlFiSjT={Z<;o6&SSZoPhzn2v4!L(#i(wr9lS zz4`0Jegs{@YLO4&eCplw6e22wE-WXl^>`rEC!MtM6?`O{=&PnPNkt1Oly4RF&}Vo{ z(vLIDai4`}7`sy53C0;N!Hoi2hPQ_;zmXBT^(vph2w8DbP{Ghxpk&+94-2kw^@PEK zaO|uA%0HL>@r%VTj@LM0rk0 zAZ}xh*AY19vrOzB%1O=kv_`ULvW=I&p z5o|?yfm;|`yDW7}F-uy!YdM;AAmi!+C8QOUCafXAoE5|H1Mj#uyQC|CI*$;dGpeLlmHhvU3Day3-1YW|w zzAs94Rc#UqpP3##_v>xKWZM;N*YCr)}Gx*N&fC=a8?CpHU0Sxj^(=(2I1w zY#GHEwxhU@k{EZrkVGj7q$wgOPxgMuV^fXo?&P{s1B`y;f~ecoaI(|1!+$B|z7_T- zXnGNbeKKx+!7er-;8MOm+h%WCu03noE=B%{^~9)C{*CoeO`7$aRk}bC1;xFhQOK`C ztY~*^j$&InKEOF|v9QH`JZGa&({7FIq@dj>OI9j0P-~D9xR!sZ0pLxO{J>zwP<#E0V89g6vdDjoMJ=X0FQuML1D(pzuB9P=Q7PH|A+x zYyMS#Y))dHj{AC9YR-MztL1RM-YC15EUL#eiSQW>K z@{`zJE2ACxOf|>tCI2wK zT9y{07@t?jO9Tw1A3*-*U%L`|s!k~Q96wxLD1lzFXQ{<8RxD$Cq&K_%robISLBDT$XyI_xd{Dm!8Ewjfc~zk*ZHB2m5t z$hP{If=t#_mOhUUtE?$$2r4WqDEhJgVR2Pqn8VIO!-AU@vkFN5>lH^8jgm6Jk}NSx zYaurhPxR0dM4?|hX-Pr8mB+JFR=Xy)ol8Au)7K&aS*(+{><9H|#kAIdl-}2z2#nJ6 z@F(XJw{^=vk3)w#0>F!WT1biMJ)Ms@V%#W9VwXl2XF9-}H1`y(si zXh#bEVW>s>TEZHi-!1jrOz>8);o0t;%>!t#U)|;&-*_bmb^1J@aoKCfK$=;!cGO zwX(SipXw%a?iQEg`YFyC8)U5yhihz6vy4-rZB>1ZiAPl`1*@+VYBxLS;WAQRd!4Zk*8vt%{#$y#sbi>L{Lox}_$hZ-kqJK4QU$ zX$UyB8+8(P%WDX80J&`IO{^vAskRzn3wq_CX^I8pR?8uv7{)J>rVS#t3f@3&Bd;oh zK;1+q#55q2)g@Ue^GxHLTUly71lUge4$o;8Y!< z0ucmC5=bL=fwTGbp0A-!yz84q&^5fZS}QS;9QPh{^y$14b$8?BIeodPL>pNrdwX(K zRu|j`sG4~J(xgd=6uZDi77gk}x~mrg0V6B8zam9*(3sM#kVTAKMP@#C|Qm4rSm*rBt{n zzK8l9(W2ZDPeE-XCInBQD@nF{<}jB@Pt1jwS7b}oLh^fxSF0SvVVIOzf`JSh$s_0n zhAkxzHpJKr+Km7+wgn9%KQdhQtV0_zT+MEy#u&>~qbb1*>y~Ay<>}rfCqM8Fq$eevMWU1sMHtZ` z;8n#O(IhP|kFRVAF*&;e9iVeEOXqp`ima@ScFNUFPZr&yIpI)J=u3?>Dw29%wfy^hyqC zi59*|-Pq`dZwEf9J&<%Et*Wxd{}<$7S(np$=)sa-8v?-MeEuS3cuMw#+8W}jxO4dw zVq0Rbz@8M7g2UC5R|01eC&_ofAb$-?F|>Pk1{DY&Tz{43g1Gai2f!rMkZ|O*%)BUk z2t>+PAy~k`*bKsVgjK>v;#K55zm247G|cfF8HgELzlr=1d*lyARIhfx7-9l*7hz4X z2<{7JND)+K?8& ziNKi9BnAr48@jn4R_ISew>zd5G;81p$j+2|AcYWkrCstgWg73ycX!Y(PinJeKR^W5Q+ z$R#;e(H~K^GJBsVXveH{`$+T!DcUF-6`L8cK$V_{vp7s9xx zKiUE5Szzt64HcBXX6G%`-8>T`XXGv!_GO|Hy{N01q~vKxKC=u+h9&`w0oaH^Cua}b{*Np|j3StB1S$}D(Vf6G6cu_3R0x*QOW=Ot6ZGeZx&1Q? z4^-ARC&nrCWdjH133T5diUP`aqC7wiq^nR(F)E~&)FiA`8i{%g=NR^jriFLjZ%0EB zHrpv^j|mS~U7-6A_&DaNv=kZsWA>R~Isulo*~5b{o;hX{MVQSDUg<<&Wc-~u z$RJpyjly-rZ7aEgTY@VqA|=1ZYZd4RJ-`d{zU-|e0CIs_9}zTj=JdbeiJ6Rr6lVX% z{BBss>BVN%im`O;*Q%W4P~4I7te{}r#ggB9cjBFjAzNYiorMzp3|vR1;{uhjHJuSV z$yhFINf<)j5gI2$61)XHsgDD20(6?y9%}&*;GsY7cMR1zQ%o8NMU*aXC?6sOaQtT0vv`8UgmRSiAM z9A)LlWH9Skh{GTzmF4T)z@)QmET1r?tYu4sm~+fG3zQ&@sVNE|J1`!m-+?Y-6sMa- zpJl`e!wzLK90dxe-;Aw-#g<1H=KSHMOBobi^#a9jr+<+zCz0vbWEv1}dS+I4^isON zH1OaP`sPeMCm`KGven`n?YYER=N)}Vdj4N(Bi*>@9`QA8rf@&_7_D0oA4R4i^O6qw z(!6qx@4iE`m$zGRsITM)b*|E^(*M>dUJs2~-A=^O5-OX)b~OF+-I3p^{Uz37 z=G_D;wD6gQB~?dZrX!}tr~mB%82e;DV}a;L*|LPSDc~%0Qe9}Tl$)w~U?B4x$jRO$ z69YCfxh)NY7;5-S3nBmW1JJJ0sabDQ8_-*%$3PXy$1@AT%+NKG8tA?Q4jE5iFL!ok z&=Chrq!KH{GYv?_cf{l$y_h4k*31X+TPSDA1*BQhi;R9$Qi#9!AGCCThIlEadZ(?Z z5i2lp5SwAIX%I!_IO~PnpkB_Hz`sTA7k?xiO&k&ZB!&d@Mf$|4eGk%ONz@(H!tbQl z>z}53ktQ@~LOFSGfug(>A7rQ_NTLYF{=_xuP)177F`<+J*>^;6l>y)3AW%jCSzjuU zFp@PS{EPp-m8NV=|0UQDKbda8pGk-lZs7}qmIyp~)1JM28V|kwGw%n_bUlLiljou# zAM>Xt2`VyXA3O02{@LDE@L*kE$52{oxPHmyg;Gb ziXX-tPn|0K5$_r4Qjn0?amYV^HhIQXJMR%tbIW)h9aLsutKfr%l$O*|@c;6Vc@XuZ zNH;~23M%jhA|pETtJ3x!9LqBUPrG#Gyn}MLwB;aRO$OWY3}LwdyX*l(*MclmK!HBY zGo>mY0+&YU=3YfyJLsO{hn#V-moG;z-V!F`psyILl0QN70az&tw{w9)=Hze3K2N@t zn~!S=x6aYTr}}2fvIr}j3$vpM4VzzPogpq>HIh9{XaQ77pyd4v6yiZ{F*Q5cFh_?v z9)^~2XvRMOWG|-eb6z36Nds-hOSjW#t4gwTY0iLi;-mDxEd{Y7Cz>0TM3dd;x`jT; z_TkLo9O+T^-LyZbY6(mR__;<7BYRcpjUk_wfzf?KTr`Ji7w<>t(+BImTptA5~#8+Eu=6tsV+SFSk#IdlekrWlT~I3se79$ zqLVVWe=b)~Ni%(2uAeGiGE>=?x>jlH<--4Zu~JUdTuZi!tgDiyO!?(hz=6m1!pf*= zVcTz)%!3C_zm(jAh;;(Wet~~03)b70TIyx)??cCESlB?Oa<9EPM#N0ejn zr`P3g_>amfzM6kMY_PC3-~8YOg)%14o>O_7a+>X$at`KzOxoqqvQIjLvJdIy%DsgD zS=o_h?JZF${%hNUVlq8bTGqrJ-Br@GI-$&JYvc1IvsF1wiAiay2O7JQqm;W-U!#?! z#!}u9sAHf+x?gY0c(ReFSra7X>DaZqemC@n)#}=2SdoEt9TrAYbE^IVCn@s~M0vVEg|{}> zBb^VA9<@aN_6Vx{kQhvbXXZ5pJ-!Dweoe?QFRQX-t_LZvWHhX{RgQFbOL+y3zSF>{LP0lBn=kpqP|l#ZasN@h4HZ0_!<*|?@C^2z ztj2SP>^&+MabcFYa(k|~fpNJFSE%k*yn&-EFHXyU6ufr;wr~+j=_IaQoX&LaagB@BHy^TzOq*qYHwR^mR(~B zDz(clG_WlBp7}x@Qh?$%D%Z`j{-ruC6N4jXb3 zYBO$W$P8!>zt7zb+(!trJOWN7YHJwcJ4qYcaN#e>?6RlP2IME1lDNebCu(HEZHgjo zWipeh5_~uH9Tn>44{D&Ew@?6|Qs1lJBbd{QS_VQ%+|Q+65jVJaiCXk?&Uf-i+yxF9 zIG!lsXa!wLS z3*?b0pW;~AAxF#;S4#P=o0EDnb<8FcE=j7?y@&=}+0np2UbREf`VgaveIo7fs8UCw zPE<;fONwJ`h(ft-A$}@vy-P`=I44XQG8!iPpl(S#%%wJ-2`Fg67s`X}8!OUDVX)eh z1ac&*ayt1~Ok7!EpjzCo;tMXD5|~BmW*g%UN+fb24TfotBYg{!A&oI@!kaK? z{40V5!X#-jITd*&^+^B|9iLY3yb;p|-D3I%GX=Y*a+rP-@uiNPnx0Hl*n?_QUkFZt zi_*5^ZJ-25auN&f38VX;KujT+PB@es>VRo9Y7zRdiY-G0J5)20ln$5XF;j+-x_n3A z42p#9Nt;LCP23Ingthd`hxOpVyV>yHc$!Hm+!p^+WhKLglv-^c|A3gDb1x~7w2hme zl1m!G!~=WCN8;zwu9Ke}mOzeDk{z3%uP8Uy|ALfK#*}aNLukOthFB(>E>B2U!;aKMF}N@( zGou>uF;$PY9pwQ6L#xpeaAo96%q{4E*DIVRym!ZL+&5&x>f5->$R~d&!leQ}q!MN- zc8BeS4^fxHPa#7ge557nV#G%j7qiK0HTp6Zx4jTki=(Yh##rKV{!o;0g<6_AzA*g^ z_$q-x=0ZV4FR(N01Mz4000Ks;K45@6OZM0{gc6av4X+?SkR^X820E`I^&(SO2m;kH zT}eB^aZE4JG~_4KDeNLFow;?t6MTZXY1>*v8FRg15JH3TKPx~PL5`aIE?q7-oI0AW zO|%5n3B!Otz`+7Z=zYip|Jpt#%%5*MHQ52;jLq=a9HGc+MVAJMeAUaoZe-t<1t846-Wo?|efJOz^Au!m~|^tped@QZ|?K zQifCSmy`rQ1W}3%J@tcz7 zD7+;7AQkHNl)LVw)K8!|YggI@aP+bfMj$xh4@Fp<8H688>}4$@6sGJ&mk@)1#fiU3 zdTDo#n2>Hl-n%^|=fUo5DW`ZN_AX1O^5NhAP!to1G1dsu$vA~u3?rkcxaSDtgqQd} zWL4k}LKb?3+gf5QCU(mb;so}J5L+r@(a)^L(AyA=F|X$B&8DJqj*6>{_`s$o5`E6Fds2)XRXJ8b^u}9UW6-qLYhDd# z^p7kumUWfUl`xJ=V_Zp&2i|2&q;^IaGS`D%`z&E5fImB+m{*`DH-cC^*k!Fz_FGud zABwKQ?4>OQ$6}pmVbIN~*J%pa!|*kScm(~TQ0A0MG z7o&h&hrWW*r7Xq#NXe(9;nc%yDSfyfUeQ!j{GDBfRG~5m6pnhA@Q>DUI*PbtfuggB zBFZ7uPtp>~lVk~rO!W=jN_s>sJkUXQr;gcYkQ-@68`h9d(@eFL5v6F#84gQ%E00Ke zz!^bpB|tf&N!JNuoR*Myq8BG-|4U*sXW7orq($se({kc-_B|~w#e^NRKvA#a&>0XU z5Wh@3lB9zdiI)Y};-^HL_S+L8(qnhV5GsVjrUwZw>3Uii(nG$|7WgBJx*Z#oGl{r? zosmCNuG+%Nn8EtEd)aFHs_{~5bvmuzJ7MRYKA^BSi3XX+P~Z{n4p)qf~jJT4;LDE@WY31MMUaP%9Y zZAzKnv|t!`!L3oyly=faDj0_xS`(1I4br!0s}KhLU!7-iu@Jrt^cvL6cLSe@Uc!3| z1^SusDq%nOO!G*H6dO%mB~tk!kY9+bSme*wM>{N}5QE~nBS(RiTrKpP$nTsX%akbF5x=hjxUPkY7?$6P-r^allUz4B-@xIi_~SkBX2wu%jzZ< zxE8W<$g|dOSdA3V)jq6wivA)5dq4GW-;1haW-~vhd}1ok(1{3QhO(>HNEfKj>Qo#*6Y91omH9NHEtgjN%h}lD7^+UF?Y*amlmxD zmcc>F@)c}Eq%6n+9m(nfI>zi(KJz;;F!MFIW#76?Bj_IcOoHNZ5ed@;i!CZKVg-Bv1p2L)}u^hOAOozOfM&1GE+^jBL3FX5d{--l@^CT?eD0U z@oSVRWb5;ZrXSMFfTDz(f!+s~{Dx;1IO zGYP|4tZiVpdd*r3ORG(rx7hD;aNOcnw-PGqVU%!3-G5zz`AH7rnstaoUr`i8UujNX;7Aea>P4kal zDvOB9kLq$Y^$puKR(KrIOhoH%+`-mb=_#vhKeO0uXVB!TB+M;s19ST^i>W`V{ZF(1 ze~NMbuQdBlvHw@EM*tmF6=mV6E(f>+et)eJFVJm6+|lFCt!l zz%*iZ(=@tux;zM#zgC_1;ENocm$@W?T9QC566cv|oxDD56Nww!eA!cb;!T$Gbnw)j zIRoSbb;{+O!HLE8pH$vHN-fkL4#saUi_E&Kx7n=$`mpKC;G7onxyz^YdBE~H`)Oe? z*0Q=aCUW#i`<|5iLki?+{j9Tt`>kL11nkUDRCpWNb#Lx`aq?yA=P>WS^FMJYC*CpH z%yJH+U)J}t?^qRw&}hzDnUeEQpjLvGDfg1uDf)&lV+U3}8=MRroZi&Na(`Cru2C_? zltu^k=LDa0aQ4rs-+1a(7*$p48yAoQ3Txi%akt@v zB6y^s?HZF`K8Xc!@p__11{qT>&wiYb=M_1MEFwPOpN5dTdXfP72ew}v2{*&m{jPW+ z!N%9!I+%05&B(f%b>#|X^?Kg=@!su)1T(GM=e-_01~H5N8CsfIneKNhZ({K4&=C){ zK$SgCt!53I8t_Kb9W8F-?+BZhcEGnvG!7o+_zwLOo;Nqd?3xECH&kC8dpZvg53GLg z=sY5-gDkC!@!P|@56fu=Z-_m9wB0C08Swo&c z=w=kUWL;2LDt}dY%)MZbTXx7AZ@J4R^PbcgM1nYiT{aueW}b0iKC6yfI}hL#&Kln9 zhmRrk-p?7g-`+*M^`iTp#1d>Ck(Ar})Bha8LN%v^djY;~k8f~ZS{uQkZ-OqVjapt* zH`4k&uDE>Ptpfo4J4NU3ZmN(3s@MaL#JkJ`&Ug<#t7aG}za2`moBScB`Hc;(AQe40 zA9eQ4KWD0%{p^EQ*C?oDNj1XEj9Vg$_W*ZugYQgyTpk%VZjrnBohksubbdsl$m-V~ zkNeOD8hKpk)nVc2wDAnLX)JU5xk2z{;S`0|({(WtXhO_?w*bjO9;$trgE6@0w_899NVENb z@LUND@<~l~ZEDg83%-=PvA4Z2%x=l5t^3ynZ#xHEP1}dAwtwXOYqO78!^Z4I6l@&% z;mRNvn3vVG$F48sv-fxhLo!@unrFo(2t(x1A`LAzv1U-Acdv{H<-y-q$3jqCpfGllm z`Si~z%FXYL=?&wbj3EvR;{Elyw{1H!ilOiU+SD0+pG(E*vFB|yeAcvGd#TjF+fDa4 zcOIbKH!d`Dr<|JyWId?nTtSn1P9|2o$e&pk(LA(N0NM4bYWUYFpG(I#hpFTVlog&5 zbSn=1hyj{D_uT!w$?&1)b|P2Dis|y;t~*~#y+XP>dN#g#vfqDtQoacA@amma=NQAI zlSc;E&H7DiVxMJ(?OByHrPJPW;d-;4&LLdL=%=4rvqAHKRtoV;Ph0gM(&SO`kjV*YXVtl}8Td+Y(e=K( zzLVYLRA~ybbbF7j!|~-qEwr(DfX3W5_`Y$Grs;)xri*w+@jdf*sXg*h^qJ*pkNSTQ z{J<#N7DP+e&urh0HZQeRrl0o+cbyv>n$5ZEKJn?dt@o+%m@S8#&TsL*{e$_npj?k; zJ>B_y9`NY(^P7TMr*S)dNO)A}#G9heH#7|HUsv0E*>v-20OzQ;)BE@u?aGzvDj_-m z4Z!-mFODDDXEffqpHK`reVfSyo{PGz2q+r4b>C7jW=;s`7n*$(yl`aXeZ5D&JV}PH z&F10o-m5orNdCJ!6+`Ib`z$v)VXCYPA8l z*`imF)nr|w%CY_m88A?`_oveV-(|OFmp?Bdo0(12jZ0gi{qj{^?rRaT|Itt)}T+B zsT1DSZ{3%;!$*gQ&X*YrZp^vd?>i1{N(S#}@&e7cmc^c}Gw#-Ukz@K5ru|)S+InyT zIt6<6V`B5n7LxmS&tV!hf zY`i(Us(4H&nFpkNdSv!pXR@5r@@y&~YheAGu=`Q5C+Y2w2lr1rVlT6iY?bo8P^fdK z=%rgNRz*ot)fZFe0ezUDtaDTN&e3m`Usj=a#cq-Az{I^CsJ?2KfpjtJE+5yNaGVFI zk(8OsnKixRws{tOcVOqN%k-^=rCVku27p(6%hDKZk&#sf*zT+nSAi{?AGH0rtK`epzL4Ikn#eQB zH_gVg*3%M|obn*{{R7(j=#$|W1{e3MPsWF=z1Zi_y|Ox`$v)^xows<`)>k9D<-61e zfXPFCNaMKKa3y8QJQL(*NAqo;J~WOL$1fgm@cHrHjjP|EpVNPRw^xey@_tPJ)y~tp z(q>(cRf>(h{fXb&fM0*W8F2T$>sQiVeDl5fZuWgF5)n(AxRy3|$;5=2Mkfx9cG!IY zO)VKe^=Q^(vP<$yAz+wpHG%YVR@ZdPLtT#DZtnWBA?QUfGJD0t)t^nKH%Pmu?7m}_ zDMB#*i8rFzjQ5r9^PQ}f*6Wr(%WwYzAM6YoxgC0Un0*zN#a+KGRBzW;yV(`d-!aoI zGXZl?OxC(XK8f6x4o(v_rMSYc0ZY>!iCrCa<;|PcH60yPtP7iJ{T@1U#nX08;Xl81#>sv*FWUb@*9N5SS#GnOgjp##Qr z_l|J>#5@36&JP>WKk-}KkAJ4@f*{Zr;(0*C+x!JC| z9V^}15AIqS)bMi2E~8qDYJX=;JtFLO(z%1jtB;)NzEo{J5=NlZ)!u6wxp8syT=^s7 zIC8>QsZGQ5>#-KQi_RAY&nVBDTHDjncWdM3@77o60VlM!%>!)5N2pKSN9F+v^rHdO z-$K_zQ~So+W|wJ>6QyqYq`v4$VY;i{)Ed0YaL4_?AB3e~sLrJ`d2H=eUwVIaJeGOp zl(Hx9L{29RD(9~TvmWadady+6n%`0Geo_1RF)6^o{`7l6X={Ya@P!oR!-L1&PHE2S zTaVYDD;JF{pVH*#T#S18JNJInjdF{U+mCM6KR%lhlD)`_nDDEu6Ma*E-_o_me0TYW zR735k`LmDk_g_s_hV?i4b(4=uf43px{kCF8yX$9Hc8>>K$QsuB?sl88jMN=C`T9IF z>w!Z#eBjP^^1CBuojv>VOgHxyicZf1@-4gEpVdYYFP@73e!@@l0<+^)-i-b1;(zRS zYveVzrMFbyrv>2JFIwauO38N-0&_3z@X6dCv*_kcKJ(0SW`aq^eUr(+Of_1>WVguv z=3>T;UPKdg>}sZRHNz5@roz^jp`VRUM|yx?D`TUdt?%@m2_i+k|4vi3v8Ocusq%|< z!#6K7I}F(4kJG%_&rjSjI@0Mi4B%}!UXbo5-yeF?^AOzWI(+SFN;!I8T86nQ*(XyjOnb( z)B~aWvq$d6(`JE_NYMuxh*(SF?6%+hbvXGh5Nw;WozH6h9IXM#cDBIu+>~4tIr(L?=Q_*g)?C;&jor-rZ$LwGTxcH9^;RQiS=6nKZPy$(aDgT!bifhqtn*i zoH_H^;AyMpdYSV8t<=f9TB)KIe1Ao}2NIWk+}>-OHZt;Z_Qh!(3(eA=hI-+-8K0vE z9sN5yG7#4zt^4ACBJnRmn!;Y>G+hTa;=z#8PjL9iqtbc6?6Ugr!&473n8*OJiTW-* z$q(lGf%R!c``%IX#CEd&w{e@{{}epEoO5*}FsS)jUzLtfv#fqj_k8T=F%!;ETjn#9 zpBBUzWe*P;yLsN&Y;KA7l;%*}rfHM5S>wr}Oq%)Bn<5;=#k&qHOS)Xq1y1tNKAgxq z(Y88=^2~A7`+2}5w)_lo^(^+caMq%hWPNsE!xW(&`CJQ;+HvcHd4JI+e(CMPHD+r3 zCs%gfF%RGRJ9p<5{Y*K;W&-*+w&Pou(DVC=D^Ra+#DvZFf#UZ+%B|*xO(s!;=SHRO z$7}~D>mEG&AsN57>yw^$r3by`0^B&Wqsk(nqcIF<%O}tSLn@Y znM^`GCYk9t@+^7E_FW>VWjMh2X=@G=_j$zEe8;+KXLH#_BlXbI0#I68$2{O!{otZ+ zCKC5aZ{+IfWdF$v7f+6dOx8-eqHqpz_-DPW#{*`29my;+9bHNmL^*(U&+3nk$4~50 zc7w|7Uw!9mxfY4dFsH4#XJ=bKd9l9!GMr9^JuqD4^IW`lNom%Sqgew-uk1is#~9O= zM%h2u!rRl*-T6EP?KvXs?j zN&g{}32GJN_K49Gup*zaWAek=!SCsJR*A{Y#V-gk8cFgtMShk}4=nr?<6gPpoco`K z;r^_JVg0*&VA*WQ-GOD()?@zV-!l&d-HG#gc``4|WJjcHU3hLG$mjN+cPy`KWY?d} z)68YDjA7=QSUHHfKng zPKcgzF13E}SwZ|{*r?$wb2@UeI(+)fvjFiUrmW%ZuOOcFS0U&hEEOA5Qhgw1=LwKp^~L@3kuWsBsUTB9isMAKI=xwM`p&^2pld)2FEKr+O94 zXN4vmh7(4WO2txK;ggQf9nW9=v?hvVb=FZOIdg@J%d4ypUL!x!0!NZB*uJ_vb8YzY ztCJ=Vt>(0^PFaoBE61Pjh0|@%-IqNjjh>HNGG*=@E_e{+a~|b~TCpjV2RWG;xAz5p zgU_YwK&1$48feOJJbrea$&}ss_?5E`Wy_{o`<`WFJZ@QR{zW%yNoxtb<;gsNp78kx zNE7V@nB3`}U^i`9ex4~EojmyIQPx;l%UCS-?E1}9R^o(v{VuwFsTQ1f`q8kWZ8|KP z^wv-6uSL(w)cq}`1~$JF4_F7DYJo?;>g*+UdyKssPhIsPrmPcA9o>Ht`TEiCgU(rJ z+a43o&I69+x_3TT)&tYxAcy7CN8`0l5<1sHdlC+lXPnN%%(wmY@JvBGu~zy@I1=Xn z!`@p5#qqW4qJsr@4;q}H2^J&_gkT}K>)`G#0|O+u1qcv4L4vz`a0nqd1RvaC1_BH= z-1&X`*17xa{Z-xb$NA?}ovG^To~h~8tKVMlvz}M_UA+d~eE!QO<`8jqtIerx84Tt+S!JczY)iVHT3Pn~iPou8hs4rG*vVavN zbV3NS^_*x2+v?ts`}R{!?y}>UjP6_puU6DQboD@UASB(`A;U~{!2Vq7{-y}^pI0?RmIn7Tf^5!nohu!d3k-tGOqn^06=ZOA0QU8gn4-N1S zMR}N1?Z%FxvBVkMSVUF0cyT-Q+d~?;CF0f2tP%dx1MEx=qdY~0!R$8HClu3%Gc(_& zqIZiWJ{xCnO1b#)IMH=9`OXBl=YsCZm#;` zB3o6^JJM*%TDtUmVa|~J5ufEI(}#>i`N#0)N8n+`KF^gG5_cVjzghATNse593W`Hg z`e*ba+2Ffj0e-~?8usULJTZoD*G7v<6+1skOA>UgIGMxIC~p#|@5x)X5DBsfcDOz> z@lDzp5V0DppsG!*LK>~<5$nq5SOu$CT5|hy^J^(Lu+hFb+d-wUXW(S7?m-^xjp%?o zZFpqu_2=XLU1xHr+ljh@M9VipLH_K~)_1H;;Ib_y1b=!B?7}4VJsi7N6(l!n`O3-Z zL9=Ry2}T`i2dkO<*^7vPwe*9i8Wm^2nqLnXnd{tTBaw5Qnt5C9Y)l>>K)=PCb+dFk z&+o0i-@PMAxx3K@1p$43UP#sF7AE#_S08G}Jp!gH9|0H_j{qbKCg_%33j#$z;91!B z^mu3^2whGjanvJ#wuTF(Hy@G1Yv_mo`0a%P>~$rQv8AOU((q*7wl&P|6ZJE>l%JxX zp<-3w!YwyUQv|uTic?0c=5x11rHiIWkM(agV?xJc37%ghTC>V#mb0D{LJqiVvVd|U z+%P>iccTQuf1|2B%+AZ!9{x-1HaKpa?%DG(x`4zE6?YTw15hIbtH-VeJ><6; zjVj#_Gh*1@hra_F-w8STBsDoC+cfa=ec2Y)t?DwdfPVP0-S;x;PR{<>X=m!2z4dj#0d(|PLLDeVVW>=V<1 z6=2u>arT^WqOO8{l9jeZS`oVaDk?n;cAb$QMT90cr7YpF5gXpA_G=+~n;(Z-n_9>bi_qUzxg0v0mEA!TF+M zy$wY8gftpv8<&eNp0zjS+J@_n(xuI&u`-_%J0fbXsvi>n<}WuO@z79g+wWuda|GoA zv}^q;;wsL~Gh-p^qmvIsiAAjXC&CWTAE5^NDC|)^Tk1Qy@6d3{gsQXw|5PTLRjf?~&Z^WIA%JR}D#&rF=La#)w)IIhWfsM znnDp`1n*v75D>v}h@4JNc;`l}g8V=gcg|n1^!+4%aWD5JGKX-BUd^^ZT-cj$531Mo^hf@~ zN8%F0-Ye5nnbF=K6IL4Q_e!v|+8<4w%A|uS^ZRPLPLBR;8Lo=|e3xG4d3f5*>AC>a^&?+d7-oX$;W?0nx)Yey?j-RrKQS^`rfwt!p zc1s`!)=5JIj*O&hHt2dj#*z<8)2Gb`bp+&`gjqOqc?josnD*ken*Xrk%plCzrEz_4 z!{ysNu{kB_7*HJ|S!MHY5FRl}dDG*QYtUpG?6-(XP1U(B;A*;>(5 zxZal25QvxJAMCtYRbC&+H+73j=$u`^f&;J05jHSI!z5LB(f|k(p|APniHfSeadSg% z*uJl1Wbfq=PZH{SiO6Zc{CeVJ5xCf<=$E?Ys*Nft#hK1Qf;c}OUO^#@S>F0*tgTOa zUO=FqA+R?fK*%bv0h5WYTk3F)AMO^5G-jHW7eEjV?o6x!V@1j^lk z)yW>{A|gP8pUJGz#?<*EtT38HH++=CeXPUPL+AWmn$tTPj&ga!wF9?$zo5U(n}$^q z-dns{+o|1IT`hs$7eIY6`b*&_tZg;XKqof@zgQT=j`+{s24rXzzuzb$5zGUBgD_lH zq&zfOaTri!9bqS^#vhuJcuBme9WEar7SRLUYMnUYfvyuE`k{)6_gGM-=!|3|gx|pJ zKmukaKPZ3;>FvvM*~#cE`I(9=#xih zU*RbqB+;ggNbB7A%;~JSdn(=X&XIZAez9{F_8 z@7SS$si}1OvpOOhuiSuN#hUOChUAv;C~@!_&i1bCTs>JfYnl9+=N0AX8t6HeVOMcJ z0|UxQ}K3(cRqlx-GE8sv<--boKLsG&{^^Zo2rFTJ-TZ z4(@CEp+nkFl-B0@VR@h{a@1K@4*4zDCw$O+fWPquBwG0Bwb%QO&IO;i>_WcgC@%`F zpXuMynd_+xCezu)GXSD1d3GsVT4kJs&)|-Y(+_tpF5~kK5w5uEh9}+a%NIb|IP%$4 zQ2ryJMg5+1ZZmvSIXvQ=vkZ#kx@n|@uVA*Wxws5ls!GRjE(ic}8h=whJ*nARc zmi2obM9Wrb$QYzj)zv(5jqLEWlr}T>%PK}ukX16SGL=HaGekkRk6 z(<$c^qh}ki*?qaiX}ve6uMX`o6Fz11@C;3z;`IKjL!gFMo zHrScId&S7CqvYJzYU593i%{{G3(*ap9>t=JanYoSB!t)<5-80LY5OM(cPn`Xe@MUg zxKliA=gnhkpXzB4Heeep9`7IIP3<5Y&n@<7(jmwkx1iy-t`Cf|T^@ko(dIy3C3PoH1nk)$-39H6+8r*y@DkvE)n+y~-w{u*iWh{ zns&8r8~w|@e!!wwfoS{@kUP;gf#9AsV@3$w`5-6r8}edY1Zy9Pq^vfo9OqIaw0Xbl z10YRO+aANGPKN@E85@WxAas*>38OCyhpKOEBXn9A+QSvi9+^e}dT-(MH)<1f%nFO! zqCUaeIE(G(fQe>Qph7n_$q#8&Dr{pSCCRMHRFo#H&VH6++($-?v=52hvP^dloCMiV;0xiJdmE>7N$^Eh#ZI)qDcg z&e~cYNqPPo$+~~G(pkOyIJ&wY0aq?CqGH%HB=z_PAyzkIcmBg?urwc~vwZCVjqD8e zE+)9Q+j%eRuwqa+xYB7xt0TozFV=`Zf#3E{bZSC9>M)2?!NopH?c5E-aol+vq73Op zMFCnMy#h;gNZcgYx-5({_Fin0&`oR3S%imBQazKcZR+G(VHIS>f>Zw?{IA;sS9tdv%i zhDW7a7&Em=2|IodgK=;R^v8Gu$NTr7^liLRQn3jQu2G?(lrT-WA#nu#_z8+6=p1r&rp^Fj3BXLodv zUSEjg9Wc&}dAgG>XwxaKtg%bQFxSOAi=)H1Se9o)F%x6x5SNw|>ItYW*@WCv#%-V? z5_S0}VsIX)Sg#A+yPip(I6{`Q0i(zm`z1)ssq2}wZioLuW;41Sy2W)#66-EY_t8Ub z4>TQ!4sjz488bj+K#fhUOlqTz2g{aC)vf#|!x$hBsl7-w1x5nR&psfD z9x??&2oJi^Cab!i9%@SIXE7(l`$dfKu_da1Ip5K_iD(Hgm!(q>D$6!^e5Iemws*?S zbJv_utjLTnea)~PD^n|)kJR(f;+dlr3y84rIB?0J;V7;TKBqe z{dn8Po|#UxqTqBwp8l&7#}n2@64IuPl~3`*G+MvEGJ0HLpG>|$1;*MQB7nyxGr+&z zmsgx&D8Ts8Qy&$2Pg!9vc+WE3gwN+{zt@qf+1=jWW<|?RY3l$UnI#L+F{cBHy1zJT zA3i3AMGXMSz!+QjXIO@7{`nihpCup>m$cQVW-c5!$}b9b-%GC!XJU}&DOZ=T$VFW1 zP-ML;SmN2zGvagsLUaC}8{M{v_>Io>L>yI|dgjCQdcq`HPB>9>0Tj^&eR#5_GLD+L z<(w+$?1#*DogBz^EHzj2N0+cG@DS@r92?Y41R*$4@kRvdoXAcLIR0=TacHMD?0}z_ zqh-G2j;p)nq9~o)!j{IX>FOnM&Pzr?Szx&-&vp-rio)a|i&0mf5eYt<6J%iF<|xxy zcA>S2qC;v4N05@5;dUam4zD_UbGR~Fiqp^!EB7AVbYhy~vGHFyF5V}0h5J(no3JKi zZJmK$Th-5*uB@(GAH|7$LX;#+^8`@bp|<}{3xTn*{=o&fVT=gsHLCg^wW;GUf2Z4# z{v>vZl@3sDJMf!25|5gh9E1kx1*Q)hqE7Qbd|x(sXtYJV*oeb*J8oK!+Irj4+1eiK z5&7cnH1EO_AInx5cUhBo+a3teu4l342c$l!KMsNmmVh;i+?sU7*foBcEpyb z96%4X$w~h~4@Y@<<&ZcYcYODjh*4l-(|UxF^U}^qZ%3y}&~mmh&*GO;2ey_(`RH`1 zmY*2(DMYG){_oGQRPsKXZ#>zY`)vJJZz1dOtbH+uCcc7FJEf?oSUAhig)hk1RO%YY z1#vz5h@?iKObSfxhn&ACr<{L{A$<6wk3bA{%dKlk`Mz7>mZ)bJg1FEYwM{zOUn#=l)&;|Y5FANSX-Olcv};J}ASBpGt18x6J>(dh+sVKRa3 zeY90cpi_IIVjL_o4djz*iueR5^^MracC5RT3mIaMNU{)@#75Z?mlwZnD2w9yw2ECCmV${BECVUW{Ai_KBN`SSqYhun7_4qjaBJ&Wr- zV_p|=S6|>)QLqjqQ98n>(m%s+MhaL|5iaJn~QgwYcEBnCI?Kn z!9?OOiNsgEQ}GYEzKTn|vYT%*)jHE`ndf)mTjTRxQVH{*=TKfH1^Tckb>1&4A)h|P zJ(NJ8*(k9=oNMSD7}*eMut$IjT(sw#LhT}`b7pyh-f>qxA&b-WkUkx)AzV!&Yy6=O z0!?2>e+0OxIu7fin5?m>lhMp3iCo$8s;X7M_P;>rb33{+T0?w}6-NbOzQs2h1?5Ko zq3i**ljQf*shbiv7N;G`9qxC0VDOk;i=IPM`FY)q0}OjZF_af*%e34ul!uSl*knqO z_+ADb8*?K&6f+|aX8Gwf?65;k`rFz%Hz&q;NAtYZHJ;}brGjE0`Nk>K*iQiw7Bw|T zuAc49p1K;6aOY`5B+jf>o6F<#eqzh;>;ut4`XD$+nvn6Oj6Oc>A4>|Eurs5U^kuE- zXD^>0;y*w6G|qn}vKoozc~=Yg4XCbxD*jN%3Hg&KuAeL)E4TeUD&~YG`OqtC+ zAVoMS{P>sW>2ONm0;5e%KlvN)!l1VA+08$t+LwMJmBGI&i431p7HTdlHBNV*>Q7(a z|L%_Es>yx*wIG%YwczQ0+b~9?Denm$0TB*&CiklF*NxIpHQC`{kj#adkQ z?1;^c2R=~fi<0B26Jx98jsFCRi*X{Fy)EmsYeXcniz&}zH9_IB@)Y{YzbS`8rt&E= zppYL-PyNUDnwr&d_cH=Jl)XsPd1dk6$#L0mN_vF$r<&%1QArzE7EQCA!FOUP!bl|C z#c4J>JTO6eV8l_!pIf5to#mS7#*%{Ew2e{EVEPoFDV56gLu#u8TUn(8U9&xi-Ype3q64JR%sta109Rxv+s*D(UHDGbK$%rt z@dZtWO>{HNk3MYa+Z@Wiq_(Gq5K5zRe?d04-$>2c3HmTCca4axOXI@Cn zmDk1~Ee|O6J5k57!z7A!Mlo44aO{MaUqoQnS#l%oH;Gu+KbIui98}UnNn!axIa5<4DFBfxmN7wMY zLqJvF!Kbb1-MDADn@Yv~Eo0%`JFquAzaO|n2c7uXDUOPxFV2LpAGR#H+_y)>&#V|! zHN`K4Sj%(BOLkU$u^2ku{#(Tmv`IVwBD>dtN9IFs?(vzTz~aTlmDEOqRlg5>4$y3x z9aukOVA@YMW^0EWgcHe;4uvd9lfu(`?o>l2GLXbmcC$9qBBn0YHA}DOv!0F@jiH;W z1-j^^emr8ePP*5XV^uZX%?o2=a5K9VBoX|{w_*g#;Jsa`za^o! zQPcRv!Hlr4q;6XZ@YUIQ-{-Lxhk@tMhhwUnO6H`bg~k6e)vNl0bf~|}@ixEV9BumJ zZgcS-wQ8S*>$ZO1dXwGpo}brdO-wiMj5MQDR{j<6W8->6H_>$V`{1{x>Onhnxw3`g zhJ2M%CqI}_BSS6Z>S-m|3YH(;O?5A`a4GNvAuy~l28-JGo(xSQb`(6QUt8f^-fLD_ zE`66yKI-5=KtM*aJ<^c6%r6e+bWjNFVYdkixK8=IP@5;J+3qnFl5u_xiSjz5gOD6t z2Uez&f)-VR@zPTkIrXvxaWFq)Go#CF`!P0cY{mVGl7<7<|Jg+Hl~YJ`?e!8}^?A3D-;% z<0Bj`H3-f#^xn6kc4HkG_P$W{@Z43nX@1D1{L<|hiqLep(?WUl??L5=ntr1rfBlX1 z@DnNE#&X=4WCxGxoAcd6x+cuO$mZ(*>tf*f4%Y^4&g~IJ39|6wm5fB0?!92$FnZaA%vZ|wVkdZ3iP_~uL zc4-PrO2KKmQ}h-wFZ#h%I9s!?1b$Fk(WH;PR;M?oBO zwUdQD2B8gL?-}-CH`%oG3q-Vki%u=9CR!x>{#L#>p|6Xm0qB5mMpzZ>Ms2e&oG2DqD2D5i&qbbk8?i z6~gKWu|fnuPaOB&8YC11I5@g(X4|J23@o&0jlR4O$LGrM*pM>}7NN%G3p(hIUCVGq zYO-%RigUA{FSvl#t*dq?ih8VoiAFsr*+)rK_f)JnkjeVhII{L{P6Ig%|Jqj!Iv%XHoI8W-`{lULMUTO{R7ph?T?<+%&6HEHeEK&IGxowHpxeJHfHSERd@S3 zFe%)#E!-=M^>;jhpCce@aBq!K{*ot|JovBXY8;l1`VuTWo!w2@b@pjnF@aD~j0 z<(q}4>G_yuic2k{p0V9(G8Zq^M6JU*N?7xppg>II1o^!byr)kf;%q}bTAC8!w$by1 zzp$hvs!^$D^BbIX^eVtrgu`Gv5;tY_?N1J#rfPgQVzP)2o5RN}Wu^B{bHc3mL@86JS)9Zf5q2WXM~`62z)Rl2$h)k6LJky*FV z)iWz6Pf)5^$5oG8#_GxgZj?b|>#TKS;^~q~YoVbTR@(dMgxpFU`iNJrUK2eZ1ZUnc zfe9d3A&X{0T_D0E-z-QtSik~0rcSc97{CPktmbQKW}0cdm+Ob`Z-v24(vl{N778?6 z$c-s{s=X7}9rJLTJNx`+Q9uzYNueo&o5wYQ+A*Q>2YR(cj(X5nz|?T*3`t`rBVfW?y;MQ6VVb<3o7}iD z7j&Y1GPYV8Ic3`8zWKF!QP$o}jQ+{ICoY%yl!z>qT%#cWbga&FN&9Dp1or4p{>0+y zVnvVH#0YDe%{~IQLDYxc`d!@*>ikS+^`QCTF6-Or?ZiD4y{EXuSYTYl&|&~Alb?hmlE~P;|(4FfU0@~@N@ID7Vxt1_C}Sp z_u}_=an8|OPFxlx2^N@y2>vyxkzeH-pfhM;8+0|E9mSp`bHOK-4=bwj>F$T3+aV^kjxQat9;^apaSX)KuprSW-5?~j6V^YcL#Lni*vr?OWTq#L9 ze>V(*1S|}PE8$vLVo_B^FaB;n7p2;X(BPeKX1Bxx`WLCb=3uM% z>f2V<&X3`JdCSdBnwY5N0*J^|&>Q!aW(cCbI^#r6T*EW1-o0>MiYCa4&4D!~=fmO5 zhS;)V1h|pEtqna^K8#|U=`+#R^wjBX?oMuy$uPv5eQ=r*K}ciUHb_LV1}@;){c7nn zPgGGxSbqtZv`a`V|HJhDd%Q1wx%hLn_|bIbb87vgIU$nSx!*>GHj|jdJd3}k3ey|s zoP|}pFANKoY@dCEC`9dOC?PQP-q;jBfym_r(l(zg%O-DIdB*0_QnV*iKh0cOJZ$F| zQ9G27HJ=8gT9?Imm~QV>C*$`2c;UpdzUr2cHL%>SU5Bcf;7g+rqr4@*F)#Y^(Uf7` z2h1tdc7yvf{D6W_2aV}m zxqf3h7P4}+h%OlMf73U)7&xR&sqpMPL6W-If|Ryljg@mEAuE{iYgMwcCvcHYU03T* znTv_KBetRaO8zhS%t~uX=22jCU4fw^XYz{=E57K~mB!sZ8us2ipAx`UuJYRwySY3m zKlp#SmgS2N5ybXg&TRAm7E&bi+}$P(d|vp7B^08c_E9ppZBfjy*1!)9FU4D(?ZMli z)b_diWSJgzx6J;_($UL6Hq1d)?H|Dvxb_~b+PzJ=$us6FQdS{(xlt-fLo3m5jt~!X zs!c3?KzH90g@wGWQ*C3kN?^2<+j)iYjgEFg+!6e%E?;AZ*lbf3veUz+H4}e!n9Pjp zt?oYVgiCbekpMR3(y&cG@gb#Lp2tE$?xDYeWJTEob&L+rXTb`=M~U}|c6OUNVIML# zBXRlXeliC>Ee-X+(VQ_KGD&C?+I9Gq{Q_i#jK_M4PvP4p2Z=&HP@3Tfmcq!Lp3bKp z7C!Nt@A+DXvl%XlepkLENiRr6{A2cP@Q^B%+F$BZc{)ty_%L>p`ZC=eojt$cTXsW* z3Fq1*(Io0;>?s?Q;@d|MC?ZC@0~cbh&xLDP>8R0A_#Zf z*E)kt2G`UxZIj=9N5Xb~Tw9Zs3cfr2jyxHki(BZ<*kQqyuOt({%(vd{3A)>3a%22m zf6twsiH)9e%$s{VCI!l8V6f@cs(yQ=Rb_)dLG|1;hQ9l|pQ2-3H2RG!R(9I^^TH`} zLGgDsyq0!+ke@2XBxZvyIf#2|?M+g3`6qdE$!R`@~k^%!krJ--}1ov^>3gL8a&Ae zMI&$C6iJc`?E!5Nhk&acD&1sK3Qx8`3H0mtWgqKoG%VzBPlvU7;^rU+N>z>R_ZV>{*05U`HLi{E9k&R`EQAyO>Vl?DbNH$!wO} zP0@78F8vYHWK`)wGNTbnYME+MbUG!+cIZ@Q|^%vy?RnMy=%e#Z^JswI6kK6{N($_!^6wPnCUXCu*j?xodrPUr8V0xBqYJ6Wy z01K1aR6dq>E*fEKU>Tt68~>3ln_By1K`JRDQb~$YLAUTS z^iR>995ViIjns-|D6h*zR@HLAF%0mTd7Ae2PGRT7Za?|MMJZQ48KR9Xm66cp_)maN zBTN4KE~|l7;|(i-PemxfSJ&cCXxk&i*O|lfZzYtjH%ES0&eT^~e(|-om9`LSz}id; z%{%1SxZnBHb!uWiuZoT*DKp-#EMuWqqBrKe%4UA4Cx&Z~SiX^{nWinSPQ(GG?p;5mnRU0biG<3DXW2iD5yqB>I7W+4!;WqW_J zttw-IU8=l>N#X15_|Y*SF?5hfgK#WG3=RLHSm>X7{pl6<>R%H1PmQasoiINni=oAh z6Q*ldpe|=Q^8u?IbyD>BKV7~a`LA78S;OS@4Rtg~Kg2P3_#}pgL*DQojSGdkQ+xFN zBWK?K-!99pH$>bOY#)`$DUxZN(SB}uIugpN_^C8BA`H0w=KG{5+p=4fHzqHiHM7L@ z2gl8z;?vhlgQs<@vLhN(Qi&912O)jN5@`w_;iF+{;n{A~Yl<^~8G|V9rl5-_f$`-h zZ`zGw3V`bIWeI zlH$#3Jyh2i=OsO&i|Ev#5f(JE?Ri(MFdU&*N@O3!?;!EQAe{Px$#?7=1CyPWmI@dP zWVJZ)4q1BEyjY}D(|XO;SYE|X|Zhz0n24K{}Rlh$b2|@bN#s>#+IP-r$wB3s_f>tw~$?~?@_h*wwK#c zXV%vBIg!y84u%jW!@X*OtDoS~O^3?ot1~pyz_8t;Zym&R^PZ3dA~a;H#XJLIwHcJNcQY({^hP36sVoVd8d8zuJ`1~_PM|> zLC&-Uws+6ZkY=GYk2fn-2A~Pki&Cf4r`#@WM^{^q&kQdEh8c6|{>JsNI5V~ExTZRO zOGTq@16lnR;Osv)Z`u=iqOa(0FsC?P20?!)5j4=U_TUN?!|AB<7`=LHvA3W`%-&|} z_K5ZGSWDUYt#T8Jt&E~J8h{SK%E!yw&BegQS-`>D#zn)^&E4z8KL!wUfd+uLd} z|NA}!3B7oIf+TOx!XHVl7|ETc6ygi!!~L*oo~=55TTJ7KWkz<06ABUUy7=Y`MvEoZ z+0_f?JCG>lE$5w5Rc9#ZFR|5m3zsJ}M}S z>l5|_Wft;gEGcQH7th*yls`PlkFF|Dyi3Zm!-s7M9L7|ESNeG*h>1L5j#T&J!T> zdJRn!SonF_l55+_V0zgC(HC>v<&mh5yX$9vI1ZZ0%9LD03O>O+!LOu3g27&QMTh&$ zyh?>aDJGg^xVhPH4oww8T;@SkY~krg;R_0J@$^`qmgn}=#a8Qu+aqv>oeY-ZbzGKt zw`k7aaS`c%Cw+RBHuQXLvv_szJFg9YcNVzjqpOv|0dI4`E}t~r^F_cxgHO%qJgu?t z{5CouPS0iN`5Ev*VKE8xX>TRzEzZ&EM6QY!TNc_PJz?=o%a39 z`;3%7Ha~e>WgGWoo^Ge`7&57sUj_t&S(iH{4zOL>*XLQ{(c1dyg8od$Lyxq5TfU@z zWurCxx|wKZVW9KMo#?)F)DZrnZDLC<<#K88Yk0t|X7}$`zh>}RfJu%{j_QXX5^eAU zCK{V}q704Mc$92y3z`T2H37>(F*UQA{S(>yn6pb}#$RjiEON!4I0zOAep?vnO*Xer ze9^4&okR~?4RTZPS-k+U{vIBUKUxK*VSDjfb@t*62Luatdh*J|jH#sLFNX{WQp)Gg z*P=%hZti_j1?5dH5Qj+-Sk`LGaH1(Bw|a5?5$oxz^J9*g#q|yWx`tsEEU-!rM(Y1r z<_~QhCfXor{hrw^8BHqzO)#>3quBBbyLyr|L#b7j)!jXNO{eti!Ez`o15j z!2_xQ_|AE8*7ef94yBzIa*JVM1F5*8x znr^Y!24MztdHH0^5es9C0hC4Xp0m$d`O}}TiwjBhboz_FMruh(12bc+<8B~HZTe?# zPmaqgIcDks6()rzA-dA@plaz1`L_;q4mlCJ0pSK~u}Mq_(O1N1A_L)U&L(NtZ+onH z%Q3uPs+GEw&9qM{7ri{rbTY1ju9amF@zyVTx0}Oa&FSo`@3{h1z z`u*hXZukoJ0r7Te%d2#7Nl}=HJhPDli~E%tjBuoJt!maflgT3hGlHK&^RWPH;^$^m z{HJi^G)3CRMkTcGb>H^u;G`c)MV?N7U(>i2WE!i>D{*#6|FL7vf=xWz>b=lI!en$? zfgoLzNfm4BlhhO{q3(ox#Mg3=Ds|EwKWZl3?b+5bDk{-Ycy^+@Se+Y%&+JX5+ON=QfsQLn8lAnsXyisdTptzL{h*e^p)ug##yA07|8;+5dAT+_$_N&$GJW}C z%m7`lY3a{+%IoTy8I^R*&w69+O|{n8IL^8cuZ+3$%jf%Z-5C6ZNyu0hHJ-Ov8^6`} zWSpgXN^4tG`x)#K%$oeBXG5(#cLgxfaLnljC{E`JAb!94)>^Shk9@l8MgqNYQ$r4e z3WHjx7ubl?jk``Y@Qc&N3I??=*=o&SG$I^Teqj8?cHHhdPw7FB1%mA@XGgUr7U zvG%G+jJ}nVx~SUbhjHMnKF00BxAv$6-0dg(+b=?seh8#=7e;j*Jj(079i+HwGYi>E zke}4ei;)`tJ=19r7gyiP+i+}SnaJ{z?G|{foI5(@34{ESqU@=eUpOAgF0t&Uk{6%A zUMr2w(^$a$Lr;C@wuwWN^ZH?K4v2ap{~hY}*{8HnCH?__P#Zbm`G29_|1az2_+Mn* zl}WOA{mdltH7tzGNKEQc(KHT|h)znP6Mbq_`olSi0B9g9>xA6npKr~e`?k&IndyQTD@XZSwrL98@003%@od2Lkem0ic z|0%d~Y^ds1C`i(UDt7Q}vwKif^p9rhyVdfN*C)foU!lTEp`^b3%{?L5N*}&iQ|cuw zIgUnH4{d#+?b#c454e<6p_Vi}e7nPg4Pv%?cA+$=9Ma<%^3eIvmm48!!5CHcbLVHk z=cP6y2-4e-k0;<-?kNWACA#hCWxa{Gu~lsG(V;z0g9fPd%`SAg^d&ywJ@1)m(;{1P zr{frJRyv}! z{o&PH^+@8G!R=2**iQ~i(lD?>1>|XD>$A_2-%+xOf(6S>6&$OajkZnQ4~7opoP>zhYxARNhJf<#P+r1ljY{Uvo_U;r<&Ui>mRYJ9b?&Z0>du8YGQ0U-HL$ zL3||0cU8HEChUNNXCJC~SzFZ+7;i1ft8n;L2*3V&iB;WHTd}_6qsIOvW~O;KHUU5k zzaC{%H=vW`MMcBDdMuW2hq3Jg=01Sz@_PL`zd450oKPoSa^ep0l1S64_V9HEqV7=- z*H->2DK@?#EB&jsIDWk^u*dWR!4KwA8G9WP?}3>Qa(2x>CTZ(@cH;uhF`wti&Wd=W9QKP>>-bdU_3DV!O%Vsf>taV0 zd%VC(-{s}7yD3@|o8>{*wk7EmBIJX}gCM;LL*m7WXZM;*!*Z{aFU>vT<_vQ8*Rvsb zX~jky495$x3Th3AOkzqJei*;Jw19tc*^Nr-2yTubKJ&Y=4fzPXh&mdVdOVv!_FJ|r zP9#-y%mw;@yRPUtpMC~5bTCNmEN7o{N;~Vtf&KohGR3L5`d(jG$m@0IRFCMM#?|DU zQmJP<2fg*{{M{fT)$bbQZ*oFh0$Topd>W1LzfHDFCYi{-JO~m9O6%aexWSLVuuWjP zv&j&+zHv=vAcCDzKnxxVj-pRZiOk{TbCl1MCOh{IltnM*nr}1TypF!9;W%S`zb4Xs z?E}|d#|*1O5d0MGk)*%4*+d-8?3Tk9GImaW$buFb^;8hY0o{>)CT>(%2mbD+CY=FB zO!`2(iN)KaBWcOtJ@_R*h!4UoZA_iNamFZicX>cJv&f6{!8{yj2&87Jgci55-e0$Z z1fV@(sjm``3he;eBS|G=reKCYvh8JS;H9&Mdr-$X*!baa;rRM+i)kfk?heSO*7LcB zkueGJ@BqAcQ#9B7$tQP}$qlj-S6=1*%2%QG)C~ zhat((?6spDW)k}wc&Q2Q3bPXu zUPLmRni{d^;wyQq3~0~`hp{jaG|8jm61>IZP9Ac3OU6q;s=`vI_C&hwC5gnSo&>CM zys3(=IL`1KCygJIjw`R~c7jVWh3J;r65tk zP&|;iaa1!s+=9PZfWg0mu5Wv>i@niiJHIpJ+{DX z(84nX9c5*E?Da)UCnzk6BhF=fBcrr}r%MbC@NLv`>>OsN@iI8+G_t#Q)3HTS{XLa_ z$LqkiyjVGdO)-}zm-}I(FU^}2lT0jWHH<1xuJ+~7{X9Oa%@l5p8}%0wr+H1T&$u1dqbvnuz#hA(a)>raxNL*qI$CboCd%F?WsI_q_ z25qQM)T!D`GIfHAzH_^LKIWE%;D-}jmWv}c(WSas2YI8dSMr_jI|Q`jQ-@sX1Y8&HGz5VL3uFS0|pyXi5>c z(xGX;Ke=K!j{ciz**kLVJuE+T_ zZm4)t14Pa;h&}%zeMBt$0|C(jZ#m$4F!!l-2^IJ+w{i~--ixwXBP<;Km-*z4pXI2q|5h)}@ ze&#sK&q)Dr*PytSBAp zYQ9ULD@6C3Au#0=PNSz2iy&Uh8*e>6FzE8=`LyM80l^h1d$FwNeK}U5$WzuK{}-y1VK7f5Wj=^Jit7@@AthwzqQ`8)|r_# zGuJiy-g}>W$GPr%7`n_J(qT?$bB&QPtO!V;1+>oaARuvV%Bys0lOCZQstC)iypuQ; zw+y0tJEo87>oY5h83wtfyG}z+JKz9Tvn<( zmSkH=nu#<{?3%(l+MW_LRf~{ME^GbNQPX7RtGMbY7tw9Cx@=z8Gqvl{NyM4cL%bB_ zdUx|E%eEVlb2Ec5`rbpS1I!>DEyT$1-R^-q^dKlo%Q3Xrd?`{lrnp5xV!wXdzkLs( z>G9Ig`RUE(U})9%q1U~Qtn2BzXFBa12%-u&G9_Cc*aEL=t|z4!e^nEwAKLB)g@;6fZ1?&6-$dQhb`on+!_4*IuQC zG9vnnZ^@BEZP`sZD)wz%D_Q8|4Q|>nXo-Q{OA#J*!-_!YJ29gKgp@QJcU=*LlJ3wz zgqBrey1a=A+yURsy~Wq`Smxj_2Au^1wqfE;;zwvIAq4`NoMg3vF-*>=cCK$+kWB79 z!vu#%4-JC4a7!3Dv~lT_hf{_Kp6Lq}Gto^z0{QsW__0g6spF9%hb*B12`$Kvm!#%v zb)f-+ZSHjr{MgU{4umf@yQJPv$YhtGg_UlSMV&89sWl33yFTKJ=xFDAJ7}Yt)^V`d zqQ|dYekg}Yc|!0P!8LJME$rrTF_|- zCi-AoY0cV)42G+MERSSrPf>P;)OfA%<_P)ImpVA(7?_T2Qlx<|ZiFoq_*KH`4=sT5 z(oUw+@v(-X&x`B9gWlW{e#Yt4E~$8ss^q47(~j16kGo{R0?2N2c=kKh2rfc?DPrQ9 zT6&<3bco7B^6(7Mb81|?wi>WYwQ9=LqD4msMmX5m%kH(-$5K92R2^f-DDx^!!_Gyz zGRx3*$mlTJ^(%&K1PEMC2}N;G zM7fY@n;lvcL-ma4=HsYU@@_7dK+b^Jn`^u6m=X+p?<>V|7o$Hu0h&l#QBx9Vw9Gn zW2rXQRv7vxkq8|x)gA|TZ8@ZDM-%J2)R`F;Q+=_0WZW!NQ(nL|u2`lMO=vzg8ABmm ztH%GG?}0?ck-92>SoNmDx`OJG*ej%<43`>2+?fI#_~|7J9=XZtuST2ws!N40lrqg&qN!rsV^k~da3vz?a zS{>l;JoJn29CNN8q&k!*%u|X>8+S`-@+{48rLx^vcj3+%_eFccOMnyhu>`(~@c4PX zvK5+Srgk(TC1ZcuXl8~9S5iVLB?0=Hf2ERChKvv+Q(sl%W*}4KB80h^Z>gg6bIf28 z_;#<{B;a@D@V254X0jed2NPkA(6}41m^k8uT7|ZY?q*G#cP$rs?HYD_z5_jBrS1%h z@%pa=wJ)$!GuVjHiJwpONHDiBC}K1?0pt}xYI<}puCvq*=v5#$D!5^-veL2d5c*9#_QJ8^Q-F(Dx!K0^rtA@lGM z(R3fBEOpb-#Zr!_aLSBp6qS4`H3(6oi}PCWP)y?GBlIgs4O~1O0X>zUQnZ zXh@}!9pkycw>;tG%?L!*8#*}Y`8pUr(t zUm$q(C8JN(FM&<+#p(FRbWyb2e(}~R3m*d)jg63m#=?p1cf(^R*J%PU8TF@CNC;=% zalcTYeJQ}g*{J{#T|=7X!;c#Y#fQwt2;F%1zyKZMtTouD%9f`fLPntJ*f@jTXBAzk zj?+iOYX9CGCv^Of(ATjUgCN$0qC=cbR-r{)P!{J=b62xXUv#;F0q)U#M&g?xpOn@| z#>0>tq7XlgQ)qu)p|=y2U0Q)=!?cq*RNuOF9XZKH%87ggP4(b@DJbp{*Ef6WfFoUz6nMvXqXTpXo@5U#GMy!46 zLki})Y}gVassuy=4`@St8M2lJycHccr4XgmgJwc}nX=VX`Mn@;+DcikUEH=$^kY5~ z#qkeL$eH^gcSiF^4y{wb*Qi({&cxJ>gQv*-WVukMch{Nj_caK=h|DlX&AO=?<%{){ ztGg!?V!GOqirO`@P~?M`xs3{u*^8Z{q#)BBb>4ivF6I@Pws!Nnqs&_7QJE}nZ+!%E zz%db=%fQBM_$f-bl82f{a^}1gWSlCu8BxNEkhm!ezErQv~|pXW;sTIF-= zp7x``O%oJ$Cg6|x_y?&y=4j`-`NkI z48Z4?NmNAuV@0X>P#T&XuscM0hD~P)!=DKZ>*V3aKI?HxJ=5xu^3I(^YH=~@R>1I| zw3avfYCA;Q+?$coXIvf-2(Ka9<^vzBms+z}OdcutojpM`r*$)yV-S&F1OK()11b$J zUSs0`xCKaPHhYavHuHk9Wcurm*b$$QqhoMug%ejB*3h>xZ41@k<$F)WKS?jnP#${f zDjsHT->TI9D6goBY^1liA+KG9oJJm%I$b=-%+6D~7qsRZM~sU{6D0rD-7pDl{ib9I ziT%g@F`-E4Ffi#D=hbVOI^k2a>&r+Mdk5rai{35MFSA>_`%0FNzW@Fz=!1n|vcfB6 z#z)f6Kvur`T?Z-l0s0RZ;P}}g2ZA%UPlH=B;`Fqw%AO$1} zP&icg<;|!!F`ejH(VAY*RyfhHN0l*AR}PEF@TfHoia^S`hMy4_x!OOw1WS5p4Vdp4 zS=hST0k`7Dsf*}1S!fEZH{VXaYdtJGWFd82yaoaX1ZRYCJXkc^yw9y8 zf_3jy3hhOM)?6P>Dyj#ETTZx@haP^Rn)7CsdC)x%g&cWZ>4=_rO(fF>qk8y+vMqZ*Zx-y=2Bd}h zv+pHLI)pe4l}H2)I3t7|=h*+OKEa)Mvy0SQZo5$1su;2^c=a-pTp1VqLT5-y zYjr_@UJqC$n19xl!W9xhE7L#987t6Qn=WD)0FQcbja+y3y96p_opKucOJ%XBz7QAm zKDkAV>`r*{JAPg%j2xoKD0{3p-oIvE4WzJU$f)Vrcpsxd{^*3X zUmL@m+oWiEAwdYYFmXa3dHU^)^f9$=6SDiVMj4M+0;|D-o%KN~bUpvLEbpdM=bF8~LbJ#%u^}qfauy(@LAb4JjV4 z^m`-4!;6*uOx@Dy|)gFh3hY$3ZmkG)sa!MI?!BU7+_p+ab zTdGfNLW#T^8W!DJ;>yTA+o-i57>YF`vz#SJLFlzk>P-)L8I1^M=nZbJt>#zEo&mb& zwIsLQrx+~3^~)u^X-j8#axu$67~i}nKhH(j+uN^^b~S7Z!!bkK>nmT)K3>p?9ZOx& zL?{)(k1C?D^v@{W-fPkb<@@d=?%z7LZR)=(NHBO1{RZKs>-CJX?p0_0o(`AT0}8)7 zS*Xdyd)6|@OS+KGll8p${dV^=AucXk$NAl%+n&Hu&iDl-_rslnQQck!BRz$8S z{tuoyp?;@YN0;`Fj{w!UaFNjX?^=Yc=Vgn#Fh3c!8r41&kYgoMoB1+#Qdr*U=Qxqn z;J?b;_86nHxhqPhuiw(P0sWWV%-pig5`WuQp812LKV1fM7^ zxL5MkwAZIO6Zfi&yV_=yczue?)4=;V{*Q)r7;to~-Aa%Vd90YVw}yB^TGiItdR*UW zzIsI$SI})-$^Jsb!2V3bku;}0R&!r(el)!~wdyfG7*H_<-nBulY{gdCgEW=k;A zc^xUlY{|24e8OY(`_mxa(Np31qh%!Nl;P*vE8cKc1*FEMs{Guv-Zw&pP-)GbRE!IS z3AUSRy9D~Q!I`|Y-gCa0kz)$RjkF0t<_l6&?o!~DuGi{gmDNR91t&!^kq^z}jXOEJ z8Oh$VKCZ_?E0!lKVOUKIeIH`nU-gCyg-kKHXTUGcbc+a+rCgp@H2D&J;)M9T)ofv?WnK8D+O&4!PkMmiq$ zuEydv9{F#4d-O+tq!&+1(F%O>?^IPkz0!YGRp$=<_W`J?tXkOsRh3uzwvmhBiz7C< z&|!IkWgaPTJ)yL$!u@V7ImVC{yD)P0lsi7AINhNfJibV^?B zB;>H7K-krXJI$L@uH_l76GQ#Ji@cSRNiTkN91FHe!erXe7hozsg7|#D>Y(k4r3~yn>kvv8?3*I?m zw{-nRE)_NA>lHW=bf5w5^%|0Z1by9(8C{u0INp4k$`;Pc;t1tX&f`X=>Ti{BN2tt= zLk~$HLbo#$Luhjl%xgFjpGIe?PVi`!Zz0=bTd6xh@(JInE6&Vct}gJZW=_ht$=_OD zugy)a7`8R-7RJ<8{7OUU@%mtz{^oZC|8Q#`3RAv4EWP)x)r=(Xy6*R%-Fz0!P+k!G z$kh7_3g_EaVZiu*-Z!offKomKKA8f}e~#AwQH|I{3Sa;V%xi^yLov*21B+abQB_vp zJNUvf^FHjckj{}@7kwP-`O=KUjjGO-XjGb5ZZD-7+ni7^b0X(?Z}=)*u$ii&7sxS& zmXZ0|b8U~2G4{s_JST1B41#Vu<(B2mTKYryoBryC?00GfMn`VzmG+v-lAn^1*o%Ik zCH2Gmq4y3f@_ymFF1n&uauq{hYt;O|1*a##UGsl7zSll{#{cFT ze|CZYx<;Vq{B>NoM&qBZ(dFDVcGCf_@p>(xv||snTkDh7FV}eI+%+Bs&sF7+G7-@c ztR>;%&`7Jv%r<>o|569I#$l6tau)M`Xu%7X!;j{zGFf)9IC68Sc#1-n>&G^6pTysK zpYX}#4Oae0iAPHQNd?Jag{`4$HMda}=O`Zm-O|WoetjoJADdyp>%sQReUO+_?DUz$ zz{Pudb70eg5@wQi5zHqa#^lwuuiZKLw$-=*#FG898Ch^~yVlZ6D~m?i<`_?k}h57FJ!y+xNTGe@tRSwQbt7_(NlM4LNk({sV!k+ z9QzlGO(sUwbRuLAwvcLWRT<<>`-XI>e3YW$55WHhSkiN{~$+ec$vp@VWE)cT~C~x>dQK@C(XVrPA8~Z zC5Nk*&wMF9is@6xaXaHn0rG1%M8#Pa$XQ7fG{y7W+jv5;#2UP9z+d|#I^S}P|HMs4 zDm6LW{-J0J@K4YSPZ?Np8G;gu5LjhFqM{z1smN5IJfoGU?Hj515gVpKQ3Y{7JT1%i zCm6p?d%C)LL$HH8b78EEA7J5yirE2us; zT{Q%%;-6O*S{q^X|2DY%ytMv1xSaQ;-v{jA63hPoaBzWxX9o0nf8|4P>Bjr;18^o= zKt2TQzwG?`SyvngZ7{WSF;suxVCt-Y?p~DTuM#9`_*_E-2o`{-2q0wQ(s2Xu>AzxQ zes&^QpjcU%ffoRL2!v7)U4nr?oY#M=2Y;afVZ5s?P>Azi|09zLk6vIS9tgye@++sq z1=%3VZ>Gx}um#aFD!o+$U?_mX+`7O5uNuj(6vV>R*u>Q7BFG1pDBRjI>ap207aMpW`pOMDK3s6`rl|%I}P1#wj98Aq&ivgQtsx18#V)rz8Xky8QqV13H?`C0WAEgbo(;= z1^VTRIFF&c)SoUKEUYa0g5oj8Z|h-1(y&E<)rVd%)IfeSTv3a{;;-)cU!XxC8Y);5 z{&-DZ-K4urPyy`Q{i!qjYwvJ%r{^+PJ;Pnd`h_e$LEb39c@dE)$Hf z!4mv5>5nW|7Z{gWR@pCKE`KjKV2g2eW_g)_h3j%LF6!iBo~bN<4RGG)X`0AEDZsRV Kn+F&PK>q{Lf!0F+ literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/nested_lists.docx b/pydocx/fixtures/nested_lists.docx new file mode 100644 index 0000000000000000000000000000000000000000..f4000dfa4c381998acdd3b229ea27d7bf0536140 GIT binary patch literal 4545 zcma)A2{_d2+qN%R6S8G1qCul@i{NJ25IWl_ERY_puCWgdt1T z2vPPX`!{p`=lpZ}zH_ecnd_QsX0G4;oA?M@Kdhhg2s!Rt&_~ zcfFvl@QYHU=b{vSJxX;(4g>br0iFHERW1rx4_>S@y_$r-fwH(zFd2@vVbe{F z>bSX*FCz_m{aDD}z^F^vR=DC-GHjxXkQs6*H%EGC*cl6{r+B+}eP3QKfV2EuxTw8V zo<&K8f)oeH->%>Kxqm)v$5|$w(a+NHAn=(2BW~@M%(XuCV*hnBg*#Q%>0&P-I=)9v zG9E#_4PCDNRHvt zx!Mu7 zO&(f zhC4aeJ=Xb}KNQyqWVHU!>BL<7VgBjEA(MzDe_99yiCcdr8v#CYGBO{^f8&<#FK+)M zIXjPgK5kHVZxX@(kzaD}i8dgsHsOJGW}`ktT`TMUS&eEVRl&SLS~->!v0Va9S^>CS zJL7Wd6#}q3<}n|ZqiFf9ZIYHW>ursXjn3SBZw|aDwVSZa3Z4SlHO2A?;b-Q#yeA(^ zz65!T)y7t;bBM-QrR;&n9`Q)w8nm-I*R#e9Xy)$TG#X}fH9N0XP~RFdQs)$aP?D5l zOV5eQ#ZA$ABd~6<*{u}`6ts)pL5dCyb=Xs5p4jTvkzJ9ptXt^*9@Bl9X^5?B<2g?5 zgWbt}auOrjOpE?l;(j^)e`<60akGVbIk`KK$oQ6JknkAD-bql=4#z_=A#GZkIulyX zr`=xS_(DTd1rh2ay7pBnT7h9w0f@<0rx!V#d9mS6Y;i8uh9--nQ}F%31QmhoPm?O5 z>vk+&>@Ppu+RD$nNyaz;e8_9yUvsw!mQLSxmGM9!qUBaLnbCL75_cAOjh>0%`8s-T z3iM9vttu`DxUqIC=ees{dEB8dJf41uPSnlxz-wkjnq||F38kRg3!XV8Wc@sOo73)Q zNQCEi+MgIH6dvs>gyKtW-^=vYo|_TpaSwPIyEj=cBPG`BcP0%q$AI*578tyKcel zC*S6mKzh?Ro&Q#KJN{}5S8&TBrH%k_H?*HN?yB_``B4!IFe13c`E(aqjwE*VqH1In zd8EJ9)kos(Q4gOB#wgEk-YUOwFY9#fpyM7V+MFQwFtd1XY^tM%Pw~h%686%yxhYs+ z(%M4eQx{`P!1`B7DEVEPDu8fFVy#Jgi7K*MI*c>Bpu1gJ)HVaI* z=wj|yLP~NV(x}-lD_(TP5m26eJ;RvTm$`^az1m5?ZzxP3B!GXM2!EH~GB|Z&$W^b1 z^TMsebbAZX<>D8pjKOyRr{Q^-Wf^^yowA`>7E(LKo=&M|A(~6M$iErP$-fNdxV5}D z>2@CnvNjR`RFh{UVXPlEbJ+6zZ4pgf5JI>#OK?4+y>RvY zSr+(_C%du&*ex@KJH|tp9jtpBXNP+Bs%y1EiJhzEQkR8p*0ThjtV3^GqU?()0$R+^v-X*tz5KUOPkQOsuZ`}OmLfBe2B2j zDML3=UHMoOOui$Z9M6J|NDnF5z)fs#mJRiNWL5-)$*EXp3w@5Gi;xoLow>!Ox+XNk zw})bB8rf+p6XXlrdroj#4U0H9g`-*B-pT1ZVeS)OctL;4vgqfxhDW{g9OdRkCZ<^D zMyqD|H*?<#fhB;TU{(HA&At+F{Gmrc2_oozHT{6dVD^@u@`H9l4z^&q4&dKK-!lbz zT;?{O2!0^^W^ifCzy!&*2(JsUJGXSQs^$fMwUqC9-h7GV5?^@z%v*Pcm{dD{Ci>=5 z=1(B<+;FCjYIugR81-!><03;jPq_;*oBh+3q3~pT=yn^GF+qoAyZ&TiwzH6{6^Q;+ z=HBb^@GA3G;59_;QY4wu0MbdRcw!%^@G&t^plW=P<^7Yo7i|Hi_cUnl-yD#nwCyt3 zUjvELT1+D~j~K%eW=hQ>7MOh3X-V43jv?fpA=K2PZt^A z^%w)PhD^PR+6Segi@OyJ0A{SAjc+g357Fy= zczS#5tB!*bzV^E+58gD;19Nzi_ebAc!$;(1U>_7+K9ON|#M|HHc$HGWnJMea`9PlQ zm)EYHJCMH)QkrR;T%6F(n!Ggtf4nW!P=ve)kcREF&u;zm;8NQiZzO{p+ zO!H>_@y%hkZIvh8jMWPj!|~>&KeRPvOl9^dNxDpfJ#v5$bs0qWZ@Oe9>XK-8U@s3( z_(dB}Pm(a7_DM5@0a>Z18=E!CU0frL&V4mf^evsx^m19mNi#7sUayBg?3~9%I}D5VYYol75A14wQ!@y zQD;q&GrtqR#oq9R9V3{(`V;{`WzgFBU+Ap}PbZfg2|7f~0!$c=V$Cn*1h0>}!8dq@ zPA=!e`Lm3=Rm5ti94AQ|T*9CdpA(nQK%5-^x$#J*{@)GW^YHp(Aa_f)bioL+BUcH4 zhlJ>?0mE~;7v%z^<+uu;-}6N~SArrI&B{JqE{+pkn~-18!>yCQ$* ztj#N=liZ88jQKm3eT6&JByvY$o*wcOi9PyPa!(P-g=i9^4si~@d)wa=3b!N)s4?w5 z2m@pdUk#4Inx@rws?muokw>510^_i6<>Moc=6v+_eiTBYSOrgr%-VKuybG!@TnVUp zn>V7S_01B4dm|2L(JiTj$gbGHAXa5&O8KYj$?hZ#+hBIn+GH2R0!t|X0xkWz3(q;C zt6&{rNd~sm`?iXWa|88MQt9FjrBzMmE$}B|YNM7wcO1Dw;;fTQ{h=N2Ta!>sr-Zm9 zwb2iPKI3W4cW(6H8fgO0ti@5qh3E(TFkDZdVi_b-vFSF>LJF4E97?Tn*nps~EKqo%(Q{AaKgFFSOR2rik#E~UT%_i-g$!^$O zr1TO#K(Ro)E`UYS=~z<_g&@0rZL)rQtL1x_VMdz=vHeX%<%!#T?5Q(;?A}N`i zf}QMF-aei?NqPJ4bu4}V4m=*JNU`)+u@S%Oe}~lH@y8D9{i;|@xOLhih8@nti9b-wB;aIaqNRg!sSsIEF z$+3hOCE1rz3^DvO=lti->HL5H^Ig|m^Ig~6-+a&g-p~Esuc27Ov7m%U|IwR>3oL0uDXq@H-T1shKillx-_lITc&% z$a(HAy6gOM1x(TZL9VFVMe|o0PGWWCss59Vq^xMA!UDzN5zlU$W+iM>a`-*X60$&SEje(%~0b21fO22Rb4UOslWQ3FQix6Lw3)Icm zC-9P^6VmlWs5i1Ywao`70)9!-WN`8={R!2WH!os{Ev<94wqbq@ z0V+r0g%s;Z)6;GPaNQSG?(|j9Ft)EFr<3wCJ@Z!T6tYTp?`vwq2CQ0)J~Hmo!Y@2x z3eIRC`;HjbJ}kLXpf?64b8fovC#B$lbzJ(t_g$)4uo{w_+imT_yEL~!_o52Fu~1o4 zUY*t1qHq%WQ`Q9kV(m{cx%fHSSfwXFBHJBYmo^xA-Rn}4v+Cf&~u~6 z8(BWl3egf~%M${oLpOo!!tNnkxsJWx$|RbKzofrq-GI^Ew9fJwq`T$ZZ0}w8C~f0L zS{#Bn;{uOY9X|=tsj)G74l(mD;7?Td37_F+H^atQpXiS`Ywa?YgT2UjiABmoo$=8E zN@N8|JleRhdv3ZPov1C;s%v`8EnbteNkBm%bRLGf5p@^3&qojtvW`mI_F3nL8%s#q zMKogRs0^UV7^i2H{!(avrW=mG7&zSfij|%3xB_=1;qlq9awb`CTAoV4N`0J(Vc>Y3 ze_m(0vq5sqpncoYI};gsn=?^%5@~NHiggJ0pF=AYLPe_r{%Y3{37Khkyo~l zAlM@^9bM69(akC*fO6_0HC|GI{Lt~mvcf-EOP}%6tFtNkapeE%XwE$IJW~t zF`3;Rv@B8%ses6!K;2YvR-)Ee>#^{nxtg!@5XB1E>Qc#3ZQ0c4n%4#;B9&VXQb5s( zmWYT9*kB+1J+2Pc+vNSLrg7>=U7bqfyUA7GQA5O#I zy({aA(nmKQWAmqBGUnCfETwhqiN|uFlQ&CCRvCM88toQu0qd8yf^Sascal1r<5$j2 zTh6R|$$y&dYn=1M87jMb$C^ZsbiH?zisKrc-`IHa+GC18I?>5>I|jS5bzM46VnE;V zq6|_)c+K)%Zs6ch@x>C%=V!#1U$o!BHLK2ixO?p&-B}m@>o8mDxrQ;xn3C2@7w+%3 z?5^3#ZMrChdEI{`83cE@uJ`2h|)nsuJV|8`5XHPiXe0sNZlueN2_>L`) zE`@;V1=;1#JlaaQZK`WMDQSFJKvv7l6%jO~HETU8HIL`#UEEb_R@S+5ay=1L#^+RK zc_5O1j<}sm6XsxKA?Jv_bmUnY0fAvi*8r+*G+ef)j1D-OP>$X+ z;TpIiJ|1<$7zM6M_TD*9NB&x;Gk6DJGw7``{VAfgnNT3{G^AOZ9*RI8 z=d0Vl+zG%`;GezkTwd^>5~#iLOE+t58l(=+*_<1Jm4D?z0?7K!N#5zrnjKm6hnVL$ z(+1v$89jY^rHp3Dr%&pajGlTkSpb&4nWG;;1R`~NbPFL)n#o%ivy+&R6UYHAW@`c;TnZJtrW9ebmD6{xaOWh0n}7dSTV7Z#@BstFzt zUryG!xXCv~JY7i0WXY9dE|WNZ^wcI~r|NC6`T zC0r+Yao|Q!4m~?!dz@r!w{#!h#Kvpu_C5~WZ4$5Zx|P4HZkpEiu^K4#?6+mEjf&EM zS-A%loOoD?-o7dvIpD=56yYOwGW-~qi0X;V_18h&VQW!A8iQU_xz$r>?Xm0O4{9w7 zan|#_cv4$^YSae_l#lvr42|Q~g?s4-oxJ2FaeEE%#f)CH>EVQt#~=~mAmCLH%_EtEkEXX_HRI!FuC5k*bG|vcM zX}nYIF5*PEsacPWBwzEgFD26s5$s5bXKpV~170OtX#$@4985J%EW(;+)-^TM;wlfPGYCb$NI7O8@^AACi4;q=Rg}69GK4bq>lpGojU*yn z21gOOY3>k%CI)e*w_J_vXY3TjHeC@lM>mXNH{4@i*TzU-3<2yf-e1%YG?!D8C5Ipk zV$vYi3Jw;X^@ZENDXR-<*hcyvfzH!+bXKiro&Th{7{7>pG>=bR+AZHE?<@ofDvt(l z%Q2xXrIpiZbpaWhfMDcu9umJBx5-T8D{?hKnDBaG9RmCD6E{=tG9Os5_Rowl?8_9_ zjp@@X`~ALIlSy+Lj!aKyw$F6l3~}EBQptS*=voq{5PR@ba=9q<+vrfjA|>zGUkdYc z4X~pM=y`gd3GUCt(`|`4<#!e&xj-8){uSKc{VumtY(I4rv$b+n6)bv|+o-E6Z8m$y zjjP$(=XF-KXkl=pTe)73Qq7>O_ioN)!{`o!jY?wWWO#d0J+GHa=R8!+K=z2CdNn0~Pc>C(rdjShbdVw!lN>&z=oF@EPXmP8x-h#!_gbo^HBe&YY<2;Z zoT*yII=FaW)CTL<&K9vd+e6NN_F#LpRs~9>x3bLBflBWL9d$Tq0o*j-&nSnpI`xe5 z_d0Z9`4MH$h5Sli{*b+`;k0Hj=$u`+$#?s7SkR=kSFjG-O_9aQy$P$w! zS&Fh|&n}^?WoPuyoO3!k{r~BIzU!K6zU!L%oA0^b`?=ryMH;Z~;%5Q^flTv(w>6k{ z6es<=EfItBgiA9%^X{HCVAbHdF;Cm0H8+Sm7WTSjMMBd}Vi^z`H#67P0+4sf`!=M( zK}ANSzN+y1x**(vZ`n>YN+gjqYO`H5JY?FbGOoVwRpI;X-odHo)gn6)aW<0KL!l4s zckAB?t2dm``;%ZqiQ#jMAk?*%?sbo?5Ec!@> zwXV-M7X`O~Hx;jN?H9&K%@1n(pi3!80|0QUjrl+yy`uM7S#@AIA3QI! z6gsdvxcb{|+UPq=MF$vP#h5X)QP5w+Y8)OBlxVx(C((puI0P)ZCna(%?Mm-tHm4fz zvDxJ8T;G%qBuImL22c!Q-FVJ7qQEDi6u|4!PXbmV0fM{p zi{oh3@RR-W0$K&uh)Mwyj}#%23c-JzpT~q6Y$e?ipl*fki>Dg1n{ys=QDM)YCW_7C z7Wb7-Z|aYStRy}wB2U+ISTI|Dm=pS7?y8!nT$OR)n7YPs4*`f^(8SMmgv4OspuoT(WzWTs#*XmZ99k%5%xIAe}h z<<&Wgxunsp$$q4rI%SW$bz7%~s6+~V8^&(Z&>Kt6h0k0b5{x19ffuu3QWP!gt(I>R z<3?`fq@IjOdvP`lLPK1Ru!V&+AB!!0ruFim5yoU@OqqgSubaS%9qKyJqY&rf7K z4M>~0*j7EWv)t2`w-77_5t}5DJok^9jsmVHv5hj19q+*Mkl9B-&IK{r%#!Br%u-*v zgAPVW0@)0|ZVQO*S5B2vY~eBoZTNqVYqLT>DS4F;H`4sn5G@q*L^fBe<}6BO-=O^X zMm@g5J_}4yh@^bjC>_bV6s#SZBgvifFw0M6I%B9)*4h6E-ZwrBp^1zz<+eiS>tHmM z4|HI*qhG6hwl(nbr$%*`j!VwrIpl8Ln`J7CVC_*o*3GkdCFgwNIl=oO^OgoV4x(n) zM)lgNtEw(I#-q~RDz}~-&q`>I#GQsgx!B+9-}t<^9Ho^TrRQ9HVj^bskKPy-oX zs(@UgPC7MTUs%81HlOHaTLz)d@{~imWrZ86H)=P6XEBgFy6Mlj$Fv;v}|rdUNm;`QH9ee*Ah+rx50?+TZ+ zQ@cul6;DIFm+{GiamaM8B~9ITWOq-iE2hba#egtY?uQD9gAWrT29iAeP8KZ_ufLds z%I8-vuLiL&=3p0<8QVyA0U3c`%>mE%Iq>xI!(lub4p4Fq<<>95A2|FZY|Dt%`56BR z^DvMJs8B!S^vI!dNX~8idOZ8aXQ!JFp|;i?c(_MEV0DXfPa~~XdSA^kp)rJRuFpFR zbCY(44MAe1BHsJPaWPIgwltV7?oOR(L&_Y9x35y2!=~b)32)(BD^(6HgxuR;0!4Sa zrMu~a-C#HFrDmKtG+HdmAca&gJUetl*}bvB`(>)t{&!)~@cc9f^lE=sx1F7%x>LsU z(1FX;35OI)jZ1QFSld3_EOC~wDX%txUw1a;6Pul0fe z*dkMZA@s(Rz{!4O#(d`LV&k4ni8jX#oR zI4D#`;KBOm;AyQT7(}KbGjSXzYmC|^`-t9u1+K&-O@B!`L`j;=>y@X-DohD0@j&dahALQS?*XDUU1d(1@UxuzZw2~-%KDq0lOsZWt}rc zS-&Zf=k>s^e<&1{m#Jv@RCq`#Y z9+hn}UGxE4Y#Ot+yDiDvXv{d!;M}O@72xLJ3b0kS(@kNUnIXQQDVaO!bn&5qzY-sW zEdB8hJ9o^X42cZT|sT4gz}$w1gfDyuc0Uc}urg?%zr@hKS8BD%g9&?oMpt)4yF zSvfvl^XhUoibj!wJUEbBUwA}^-TAeOu;azplc@8J7u*gp zyT~O1ag&H9XWa#ry;%*NOZ*id`D?|p=97Yt#oY&>{UqNMg+?<=^#*JQj~)A+^Bc)Z zE+ufW5AXR!sEhY=mZxaEX`||q@?qy8MuXF}ad*<`uh02^cbUH%baW&B5l-+?DC1f& zenx5ouSyl?ki_vmX?@q*>+0eK=Yhh%IjjRmsIdX*P!;s&{5GBqrwy*GbI>E=E!rp&E#z6dOFe!>u38wn0qbCCCyF7U z%6rX5wSQ)p*XJop?&`pU)6C%qmLM&!dlTv;wxfC|E3=j-A>!)%i2C}w6LITUzUNki zx3PKRsh3KznL3Sf1szB6oAHAsJ?m`Nilpp8{}<%qy%!bhrr^qFl&|tyDDlJ?sq7<% z6@yh*HPs?S*DNiQJ*RQIpkHI=&b)Gjm6u*JYRt{y(U{`RF4a&sqwH@j70tV2u!Tw zu)L$}nRzMM=2M5GwuV0j@u<$NeGTXBpPLQ#_>D!hx}LHrl3bB9a*8NHi{pV3C6UU} z3y=rf;?`967vO-+iAGvn+2ggjLPa=(-t0%Nb_{yovoeO0nT4O}$7y3{>Smla{#iTD z9zO$j22VyP|53Z?fA!xH{b&5n*v5#bKT418BY)ukC9MAZq@5cdBj)`m1ij%up0qRc z{S4n}U5wrFN13w!1^=;E{tVt}3k-MrQLS_*{M!ot^17eVJ9YohH!lF~Kz}!qpW!># gl`$MY3QS+h|1l~^12%d~OiV!fMWe40j*D^kA64`ZU;qFB literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/resized_image.docx b/pydocx/fixtures/resized_image.docx new file mode 100644 index 0000000000000000000000000000000000000000..913099c4a3d2b9cc25d8857fad91373a3c8f6ad7 GIT binary patch literal 7903 zcmaKRc|278+y4w>%-CkePGYQ~>_S2_6B1!838|1hJ7vkfZ)K1*YV6r}AvBh3A^Vnf zvWF~L3qS7X`Tp*n`}ce9YhH6+=Z|xJ&bi*_dSBP)TyLxzDTp2bfj|Hi{+BR-KaBG1 z{E@S{t&528`LQTgU5yk?74qYl;pWDa?d7nJ-aRgi1J^EjQ1aSFUoW|Yb+Tq-m2a3ph) zq6xn!Rb+GOhac1KTChgpy2vZ&lm~lsOIP%Vc$a=Pe(Ey&@KNNq5B`Yq9JQ+( z3pe-9FmeA^)Y$%m+J7aJnZx6!cINi3=g|FEV#R!>XcmPl?fWPtHK-RLxzwk^X7yId zTU}$pugP!IA3rCJfMFfyoF6VtC+8(vO5o^1-ldsi8F=B9%}*x;!5^N{`K+ua7apP`9c6vVW`{o&N=|g8$E;nz8O}2 zc}9WFcK+F>femKCA02~o%|i&z!Fetr`BvdAPs0kIMHD}YY_pAOx4?CJyeM;xZncm3 z@8lvAe z2Br7Mq!S`Cx#M?% zw*{Y4irUf(+f#}M6H6viOFJ@)JJQNJ)5=CN%iG?Rk0w=);VZ|pt2)xFCQ@rA-@Wh3 zd_PrE{W-g4I8Lw`x*OzFqLyr$XrABW1DhRd6Xg)PLQ z)^G1y`ad*}R(%?)ZJnuTn<2Cg*0l|OXeU;65*s>38an42yT(3sj@NWA)b=bAJ})-) ze68zUB7FJQ*)!GhWvZojrL}LmZD6)#aIvd@sbh%vX?Ue;WWHncd+*5i?y-gy4Wrh@68PTQuE}$^u`)gNc12E4+(e@c zfKYKslfKBF;-YHhJB+t0%}+@{05}Y)>hTg{Q<*0G|Od!4(xCi#~Kq1t_pK@bs^193h9imKUq(-6?A6)iVm=0*#3elyCEt{@-j5O7QEUT~(d*0I z{-&~9w;Eu^TAzEl_oXVIm_!)DoF3H0l6W8Dv%8pj&Uyi!pxedh4?$=wMMWlI)6tH>OX*ywn(_X&?4VgH2YnPsg4VN{klM zU(TdrBJD?{p%fLe08w(`cx=c8i&{=Lb`Mh`T900 z>&ZrPkwvj~r-DT(=b8tlXfcXCOiKfG@DiTcd>n6+wKi7nMK_+;u|3VD?;}V(lamz^ z9!m@nm@I>d5I8S|x=Vxw!cP)dt3}~p2JvJ#1H%M=TaJQ~1WsNV$kHHT-+BTezo*M;_vY{$&hG0JY1jv_F{%;rd(Rk%ThsABTxpq z$%vCzq=D{w%O;&y%B?VAjKpC-4Rk4GYYzegp;@OOl5HIr(?p|!g)mO!!!dYR{S!q* z^UVNOtFcqPfsl?ME5HLuhHrs;S^CN{$-y)gcbc8fr3&sF|Y?tZh z1LgYtAq7nGe1r}F3m(YeNAQ2AYegzJ$$oPqJ%=)Ei*v#lsE@L2?B@-#C-euR%9~o@ zyowhDmWSEN25toKI3YD4FSFnoFph0|WD4XZ6QUf(4rr$%^9Mv}z{$7XFOtNdgQH4efmUBGf%-yk48*819hrfO?~uu`Vz>cbvjLRpx2TH!aoc&6GRrq1 zmjVKz{%bTc!Nw5QfNy2~lFrCzdI*Cz{&P?(my8~LnA|&+09&MDC|?*eTAVBs(u$(6 zJ#hpH&zIpe4|#M!Ch?jo{y1_%m>Fr_2#(KL5>RS%u0`^^aDrrL4ywBZjr};ne$9> z6a>g~X#Ak!UK80HERqg79_m}#{?cMXlv74AOb*G+=5v%u<8uch7{DD7%U*_@Qw&in zi|Y_%47$)%HtWM?MH$G(ZGH&^tftu3c>{Qb+%^t19xqez_j^pM3*gqo3BrT7c|xe2 zXf&b+CmgIq-KHB#l(&;z7vnQj)E#K{p=1lOBu(f(`4r zQ}`0p3n0t5NqZ%LvmRnE4b9M^#Wy2IUlpL0U7FwUpsx0k3=1HdKoqtSWhahT0LZU6 zy}mdSaXgT^22bf<$weZENG&XmfEUgQ%h(|yw&n%!37>(&cv%&h=fA#20AfL}c_1u) z668UXE!6gMbC83hE5Y@o^tw3IZfU`T4OtM=OQ0V!&35S3B5BtwZxQI7zTmsBbdWpd zVcxfm?<*4-Q|ovkz9O`!*}hSZZ7U&YY&y4 zPV{m(iSXQA|0u=+62-uT_i~|%?iq|%FRMgY-oO8wf-+o;z2hWGC?GsZ5Fvwc?2wV- z1oM{8lHA^d4$2K12d;5`k)I|#W9d_vnx&=&hiet0!kr41)@i|`zJAi=^NuGhgn4N>z{o0z8d&nr7&ppBRfGMzb6mvB&ThpS`{BD48` z(!bG)jF&)^cRM^7FiE)XyB)(b!p`V0Aom*`qtV;)u5sRJ@TP+x=+4VZG~Y2in&cKE z6{n)3OvH*r=G}!8nNU@hfssTe9;Te{$|dt2a8YG^yJ)OYpNTsB5n)-F9KkzqfiiPT zs9_3X(hd@wheT^$oTsYggVHv^FM}+7A1v+PZrXqewi$2=koEE)Xy34fRpwA=tu4Wc zpWpOxu43QDtnlAICh04A%G80zMHZdN?NAoxfTr%DI|+`|UZUvZ@J9SR&~ z6;=w`1JX#}fhsFn;cir-v)PeEN{&tP>ULKO1i3E8c2~+7mJYjmi$nt;LM8@_B51r` zGVMsb-z5!)+#(V3gYE#7lKkv+=nct$FdPY6S1{a9?;1)P-tH&U^9{y^7k~kL88QGM zf52-JcZAb(DkoCW06#x6QoG^H7nu=bjsZMakeM<0i&BVV9~sS%5@-vegp}gf2PGik zGo>KsXP%li-7lY-Eg{@^dnmc*ww7u;pxl=0#XGR$dCl zONoMOgz_z75DGQ$lU1aN0%s6ulQb&{lommuS|e+S@@k0Vz07sp`zD;|r}U$WB(o3R z#KbmM3g-ZUer5@fBFSCR;k0u!QCP0WkIB$H7D@c@6n^%WoA5Hn>josGegG}MIn+Y}oP!7a6AQ)! z@D(g;K!Fr?KyE2xDv%SU$W31AO|pFy&B`*Jj0(IBXTm@Pf)*mtVD5{?FsKuZq83Nt zVL;~u8|kA=--JBFlPAF``;5au=`gBV+(ISnVjeEj0zL(&%0yO=Qx7WEfMgi&BRJUbK-Vd5VF72JsOBh`0{t=x`=4EzWzQ$+Z16 z2F@V6uw*MB&94qRx}{`lCs+dxVmu2n13)TfaaqZX@0#dC}t@2`_-57N6mG6cCQO?f9ILLZi#)U8CS4r+)m zNT9IAeY~rMf^2rlJR5hMZK?*VZe$uV))ikfZwUyYt5z)9&mp+WNDhVw;D#quiT+j8 z8n^7p@4Lq=m6VzXxNnYDP1Ydn_EV43=rbRl(iSvS$CssuFCh`yJRf2muQqk>FoD<{ zbLbX%6E3SAamQ(1tX0QU2K;n*S_LZze_Qq9bKneR!tSS+VYIb`c}R)$=Xe_IPh z@SS(4lmPw-o7V_V*Hg0MVC3vBoB90_=euYYRgiQRvG-J7YoBiSX@*n`ty#(SQ>_Mb zj3RUT%8kN^a^2R8zXPvIk^x0klt!+W6P@gtZ3m1QhF@-zuhei;i8rG?8*CaG_hVkV`1zV>s+>pf$=ei}fI8kcW@!r_f#x~gz`IGELiiJA z^>w^3=OO_m=ySLCw{O?Ai}HA$JG4e}0&lD899Z&<>R4~wW#9nv(f^Knd%vmYA`_QB z#MOe5?|u5*db%ehBe`!WSr~>6TI*V1j^x?U5jZ}V+U3TF8{tzrn{gu(rpJ>xm zc=o$30$Sj;T`iBC1?P{_^10TbOyO-nYslARfzO*9=C|DTk~FE~EZkDp236Xf=D*D) zJHjX=XnueA%>|+b6Kn)RE!r>|deQHk%j+t+% z2VDr*gbxV876RE>7Mks=Tb{`9kwpLCSNQ$L;F$cJ&MYs#_u@#;#$wQaqw_i6|C+G6 zxO&){yZkj?)wOe&5QU#KK0+K`r^dgKzN#Ar^$!~3nc#4|O=CJ-CiK=C|Iz1}N*&sS za=o#2>(I;Xi<MPS8` zr?53Nl5C?fOAArfZ75H1Q&qg%f8&+%O0QJcyz$hCZ3`w@_d+}}=JMOf(y#XW&1^Mh z-|CF9MUj%G3y~MzF^>m6Yq-ih#@c6)EX=t-E`sE$-hKe0{4BF!cdidfQ_KtgIc!Auhyu!w( zk3}`VqG;SC{(<`AeDg){wlxa6gt))WOs$`LyHvSFy73XwP~>i#o#>MIff-l0x5ldopAC@nL)l(>Qj?>b4{1-+1Y{fN?#Xel z#(%y>9L8Fq-SgX33OVIP7Xo%+5o)UJm)hGF8}H zi{nR%k1`IylE-{j38`yOk-LN+F+WdH&Qs7O&l$dUJk(FX${_+4CMLs>dE_8e+d3@l zW|67iE$db~b<^2~ zFnQaV;dozNV5s5Sx2E>z`45g?3BfbC3qO_dD}k*#WZ7)CmhBIf%wnz@$|EpBQehc( zG^^}ZAp{!J1W;5JbMlgEmzDCSEMs2V&`00Al&6t~;%~jT#_e3TS%(qd^IX_dv^(YbYiF#c&jVa(Nsi7udwfdZzXf>S75{I5 zj~$%<(kbj>9z1D5(ue%mzZ|e1K0%Pen2Jbwib*kF$$adVVVQTu2VZP`dTcsjvCi*4 zx8?06n%7S2pVcc@#iR12*yj&*)- zZU2^%zj}|)J08%wnpsE7?hZ6O|>G$|O6WXap|xOO*D;_=6GXobx?so2h7J^fc`>CT|NfAdWL zIg>CR>3TStyF55gpN2R!%@$F3(3v_GQ^G%;K#JkFhN-ZnhdL^^O=mG~-K-W^|7e+p z3Z}5}&^6-Xy)H3RxS$*PVkPqGnV3;D9;IpEuP2*rcdz2tVotqTds|T;sz$zs8a$pC!_EGD-r$fM z|Kz@q*0n?1j%Jrsn1*STt69#{hviJ(wluffJg4uEsRe~l7o2C}DiSZn(SG&$JjUF6No9$Z? zvjXO#-+ugyuFP(yjpAo;BSGic3?!ik{4GWPDaoBnk^j^F5GMcW_@^p$uGsx;v}d>a zfAzb6`u{25oNHHqo61>b`OE)*QL_HI(w|${bHV3tdvMkX|61vv^3Ok)__J-Ev-)q# zK8x+ICH~y`{?q->dU)Pj|29{~|91bo)Be->&suX{GXJ&-=0EDoe;3eydjCnM|M}z} qSpM+-Pj&jI`=6 ziM=HoTQZi5^omyXm1)c^Jvz5$8@sJxTSx=7F4S;OXc55rXy>kIgD7 z<-yF?0&M%O3Ia+z_MBx>sr}6z4}%`cQ}=$=m(h3!eG;%?B5z#tB2~B$q3(C=B;yv` z-O%CEZ!s6wlixPZzJ-D70Lef05%Z1X^Hn4vB7*-bBMkTzer}$&683Jc-Uik-E_Psl zq)T~XvuhhS==JVpfQ?%oE4GsoRp*VBl35|m-ngJXm(b3m-y9OdC{bQi!~TAEYWE`Q z@WY^qtPMY7!<;Vr8`eNh(|L*5&}>UCzwGNIGk0muGGowxKD<4)jHS9vFSeG8E%V0= zz$u`08o(+FQe3_dkAmhRn*?vv7_FouzvxPDYB0^9*!va~Url9ju& zv-W+8s;#9CWTh`t%-)8~#^u7(87u9uipIZ))$dt#6h%xSfZiBCvM;%c9B-o)iDy zvc~a)wf~CA*6pSb($3YJK;3_ZmDo+$3IVC@-d0O*gdw1+nZA6=FZ8Z(KOO|i(j^Jw zc2A{X;eV(-kWRj^3waZp#nH)JzFKXAR+R)9D33NevvQOkdXlO&b<)babLGB#B8Q6L zTx9W{&Jr)Y<}F+sR|RDjykC>F51&Y51NSzlWxn3XoX|bBV0l$1`+L=?t>Y(iW$8b#-yir4QL87j-x1#?KT(ps_dh1Ii7t1?qrOs`b+DD zE~Dfz!MeA2Ps4OrKO@;K7%2p19_qKnrO91jCdu}RbO-Sl3kqz?xeErINlG?5x->Km z+inc&cGB`Gvnq`c*d(30v+6<7$XAj=j;!l$99g!9Zax<@d;#c{!+A{a;~78X3YaPX zc*A#X=ZsNYM0Yx~43bYOG}78wovyiP;Un)k6XL9(R^PysZq>Nq+MG)Ltif8-KD)^J zgKoBKEwSb%ZQMSQ!#5D7mjG)68M3k2PN~Cbz83(5zPBG$4X4Rw-1S|&nx)XJH{q;PpLti-r_kJH*+?6KbQ%% zOU@eel%{%=igEe#ST-1`VsuQ5JR%{8Z_P0OkkG)i@cR1qCPx3G98VVFnz5xmns{^j<4fo7G3*1{e6n_|(i%kLSkL zXYhPAAT>6ofpdtiEl=Hx*Cy@VM9c1EON$m{gEW5P928evuBg149w96b;0|nw((l@{EwDBPqg45#(U2^oZ3f;rWzTi{HctG`j)s=WVg0=!#Bvg?*_|WHya+u?(qV zrI@(d4Szn?&}+3u;kr5ClEZ-*)O9VOfgvB7MN;%`KbpN7q`~mka3#u()CQ9qh?PyH z%WQZcb%|QFaEV+XKAb7{)^#lC+Ow_1(DvuCxLqbrPGm&&eREtROzF8v9Vb!h?L4gg zJC0HHkO%#jIB2;N%DcO}g-(%SGnbBU@iQ!9DMrZ90V+IwrW}& zJ4g>cHaIr7v}_(g@2Bgpm*~2>y5@2U6d$|~8FzD3i}Ag!G;~&^$S*M7;mLHnizi(+ z(-UG1p`Jdw!f8ht6$zq%l09nD(xY0ZM+`hNsZV(9z0B!F+C8+=>kPG5MGg2<-qZIE zy$JBVWe$@<$B9_8E~VW9H{BFD`_Aaq9W?H$@=N#r-C(d=Q5q+e&1D~$QIYYrvocZ+ z7dWM1+z0n)rniBxui3XAxMDOGBnFCcQR>TQ2!#U_V@|{23x^qB8~^LLEq1ta}?EY{J z=4A!1*gR)H3^;Fqnwo|q+r(##UB8K%Z?rmG@nEeitl%{Qhmg{CxDNQ>=;`K^`;8==Bgz<~rsFk3oQ%+#K5)kvhlI{{>b5vr@|(`Fp6PS? zKsC>q`O;;+nRPG1RNg>SUU^3)=i97@QJddVFd0Or>jK)E_PbaEf}=^RxhpjdO<-xe2qZIjKO2?8_btQ}`=U!f0)U7~eVNA{f~tJ_q{yqAy| zKh1q%Jt85{FL@qI6DA@z&4dBXe`;jeUiIQ(;;?5&z`~mk*JQG1^El$#j+qe6e@<-5 zI&QCImQwppnXs#h8}IAhW7$IV2=x|RrcP|NRcXxt1bKXf(g+oAq*~Vt&=eJy<^!PvNDfm5|Qt@Y;-=7_S0F% zd#`A1(#YbgSu&QjyByWODx5BPJ+#Htu*B5HU9z5YM=aw3&^AD{yD}n`SbQw_=+2zj z4XWcfQz=Zy#yHaJ8{07bYKhm`OuaXX!nHGw(}V<9+AU*^5xONu-Z-{ruOlGA6r2;ZPIb(QWLx9O2Q-&Fau@*k)AXunve}Hp zx!4YjIwmJMuxQj0^V#$>*3dPZL3p?A9G{-~hL8z!hvmj|SndmCubKjVV$pgBCQh_;X=d!U4#zi3n|IOgD3A52d)vkV zNBsGfmjk7Mkz1dmBWPfeBi3!Wl-Bdh z!a+|-#JFDdYb_Tr$JBVVhog0E00%Y-jSB?93-o{fwoL}R~;zMD9JJ&Z*eFYTeXc^KE|cQ=O%}m8{0s*CtNz!H{)#_oNeEW# zXK3QP%6I&~7_(oWbaKA@=T1yX|9a9t49&0jlgBKfoPWkB;4l2ineTiFmyr*y@Mjs14s*95GjJvJJKYyAxM=X z0Vx7Qn;;Tt@+UKIM#ld=Pu9BYX62rHzq7N?{?5K2EkYs&JW^6pJip8+CAw#TE9q z2Y&hE>No`Xy-+tdK3=J}!db~nbgWg>C1742$njtT*lMDXJb4dpDNuomDzc^1gvIn} zZuL`M%_U0*n{H0a<3y7iFAqpE2p#I=h76&KMW=Ajw-zlM4L&QHa4a6msqI#pF2x< zgbcU$yNy{d$Bz{C&R#hv0BHe8H{RoO&tmJ9C&0r4{ZB;buvhqdK*2x<4|g9UTRS&< zkpPHWRZ6FO?=>0(MuFJQqwsRY09jJ2PsL4%O~PEXjQTRHkJGR-G?pG%RoZyvBW3|3 zmvr`N!c5AJhrWGT@5 z3dTW~lv?is>Ms!<7x{9^YOJvWxt`8Yi8=8IY$D^T(nfnC(d}>7Yb*mjiM^$$V%fy% zFiT4g@8wl{UO5uIYEHBKEp#a%FT*8wr%p8M#Yu^Rvc!AS_It}DC;0L@uP^zfH=KD) zYt>X1-!D*`r8%Sf>cEja*L6&{o0P?OwUQ#r(9 z67WmZuKtbMUt|J%JoJUwyZhjv`-@m9!(=_8G-{YIwX6;u3nkTTKW^nFgFDw=P5`8+ zQ~8fEBxx0(cWRm9&tx#7gK;@m2NJBuY^@D!twF|cRgEV95>$k z?vOl$bC~L^RXAUM+F`}*GN;6U`0Zm%5u*YyQF*jdxJ;=!vZUX{1VuCJSLVKz%dWufyG_;blfhs*M}^#;+#ruzg^)M@^7Sya(vT>QsT^1HACDX-fnMuo4R!b#~0h~4L!mU0GK;K9VI5E={ z#$ofxVMGqKtAIHH9*2d+wB&{V6P9)U)^-U&aE!nXHL-D%|P{D9v5IW!``!Img<#A<^X3`Va z(jJdHmLF#`+N>W`oxVyqZA|(xg|04BylX=#%O;fUDzN0lCF#Gw_|Ff zH!n&PzF1mzN~No8OEbnrS?^4plRQ-K_`!=#&+qE2yUNjl+y_79Te7WW*WWF1B9C_c z&qEVsI-Aq&>cMlKQ`(@QtU_AXGJya?o%c;SFhHpjSjd@-%c%FdQy)cZ)?2x~edVzy z;6%2_XvC_^Bc|3t$|^X|XGLg5x_*g#;l0llLNfNYO8C$D${HI-#Hb2YrfNw7O2$)7 zulYWaUlo|?F-y&TFhW$b^rk$}wWx8eU>jHx16>fgFIN(OUvMZPQdQ&A&b^)-Yn|+E zn(B!O?l~(V=4>B~w!18J4ems?S@6nkZQAdwd3wv$)$(gtLB^Xx4iUhSN0lw1M8BlqvA_q2nhGQX))suk<^AksPFDb>;WF_^#KB1hgT$9{gEf|`5VhN`y7>|i;2bp!4&HZL8hlc)UL5CKb~ z{}>*nu#;J3J4vE(?C6%C=wOAyafv7whl32Seyq z7!BQIGcPiqi7dN(BdEvN9j|IjG#PZDNMvZkypL!zzn3h?vX)9Z>dQRkS`}7MQwUIQ zU1sfh5+Arq9x6_K6c^it{v7x8{F-(+0gn3?S$7mWu-uQP`4#u6|K`58PoSHkO(a(MCj`LeoNHu#9r=oQKvRux8T@k!$-Se;klNU z@|glb%6G>%R1zZmcRRQvG012(9@;>@*dx0rLR$=Gs!Fvskm`er5qQ~`O-N( zrYBe~@!=4eP^4~x+Q{C0MEcUvSk=l<^GDfdoV1xh-+cKXmA(|{pp}uGX{NIBkwf>#I)37b zt{GR@`Cs7M8(g;&G4xq7d*&3Y`&zj(!D3X0Gt@`WS44$0@>Eh&<0%ujbd%J6DnfYxnh1XnPXwx0hzxQ7#;Mw2;C=Gc72SY0{&!@M zZFpIclrO(Uu&jIZIVt3OmwWj)mQOg7!aC(C0n%kJq5gUL>(n;>C`4YErN3&^ z_3-ot+Io88$eA}PM*diohIpx?Q@PgF?WqCJ?*_8|)e9<6m#&YRP{uQi&W*{Am6h)P z;0mj=G+~+|?#jLzel-%O0okiA=neNHHE@cgxje11uvAPDgz9&8r|2#!y9}a)1{V1G zy&)&{KV+H85t9cD?VlNXua)oRMGC?*0pLJ^;ku}FeBqgp?~%)b_sP%W&2JWjqGuuA z`>a#6Tjk#D*#?91{4I-4C|rS?y>M7DcKPJ^zbX*7@%&TZLl5YmBQqq$O0V}C18&rz z#0b|Li)+qXIr-U2h}AeC$VYnmew zZ($3qfxl#stH~w-uY#zssfWa*6E(YV&Z{_!p~T0`jzZ1Xy7}~9$gef3=g|XPzi-?>8)-#Dz_c0<<5Zr0n;)`5j2?7;9?hnjZ zvAI83-Q|&9uvuO{xnkm_sN-v7xEn!C zqsj1IkoxK7ZaK01k%yhZ|KzxF2L?N&X}+S*)d$&eRk(50Az z!OHUJjbdIZacWJT^QNPPIT22SPv~`yB3MSioug*9XnMhVxjDiKBwo_ua`FjVe*2dV zLMYc6oZ12^0OCz2rFr(9)8aUwM>61Kul7XT%izmd{McKp+XIkRtX@|V!UFw(2*+f6 z0tUPvj`*T8#5v-B#|3}w7pZWj8Q2l(T zixc9%S7J{1pF91_i2R&?ahJul^N$!I{x4JI&*>M15VsZoh(WA0{#nS6jrr%~iz0*5 z${%5m{Wty*mj5W{&)FBd=-(?>r?`;)w=DgfevucrR{Igfl>fEyKw88kxQi}fKe$&3 IG=J9ne?Dezx&QzG literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/simple_lists.docx b/pydocx/fixtures/simple_lists.docx new file mode 100644 index 0000000000000000000000000000000000000000..c09ad744bc219c4c920f3eaec0dbe9262ce445f5 GIT binary patch literal 4426 zcma)A2UJtrwxxqK=_0*LFG>?cDS~JaLJ@%gLLf*Yy-5{;P^5(3t5gAL(lJ5GrO72o zZc$ME|>vw=zb?IU{?Wv-jL{&An!T074=LJTfveJUB8$1@Bx?V6Pv! zKda-c~P<~gM#!D%rh(e&_U%<#QbQ{j>sc&;Vg-RQ6m zt*v5dDd$(|+%`ITT{ply)kU$+({%?a{#SFdrA9_!-JrJwwa1z#aHs*z< z71i?MjM`q-0~UE+#mgp2(3qTrAF;Hno z-(F;w-GKQ*L|<WxaS&rX+>Y?54K-m=-0 zSTCyhx}qjBn#T*h-^s~z;jyhS2;kcN?b(6vyM@RbDJ%&xDLeQt9KXDE*JOQiql(!< zkm|A{YFm%{6-<{<%q~;_7Uj<3d(sp{Hae17%}Cd=W%2?NcY~a2@bgJrbnx(VL`vqc zrd>n;3Pkldw1oJM=a+RA$c?)V%!|qZ87)J(rfAhGek0oZ92s!E`MY$F$K`CL)&@(A zN551U>kL3RWTu#i@9qJHBSZ%RqA6^M6(%ALfX`S*g!;cCJG3CB{+#k`s7#fS@fO{~ zY)!`dqO_cc1(@9dlW_2G<)-;5){p8$ug7nLO^8$KSMJK^uJW&0c3e^Jib z(aPN(;^2k@_+Rvk?ImxMq)|V3rk;Yj4^p|E=E194uXl^9aEMg)Vw~X70ZBp$@DKIm zYw_0)BzwZs*}56aHfzB#x20$dRL4-T%WS2mE`;jM-PH0$j> zn_y!^jS8bcR9)OLa59NS9F0^@>)c73)FD}Vq@_1XX=i*zx#VrD|9BJBJ3vuPoGvjt zEC)SH>K4##ACc8sjUXUhbMsYrjBM(rpLFW3Zyi74uS(O;9eDrnM0yScwnOnTT|PaU zIl;#vqE5Bu)s0;*1(u0Xnq1Tw^s*R}A zD!4tj769iCHHciJLx_1XfoYDCYwc2lrD_&>)+n z4cLC5>`^oS6R#dviAAlo@%{GXU`TOpO%k~YC~ed^9||oIEGr8fjV=621vN<%zPI@* z#~lI{8;UQ$4J#)%(McOkY|b~r4l4@u;Jd#GnAH<~kl>LYIGS8gE4B^_F9;tz61LsW zhbrN2>V`I`x&~l(M4^gTK}A9>S2lOIkF!~wdGk|OXn4sHxqKIDD*7ixb)e>O>bzy2 zFqZaSBJk1WKil2ION$xh)2~5}k9WuVn9~>A`~+?`7$a?T9<*G_1gkH}Xsw2STRiCd zR-9$vj#sy=$1XJ>VeY|NdGy^~2;!?n1Q#(ScH>DJVs3>u?p2%XRHO4JDvBmTp+~LR zb@3Qn@$kC26gZ8j%A?*Hn=c#1A`V5RGF#RT%JpE7`k&5ZwQ$2joOH; zyDU07m+=U0sQZutIx4IFtfS>+zloG--d{#i?_y8Jy1C=*w#P7$eb+PEH&<;RMPE@6 z>0WW6D*=l?&)8{6(d}X=7O3SjZOHDU6H$$6NcemlJkXbY+|{)T`1mC^&> z>EPE)u`MJR4mVyi<%_w-0f9;4>V_VtteWzCG5H*K{K5@EA9q<^6)*|EnV)168@zf) zBmtln!@15?#yNH?KKSAOUdDcdXrRPF*rS=Ibm;sb za=@P7Liv~x5fKJKr#dr(<5bW)TCBi5#{zpayWga{k!?ClDZO(3=SIrV(=)FW*O%k@ z7np}P%~-0w1d|JQIy@1qpHHd~lQUi~_xyy)Zj2mndmJk&`tkDuix2zDn)G+062Y48 zQbX^J#aQbZlgSd37o|o;P7_=7D{1xN(_@y>8e6&kJ!zSAhG<91r;5&2k^EspQA72h z_<**~sR^p#8F!gFbB9-4+nrLhJYZMapk;(NooH)#Vt+DlqY(|Ka0tE^EQfzbPQ8_P zzY7*i0?=-BXh@LK$wI2hKJXMZy&yB7H&rODJz{9%Yj4+{9R&&Dw2v#5nE=P?ooMZS zVmc;9*`OvCq6C1bE=Uv-&Nga)bNZmQ!fY`x(lTK%Ijgjt6r?Q*bw0@B*)VAVNIjIzGMFG;poIH)bp-X#wzFSX=JSahCV%Ho_ zfNqYRPM8^Jd2k~Tm}xS8k1A;X3QWFDMT%*}HKZ(hj~Yoe`PrjL&-0MrNf6U(KUc0a zxr`G+99b{2yQMi{@-1)CYSehN0p#DiEXz68*FFI;ZWT= zR>nk$R#i+kEZ3fTgTpqYg&Leh6j{rhx_a-ut;S~srsB@wJqF|oLpxXTc4mM`YBH&{ zmvC=2JP}`P-1jVCLBx;(6Jc^S+iz#W-t`O12<>LED@U4MkCI^HtnCbLf$Qz5(RtYE zQ(!CdpIc*`-u`ESR*o*;J7fD;vnTCb3?bVGB7p}l1S|_QG8eN}m_v;y- zD4PwP9jDgxP3nGbTRi^ev}aI?+K6YDmxwCY&7w_szM`QyJ+g)}D*_~ZYpJn74FK~9 zJ!@c}gdK$BsLeC0<^qYbZ^!N@OtILjRWexh*XN}4^SreB2FU<(CzZD+?KqC&i<22! z5C>dfV&(`g&Wu{iXYe(gXqDQ_Q6`Pv747nW9XD?=zY^u@sY2@XeimM2EpT9t z9+sq>0BW0V-Rk0$=yx2_*Dvm>Sa_bckze>1J_T2tU`|&jx~I_B({AjQkfwRQbF6y0 z1k_6>0;19VI2a?OvFhw-y88!mpeN+Il*{gpJQJznb?s_y#%9r?4R4#i?^0LCWCZ8H z0*m=Axb#?XL26jffo+H%8F)ECT+MI7`HmkNor+^whq$!u9VgUqXp z6^k#u#WRW-wQN61Xp>nL^r<8e;cOYWyPC)NqRzP^C`JcNd;(TLEe*aU5>FI*Tv^wA z#dMfFyfJKD+sKyLKhh%Rp%ik_QUOg;CR@8U42$>F{CXcUPL`{ziaNdNoZ z`?@;_GKwl`AY{4XdGlrs(em!ECt$$)VSLXUK%DXYInLQ&AmHc3^X>-c z4}Sz5_ErDiE&iH+-V5M7(2sDys>=8Le{e*-9d|qU5TKOZiu)FaeVfnXm{+fNhihiEC q80ER_zh&vy^z*#HmD-O8r~0pj2Lupf5y8W|fc@b9F*u;c-TfZ{GiSR1 literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/special_chars.docx b/pydocx/fixtures/special_chars.docx new file mode 100644 index 0000000000000000000000000000000000000000..b4b9287f37c6bc22cf4e673d3687e92ebeb1e80a GIT binary patch literal 3566 zcmaJ@c{r47A2xQ4WyT&EYe(4z*|$QPkR_CztjEYY_I-&6EwY5{OZHNhB3oo<8W~w9 zA|ZQXXbj&>-|29C-|6>W*L%HxJoocH&;8ub{k!$G$;g>WsHv$*9OL6vNsbhd@NJE7 zaPfkQ5#N;$b+pM;X>YILS=3iCF2b?h$Zb9~cfKvEsPyGk6q4#3yz=0q>WK#ZxZJMB zfX($&?_H7mxw@HzY1_--s-EJ54dTxJ6UtET8$-|rW0ePJt#k{avX4>uHjJA0V>_-a zmr6-^mOSQl)HCQ(vgNBTPV~goVbj7Sb8{qyKR9=rzb384UEY(G31odC5cP+n)f0VqgB_YxOS4J2KBmCSE_E1N6H*X^wTNej0 ze^-~v#1^-99#AJ%g~HZ7_hec3iF*y+Ws*{hWSJZCmuBvF@EW#+++l)N7T0l3Vli07 zd-zX7rZTnyOpP;ooINK4xy)vvabcO(+7lJ$`{+p|0FMem2A-;GM$i>>YTY!EuL10D^gLDLYXvb%~0Y1$~sSJd{b)i?jN+D zR}@^&(HH~a8Mhoc5+3wYRWn^08ZfF_G(n%9$2E8K+P`jr?uF+501{b~UY%3lCUD~a zTh>nfV(q_TvUj)hadmL>CQ|p8u-@smi_^xe1;q))V4TMJc*%8w0EJwya=(%A`V2W~ zo;rQo&0P-^)>+8wIXdXOhgUI$#7 zS2IKBvc|^^&8qY>lOEYbMd$&#S*dtCDw}fG_ERo?=tl#lY!1eq zA4Emu&YrZQ_Zxv3*l)Q5|Kjc_Yj+KM+$O|9O;}-a2*T^kYh7?jDni2GJE|=l!*HV{ z@nTfGcwyx$>#4l60jl9`Z1*V7tu>9+1n!C}D3S!ux%7OwFW=XyWh^|Ay8Ld{XS-jL zJE}6;msQJ`{GC-9v^=jQYm&2ym!EP8X#7HXyseKuVf|czfWlgacoLHNDLgr9unHV; zdP!{oh=idEmfjdtcsgC=VKdL>S-Khz{kF#mL-FZ1H5)eNjM5(roo82`qr;Za(=S1? z$k*VYOM;gC$;w0hn~EIwl>WRJb{}!q_SM5iQTwdOiu{e1`XB7BVSzIKk`o^22Xq~( z3lDb}J|`FphaxZQ?R_cWiW{N?P60J>D`2H*fXNoml>_A>*MO=&?p!fNtxdmCGIiHW%Wa6Y?Di~19Uf=L}qCOamo`ik^kdT1Lf9FUH zzx3hd9pK{N_0x{7S-X#mgTkhYWA}8KP#;L0OF&FQjJ8SRp(KLxvC($sDnl2}RwXN})nsnGKW%yg>#XX+- zz);lfEkD?6D*CwQ^doAVNEcHDEF-1$T$O}j6b5S})w%o!N0nnyRM94|{bkB4iB871 z8=Z@|tR7XHq?KjLugeXb<_w0;-Nl|)qGQJ_1a8#x>qJsj*)C$W;Y;<%Xy9seZxjOn zCVT0P-S&`kA_YU{b_U`sQ})vkHI?iBWfm0D!v+&iYP%0QaWuj3{H8S>`$%rPhvh6C zjERHv^$Y#1F``YMvwCJy&gf?q$8ZEuC{#qNJPXElJ^l{Y9WSO|PW*mO<0^$%8n9Mn zmZXfi*$zkT-%{88#fN3ru#F}&h@DE#T0N&T0Mj{xwb%ISJZ0DheqsGJdEow=sZ%>) zbT?R&t4D^{jf~cJ-Uk%;1q}yiha*M(D+}WBT9Cc_zFWg8885=xlJP+K7I^z^!Mx<) z*lJ(JLHvTDly&%%DarwhTt$XL_zt_~0oTJ#-JKGw1A=y(B+X-1XM;w9GR77yD}`aN z8fCVZaq;2UTe8+`r=jo78$Hmg+QN)yzqBfUHhaaPswuP93LRMs<;V|c8{{TRGaBV` z*NR|Uan!$)CWs(Sf@|p`+&#RYHXa^CZDzfb0NxS@QA{_rsJ*!95@R6n)j+|o3Zss2 zZk^Obu;8&eXVFcwvv0bB$}I88qM&^KijL=JG$@_A6;3&CEV%7z*e6PxDTCESWn%MP z>AympZnx6grjjSrvZv2$^*UWzS70seM1N;){KDMMQ@WNFA)Nk*%057-uR1CPAUYbn zA2B0*9e5CLCYckmG3M&^jbj+RRO$uEFz8V}Q$OYOk+{Ld_VoJ&gyjPX_wV1+0MVQO z-k_a3;%69eO|;Z&=V2zMBs#2UjbTBd16zhAyUvx&u^FxiJD3PJz~W_?`(mBK0Hc^( zBZD4&W?=J>WgP`~vNW9#Fj7*yP0L}NvI)wz%hjv)`O-HehZ71yno(VDCMQ%8W9-hg zQ^I3^tD9WN;+BkrLYyav6Q#sVW^;ReC~#m_{|Bq0ShGLy72jvpo3R$(x;+`cxJl+0 z7{qBVHiyWbC~QSVP%@E;#WNnfFzaZ#6)&s%=_J>iB@Zwp>CPmD^wp3lHoVK{+xyy$ zp#hY>!TB^&28 z5RmN;)q;?Up6io+Z*{uhB9J;EouovLoJV$BevZpO`yRvYT{A8MuLr;7mWjZvxjG?b z5rTrXQGkbomnBg@O-TcUr~ryu3yJGCPL}hyKz(`ssh+!0Sq8QVP^5>^TCrnYX zC$hmE#)j=L_h-T9s{4Pg ooQLKJ`j?L!haWvx;^jE3;1mD%s_1J|P!b1G6CONK5I;}+KX_NcmW)HHH3)XdoVhQG9gNo zC^1?DQQyqHxtHX~H`311sa=UBx0IBErI}O_ag9RHW$NCN7rt zu7U!X@6rTSWn7snpud|`;8eGXYij&Fn^sT-RkLxdu>rRTFUBn{w*?4Bm? zuiH%TU_)owY8jD{GM&Y?GR{Jy4MKKa)6#;hrtbwCbnnECDWw|TES?R?F{64A8`h<= zQ3Morewog0rLNiYJCvicFy48l=HOZ2t?Vq}@d-Nwq!Fk3`2LBwm=A3QX9%~Iae-l3 zrGx;DnzzNMX|8vX^LIO7(iJZwn^WHx5?6+{wSbDlGOxXN^dY!uxGckrQ(+4RK-l9&d!;#D*u3JbAa>{_k<;naeqg*A*H4`vRe zVopE3*B66wQa3HAGxpN^FdHliMg(SwH`gBuR`&qqB^w#z_O3O&m^&c7oI9MNl1S|B*amR5aD?2%M`>NufX zUXrVmCI5lqlvK721 z6eCaIIXWPK7lYr*r-&rpJ`n1SNM}OOlzyp(#({(>v}Gro?dX|aox0%4w;%u#POO=A zuf=0Rn3u1)&8G9`tGV&iM^?$ua7Wi99D}El83cx!@zM!+I7+bd&m2v^Wx6}m*!s?`A0_%?l7pOi>-q-hK#{v^@opzu07p4 zzDk93jy8Z9747RF0p?FPaq}2nndWe=W%(aB3zv5ZgFSblhs|5&35x6%jg94f^pVc4)Nmm(#nJ^=$<>W6@-Y7*pAW`EfhW~ph6`SqTD-&H}2y*0aV(TKQrXY7Fj(m zS0U{SGaMsZ9_fGB9`=Yw?a&~ygwkCpKSW&riT|XLnSxRb)w4vVd@9aASgk+ry@L>> zKa5}3lzA1xuHXO(MiJvzlNJF(s4MHG8|g&SlB2W>@EuuJQM11Lo684Pd^$kgf{%VA z(mcpW=3rYp`drhs+?c1!z!+B3UJ)DGkgO>|TQli0;!p?c9@IkXOjEoAQ$S=szg96;g-i<6fU=mH zKFQM;QNDB(d@EJMJ+M4a`89vij|nq24obb`nEzJir5rK?8w<;k__9a*s&>l1)qYv$ zgW9UG(`*#Y2fBJ6E0MNSuS?u@zJv2w4CJRa$W4W7;)7&@P^y9c?em;T#eM5Vblcmr z1HK~N0X_jK9E{J(nY5ooJY}*}(Rf*J$G7Km*5Ct5bq9@uf}k`FLXnxZteM>v1XIcd zhM)O(AhJ(&xWkPesFCT}e2jSX+Y4I>OItbKPVJYjYAM$W^9V;&yi2gJhkt!x z>#TB{o4;x3>o8JCCgb3t)N7O>>@fX|t!scW7Z}3p#`nf}bG*@s*mJUVW#qT2jDf1=x=7OxdH|3%A~gG+1Y2=Rd`B0Xm25y) z%lOw1Z+kq6b*R@xrPUaEPNeuI&SJd$Kf{hm%V(E(xl+TFy}E5#m1}2E9_$`Fz}*z& zc}3n~KhN|%pF(oF&^Ye0fB%Y zcN4M24@N1>w5W$_-Ils*zqGE?(|tZkE)XZj8J=XB3%EONf>FTfecX&rv;sQu{!Ibo ze=ES%&D-A671P?)I@1mdLKOaUWnm|3auw6E8gftc{*ag5P^w~`=fo{vM<(~c-<6q2 zrFhhbs7|3s)w*4!t*tr+QOZNj)+30#u=`GRkcMT<{4P78D{Uc%K)yI{n) zBPxbpdD-LUlhK4pJH9?jC4M>c5;foc;DYSy9QG&yaxP(x$7iBN{I^S@ME2(Ropa$e zTxwzpvs>B(I1gdD;$U0d53B2SmBhD2BZ$o@Q!B3lMU$x=iPK6Ao8E?I_D9igWro9V zQ-Xoi7C?@>)fPpyd14dH%fA=&CTbzLan$SilvFuW8q}hv?3IOECMvcS`BN8OL}``R zR6NU{2<-N$>0=sJl0IW5;aQF zm`xKvY0%y?LSm5J)i-y2qvKOJEwjf)RlnzU!az>SwUHh^tHfz`mu-Ge@852VVY}-a zkF`&8JC6sqTb&)DR()~6q_4D~g*F`#wgr5PHAWP)%Hjl#P3_SqYXNs^t2Q`}Vx{`M zk4Y8?VM2jUdfm5X{29IlV*8VP28>^s))uk08Hzg3E=u@hk_7bZV4s^=t#i<7gWsII z%F4FrlNGftB3;}6G6QBWO1AlJ1DMe^KX7Ju^40@+T6)GY2qF=j(I7qM2OnF=jj2nV z;bK&+ua2NkMW0u(B>$#r3bd-xmIiilbaE9mb8^DS`uVUh>0==Z{Q2fq*@}nuPc=E$ zH6=aY%*eUewS9cxLUVec$~)G)xY*v~TWoX+=cmZIUeZ<0BTrz1kYuvkS#|g>ZxJtQ zAWbRDFBX;q1FW;ygth^o$#_xK*HTxM`fL=oC25Pg$M&u^eZJbsR ze`QD#Hb2tuEO>!Whx9zs;8vFZ&Ig$5F2gwGmm*h|G|gUVp87ePS;08f+4wfUB$X&Y8=HZ*S6j&Ub-L+;catdKNUX z=l@DBHJW~i96B|lgP)1Ex09u-5k^4G@IkdsA&QVK{|JOGyw2$!5!V`a`1L*T5TaT< z`pNl{yYlgV2_%eyjf86f+Pgd8SE;e#Q&U}lyaz%Vbq>AaF_AS9pBi;PA1wjXqe(r3tTb?s^=j4#hu#N;WUOYs3>s;KzmnyvF<*9`HK4*3x;;_BmlA_SnTrPkJ`2)`L+V3+{0_>Wfc2-z( zVw1h@ey?K=rRJ-Wn!U++4OZ4*M;LWl^o)76U>!FM3H#L4mtAb5cN9V zsyG&L*8F-JIog6&E+J9z61U86XrfL3Jb< zy(lX5qyBr|xs1m&(dEpG$@&*{AMGlC;{Q+b|MgCn6V}gfVu1VOPCq5)U-6gEXG{*e zC^7sW_{*v6SNLTk#2n%mB|-QDesQG#3chSI7_Yo20NPjoF_s_B`78Qz7ybO^TbI#) j+tRP_%eug{+C`~c`JaskR>mj5TttliU}oNWGR*2fPRztu literal 0 HcmV?d00001 diff --git a/pydocx/fixtures/table_col_row_span.docx b/pydocx/fixtures/table_col_row_span.docx new file mode 100644 index 0000000000000000000000000000000000000000..856abfdf3e887748e96bf304d228fb6a4bd8a50c GIT binary patch literal 4057 zcmai12UHX5)+MwA442TGNXJ46V5BG@MT#^*dK0M78q%ZSZ{$N3e zJ9!pw%B4k_bbRbS-+kfp%5BdHmI(H?bUb>HEe*!5>BH1|6kq$Sn@XF!t4h4^(p=T+ z1P(*`eX9TH^7+nGcy~?HOL+tsBcwh7U~4f%kVr?5e0u@$ycN7Z^1O28J$($-~Er=r_<^qU@rGC)i6-*JrM z$MDypj=p^{m9*h!sGqvU(ZS}+X*LZF3rMr#@=DVqop?lZjwv%{H{1W~eBl{+I-%u^ z!eZ~tK3yusS~Xw=71@4{C(l*&S#~Jz)31MsJMv#z#Dv;nJPHQ5&gCUdqhkpu$)2lOSeJkqDjBN{6S+|r(InW^w2+R{|pheD=V|g zI|NL;e~a4Lzft=)ne33Zp04%?4xBn54_S`+MomA^j`+58|Ih*mH!5c8LM`p>0KC;KQbylufuj4iX@CmVWPQ z$%qJGjuLFD3H8MQe#aZSzY#cH`*Nz4{*)M`d16&x?BP%nHFd;7L4iB1R||N2)Llz| zI2E>-3+TTUa5KIli%}?m@rrDPc6_&BT@#{@LZ^p|el(dksV!kEeJ*<*1_^>;I;vL> zYwBbcq$4B9=t;DwzF&*J;EwI_Ps92Gp2U;WjYH&Oiq#IV!LfHJ`4R$`xI=kfu+i)R za&Jb5Sc7BbWy~IJr(-rN=4;IsV)8JU+L*WGC_Ot{<4|yg=n%f%_8AYjnia|126ymH zecg9myA&=}^uklsG)972FNsQ--(*o+K`RWAawn4NlRtH6Mbuni5H78=EeLGz(p|7C z;MQX`pE=jUG z*Mu?AaJLyqcf@DrCA8lrc~bw(>iRb4gm~@Dg4k`#n+-oSC-kxKw~eylA3qO&A5&4T zcE8WWQYHMy%&bY;n+ryQE}qwxZ}-E#X@p!f1%bF;O3$g1^m`Xi+c)b#i3TCG%{ zv$tHJXx&kB{V0$K}_%Y}C{lKQp#tqbk1nNbHx2v5cy3I z4wmVA>ct`vP-{HjdikO7xA~2Yw<(#!jAvJ$7rEx*>h{P+Zv+Sz?26Ni=B`%Bvr?uIpTHhA2LHjLn9;2ej%>KG}n+d9;}!2*98-1ma%a=>*vn3Xx&px=f10%bsNF z4MX{JdEqhkGoLO`nX3x(y%9s(vArrEsl=2-oBWs_uMnE75O;h-0!OYssht{PhmVWBJ5e4h4Xu#FVvvA|e9WN^L$A>VrveB=4!uqMuz#Q?8d4PHx~D~Uv{kO$ z7fEW~=PElnk;z}>Ri{J_b;JmNAd&FEhwDWQK5H{zGmYI2&m`5 z$=ocE3Zr=J+t6cUWLX{5SvKZ8Wd1lP5|D1ZNKc)4M9S+RG@(*j8a9oUT(4DP8k&f- z(#Uv)&da@`0fRe3f}aYXp&BR}VFIzGB;R~G)|1WD=E6EwI+xpAxeaW|JP$9bbc5uZ z7coCbe~~LVk|HvQ+lJ-cnL}`AQYN6!l?&YJ4fISQr*+&N#%o&sc-dJ)!))O&`V`Wx z6ROMi6fCb6(6_~!!<~B{uJvvcId^T~=r{+b2bM~3n zOV%-W6v85cR+YresJlK^J$Q&&+ka7JUz4(2$&r8TZi(OKs}VY=-vZO)(1y6ZC3wFo z(;4iF2o`YyZz32yAgGyTMPQOx`~Ta({}&xwB>FGk=NfBytA(49XozX>A{Cb%;*u;K z?^_c*tiy+Eq#vV9%*77bxF@zdhSd z`Ix;Wa^J#llNqJb&N&FFnTeg|?;5kaq4tt7zVB!k!KfaTO-3U~X~8UTY+D)j0BDR% zdw==rqoHq?2;HUTj8o2jBXl3vPIL5ZcY1cu)z9g**8U{`WZVQ=QO0gj!Ox} z_h_Sw1Z+neO)?mK_kR$OgWM7&$}eC*-G zTmY@y%I;<;-SEnCklQuVb4_gt2b>x8MU%2<-lONV9Oz5BGKp)DoMRsIT2wQQ-@$Ch z(evtp-IX%Qzu+y*bGmzu0PP5&wdiUBNEu0f&T*&b8R8uGpK)rc`!(^jr6-O%KZBm| zs(%hYzviDd#6;u&Gt>zh^jH3W83TS@>2%j58t9*)K`8jIl}_8~U(-*^9 z(tkGEUz1O(0#O-%#xm7Eb@JEj)4c!BFF!-ri+||Iuj!}vmH2b~jC&XU`S~x2?3?SK@kvq zfRy}Zd?C;O|9NJuS@*8{ojc#zv(Ns{2C8A>P-EcZ<71@MaVTJ13L@0Cg}aTDhX6nN zyD~*X4O^Z#a`&7@ac9AaCwgG?kVV0Tq3d{4Q(yqn-ei=YGKovZ-UgGqrF1G%IrRhTJv`C@pHC8bJYbg%knxX3NH zvTSBsEl|IV6-(W-WVeOJqbI>a1^Kt8XB-AhTCo~VmCwY*{AgdWhO^t6mzq}BN$}HX z`dUvsDE5WB9Xp7m6GP1G&ixA|h)2KZh}<8Oukzh9k}zy+O6M#wx#x3XFX9p~+}7_j zVZItSQq(iY@JBvS4S>Hnj(KwqRj(`-1_tmy5urq_@Nsds7O-`3_SChsalUG*x(!sCZ8N_tfep#Mw8i1>$mETmX~eE zW$$)Be}wbAInDHS;BtI!hC|MFo#2zFrzLU>Ox7+By}&lko@nU)Bv$e;VYeW;(ovAo6NH9|{M{_?n+i>!+c!!l0b*n+oF_+k zY30CIN}0l`l1GArvDw!KXezfFtdj2DBG*-Ti*UGl?fJPowo>~5se~(2UUQW=H2nGo zo#$dUUx}tC=gYWyc^dYF#*|awdT~L zs5BoxWqN*e!RQjeGjsqPm)BJX#R6=421q?>YZ;)LcO7Wznmu8w&C)5F7&15$SvIk9 zLa@?aJwI7I!$c#ZM6&5SfU1|~w}|}1BWEwLm5sZ-^CL7Eh$kb^Y(eUXqd>y&DVv9` zA%YW0a|u)Y<@}{&%a|PUSdX1JVC{j`34>Z3G_&#*75;(HwO#%S#XJkSPealm{-H>l zpl1Al#20N`s*S^EOD@$*pe3r5nlN3GV%S9#;f2lDjpKUTsFPgxabH&)sx6%lyCqI< z@nZtC99d{J)UJL8Gni8muq_qp^u%R|OVVKFctyC9vzD>5eHC|Q_q~#mYWiJfYEk0= z5i)GA;#KkBc{tlw?1j)RHv+^>cp5&qWf(Dow3Yw*f?cl(cU1huZSfR^_&JmBQst(; z_x9lxgShbFCp049n?k`6mWKDpI-ad>vhW&VX8U)0`=t%vjhlX%Tc?vf`O-K=xP0Qh8Qf&dqX{2ssl9dpw^ldCgxRQW`#6RH38dCa@pnQ*i)fzqU z!bk|*Tn?I&)dWWA>Dsh$gZ|iX_VbLC>Pz9isi4QJV$#;wxSw)jATlVgN?3pVp$4yIx^=)O~Hdg9bID+GC|UN{pGea1MVekwRb*Y`KPKx zNBFC_$BI(DQfMz-Z-`mUNbI#xr8v-!x@*RjJ8i6OOY9U*Emy_b=J4J#6`RmIh(7Kl zX#mFl9KhT}1%oitcOQ=FHF`TDj^A&NxI4ZL^U{CJM!_WnRVMdNabQLeUQwf?P(H>p+$NH7Qndmv6_-y3J zE%{Yd=;nKr$=}DOD8-WBIFI#j8_`j)$dDR zKAxyt8)|+pmC8(!DWE6ANUF6)`II&j+*M|@yMK2|Tm~q5U^0y0lQ5L7QOd0(Y$Z}-^UxWIKO8L+1LMZJW^OQ-*lsitGT+a{ADQjBHUnfC zjYJkLz+E?b`o4K_fGfO+_x#pIwr6O|;3J35k2So76C5)b7&)I}+JM-t_;oy&jZ*Dm zG^-Rk;!Vahm;*g|y!aLIL(fE2lwTP3J*1Vi!9sw zSe{Ju_2s`Ch+pid-W2Tz!930tNOx#m04LhSwsBXofMkprB>VL4?*{@*k0vq=R_emi z$7EUpSH#&AABrmtgJJYa_hmY#SFg1D6T8j#1~z@BWVC$CjN5&dEw|oz4j)}K{7Nyw zFY)MpON9E&wVojW8MN;}TRAzA8jm>=QTgidTa+f@^rEz_d*`jxyzYFjl`}b4SXRL4 z+lq^rO+}HTJHj8sZ21S70B=a!60P@%Qr0vf)jqc+M_VzTgp!ng3!y{0^xDbgdBEL# z8sU@n_=z2|BmjxBQg@$RZ6uk6&thM0kg1PUKk|D;LezD`Q>{q4M5$aYWnY(XV=PB! z7~R^z;iq)-(k*8MXl};N?pa%)xcL$PcibdLaT6uNKzA2c4*^S8S2Q_u$8Hfl79_`A zMsz5=aCC|Sv3>$c`P3{Zx;u2fS8=B~KhofuLaeQI_4}8bou~1U7u~Grt>sk0vm21Q z=CHTve5_=hENU!Ep};2=T?8im#Ap}UNotjW6W>4wL#hth-TNX%3-6oygSzblb zZf+{V9Wvy%5uJ3vRgzK z_^}6;Sd7{}5$5j-L?1Q(6!_4^{l~ZrPBwGwxj`Mdd&CoV6qD5{rC=o>=66esuA=y% zPthYdZ%{#%{l&TZyzK%fWMwZfK*BF;!~AHDGyphLA5Dv6^B_EZ$dk{{q_uC8eY%<1Dx(U2I`i^VIF$-7&4|NgWTHzG})4Ll4l~ZtTPxb*yEH3Aw2nUKcgy%=(nZ z8{r?UYfIx=)Oj>Q14O>%TX`}b&D7mztQZt~Kb)MM*k22og}Y_Ni+p>LQ{Ex`@HQIT z^()67*HO4${1#g(6t*UcC?A3vr7d)QU2QzfehiUmBbq&e%Ha&8~Ow**;>% z-24MLI?y1V5OJ~YrFME)VG>P#gOF|2YH^Vv67DaOU*I@B5bxx=ti)msS-FhD8{(og$2V8U0by(2g!R{y~y~uTAerR}zebikXg2sVFF5Z30FR4W3s*{BX_->S#i*qBc#8FFLxBZ&TEumT>rS@AD*#{CCi9o$(n1ZNn>Q477dxQBx5Zk!jNsoU~E@0$v(!KB@x|0$og3- zOZF5Z!nN;YO}3Kcn{lr`IrsMc-t(UGp7*@J|NPeHc?PXb&%gsOU`iSTpp;Cc$db1 zrmriKyhu@%P5Xf2L9I;4Bn=OX-fcgfHVNl-sJfob_PDXL@df(jjp~z2hVg$kbb67{ zPkxgiyd2o@4)qWfbCvW@bcGt$4^Ep(4Itt6p(oWtjWlWtpBXwnh1!kbbKWO+&lz)j z)pwp%$k%arObF~ZXq9>|uB@F{I3Cy-|6Thn?4Xy4=PQTRU8`?Y?VK>)v+0>34R7487Muy)zT1fVa+` zVK+-PwZKwYXf+Yf%|x$BtFO`IeEA zEhe-!6Z2cY=9e)*03ezk06_o$hPq+g-R-e56q z{XO@7P)t%(s4)(Qtj%j_->KaB`bi+b$C9_*#&V=2ElNl?m6yJJ9^**|Y3#J2=RV?< zUZcEbCUs3qI5YNFs<+##9%+_SH_g5=hIeA-}6LnL3D5)cfBvJG?ilSc0<^ z#+8~IEj1mIDo33dH8nL`TsDk!6zQ3YS@^T_hmGlQ>lW{KsPQr}Ge=3O`ctDz`epo_d-1!udpluv z`_h1np?WZ7J~N*sCEkYIBSql;xE4HcK#t%>9|<%=&E$5e1%zbV4OwM~#(k zs|fi$oM2lj7vNCw{}dN<)9d#x7J4yR9sOdVAY1{bMRg#MFGPabfUe$!>1?GayW(w_ z2fvqC1^2LBy$eKrI8t1sK?J+e}+|EK^r%VGC$JL zOK;-}o>a=7>?^3_6)pLe6amuP99%QmG(Gp(!^7_d`<=|e@f$3PA6u>!$b!aPuFB&8 z)^U8XPKj4v?#Rulzl|vE+SE9KU@IKuLKpSu$m^6h*`~A+RrM8Eu{!-N(y6i#wIu1`W+*t@FOYOJtpv?=uIz(O0^q&_du`c{BL*h(iWS z`aiZ$?(-J4hNT<#*IJRy0>!-taJnXB18@3Lx`!d(!5VaCUBBIG=fj#-)#TJ4_{V|1#~30)uB4F{t@BJ_xuClfD!F4` zFBgvp**++gw^v)d!QOpYI9^ic$+?eDde|-doK=j+m3+%sPBen9rgtKaMnOMEbQ8ubl zt(@|+1wuOjT(snRfpP=<e|56$xnDRrdiWRFzeRm?WHJ6roI1tll?$Iybxg#s0L zCi!(u&N0=ahz(%{&~Q#d@+GvUwD*8qI+2Tep)C38Lf)iQ_F@pFm^a8oXfjLe1dmZ3g4Y@&a4&aJ1s3bCKT9nL^p|*li2B7})!MC4MEeG*^At z-V}5X-34RLJ^^JaJgk&=OGK;gk8&w`dK4R}%+D`=e#73@BMU5L$7qORAw6|>C^oIh zIPbhsW2Khs;t|gv&6DmtC)bv^q9r6HYNF`G1J9h~e0T{FQ+rKWS5X(l$Ch~=)OPO8 z35W54;=xbL_BH)BDTl$Qjezrfx;uYj`6V?kN#TN=zT(GEL}S<<#P|mv3d{AZXqH~i zoX+J--W`nOZKb(9Y7WP#%=Yit&rSWp>;ew!PE{xthTbk1H**^Jm&UbRq0R%!53<6m z0Fwr*=~RpsG4M{&ljJnb$BU+t;x|4PZ8bl34UOv0u$vzIeLG6(KM{*z3Jj+>PT(nQw$OF;=U<(m5eDNEy;fF{QO!Ch@zD|b*Gj(GK1|BMY%1m6kndMzjK(2f6896EKI3=j?-w?Cg)2~Ds3NHP}Ma*bFI zZTT>YzUZoS;n2jaBnNX6K_g`qG=qp-WU+JVH_4d^&1mP==A7aBqSW<3GMwU*YS}dz zZT?$($7xaNp2*;B5eoyj0*@$kIk^~r@%uA$OaJWsi0hl5`|M!73K8!*RyEt0vWUl2 z395P}zp2)bTo7qke;|HKyKQSuFy3o_^UALGgIJ}F=yCy1~C zuUQQWhuDxRQq~jJZ92{F7Dqg!?|G6uWJ%#wy7?is`0B&+#@E7|4TIEII9^vXmuw%$}&Ro3Y1m@JBr=E z!Gp=nF}T}wJP6XEHqT=kn(QB3S><;ipwP=)Zq7e(v9wJ^%f) VjMiqPmIeS|p>VTkNRNXb~TL?|+{W{}W`Otu)tGO{Pvq+#sYYqH(QmZBtUaT!a- zo+aD0G)RM+o#8*@E~)#!oq3*lW}f-ZZ@%yS-t(UKKn>{`xM-M|m}vNtm9%LN1PkS} zowp;xM;1(dFG(~uq}Mteu}J2=NE$^*V_FDnV%kWtRRB7Dfz(9+oViu9H>L%wf<|TG zYXZJ53HG=XcC#)&!eX@=3vX$8$#qxBx%z+9kQKSnD_do$8Q*7+ZgZ+|44q@osUL@F zGFm85l=pi2P}Ir946hCsE6-2x8m(AQ3suO13N{ zQ(Ar&%zZh)q5DQ|K!Mk$t77tD|LZQ~pzO1U2}|aRmpZkc1uR>iwW@fXEcFDY>$i7H z5gFWGgGY4RPGUQ98ixc{GNFb5rr9pq<3p75YS7WpK>rgFPRa^Dq_=~t6Vd~9#U74u z1pB)qN)qZl8jpdR*EJ8pky(6&Ex_0+RH1_69Q~tZRo%(BCQ~X()1LD5^}dc^@(njZ1NT^rc)b?bq!mn%at8Ry78bjN7yt#?P2XRUD*EO{{RM*Sy+{xNvp`xZlnz^_} z!NmWks0sd!+Mi@{K;HCqcl1C}q5G3q3GKiJIS^$14kWGC7^bC@ar?OTYqRsm@_GTv z?1@ra>x@Z-(BB}bXCA1o%e6*56l~!x`TPnVuOkn-qCHUS$|qPv_NIr_wXmJ_6nR|v zOeGdAG{cJ;dnohdGD_+V_N5lLWK2ckHgq_JA55r$WHc{l44W`c+ZmaCIE=79aiOsK zUFcxdt-!EzGGNZ+OiUJG9DoXIamPM>R~}0Tm_-GvIoDLR92xd(dHrs1OX5X_c}{nm z<&NS6434N3Kgvhm8rz|zA_8HZ4QQd9m;0xP{KF#;Uw62p_bm@+Dj2nCA7dZNfg%>S zxtI!41;SO)F)0h91|PA7g%#`*hfZiUFb;_i)rH_0y$8)`pEV^bO@&6+n<{SpKDQa3 zOo#+pUs3lZCll3@oGomZLgse^g97wyE-9mlzZRc09v#0@)%j7@gvffn%j(|=^y7Dg$ z9qY$^NI3(7;CzXh0(1#oO5ZAq@!1Lf7Mxyzj>|_|(!J}6_^P}~=O!kkU-m}K9h-{! zts=u+*av6BDz{-2`Qp!!v|HYqQCX!eg>cIkeS>+dGuA9>RD zZpaf)zq8|NoAYJu0FqK>U=i_M+-3T{WRnCy4vX>z0K|CCK1L!diB~% zxXLdHK;2MU^OAS)nr^kngpZLhTW+hbK`eUfRP>AfqH_-1);?nGqMd<@bNxQH z!Fg2=X5w=lnwRz$N~#`xh6C1OvNZa{;6UIE3xB>vD=g!sCKbbt}4H*cfrgEm&gv(Sm1P714vaMSj1xzns4{o z*_}+pflo`|ZN=-;*PX_gTH{QR29oqDSMP%LZl@-qKXabSe)!9dhNi5r9z0<#O24b- zn6pP~?-qV4%@Vp6Unf5wvY7guYyRG`f z6a^3(09%He9u`t$WlaoOfTZ5q!t#Up)aom^akxTOyH$~-NB&bX*DuLgIA=|Ns0xQF ztXLS$zA;J}lY3rzM!E{7W*HHad2QFtK_t=POY9y)|Ip&7qiS0FT9k9^19_3O)0ov` z5`E67QaVVB)@8IC=}87WJ+9mO0&&=$c|0FWdRZPOkXt#1P!_(4VO;h5S~==bUyC30 zShtHyN##`~daUJd3J6;@hGU-vg!rX@u-@2w?%f%%VyOuOTh8fhbQn#_g2bd!+ens= zE$EM_+~Egg2$yEf6la7@E7?MH6~COobU{Ec@?5{XRY~%cMGaR}Yy9fK?hTrJRaZ%O zLAp`BAp4O@Qo2dNt=N43!d|VeIiar4doJXCtue+n43^STZkyp}x6S_dZTp}C5RN|k z4a>sa< zW2LujByyI6XQUUy$;Gp;0j3GDs78zT~P|)NHlXLxW zU!7x>mHkQ#x!D`Z!j{5@FwJ+9y12&jROh`mkHn-}tlN0=L!j@-F3P^Z!J{y;dyKC? zPv!!LN4fg1eZ@OhMW<&uv-yJ38u<-=a|{(9#sPUW5z*s*J#E+UAf)Bk2oe#NF^pKe zI_RL%Tj}&>DomRnD(NVervL#wR*L*IJ1w@-Ub))G)pKdxjHrMHwTvgCb%OnLKXnUM z4hTslYp)4%M!}5lxAjBMjYla{5rdK&o23J%qUlfilZx+&@zoBc6>wA9)ksw zs-07pm_D+d1bsTcA_^dx_-?+-=)~M^R)zJ*IFx(o=sY9X`~gz4V{06vsq>ZDCE|LQk;LZIh2gOEgu|g1y+@d< z12ZM@Pg(9V58j9h1>IaRjs7&Hn#}*&`JoyvVrc{4>v=ca_4MljX5ZZXuwqCo;4RPO0eBMmW@A>FOZ`ZdY`rh2+b>owLwNq2?@Ii&w$w@Mxoa4n!FQg!h zE-h+;uFJC?n-GTtrE3}-+A_+R9Cx;PVV8)vZ15C@=vTbyM1wXL-6gf71cuex0QYsjc?cm zZLy~qoFjVyWGCJ|V{K8tNBQN`b8(By-O>3rBvdNm9l+7}o{KJ&PS>iy2$sF>lsuL# zuHzxTSR4lYBAVqGVh+TkSC{5X=ta2E4xse6=%Ox3UA!C{oIc||;@Er89qZBnL4$EP zzDWg1OGKZ39?>EZCm7ez*P)BLVjwR)8n-AJ#>K1Fbp!rZF?S=b{}f5cCd3QaIm={w zzi27x{WSP#`uw)$XKbXvR{qA+%%(fPl7-XGDcYytI9EyaB_;(vhJ~)dlh!=!aUH;Z z)oh4?OGa2^XhP%A)>b6IEm3LRd-~I7FB#zq*AXXUgJ9ds{WFC5 zefEq9j@1iOfZh8kxJM}9!Y)!wH^n#FT?z1X^trz8n36iE@S+z(qFOAIDm^bSNzBnk z9$$eHT3)Hd+}oS>HQZh+hG9U*fD)7N)<50{mzypGR=mm^yrA>tdLyCer1V?;(w8u$ z1$!@;?F;L31w7R>R`DO~8@G}gl!#J6&*`K^-gfH~bGaicyqdz}P2h)i;A*wgJ=F~0 zGwk}}d#^V~ zhryy|zb_L-a^h0_&u>-=MI4cSd2X1#mp~IcJ{i9)^tk5hEWNiFIh`HmeF`d(%mjL{ zD+=54Z&32zUM$-_<7M`hjP0)5_Xma3l3J%JK;PK!7_@X;G~Xt_gA)*S^7}px%z;qD z?-LIW0aRoAEjTHE_3v@%$NYnqnK~+b3q6Xe?C1Z-;PB&<4%(mZcXEyXuP1$HxPQz) z*ifnE{4E9#{U6KpWBNfMq;}nJ(G2)2{aX+IG5Mg#P_^<~7y}Oo%U{a*WA?!+`hMqN n%3c0TmVQh>$O~$zeGBa2|66!a!$XYJMa+~Jb!6UPrGEP#?seVJ literal 0 HcmV?d00001 diff --git a/pydocx/lxmlparser.py b/pydocx/lxmlparser.py index 94b130d3..bd866645 100644 --- a/pydocx/lxmlparser.py +++ b/pydocx/lxmlparser.py @@ -9,15 +9,15 @@ # visited already. #if el in visited_nodes: #continue -with zipfile.ZipFile('/Users/samportnow/Documents/pydocx/helloworld.docx') as f: - document = f.read('word/document.xml') - numbering= f.read('word/numbering.xml') -parser=etree.XMLParser(ns_clean=True) -document=StringIO(document) -numbering=StringIO(numbering) -numbering_tree=etree.parse(numbering,parser) -numbering_namespace=numbering_tree.getroot().nsmap['w'] -visited_els=[] +#with zipfile.ZipFile('/Users/samportnow/Documents/pydocx/helloworld.docx') as f: +# document = f.read('word/document.xml') +# numbering= f.read('word/numbering.xml') +#parser=etree.XMLParser(ns_clean=True) +#document=StringIO(document) +#numbering=StringIO(numbering) +#numbering_tree=etree.parse(numbering,parser) +#numbering_namespace=numbering_tree.getroot().nsmap['w'] +#visited_els=[] def get_parsed(): parser=etree.XMLParser(ns_clean=True) @@ -108,4 +108,4 @@ def get_list_style(numval): if i.find('{%s}numFmt' %numbering_namespace) is not None: return i.find('{%s}numFmt' %numbering_namespace).attrib -print get_parsed() +#print get_parsed() diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index bfaad2a6..47d1dca6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -11,11 +11,21 @@ def parsed(self): self._parsed = self._parsed.replace('


    ', '

    ') self._parsed = self._parsed.replace('


    - ''') @@ -123,15 +105,13 @@ def test_simple_list(): 'simple_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
      + assert_html_equal(actual_html, BASE_HTML % ''' +
      1. One
      • two
      - ''') @@ -143,8 +123,8 @@ def test_inline_tags(): 'inline_tags.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

      This sentence has some bold, some italics and some underline, as well as a hyperlink.

      ''') # noqa + assert_html_equal(actual_html, BASE_HTML % ''' +

      This sentence has some bold, some italics and some underline, as well as a hyperlink.

      ''') # noqa def test_unicode(): @@ -167,8 +147,8 @@ def test_special_chars(): 'special_chars.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

      & < > link

      ''') # noqa + assert_html_equal(actual_html, BASE_HTML % ''' +

      & < > link

      ''') # noqa def test_table_col_row_span(): @@ -179,9 +159,8 @@ def test_table_col_row_span(): 'table_col_row_span.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
      @@ -204,7 +183,7 @@ def test_table_col_row_span():
      AAA
      - +
      @@ -227,7 +206,6 @@ def test_table_col_row_span():
      1 213
      - ''') @@ -239,16 +217,15 @@ def test_nested_table_rowspan(): 'nested_table_rowspan.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
      ' + text + '' - - def table_cell(self, text, col='', row=''): - slug = '' - - def indent(self, text, just='', firstLine='', left='', right=''): - slug = '' - - def indent_table(self, text, just, left='', right='', firstLine='', column = 0): - return '' diff --git a/pydocx/parsers/Docx2XML.py b/pydocx/parsers/Docx2XML.py deleted file mode 100644 index be0a4953..00000000 --- a/pydocx/parsers/Docx2XML.py +++ /dev/null @@ -1,16 +0,0 @@ -__author__ = 'samportnow' - -from pydocx.parsers.Docx2Html import Docx2Html - - -class Docx2XML(Docx2Html): - - def insertion(self, text, author, date): - return ("{text}" - ).format(author=author, date=date, text=text) - - def deletion(self, text, author, date): - return ("{text}" - ).format(author=author, date=date, text=text) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py deleted file mode 100644 index 815d2ef2..00000000 --- a/pydocx/tests/test_docx.py +++ /dev/null @@ -1,868 +0,0 @@ -#import mock -import tempfile -import shutil -from os import path -#from zipfile import ZipFile -from nose.plugins.skip import SkipTest -#from nose.tools import assert_raises - -from pydocx.tests import assert_html_equal -from pydocx.parsers.Docx2Html import Docx2Html - - -class TestDocx2HTML(Docx2Html): - def head(self): - return '' - - def table(self, text): - return '
      AAA
      BBB - +
      @@ -260,7 +237,6 @@ def test_nested_table_rowspan():
      CCC DDD
      - ''') @@ -273,9 +249,8 @@ def test_nested_tables(): ) actual_html = convert(file_path) # Find out why br tag is there. - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
      @@ -283,7 +258,7 @@ def test_nested_tables(): ' + text + '' - def table_cell(self, text, last, column_index, row_index, col='', row=''): + def table_cell(self, text, col='', row=''): slug = ' Date: Tue, 21 May 2013 15:42:24 -0400 Subject: [PATCH 238/404] fixed break tag --- pydocx/parsers/Docx2Html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 19f76a00..c8d38238 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -165,7 +165,7 @@ def indent(self, text, just='', firstLine='', left='', right=''): } def break_tag(self): - return '
      ' + return '
      ' def indent_table(self, text, just, left='', right='', firstLine='', column = 0): return '' From 6e8bf4fe73f56adec5767e1b487ba250a3fceb24 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 15:49:41 -0400 Subject: [PATCH 239/404] updating --- pydocx/parsers/Docx2LaTex.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index a018e5c1..2e5ea106 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -121,13 +121,8 @@ def table(self, text): def table_row(self, text): return '%s \\\\ ' % text - def table_cell(self, text, last, col_index, row_index, col='', row=''): - if last is True: - self.cols = col_index + 1 - self.rows = row_index + 1 - return text - else: - return '%s & ' % text + def table_cell(self, text, col='', row=''): + return '%s & ' % text def page_break(self): return '\\newpage ' From d584137ba4c04f8334e3b08ae429a0546280e514 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 16:07:41 -0400 Subject: [PATCH 240/404] refs #30: Updated the tests for expected behaviour --- pydocx/tests/test_xml.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 4e5cf1a0..16b274c7 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -641,7 +641,7 @@ def test_performance(self): class NonStandardTextTagsTestCase(_TranslationTestCase): expected_output = ''' -

      insert +

      insert smarttag

      ''' @@ -674,8 +674,8 @@ def get_xml(self): class DeleteTagInList(_TranslationTestCase): expected_output = '''
        -
      1. AAA
        - BBB +
      2. AAA + BBB
      3. CCC
      @@ -696,8 +696,7 @@ def get_xml(self): class InsertTagInList(_TranslationTestCase): expected_output = '''
        -
      1. AAA
        - BBB +
      2. AAABBB
      3. CCC
      @@ -719,8 +718,7 @@ def get_xml(self): class SmartTagInList(_TranslationTestCase): expected_output = '''
        -
      1. AAA
        - BBB +
      2. AAABBB
      3. CCC
      @@ -875,8 +873,7 @@ def get_xml(self): class SDTTestCase(_TranslationTestCase): expected_output = '''
        -
      1. AAA
        - BBB +
      2. AAABBB
      3. CCC
      From c9c76fb0208f3ab27aa79a3025a4b29e2300db71 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 16:08:38 -0400 Subject: [PATCH 241/404] refs #30: no longer adding invalid attributes to insert and delete tags --- pydocx/parsers/Docx2Html.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 782be941..525bdd9c 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -54,8 +54,7 @@ def heading(self, text, heading_value): def insertion(self, text, author, date): return ( - "%(text)s" + "%(text)s" ) % { 'author': author, 'date': date, @@ -88,8 +87,7 @@ def image(self, path, x, y): def deletion(self, text, author, date): return ( - "%(text)s" + "%(text)s" ) % { 'author': author, 'date': date, From 70fc06feaef7306b6e8bc285975e1dcbc31e5652 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 16:10:18 -0400 Subject: [PATCH 242/404] refs #30: stopped break separating tags that were inline like (insert, delete, smart) --- pydocx/DocxParser.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c57817f0..33c322d2 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -576,12 +576,22 @@ def parse_p(self, el, text): def _should_append_break_tag(self, next_el): paragraph_like_tags = [ 'p', - 'sdt', + ] + inline_like_tags = [ + 'smartTag', + 'ins', + 'delText', ] if next_el.is_list_item: return False if next_el.previous is None: return False + tag_is_inline_like = any( + next_el.has_descendant_with_tag(tag) for + tag in inline_like_tags + ) + if tag_is_inline_like: + return False if next_el.previous.is_last_list_item_in_root: return False if next_el.previous.tag not in paragraph_like_tags: From 6dde9350297081530749d3f6237d954a7391f828 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 16:12:31 -0400 Subject: [PATCH 243/404] added test; made size search cleaner --- pydocx/DocxParser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index dcb80705..df15e2d9 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -705,8 +705,10 @@ def _get_image_size(self, el): """ sizes = el.find_first('ext') if sizes is not None and sizes.get('cx'): - x = self._convert_image_size(int(sizes.get('cx'))) - y = self._convert_image_size(int(sizes.get('cy'))) + if sizes.get('cx'): + x = self._convert_image_size(int(sizes.get('cx'))) + if sizes.get('cy'): + y = self._convert_image_size(int(sizes.get('cy'))) return ( '%dpx' % x, '%dpx' % y, From 4095602444565bb725b94f2e5ea6ef6210cc4d3a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 16:18:05 -0400 Subject: [PATCH 244/404] updating tests --- pydocx/tests/document_builder.py | 36 ++++++++++++++++++++++++------ pydocx/tests/templates/drawing.xml | 6 ++++- pydocx/tests/templates/pict.xml | 17 +++++++++++--- pydocx/tests/test_xml.py | 4 ++-- 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index d3415f18..4e951608 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -125,14 +125,36 @@ def _tr(rows, text): return template.render(table_rows=trs) @classmethod - def drawing(self, r_id, height=0, width=0, size=True): + def drawing(self, r_id, height=None, width=None, size=True): template = env.get_template(templates['drawing']) - kwargs = { - 'r_id': r_id, - 'size': size, - 'height': height * EMUS_PER_PIXEL, - 'width': width * EMUS_PER_PIXEL, - } + if height and width: + kwargs = { + 'r_id': r_id, + 'size': size, + 'height': height * EMUS_PER_PIXEL, + 'width': width * EMUS_PER_PIXEL, + } + elif height: + kwargs = { + 'r_id': r_id, + 'size': size, + 'height': height * EMUS_PER_PIXEL, + 'width': width, + } + elif width: + kwargs = { + 'r_id': r_id, + 'size': size, + 'width': width * EMUS_PER_PIXEL, + 'height': height, + } + else: + kwargs = { + 'r_id': r_id, + 'size': size, + 'width': width, + 'height': height, + } return template.render(**kwargs) @classmethod diff --git a/pydocx/tests/templates/drawing.xml b/pydocx/tests/templates/drawing.xml index 75cd5f75..fe950155 100644 --- a/pydocx/tests/templates/drawing.xml +++ b/pydocx/tests/templates/drawing.xml @@ -39,8 +39,12 @@ - {% if size %} + {% if width != None and height != None %} + {% elif width != None %} + + {% elif height != None %} + {% endif %} diff --git a/pydocx/tests/templates/pict.xml b/pydocx/tests/templates/pict.xml index 354356ed..b096bc1d 100644 --- a/pydocx/tests/templates/pict.xml +++ b/pydocx/tests/templates/pict.xml @@ -9,11 +9,22 @@ - {%if size%} - + {% if width != None and height != None %} + {% if r_id %}{% endif %} - {%endif%} + {% elif width != None %} + + {% if r_id %}{% endif %} + + {% elif height != None %} + + {% if r_id %}{% endif %} + + {% endif %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 990066a6..11d9722b 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -146,8 +146,8 @@ class ImageLocal(_TranslationTestCase): ''' def get_xml(self): - drawing = DXB.drawing(r_id='rId0', size=False) - pict = DXB.pict(r_id='rId1', size=False) + drawing = DXB.drawing(r_id='rId0', width=None, height=None) + pict = DXB.pict(r_id='rId1', width=None, height=None) tags = [ drawing, pict, From 319b8b12c878751c779074d6f2aab26d54d1d3b4 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 16:30:21 -0400 Subject: [PATCH 245/404] fixed formatting --- pydocx/DocxParser.py | 76 ++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 5ebf193a..8410c5fd 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -43,8 +43,8 @@ def remove_namespaces(document): # remove namespaces def has_child(self, tag): """ -Determine if current element has a child. Stop at first child. -""" + Determine if current element has a child. Stop at first child. + """ return True if self.find(tag) is not None else False @@ -58,22 +58,22 @@ def has_descendant_with_tag(self, tag): def find_first(self, tag): """ -Find the first occurrence of a tag beneath the current element. -""" + Find the first occurrence of a tag beneath the current element. + """ return self.find('.//' + tag) def find_all(self, tag): """ Find all occurrences of a tag -""" + """ return self.findall('.//' + tag) def el_iter(el): """ -Go through all elements -""" + Go through all elements + """ try: return el.iter() except AttributeError: @@ -82,8 +82,8 @@ def el_iter(el): def find_ancestor_with_tag(self, tag): """ -Find the first ancestor with that is a `tag`. -""" + Find the first ancestor with that is a `tag`. + """ el = self while el.parent: el = el.parent @@ -190,9 +190,9 @@ def add_parent(el): # if a parent, make that an attribute #divide by 20 to get to pt (Office works in 20th's of a point) """ -see http://msdn.microsoft.com/en-us/library/documentformat -.openxml.wordprocessing.indentation.aspx -""" + see http://msdn.microsoft.com/en-us/library/documentformat + .openxml.wordprocessing.indentation.aspx + """ if self.root.find_first('pgSz') is not None: self.page_width = int(self.root. find_first('pgSz').attrib['w']) / 20 @@ -226,12 +226,12 @@ def _set_list_attributes(self, el): def _generate_num_id(self, el): ''' -Fun fact: It is possible to have a list in the root, that holds a table -that holds a list and for both lists to have the same numId. When this -happens we should namespace the nested list with the number of tables -it is in to ensure it is considered a new list. Otherwise all sorts of -terrible html gets generated. -''' + Fun fact: It is possible to have a list in the root, that holds a table + that holds a list and for both lists to have the same numId. When this + happens we should namespace the nested list with the number of tables + it is in to ensure it is considered a new list. Otherwise all sorts of + terrible html gets generated. + ''' num_id = el.find_first('numId').attrib['val'] # First, go up the parent until we get None and count the number of @@ -457,12 +457,12 @@ def parse_table_cell(self, el, text): def parse_list(self, el, text): """ -All the meat of building the list is done in _parse_list, however we -call this method for two reasons: It is the naming convention we are -following. And we need a reliable way to raise and lower the list_depth -(which is used to determine if we are in a list). I could have done -this in _parse_list, however it seemed cleaner to do it here. -""" + All the meat of building the list is done in _parse_list, however we + call this method for two reasons: It is the naming convention we are + following. And we need a reliable way to raise and lower the list_depth + (which is used to determine if we are in a list). I could have done + this in _parse_list, however it seemed cleaner to do it here. + """ self.list_depth += 1 parsed = self._parse_list(el, text) self.list_depth -= 1 @@ -603,10 +603,10 @@ def parse_list_item(self, el, text): def _should_parse_next_as_content(el): """ -Get the contents of the next el and append it to the -contents of the current el (that way things like tables -are actually in the li tag instead of in the ol/ul tag). -""" + Get the contents of the next el and append it to the + contents of the current el (that way things like tables + are actually in the li tag instead of in the ol/ul tag). + """ next_el = el.next if next_el is None: return False @@ -721,10 +721,10 @@ def _convert_image_size(self, size): def _get_image_size(self, el): """ -If we can't find a height or width, return 0 for whichever is not -found, then rely on the `image` handler to strip those attributes. This -functionality can change once we integrate PIL. -""" + If we can't find a height or width, return 0 for whichever is not + found, then rely on the `image` handler to strip those attributes. This + functionality can change once we integrate PIL. + """ localDpi = False sizes = el.find_first('ext') if sizes is not None: @@ -764,10 +764,10 @@ def parse_image(self, el): def _is_style_on(self, el): """ -For b, i, u (bold, italics, and underline) merely having the tag is not -sufficient. You need to check to make sure it is not set to "false" as -well. -""" + For b, i, u (bold, italics, and underline) merely having the tag is not + sufficient. You need to check to make sure it is not set to "false" as + well. + """ return el.get('val') != 'false' def parse_t(self, el, parsed): @@ -784,8 +784,8 @@ def parse_insertion(self, el, parsed): def parse_r(self, el, parsed): """ -Parse the running text. -""" + Parse the running text. + """ block = False text = parsed if not text: From 1699da1c41ee0818f66c7bf95ef32f53ea48d28c Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 16:36:24 -0400 Subject: [PATCH 246/404] updated test --- pydocx/tests/test_xml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 11d9722b..86f310f3 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -137,11 +137,13 @@ def get_xml(self): class ImageLocal(_TranslationTestCase): relationship_dict = { - 'rId0': 'media/image1.jpeg' + 'rId0': 'media/image1.jpeg', + 'rId1': 'media/image2.jpeg', } expected_output = '''

      +

      ''' From 988b767a0f04d78c37a091fd53d5f25f00d0ffb0 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 22:59:07 -0400 Subject: [PATCH 247/404] fixed tests --- pydocx/DocxParser.py | 3 +- pydocx/tests/document_builder.py | 9 ++---- pydocx/tests/templates/drawing.xml | 12 ++++---- pydocx/tests/templates/pict.xml | 46 ++++++++++++------------------ pydocx/tests/test_xml.py | 6 ++-- 5 files changed, 31 insertions(+), 45 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index df15e2d9..4f486400 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -714,12 +714,13 @@ def _get_image_size(self, el): '%dpx' % y, ) shape = el.find_first('shape') - if shape is not None: + if shape is not None and shape.get('style') is not None: # If either of these are not set, rely on the method `image` to not # use either of them. x = 0 y = 0 styles = shape.get('style').split(';') + for s in styles: if s.startswith('height:'): y = s.split(':')[1] diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 4e951608..a9b49aa4 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -125,44 +125,39 @@ def _tr(rows, text): return template.render(table_rows=trs) @classmethod - def drawing(self, r_id, height=None, width=None, size=True): + def drawing(self, r_id, height=None, width=None): template = env.get_template(templates['drawing']) if height and width: kwargs = { 'r_id': r_id, - 'size': size, 'height': height * EMUS_PER_PIXEL, 'width': width * EMUS_PER_PIXEL, } elif height: kwargs = { 'r_id': r_id, - 'size': size, 'height': height * EMUS_PER_PIXEL, 'width': width, } elif width: kwargs = { 'r_id': r_id, - 'size': size, 'width': width * EMUS_PER_PIXEL, 'height': height, } else: kwargs = { 'r_id': r_id, - 'size': size, 'width': width, 'height': height, } return template.render(**kwargs) @classmethod - def pict(self, height=0, width=0, r_id=None, size=True): + def pict(self, r_id=None, height=None, width=None): template = env.get_template(templates['pict']) kwargs = { 'r_id': r_id, - 'size': size, 'height': height, 'width': width, } diff --git a/pydocx/tests/templates/drawing.xml b/pydocx/tests/templates/drawing.xml index fe950155..dfd470b4 100644 --- a/pydocx/tests/templates/drawing.xml +++ b/pydocx/tests/templates/drawing.xml @@ -39,13 +39,11 @@ - {% if width != None and height != None %} - - {% elif width != None %} - - {% elif height != None %} - - {% endif %} + diff --git a/pydocx/tests/templates/pict.xml b/pydocx/tests/templates/pict.xml index b096bc1d..3efda80e 100644 --- a/pydocx/tests/templates/pict.xml +++ b/pydocx/tests/templates/pict.xml @@ -1,30 +1,20 @@ - - - - - - - - - - - {% if width != None and height != None %} - - {% if r_id %}{% endif %} - - {% elif width != None %} + + + + + + + + + + - {% if r_id %}{% endif %} - - {% elif height != None %} - - {% if r_id %}{% endif %} - - {% endif %} - - - + {%if width != None or height != None %} style={% if width != None %} + "width:{{ width }}pt;{%endif%}{% if height != None %}height:{{ height }}pt" + {%endif%}{% endif %}> + {% if r_id %}{% endif %} + + + + diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 86f310f3..f577fe75 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -148,8 +148,8 @@ class ImageLocal(_TranslationTestCase): ''' def get_xml(self): - drawing = DXB.drawing(r_id='rId0', width=None, height=None) - pict = DXB.pict(r_id='rId1', width=None, height=None) + drawing = DXB.drawing(height=None, width=None, r_id='rId0') + pict = DXB.pict(height=None, width=None, r_id='rId1') tags = [ drawing, pict, @@ -159,6 +159,7 @@ def get_xml(self): body += el xml = DXB.xml(body) + print xml return xml @@ -233,6 +234,7 @@ def test_get_image_sizes(self): ('40px', '20px'), ('41pt', '21pt'), ] + print image_ids self.assertEqual( set(image_ids), set(expected), From 315e8440337901c988092727e41e97097848809c Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 23:02:54 -0400 Subject: [PATCH 248/404] removed not needed files for this branch --- pydocx/HtmlConverter.py | 74 --- pydocx/parsers/Docx2Html.py | 171 ------- pydocx/parsers/Docx2XML.py | 16 - pydocx/tests/test_docx.py | 868 ------------------------------------ 4 files changed, 1129 deletions(-) delete mode 100644 pydocx/HtmlConverter.py delete mode 100644 pydocx/parsers/Docx2Html.py delete mode 100644 pydocx/parsers/Docx2XML.py delete mode 100644 pydocx/tests/test_docx.py diff --git a/pydocx/HtmlConverter.py b/pydocx/HtmlConverter.py deleted file mode 100644 index 1ff1d7c3..00000000 --- a/pydocx/HtmlConverter.py +++ /dev/null @@ -1,74 +0,0 @@ -__author__ = 'samportnow' - -from .tests import document_builder -from pydocx.DocxParser import ElementTree - - -class converter(): - - def __init__(self, html): - self.html = ElementTree.fromstring(html) - self.build() - - def build(self): - def add_parent(el): - for child in el.getchildren(): - setattr(child, 'parent', el) - add_parent(child) - add_parent(self.html) - self.set_list_attributes() - self.parse(self.html.find_first('body')) - - def find_all_by_tags(self, html, *args): - list_elements = [] - for el in html.iter(): - if el.tag in args: - list_elements.append(el) - return list_elements - - def check_for_lst_parent(self, el): - lst_parent = False - if el.parent.tag != 'body': - if el.parent.tag == 'ol' or el.parent.tag == 'ul': - lst_parent = True - self.check_for_lst_parent(el.parent) - else: - return lst_parent - - def set_list_attributes(self): - ilvl = 0 - numId = -1 - lsts = self.find_all_by_tags(self.html, 'ol', 'ul') - for lst in lsts: - lst.getchildren()[0].is_first_list_item = True - lst.getchildren()[-1].is_last_list_item = True - for el in self.html.find_first('body').iter(): - if el.tag == 'li': - if self.check_for_lst_parent(el.parent) \ - is False and el.is_first_list_item is True: - numId += 1 - ilvl = 0 - if el.is_first_list_item is True: - ilvl += 1 - el.ilvl = ilvl - el.num_id = numId - - def parse(self, el): - for child in el.getchildren(): - parsed = '' - self.parse(child) - if child.tag == 'b': - self.bold = True - else: - self.bold = False - if child.tag == 'i': - pass - if child.tag == 'u': - pass - if child.tag == 'li': - parsed = document_builder.DocxBuilder.li( - child.text, child.ilvl, child.num_id, self.bold) - if child.tag == 'p': - parsed = document_builder.DocxBuilder.p_tag( - child.text, self.bold) - return parsed diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py deleted file mode 100644 index c8d38238..00000000 --- a/pydocx/parsers/Docx2Html.py +++ /dev/null @@ -1,171 +0,0 @@ -from pydocx.DocxParser import DocxParser - -import xml.sax.saxutils -import textwrap - - -class Docx2Html(DocxParser): - - @property - def parsed(self): - content = self._parsed - content = "%(head)s%(content)s" % { - 'head': self.head(), - 'content': content, - } - return unicode(content) - - def head(self): - return "%(style)s" % { - 'style': self.style(), - } - - def style(self): - return textwrap.dedent('''''') % { - 'width': (self.page_width * (4 / 3)), - } - #multiple by (4/3) to get to px - - def escape(self, text): - return xml.sax.saxutils.quoteattr(text)[1:-1] - - def linebreak(self, pre=None): - return '
      ' - - def paragraph(self, text, pre=None): - return '

      ' + text + '

      ' - - def heading(self, text, heading_value): - return '<%(tag)s>%(text)s' % { - 'tag': heading_value, - 'text': text, - } - - def insertion(self, text, author, date): - return ( - "%(text)s" - ) % { - 'author': author, - 'date': date, - 'text': text, - } - - def hyperlink(self, text, href): - if text == '': - return '' - return '%(text)s' % { - 'href': href, - 'text': text, - } - - def image_handler(self, path): - return path - - def image(self, path, x, y): - src = self.image_handler(path) - if not src: - return '' - if all([x, y]): - return '' % ( - src, - y, - x, - ) - else: - return '' % src - - def deletion(self, text, author, date): - return ( - "%(text)s" - ) % { - 'author': author, - 'date': date, - 'text': text, - } - - def list_element(self, text): - return "
    1. %(text)s
    2. " % { - 'text': text, - } - - def ordered_list(self, text, list_style): - return "
        %(text)s
      " % { - 'text': text, - } - - def unordered_list(self, text): - return "
        %(text)s
      " % { - 'text': text, - } - - def bold(self, text): - return '' + text + '' - - def italics(self, text): - return '' + text + '' - - def underline(self, text): - return '' + text + '' - - def tab(self): - # Insert before the text right?? So got the text and just do an insert - # at the beginning! - return '    ' - - def table(self, text): - return '
      AAA BBB
      CCC - +
      @@ -296,7 +271,6 @@ def test_nested_tables():
      DDD EEE
      - ''') @@ -308,12 +282,11 @@ def test_list_in_table(): 'list_in_table.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
      -
        +
        1. AAA
        2. BBB
        3. CCC
        4. @@ -321,7 +294,6 @@ def test_list_in_table():
      - ''') @@ -333,12 +305,11 @@ def test_tables_in_lists(): 'tables_in_lists.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
        + assert_html_equal(actual_html, BASE_HTML % ''' +
        1. AAA
        2. BBB - +
          @@ -351,7 +322,6 @@ def test_tables_in_lists():
        3. GGG
        4. - ''') @@ -363,8 +333,8 @@ def test_track_changes_on(): 'track_changes_on.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

          This was some content.

          + assert_html_equal(actual_html, BASE_HTML % ''' +

          This was some content.

          ''') @@ -376,8 +346,7 @@ def test_headers(): 'headers.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          This is an H1

          This is an H2

          This is an H3

          @@ -388,7 +357,6 @@ def test_headers():
          This is an H8
          This is an H9
          This is an H10
          - ''') @@ -415,8 +383,8 @@ def test_split_headers(): new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' -

          AAA

          BBB

          CCC

          + assert_html_equal(actual_html, BASE_HTML % ''' +

          AAA

          BBB

          CCC

          ''') @@ -435,10 +403,8 @@ def test_has_image(): actual_html = convert(new_file_path) # Ignore height, width for now. - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          AAA

          - ''') @@ -452,10 +418,8 @@ def test_local_dpi(): ) new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          - ''') @@ -476,45 +440,11 @@ def test_has_image_using_image_handler(): def image_handler(*args, **kwargs): return 'test' actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          AAA

          - ''') -#def test_attachment_is_tiff(): -# filename = 'attachment_is_tiff.docx' -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'attachment_is_tiff.docx', -# ) -# # preserve_images must be true in order for the image to not be removed. -# # This is handled in build_import, however here we need to manually set it -# # to True. -# new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) -# -# # First open the file and verify that the image attachment is a tiff. -# try: -# zf = ZipFile(new_file_path) -# # Get the document data. -# _, meta_data = _get_document_data(zf) -# finally: -# zf.close() -# # Find the path to the image. -# image_file = None -# for file_path in meta_data.relationship_dict.values(): -# if file_path.endswith('.gif'): -# image_file = file_path -# assert image_file is not None -# with open(image_file) as f: -# magic_number = f.read()[:4] -# # Make sure the image is actually a gif. -# assert magic_number == 'GIF8' - - def test_headers_with_full_line_styles(): raise SkipTest('This test is not yet passing') # Show that if a natural header is completely bold/italics that @@ -526,12 +456,10 @@ def test_headers_with_full_line_styles(): 'headers_with_full_line_styles.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          AAA

          BBB

          CCC

          - ''') @@ -546,17 +474,16 @@ def test_convert_p_to_h(): 'convert_p_to_h.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          AAA

          BBB

          CCC

          -
            +
            1. DDD
            2. EEE
            3. FFF
            -
          CCC DDD
          +
          @@ -566,43 +493,9 @@ def test_convert_p_to_h():
          GGG HHHJJJ
          - ''') -#def test_bigger_font_size_to_header(): -# # Show when it is appropriate to convert p tags to h tags based on font -# # size. -# if not DETECT_FONT_SIZE: -# raise SkipTest('Font size detection is disabled.') -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'bigger_font_size_to_header.docx', -# ) -# actual_html = convert(file_path) -# assert_html_equal(actual_html, ''' -# -#

          Paragraphs:

          -#

          Header

          -#

          paragraph 1

          -#

          Lists:

          -#
            -#
          1. bigger
          2. -#
          3. smaller
          4. -#
          -#

          Tables:

          -# -# -# -# -# -#
          biggersmaller
          -# -# ''') - - def test_fake_headings_by_length(): raise SkipTest('This test is not yet passing') # Show that converting p tags to h tags has a length limit. If the p tag is @@ -615,14 +508,12 @@ def test_fake_headings_by_length(): 'fake_headings_by_length.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          Heading.

          Still a heading.

          This is not a heading because it is too many words.

          - ''') @@ -637,15 +528,14 @@ def test_shift_enter(): # Test just the convert without clean_html to make sure the first # break tag is present. actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

          AAA
          BBB

          CCC

          -
            +
            1. DDD
              EEE
            2. FFF
            - +
            @@ -655,7 +545,6 @@ def test_shift_enter():
            GGG
            HHH
            III
            JJJ
            LLL
            - ''') @@ -667,17 +556,16 @@ def test_lists_with_styles(): 'lists_with_styles.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
              + assert_html_equal(actual_html, BASE_HTML % ''' +
              1. AAA
              2. BBB -
                  +
                  1. CCC
                  2. DDD -
                      +
                      1. EEE -
                          +
                          1. FFF
                          @@ -686,7 +574,6 @@ def test_lists_with_styles():
                      - ''') @@ -701,25 +588,23 @@ def test_list_to_header(): actual_html = convert(file_path) # It should be noted that list item `GGG` is upper roman in the word # document to show that only top level upper romans get converted. - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                      AAA

                      -
                        +
                        1. BBB

                        CCC

                        -
                          +
                          1. DDD

                          EEE

                          -
                            +
                            1. FFF -
                                +
                                1. GGG
                              - ''') @@ -731,11 +616,10 @@ def test_has_title(): 'has_title.docx', ) actual_html = convert(file_path) - assert_html_equal( - actual_html, - '''

                              Title

                              -

                              Text

                              ''', - ) + assert_html_equal(actual_html, BASE_HTML % ''' +

                              Title

                              +

                              Text

                              + ''') def test_upper_alpha_all_bold(): @@ -747,12 +631,10 @@ def test_upper_alpha_all_bold(): 'upper_alpha_all_bold.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                              AAA

                              BBB

                              CCC

                              - ''') @@ -764,14 +646,12 @@ def test_simple_table(): 'simple_table.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - + assert_html_equal(actual_html, BASE_HTML % ''' +
                              Cell1
                              Cell3
                              Cell2
                              And I am writing in the table
                              Cell4
                              - ''') @@ -783,8 +663,7 @@ def test_justification(): 'justification.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - + assert_html_equal(actual_html, BASE_HTML % '''

                              Center Justified

                              @@ -808,7 +687,6 @@ def test_justification(): Left justified and pushed in from left

                              - ''') From bf2705bd2f23e700e7e9e7910b295f46994e6cb0 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:23:44 -0400 Subject: [PATCH 223/404] refs #29: updated the xml based tests for the new expected html --- pydocx/tests/__init__.py | 17 +++++- pydocx/tests/test_docx.py | 13 +--- pydocx/tests/test_xml.py | 124 ++++++++++++++------------------------ 3 files changed, 60 insertions(+), 94 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index b636109d..74685f1e 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -11,6 +11,17 @@ ) from unittest import TestCase +STYLE = '' # noqa + +BASE_HTML = ''' + + + %s + + %%s + +''' % STYLE + def assert_html_equal(actual_html, expected_html): assert collapse_html( @@ -99,8 +110,8 @@ def _parse_styles(self): '1': 'decimal', }, '2': { - '0': 'none', - '1': 'none', + '0': 'lowerLetter', + '1': 'lowerLetter', }, } @@ -136,4 +147,4 @@ def test_expected_output(self): numbering_dict=self.numbering_dict, ).parsed - assert_html_equal(html, self.expected_output) + assert_html_equal(html, BASE_HTML % self.expected_output) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index a55df801..70b2e09f 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -6,24 +6,13 @@ from nose.plugins.skip import SkipTest #from nose.tools import assert_raises -from pydocx.tests import assert_html_equal +from pydocx.tests import assert_html_equal, BASE_HTML from pydocx.parsers.Docx2Html import Docx2Html def convert(path): return Docx2Html(path).parsed -STYLE = '' # noqa - -BASE_HTML = ''' - - - %s - - %%s - -''' % STYLE - def test_extract_html(): file_path = path.join( diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index c1a5bf8a..6d1464d0 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -15,10 +15,8 @@ class BoldTestCase(_TranslationTestCase): expected_output = """ -

                              AAA

                              BBB

                              - """ def get_xml(self): @@ -40,9 +38,7 @@ class HyperlinkVanillaTestCase(_TranslationTestCase): } expected_output = ''' -

                              link.

                              - ''' def get_xml(self): @@ -61,9 +57,7 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase): } expected_output = ''' -

                              link.

                              - ''' def get_xml(self): @@ -81,8 +75,6 @@ class HyperlinkNoTextTestCase(_TranslationTestCase): } expected_output = ''' - - ''' def get_xml(self): @@ -99,9 +91,7 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): } expected_output = ''' -

                              link.

                              - ''' def get_xml(self): @@ -120,9 +110,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): } expected_output = ''' -

                              link

                              - ''' def get_xml(self): @@ -141,14 +129,12 @@ class ImageTestCase(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

                              - ''' def get_xml(self): @@ -217,8 +203,6 @@ class ImageNotInRelsDictTestCase(_TranslationTestCase): # 'rId0': 'media/image1.jpeg', } expected_output = ''' - - ''' def get_xml(self): @@ -271,8 +255,7 @@ def get_xml(self): class TableTag(_TranslationTestCase): expected_output = ''' - - +
                              @@ -282,7 +265,6 @@ class TableTag(_TranslationTestCase):
                              AAA BBBDDD
                              - ''' def get_xml(self): @@ -299,8 +281,7 @@ def get_xml(self): class NestedTableTag(_TranslationTestCase): expected_output = ''' - - +
                              @@ -308,7 +289,7 @@ class NestedTableTag(_TranslationTestCase): ' + text + '' From 8f387f39892ef0e9d2981447a4228f077293f6af Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:09:34 -0400 Subject: [PATCH 226/404] refs #28: updated test based on merged master --- pydocx/tests/test_xml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 0b09a84f..c0703c45 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -722,7 +722,7 @@ class DeleteTagInList(_TranslationTestCase): expected_output = '''
                                -
                              1. AAA
                                +
                              2. AAA
                                BBB
                              3. CCC
                              4. @@ -746,7 +746,7 @@ class InsertTagInList(_TranslationTestCase): expected_output = '''
                                  -
                                1. AAA
                                  +
                                2. AAA
                                  BBB
                                3. CCC
                                4. @@ -771,7 +771,7 @@ class SmartTagInList(_TranslationTestCase): expected_output = '''
                                    -
                                  1. AAA
                                    +
                                  2. AAA
                                    BBB
                                  3. CCC
                                  4. From cbea7a986a0eeb5d852ba4fce45c00dddc13b8d3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:10:58 -0400 Subject: [PATCH 227/404] refs #29: updated tests based on merged master --- pydocx/tests/test_xml.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 6cc1ddfc..16014fdd 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -673,14 +673,12 @@ def get_xml(self): class DeleteTagInList(_TranslationTestCase): expected_output = ''' - -
                                      +
                                      1. AAA
                                        BBB
                                      2. CCC
                                      - ''' def get_xml(self): @@ -697,14 +695,12 @@ def get_xml(self): class InsertTagInList(_TranslationTestCase): expected_output = ''' - -
                                        +
                                        1. AAA
                                          BBB
                                        2. CCC
                                        - ''' def get_xml(self): @@ -722,14 +718,12 @@ def get_xml(self): class SmartTagInList(_TranslationTestCase): expected_output = ''' - -
                                          +
                                          1. AAA
                                            BBB
                                          2. CCC
                                          - ''' def get_xml(self): From 1c72947f4370d779822d175d9ba825411b4fc333 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:12:33 -0400 Subject: [PATCH 228/404] refs #28: updated tests based on merged master --- pydocx/tests/test_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 48c16cbd..fb1a47fe 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -923,7 +923,7 @@ class SDTTestCase(_TranslationTestCase): expected_output = '''
                                            -
                                          1. AAA
                                            +
                                          2. AAA
                                            BBB
                                          3. CCC
                                          4. From e605958b8792a9025bb047961b57d46fb4e8d8ef Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:13:27 -0400 Subject: [PATCH 229/404] refs #29: updated tests based on merged master --- pydocx/tests/test_xml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 85cfea1c..17609d1a 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -874,14 +874,12 @@ def get_xml(self): class SDTTestCase(_TranslationTestCase): expected_output = ''' - -
                                              +
                                              1. AAA
                                                BBB
                                              2. CCC
                                              - ''' def get_xml(self): From e254e81b5fbd2fcef8117d752d9bf2411d762fbb Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:20:29 -0400 Subject: [PATCH 230/404] refs #28: split up a line into multiple lines --- pydocx/tests/test_docx.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index cc2094af..9db462d0 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -143,8 +143,13 @@ def test_inline_tags(): 'inline_tags.docx', ) actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

                                              This sentence has some bold, some italics and some underline, as well as a hyperlink.

                                              ''') # noqa + assert_html_equal(actual_html, ( + '

                                              This sentence has some bold, ' + 'some italics and some ' + 'underline, ' + 'as well as a hyperlink' + '.

                                              ' + )) def test_unicode(): From 04c407dc913595010779477d92fa8795e44c9690 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:22:13 -0400 Subject: [PATCH 231/404] refs #28: updated how we are doing underline --- pydocx/parsers/Docx2Html.py | 3 ++- pydocx/tests/test_docx.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index e342a04d..9aa78354 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -25,6 +25,7 @@ def style(self): {{color:red; text-decoration:line-through}}.center {{text-align:center}}.right{{text-align:right}} .left{{text-align:left}} .comment{{color:blue}} + .pydocx-underline {text-decoration: underline;} body{{width:%(width)spx; margin:0px auto; }}''') % { 'width': (self.page_width * (4 / 3)), @@ -112,7 +113,7 @@ def italics(self, text): return '' + text + '' def underline(self, text): - return '' + text + '' + return '' + text + '' def tab(self): # Insert before the text right?? So got the text and just do an insert diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 9db462d0..815d2ef2 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -146,7 +146,7 @@ def test_inline_tags(): assert_html_equal(actual_html, ( '

                                              This sentence has some bold, ' 'some italics and some ' - 'underline, ' + 'underline, ' 'as well as a hyperlink' '.

                                              ' )) From 8c5b39ccf694a02831916d13d45322e2c8e5d4bc Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:26:49 -0400 Subject: [PATCH 232/404] refs #28: Added css stuff to the README --- README.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b4453cf8..91bd33e3 100644 --- a/README.md +++ b/README.md @@ -113,8 +113,7 @@ DocxParser includes abstracts methods that each parser overwrites to satsify its @abstractmethod def table(self, text): - return text - + return text @abstractmethod def table_row(self, text): return text @@ -161,4 +160,16 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow def linebreak(self): return '!!!!!!!!!!!!' # because linebreaks in are denoted by '!!!!!!!!!!!!' - # with the FOO markup langauge :) \ No newline at end of file + # with the FOO markup langauge :) + +#Styles + +The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include: + +* class `insert` -> Turns the text green. +* class `delete` -> Turns the text red and draws a line through the text. +* class `center` -> Aligns the text to the center. +* class `right` -> Aligns the text to the right. +* class `left` -> Aligns the text to the left. +* class `comment` -> Turns the text blue. +* class `pydocx-underline` -> Underlines the text. From a0de8a97933edf045e8ce9d90dd09906a09639b5 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 14:52:58 -0400 Subject: [PATCH 233/404] refs #29: namespaced all the css classes --- README.md | 12 ++++++------ pydocx/parsers/Docx2Html.py | 19 ++++++++++--------- pydocx/tests/__init__.py | 13 +++++++------ pydocx/tests/test_docx.py | 15 ++++++++------- pydocx/tests/test_xml.py | 6 +++--- 5 files changed, 34 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 91bd33e3..de86d68f 100644 --- a/README.md +++ b/README.md @@ -166,10 +166,10 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include: -* class `insert` -> Turns the text green. -* class `delete` -> Turns the text red and draws a line through the text. -* class `center` -> Aligns the text to the center. -* class `right` -> Aligns the text to the right. -* class `left` -> Aligns the text to the left. -* class `comment` -> Turns the text blue. +* class `pydocx-insert` -> Turns the text green. +* class `pydocx-delete` -> Turns the text red and draws a line through the text. +* class `pydocx-center` -> Aligns the text to the center. +* class `pydocx-right` -> Aligns the text to the right. +* class `pydocx-left` -> Aligns the text to the left. +* class `pydocx-comment` -> Turns the text blue. * class `pydocx-underline` -> Underlines the text. diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 4b6c6154..782be941 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -21,12 +21,13 @@ def head(self): def style(self): result = ( - '' @@ -53,7 +54,7 @@ def heading(self, text, heading_value): def insertion(self, text, author, date): return ( - "%(text)s" ) % { 'author': author, @@ -87,7 +88,7 @@ def image(self, path, x, y): def deletion(self, text, author, date): return ( - "%(text)s" ) % { 'author': author, @@ -150,7 +151,7 @@ def page_break(self): def indent(self, text, just='', firstLine='', left='', right=''): slug = '.insert {color:green;}' - '.delete {color:red;text-decoration:line-through;}' - '.center {text-align:center;}' - '.right {text-align:right;}' - '.left {text-align:left;}' - '.comment {color:blue;}' + '' diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index e3207ec8..72b98f5b 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -166,13 +166,13 @@ def test_table_col_row_span():
                              AAA BBB
                              CCC - +
                              @@ -321,7 +302,6 @@ class NestedTableTag(_TranslationTestCase):
                              DDD EEE
                              - ''' def get_xml(self): @@ -344,8 +324,7 @@ def get_xml(self): class TableWithInvalidTag(_TranslationTestCase): expected_output = ''' - - +
                              @@ -355,7 +334,6 @@ class TableWithInvalidTag(_TranslationTestCase):
                              AAA BBBDDD
                              - ''' def get_xml(self): @@ -374,11 +352,10 @@ def get_xml(self): class TableWithListAndParagraph(_TranslationTestCase): expected_output = ''' - - +
                              -
                                +
                                1. AAA
                                2. BBB
                                @@ -387,7 +364,6 @@ class TableWithListAndParagraph(_TranslationTestCase):
                              - ''' def get_xml(self): @@ -416,13 +392,11 @@ def get_xml(self): class SimpleListTestCase(_TranslationTestCase): expected_output = ''' - -
                                +
                                1. AAA
                                2. BBB
                                3. CCC
                                - ''' # Ensure its not failing somewhere and falling back to decimal @@ -448,11 +422,9 @@ def get_xml(self): class SingleListItemTestCase(_TranslationTestCase): expected_output = ''' - -
                                  +
                                  1. AAA
                                  - ''' # Ensure its not failing somewhere and falling back to decimal @@ -476,11 +448,10 @@ def get_xml(self): class ListWithContinuationTestCase(_TranslationTestCase): expected_output = ''' - -
                                    +
                                    1. AAA
                                      BBB
                                    2. CCC - +
                                      @@ -493,7 +464,6 @@ class ListWithContinuationTestCase(_TranslationTestCase):
                                    3. HHH
                                    4. - ''' def get_xml(self): @@ -520,15 +490,14 @@ def get_xml(self): class ListWithMultipleContinuationTestCase(_TranslationTestCase): expected_output = ''' - -
                                        +
                                        1. AAA -
                                      DDD EEE
                                      +
                                      BBB
                                      - +
                                      @@ -536,7 +505,6 @@ class ListWithMultipleContinuationTestCase(_TranslationTestCase):
                                    5. DDD
                                    6. - ''' def get_xml(self): @@ -562,18 +530,16 @@ def get_xml(self): class MangledIlvlTestCase(_TranslationTestCase): expected_output = ''' - -
                                        +
                                        1. AAA
                                        -
                                          +
                                          1. BBB -
                                              +
                                              1. CCC
                                            - ''' def get_xml(self): @@ -592,17 +558,15 @@ def get_xml(self): class SeperateListsTestCase(_TranslationTestCase): expected_output = ''' - -
                                              +
                                              1. AAA
                                              -
                                                +
                                                1. BBB
                                                -
                                                  +
                                                  1. CCC
                                                  - ''' def get_xml(self): @@ -624,19 +588,17 @@ def get_xml(self): class InvalidIlvlOrderTestCase(_TranslationTestCase): expected_output = ''' - -
                                                    +
                                                    1. AAA -
                                                        +
                                                        1. BBB -
                                                            +
                                                            1. CCC
                                                        - ''' def get_xml(self): @@ -686,10 +648,8 @@ def test_performance(self): class NonStandardTextTagsTestCase(_TranslationTestCase): expected_output = ''' -

                                                        insert smarttag

                                                        - ''' def get_xml(self): @@ -705,7 +665,7 @@ def get_xml(self): class RTagWithNoText(_TranslationTestCase): - expected_output = '' + expected_output = '' def get_xml(self): p_tag = DXB.p_tag(None) # No text @@ -720,12 +680,10 @@ def get_xml(self): class SingleListItem(_TranslationTestCase): expected_output = ''' - -
                                                          +
                                                          1. AAA

                                                          BBB

                                                          - ''' numbering_dict = { @@ -748,11 +706,23 @@ def get_xml(self): class SimpleTableTest(_TranslationTestCase): expected_output = ''' - -
                                      CCC
                                      - - -
                                      BlankColumn 1Column 2
                                      Row 1FirstSecond
                                      Row 2ThirdFourth
                                      ''' + + + + + + + + + + + + + + + + +
                                      BlankColumn 1Column 2
                                      Row 1FirstSecond
                                      Row 2ThirdFourth
                                      ''' def get_xml(self): table = DXB.table(num_rows=3, num_columns=3, text=chain( @@ -773,14 +743,12 @@ def get_xml(self): class MissingIlvl(_TranslationTestCase): expected_output = ''' - -
                                        +
                                        1. AAA
                                          BBB
                                        2. CCC
                                        - ''' def get_xml(self): @@ -800,13 +768,12 @@ def get_xml(self): class SameNumIdInTable(_TranslationTestCase): expected_output = ''' - -
                                          +
                                          1. AAA - +
                                            @@ -815,7 +782,6 @@ class SameNumIdInTable(_TranslationTestCase):
                                          2. CCC
                                          3. - ''' # Ensure its not failing somewhere and falling back to decimal numbering_dict = { From 061d8d18f2c56dfd359bf2929e9e692353aae1d0 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:26:01 -0400 Subject: [PATCH 224/404] refs #29: updated white space --- pydocx/tests/test_xml.py | 151 +++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 79 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 6d1464d0..145f0d39 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -15,8 +15,8 @@ class BoldTestCase(_TranslationTestCase): expected_output = """ -

                                            AAA

                                            -

                                            BBB

                                            +

                                            AAA

                                            +

                                            BBB

                                            """ def get_xml(self): @@ -74,8 +74,7 @@ class HyperlinkNoTextTestCase(_TranslationTestCase): 'rId0': 'www.google.com', } - expected_output = ''' - ''' + expected_output = '' def get_xml(self): run_tags = [] @@ -90,9 +89,7 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): # 'rId0': 'www.google.com', missing } - expected_output = ''' -

                                            link.

                                            - ''' + expected_output = '

                                            link.

                                            ' def get_xml(self): run_tags = [] @@ -109,9 +106,7 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): 'rId0': 'www.google.com', } - expected_output = ''' -

                                            link

                                            - ''' + expected_output = '

                                            link

                                            ' def get_xml(self): run_tags = [] @@ -129,12 +124,12 @@ class ImageTestCase(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

                                            - -

                                            -

                                            - -

                                            +

                                            + +

                                            +

                                            + +

                                            ''' def get_xml(self): @@ -202,8 +197,7 @@ class ImageNotInRelsDictTestCase(_TranslationTestCase): relationship_dict = { # 'rId0': 'media/image1.jpeg', } - expected_output = ''' - ''' + expected_output = '' def get_xml(self): drawing = DXB.drawing(height=20, width=40, r_id='rId0') @@ -255,16 +249,16 @@ def get_xml(self): class TableTag(_TranslationTestCase): expected_output = ''' -
                                            -
                                              +
                                              1. BBB
                                            - - - - - - - - -
                                            AAABBB
                                            CCCDDD
                                            + + + + + + + + + +
                                            AAABBB
                                            CCCDDD
                                            ''' def get_xml(self): @@ -324,16 +318,16 @@ def get_xml(self): class TableWithInvalidTag(_TranslationTestCase): expected_output = ''' - - - - - - - - - -
                                            AAABBB
                                            DDD
                                            + + + + + + + + + +
                                            AAABBB
                                            DDD
                                            ''' def get_xml(self): @@ -392,11 +386,11 @@ def get_xml(self): class SimpleListTestCase(_TranslationTestCase): expected_output = ''' -
                                              -
                                            1. AAA
                                            2. -
                                            3. BBB
                                            4. -
                                            5. CCC
                                            6. -
                                            +
                                              +
                                            1. AAA
                                            2. +
                                            3. BBB
                                            4. +
                                            5. CCC
                                            6. +
                                            ''' # Ensure its not failing somewhere and falling back to decimal @@ -422,9 +416,9 @@ def get_xml(self): class SingleListItemTestCase(_TranslationTestCase): expected_output = ''' -
                                              -
                                            1. AAA
                                            2. -
                                            +
                                              +
                                            1. AAA
                                            2. +
                                            ''' # Ensure its not failing somewhere and falling back to decimal @@ -448,22 +442,22 @@ def get_xml(self): class ListWithContinuationTestCase(_TranslationTestCase): expected_output = ''' -
                                              -
                                            1. AAA
                                              BBB
                                            2. -
                                            3. CCC - - - - - - - - - -
                                              DDDEEE
                                              FFFGGG
                                              -
                                            4. -
                                            5. HHH
                                            6. -
                                            +
                                              +
                                            1. AAA
                                              BBB
                                            2. +
                                            3. CCC + + + + + + + + + +
                                              DDDEEE
                                              FFFGGG
                                              +
                                            4. +
                                            5. HHH
                                            6. +
                                            ''' def get_xml(self): @@ -490,21 +484,21 @@ def get_xml(self): class ListWithMultipleContinuationTestCase(_TranslationTestCase): expected_output = ''' -
                                              -
                                            1. AAA - - - - -
                                              BBB
                                              - - - - -
                                              CCC
                                              -
                                            2. -
                                            3. DDD
                                            4. -
                                            +
                                              +
                                            1. AAA + + + + +
                                              BBB
                                              + + + + +
                                              CCC
                                              +
                                            2. +
                                            3. DDD
                                            4. +
                                            ''' def get_xml(self): @@ -616,8 +610,7 @@ def get_xml(self): class DeeplyNestedTableTestCase(_TranslationTestCase): - expected_output = ''' - ''' + expected_output = '' run_expected_output = False def get_xml(self): From 2aa5922d04f45c1ce93277b3506593dedb674326 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 21 May 2013 13:27:12 -0400 Subject: [PATCH 225/404] refs #29: updated the parser for valid values --- pydocx/parsers/Docx2Html.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index f97e39be..0a2249a6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -24,12 +24,7 @@ def head(self): } def style(self): - return textwrap.dedent('''''') % { + return textwrap.dedent('') % { # noqa 'width': (self.page_width * (4 / 3)), } #multiple by (4/3) to get to px @@ -99,8 +94,9 @@ def list_element(self, text): } def ordered_list(self, text, list_style): - return "
                                              %(text)s
                                            " % { + return '
                                              %(text)s
                                            ' % { 'text': text, + 'list_style': list_style, } def unordered_list(self, text): @@ -123,7 +119,7 @@ def tab(self): return '    ' def table(self, text): - return '' + text + '
                                            ' + return '' + text + '
                                            ' def table_row(self, text): return '
                              -
                              EEE +
                              EEE
                              FFF
                              -
                              GGG +
                              GGG
                              @@ -611,7 +611,7 @@ def test_has_title(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

                              Title

                              -

                              Text

                              +

                              Text

                              ''') @@ -667,18 +667,19 @@ def test_justification(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

                              -

                              Center Justified
                              +
                              Center Justified

                              -

                              Right justified
                              +
                              Right justified

                              -

                              +
                              Right justified and pushed in from right

                              -

                              +
                              Center justified and pushed in from left and it is great and it is the coolest thing of all time and I like it and I think it is cool diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 7538ca1b..4e5cf1a0 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -641,7 +641,7 @@ def test_performance(self): class NonStandardTextTagsTestCase(_TranslationTestCase): expected_output = ''' -

                              insert +

                              insert smarttag

                              ''' @@ -675,7 +675,7 @@ class DeleteTagInList(_TranslationTestCase): expected_output = '''
                              1. AAA
                                - BBB + BBB
                              2. CCC
                              @@ -697,7 +697,7 @@ class InsertTagInList(_TranslationTestCase): expected_output = '''
                              1. AAA
                                - BBB + BBB
                              2. CCC
                              From 0abe0a30552eca57dc0d88cd7d0ae943002995af Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 14:57:48 -0400 Subject: [PATCH 234/404] updated main --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index d4f7b882..f5d813c1 100644 --- a/main.py +++ b/main.py @@ -3,10 +3,10 @@ from pydocx.tests import document_builder import xml.etree.ElementTree as ElementTree -print docx2latex('/Users/samportnow/Documents/motor-dediff-prod.docx') +print docx2latex('/Users/samportnow/Documents/Nosek Articles/NHF2012.docx') with open('test.tex', 'w') as f: - f.write(docx2latex('/Users/samportnow/Documents/motor-dediff-prod.docx')) + f.write(docx2latex('/Users/samportnow/Documents/Nosek Articles/NHF2012.docx').encode('utf8')) #print BeautifulSoup( # ElementTree.tostring( # Docx2Html('./pydocx/fixtures/simple.docx').root, From 058a0f559f3df614c0284990dbb463220a752ea6 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 15:13:11 -0400 Subject: [PATCH 235/404] added latex parser --- pydocx/parsers/Docx2LaTex.py | 200 +++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 pydocx/parsers/Docx2LaTex.py diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py new file mode 100644 index 00000000..fcd14ebe --- /dev/null +++ b/pydocx/parsers/Docx2LaTex.py @@ -0,0 +1,200 @@ +from pydocx.DocxParser import DocxParser +from mako.template import Template +import xml.sax.saxutils + + +class Docx2LaTex(DocxParser): + + def __init__(self, *args, **kwargs): + self.rows = 0 + self.cols = 0 + self.current_col = 0 + self.table_info = [] + self.columns = {} + super(Docx2LaTex, self ).__init__(*args, **kwargs) + + @property + def parsed(self): + content = "%(head)s\\begin{document}%(content)s\\end{document}" % { + 'head': self.head(), + 'content': self._parsed} + return unicode(content) + + def escape(self, text): + chars = ['%', '&', '#', '$', '~', '_', '^', '{', '}',] + for ch in chars: + if ch in text: + text = text.replace(ch, '\\'+ch) + return text + + def linebreak(self): + return '\\\\ ' + + def bold(self, text): + return '\\textbf {%s}' %text + + def italics(self, text): + return '\\emph {%s}' %text + + def underline(self, text): + return '\\underline {%s}' %text + + def list_element(self, text): + return '\\item {%s}' %text + + def ordered_list(self, text, list_style): + return '\\begin{itemize}{%s}\\end{{enumerate}}'%text + + def unordered_list(self, text): + return '\\begin{itemize}{%s}\\end{{itemize}}'%text + + def head(self): + return "\\documentclass{article}\\usepackage{hyperref}"\ + "\\usepackage{graphicx}\\usepackage{changes}\\usepackage{changepage} "\ + "\\usepackage[paperwidth=%spt]{geometry}\\usepackage{hanging}" %self.page_width + + def paragraph(self, text, pre=None): + return '\\par{'+text+'} ' + + def heading(self, text, heading_value): + #TODO figure out what to do for headings + return text + + def insertion(self, text, author, date): + return '\\added[id='+author+',remark='+date+']{%s}' %text + + def hyperlink(self, text, href): + if text == '': + return '' + return '\\href{%(href)s}{%(text)s}' % { + 'href': href, + 'text': text, + } + + def image_handler(self, path): + return path + + def image(self, path, x, y): + src = self.image_handler(path) + if not src: + return '' + if all([x, y]): + x = x.replace('px','') + y = y.replace('px','') + x = float(x) + y = float(y) + x = x * float(3) / float(4) + y = y * float(3) / float(4) + return '\\includegraphics[height=%spt, width=%spt] {%s}' % ( + y, + x, + src) + else: + return '\\includegraphics {%s}' % src + + def tab(self): + return '\\qquad ' + + def table(self, text): + center = False + right = False + setup_cols = '' + print self.cols + for i in range(self.cols): + for column in self.table_info: + if column['Column'] == i and column['justify'] == 'center': + center = True + elif column['Column'] == i and column['justify'] == 'right': + right = True + if center is True: + setup_cols += 'c' + center = False + elif right is True: + setup_cols += 'r' + right = False + else: + setup_cols += 'l' + self.table_info = [] + return '\\\\\\begin{tabular} {%s} %s \\end{tabular}\\\\ '%(setup_cols, text) + + def table_row(self, text): + return '%s \\\\ '%text + + def table_cell(self, text, last, col_index, row_index, col='', row=''): + if last is True: + self.cols = col_index + 1 + self.rows = row_index + 1 + return text + else: + return '%s & '%text + + def page_break(self): + return '\\newpage ' + + def indent_table(self, just='', firstLine='', left='', right='', column = 0): + self.columns = {} + self.columns['Column'] = column + self.columns['justify'] = just + if self.columns not in self.table_info: + self.table_info.append(self.columns) + return '' + + + + def indent(self, text, just='', firstLine='', left='', right='', hanging = 0): + raggedright = False + raggedleft = False + center = False + slug = '{' + if hanging: + slug += '\\begin{hangpara}{%spt}{1} ' %(hanging) + if left and not right: + left = float(left) + left = left * float(3) / float(4) + slug += '\\begin{adjustwidth}{}{%spt}' %(left) + if right and not left: + right = float(right) + right = right * float(3) / float(4) + slug += '\\begin{adjustwidth}{%spt}{}' %(right) + if right and left: + left = float(left) + right = float(right) + left = left * float(3) / float(4) + right = right * float(3) / float(4) + slug += '\\begin{adjustwidth}{%spt}{%spt}' %(left,right) + if firstLine: + slug += '\\setlength{\\parindent}{'+firstLine+'pt}\\indent ' + if just: + if just == 'left': + raggedright = True + slug += '\\begin{flushright} ' + elif just == 'center': + center = True + slug += '\\begin{center} ' + elif just == 'right': + raggedleft = True + slug += '\\begin{flushleft} ' + slug += text + if left or right: + slug += '\\end{adjustwidth}' + if hanging: + slug += '\\end{hangpara}' + if raggedright: + slug += '\\end{flushright}' + if center: + slug += '\\end{center}' + if raggedleft: + slug += '\\end{flushleft}' + slug+= '}' + return slug + #TODO left and right + + def break_tag(self): + return '' +# return '\\\\ ' + + def deletion(self, text, author, date): + return '\\deleted[id='+author+',remark='+date+']{%s}' % text + + def insertion(self, text, author, date): + return '\\added[id='+author+',remark='+date+']{%s}' %text \ No newline at end of file From 3f073e40df33fb37654ed0d1dbb869d96b4ac982 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 15:33:25 -0400 Subject: [PATCH 236/404] flake8 compliant --- pydocx/DocxParser.py | 74 ++++++++++++++++++------------------ pydocx/parsers/Docx2LaTex.py | 62 ++++++++++++++---------------- 2 files changed, 66 insertions(+), 70 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 82e38a29..5ebf193a 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -19,22 +19,22 @@ 'drawing', 'delText', 'ins', - ) +) TAGS_HOLDING_CONTENT_TAGS = ( 'p', 'tbl', 'sdt', - ) +) -def remove_namespaces(document): # remove namespaces +def remove_namespaces(document): # remove namespaces root = ElementTree.fromstring(document) for child in el_iter(root): child.tag = child.tag.split("}")[1] child.attrib = dict( (k.split("}")[-1], v) - for k, v in child.attrib.items() + for k, v in child.attrib.items() ) return ElementTree.tostring(root) @@ -117,7 +117,7 @@ def find_ancestor_with_tag(self, tag): @contextmanager -def ZipFile(path): # This is not needed in python 3.2+ +def ZipFile(path): # This is not needed in python 3.2+ f = zipfile.ZipFile(path) yield f f.close() @@ -134,18 +134,18 @@ def _build_data(self, path, *args, **kwargs): self.fonts = f.read('/word/fontTable.xml') except KeyError: self.fonts = None - try: # Only present if there are lists + try: # Only present if there are lists self.numbering_text = f.read('word/numbering.xml') except KeyError: self.numbering_text = None - try: # Only present if there are comments + try: # Only present if there are comments self.comment_text = f.read('word/comments.xml') except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') self.root = ElementTree.fromstring( - remove_namespaces(self.document_text), # remove the namespaces + remove_namespaces(self.document_text), # remove the namespaces ) self.numbering_root = None if self.numbering_text: @@ -183,7 +183,7 @@ def __init__(self, *args, **kwargs): self.page_width = 0 self._build_data(*args, **kwargs) - def add_parent(el): # if a parent, make that an attribute + def add_parent(el): # if a parent, make that an attribute for child in el.getchildren(): setattr(child, 'parent', el) add_parent(child) @@ -197,7 +197,7 @@ def add_parent(el): # if a parent, make that an attribute self.page_width = int(self.root. find_first('pgSz').attrib['w']) / 20 - add_parent(self.root) # create the parent attributes + add_parent(self.root) # create the parent attributes #all blank when we init self.comment_store = None @@ -205,12 +205,12 @@ def add_parent(el): # if a parent, make that an attribute self.list_depth = 0 self.rels_dict = self._parse_rels_root() self.styles_dict = self._parse_styles() - self.parse_begin(self.root) # begin to parse + self.parse_begin(self.root) # begin to parse def _filter_children(self, element, tags): return [ - el for el in element.getchildren() - if el.tag in tags + el for el in element.getchildren() + if el.tag in tags ] def _set_list_attributes(self, el): @@ -261,7 +261,7 @@ def _set_table_attributes(self, el): if ( v_merge is not None and 'continue' == v_merge.get('val', '') - ): + ): child.vmerge_continue = True def _set_text_attributes(self, el): @@ -286,10 +286,10 @@ def _set_first_list_item(self, num_ids, ilvls, list_elements): for num_id in num_ids: for ilvl in ilvls: filtered_list_elements = [ - i for i in list_elements - if ( - i.num_id == num_id and - i.ilvl == ilvl + i for i in list_elements + if ( + i.num_id == num_id and + i.ilvl == ilvl ) ] if not filtered_list_elements: @@ -304,8 +304,8 @@ def _set_last_list_item(self, num_ids, list_elements): # non list elements into the first root level list. for num_id in num_ids: filtered_list_elements = [ - i for i in list_elements - if i.num_id == num_id + i for i in list_elements + if i.num_id == num_id ] if not filtered_list_elements: continue @@ -326,7 +326,7 @@ def _set_headers(self, elements): 'heading 8': 'h6', 'heading 9': 'h6', 'heading 10': 'h6', - } + } for element in elements: # This element is using the default style which is not a heading. if element.find_first('pStyle') is None: @@ -350,7 +350,7 @@ def _get_children_with_content(el): for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): has_descendant_with_tag = any( child.has_descendant_with_tag(tag) for - tag in TAGS_CONTAINING_CONTENT + tag in TAGS_CONTAINING_CONTENT ) if has_descendant_with_tag: children.append(child) @@ -385,8 +385,8 @@ def parse_begin(self, el): # Find the first and last li elements body = el.find_first('body') list_elements = [ - child for child in body.find_all('p') - if child.is_list_item + child for child in body.find_all('p') + if child.is_list_item ] num_ids = set([i.num_id for i in list_elements]) ilvls = set([i.ilvl for i in list_elements]) @@ -394,7 +394,7 @@ def parse_begin(self, el): self._set_first_list_item(num_ids, ilvls, list_elements) self._set_last_list_item(num_ids, list_elements) p_elements = [ - child for child in body.find_all('p') + child for child in body.find_all('p') ] self._set_headers(p_elements) self._set_next(body) @@ -539,13 +539,13 @@ def should_parse_last_el(last_el, first_el): # Will be handled when the ilvls do match (nesting issue) if last_el.ilvl != first_el.ilvl: return False - # We only care about last items that have not been parsed before + # We only care about last items that have not been parsed before # (first list items are always parsed at the beginning of this # method.) return ( not last_el.is_first_list_item and last_el.is_last_list_item_in_root - ) + ) if should_parse_last_el(next_el, el): parsed += self.parse(next_el) @@ -577,7 +577,7 @@ def _should_append_break_tag(self, next_el): paragraph_like_tags = [ 'p', 'sdt', - ] + ] if next_el.is_list_item: return False if next_el.previous is None: @@ -613,7 +613,7 @@ def _should_parse_next_as_content(el): if ( not next_el.is_list_item and not el.is_last_list_item_in_root - ): + ): return True if next_el.is_first_list_item: if next_el.num_id == el.num_id: @@ -644,9 +644,9 @@ def _get_rowspan(self, el, v_merge): # We only want table cells that have a higher row_index that is greater # than the current_row and that are on the current_col tcs = [ - tc for tc in tbl.find_all('tc') - if tc.row_index >= current_row and - tc.column_index == current_col + tc for tc in tbl.find_all('tc') + if tc.row_index >= current_row and + tc.column_index == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: @@ -737,7 +737,7 @@ def _get_image_size(self, el): return ( '%dpx' % x, '%dpx' % y, - ) + ) shape = el.find_first('shape') if shape is not None: # If either of these are not set, rely on the method `image` to not @@ -793,7 +793,7 @@ def parse_r(self, el, parsed): run_tag_property = el.find('rPr') if run_tag_property is not None: fns = [] - if run_tag_property.has_child('b'): # text styling + if run_tag_property.has_child('b'): # text styling if self._is_style_on(run_tag_property.find('b')): fns.append(self.bold) if run_tag_property.has_child('i'): @@ -808,7 +808,7 @@ def parse_r(self, el, parsed): just = '' if paragraph_tag_property is not None: jc = paragraph_tag_property.find('jc') - if jc is not None: # text alignments + if jc is not None: # text alignments if jc.attrib['val'] == 'right': just = 'right' elif jc.attrib['val'] == 'center': @@ -843,7 +843,7 @@ def parse_r(self, el, parsed): block = False self.block_text += text text = self.indent(self.block_text, just, - firstLine, left, right) + firstLine, left, right) self.block_text = '' else: block = True @@ -957,4 +957,4 @@ def page_break(self): @abstractmethod def indent(self, text, left='', right='', firstLine=''): - return text # TODO JUSTIFIED JUSTIFIED TEXT \ No newline at end of file + return text diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index fcd14ebe..a018e5c1 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -1,6 +1,4 @@ from pydocx.DocxParser import DocxParser -from mako.template import Template -import xml.sax.saxutils class Docx2LaTex(DocxParser): @@ -11,7 +9,7 @@ def __init__(self, *args, **kwargs): self.current_col = 0 self.table_info = [] self.columns = {} - super(Docx2LaTex, self ).__init__(*args, **kwargs) + super(Docx2LaTex, self).__init__(*args, **kwargs) @property def parsed(self): @@ -21,7 +19,7 @@ def parsed(self): return unicode(content) def escape(self, text): - chars = ['%', '&', '#', '$', '~', '_', '^', '{', '}',] + chars = ['%', '&', '#', '$', '~', '_', '^', '{', '}'] for ch in chars: if ch in text: text = text.replace(ch, '\\'+ch) @@ -31,27 +29,29 @@ def linebreak(self): return '\\\\ ' def bold(self, text): - return '\\textbf {%s}' %text + return '\\textbf {%s}' % text def italics(self, text): - return '\\emph {%s}' %text + return '\\emph {%s}' % text def underline(self, text): - return '\\underline {%s}' %text + return '\\underline {%s}' % text def list_element(self, text): - return '\\item {%s}' %text + return '\\item {%s}' % text def ordered_list(self, text, list_style): - return '\\begin{itemize}{%s}\\end{{enumerate}}'%text + return '\\begin{itemize}{%s}\\end{{enumerate}}' % text def unordered_list(self, text): - return '\\begin{itemize}{%s}\\end{{itemize}}'%text + return '\\begin{itemize}{%s}\\end{{itemize}}' % text def head(self): return "\\documentclass{article}\\usepackage{hyperref}"\ - "\\usepackage{graphicx}\\usepackage{changes}\\usepackage{changepage} "\ - "\\usepackage[paperwidth=%spt]{geometry}\\usepackage{hanging}" %self.page_width + "\\usepackage{graphicx}\\usepackage{changes}" \ + "\\usepackage{changepage} "\ + "\\usepackage[paperwidth=%spt]{geometry}" \ + "\\usepackage{hanging}" % self.page_width def paragraph(self, text, pre=None): return '\\par{'+text+'} ' @@ -61,7 +61,7 @@ def heading(self, text, heading_value): return text def insertion(self, text, author, date): - return '\\added[id='+author+',remark='+date+']{%s}' %text + return '\\added[id='+author+',remark='+date+']{%s}' % text def hyperlink(self, text, href): if text == '': @@ -69,7 +69,7 @@ def hyperlink(self, text, href): return '\\href{%(href)s}{%(text)s}' % { 'href': href, 'text': text, - } + } def image_handler(self, path): return path @@ -79,8 +79,8 @@ def image(self, path, x, y): if not src: return '' if all([x, y]): - x = x.replace('px','') - y = y.replace('px','') + x = x.replace('px', '') + y = y.replace('px', '') x = float(x) y = float(y) x = x * float(3) / float(4) @@ -115,10 +115,11 @@ def table(self, text): else: setup_cols += 'l' self.table_info = [] - return '\\\\\\begin{tabular} {%s} %s \\end{tabular}\\\\ '%(setup_cols, text) + return '\\\\\\begin{tabular} {%s} %s \\end{tabular}\\\\ ' \ + % (setup_cols, text) def table_row(self, text): - return '%s \\\\ '%text + return '%s \\\\ ' % text def table_cell(self, text, last, col_index, row_index, col='', row=''): if last is True: @@ -126,12 +127,12 @@ def table_cell(self, text, last, col_index, row_index, col='', row=''): self.rows = row_index + 1 return text else: - return '%s & '%text + return '%s & ' % text def page_break(self): return '\\newpage ' - def indent_table(self, just='', firstLine='', left='', right='', column = 0): + def indent_table(self, just='', firstLine='', left='', right='', column=0): self.columns = {} self.columns['Column'] = column self.columns['justify'] = just @@ -139,29 +140,28 @@ def indent_table(self, just='', firstLine='', left='', right='', column = 0): self.table_info.append(self.columns) return '' - - - def indent(self, text, just='', firstLine='', left='', right='', hanging = 0): + def indent(self, text, just='', firstLine='', + left='', right='', hanging=0): raggedright = False raggedleft = False center = False slug = '{' if hanging: - slug += '\\begin{hangpara}{%spt}{1} ' %(hanging) + slug += '\\begin{hangpara}{%spt}{1} ' % (hanging) if left and not right: left = float(left) left = left * float(3) / float(4) - slug += '\\begin{adjustwidth}{}{%spt}' %(left) + slug += '\\begin{adjustwidth}{}{%spt}' % (left) if right and not left: right = float(right) right = right * float(3) / float(4) - slug += '\\begin{adjustwidth}{%spt}{}' %(right) + slug += '\\begin{adjustwidth}{%spt}{}' % (right) if right and left: left = float(left) right = float(right) left = left * float(3) / float(4) right = right * float(3) / float(4) - slug += '\\begin{adjustwidth}{%spt}{%spt}' %(left,right) + slug += '\\begin{adjustwidth}{%spt}{%spt}' % (left, right) if firstLine: slug += '\\setlength{\\parindent}{'+firstLine+'pt}\\indent ' if just: @@ -173,7 +173,7 @@ def indent(self, text, just='', firstLine='', left='', right='', hanging = 0): slug += '\\begin{center} ' elif just == 'right': raggedleft = True - slug += '\\begin{flushleft} ' + slug += '\\begin{flushleft} ' slug += text if left or right: slug += '\\end{adjustwidth}' @@ -185,16 +185,12 @@ def indent(self, text, just='', firstLine='', left='', right='', hanging = 0): slug += '\\end{center}' if raggedleft: slug += '\\end{flushleft}' - slug+= '}' + slug += '}' return slug #TODO left and right def break_tag(self): return '' -# return '\\\\ ' def deletion(self, text, author, date): return '\\deleted[id='+author+',remark='+date+']{%s}' % text - - def insertion(self, text, author, date): - return '\\added[id='+author+',remark='+date+']{%s}' %text \ No newline at end of file From a1d8080351231300ba2ead3a4a52ed5bf0ae4cfd Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 21 May 2013 15:38:31 -0400 Subject: [PATCH 237/404] updating --- pydocx/parsers/Docx2Html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 5070f4a6..19f76a00 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -126,7 +126,7 @@ def table(self, text): def table_row(self, text): return '
      ' + text + '
      ' - - def table_row(self, text): - return '
      ' + text + '
      ' - - def ordered_list(self, text, list_style): - list_type_conversions = { - 'decimal': 'decimal', - 'decimalZero': 'decimal-leading-zero', - 'upperRoman': 'upper-roman', - 'lowerRoman': 'lower-roman', - 'upperLetter': 'upper-alpha', - 'lowerLetter': 'lower-alpha', - 'ordinal': 'decimal', - 'cardinalText': 'decimal', - 'ordinalText': 'decimal', - } - return '
        {text}
      '.format( - list_style=list_type_conversions.get(list_style, 'decimal'), - text=text, - ) - - -def convert(path): - return TestDocx2HTML(path).parsed - - -def test_extract_html(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'simple.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      - Simple text -

      -
        -
      1. one
      2. -
      3. two
      4. -
      5. three
      6. -
      - - - - - - - - - -
      Cell1Cell2
      Cell3Cell4
      - - ''') - - -def test_nested_list(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'nested_lists.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
        -
      1. one
      2. -
      3. two
      4. -
      5. three -
          -
        1. AAA
        2. -
        3. BBB
        4. -
        5. CCC -
            -
          1. alpha
          2. -
          -
        6. -
        -
      6. -
      7. four
      8. -
      -
        -
      1. xxx -
          -
        1. yyy
        2. -
        -
      2. -
      -
        -
      • www -
          -
        • zzz
        • -
        -
      • -
      - - ''') - - -def test_simple_list(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'simple_lists.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
        -
      1. One
      2. -
      -
        -
      • two
      • -
      - - ''') - - -def test_inline_tags(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'inline_tags.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ( - '

      This sentence has some bold, ' - 'some italics and some ' - 'underline, ' - 'as well as a hyperlink' - '.

      ' - )) - - -def test_unicode(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'greek_alphabet.docx', - ) - actual_html = convert(file_path) - assert actual_html is not None - assert u'\u0391\u03b1' in actual_html - - -def test_special_chars(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'special_chars.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

      & < > link

      ''') # noqa - - -def test_table_col_row_span(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'table_col_row_span.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - - - - - - - - - - - - - - - - - - -
      AAA
      BBBCCC
      DDD
      -
      EEE -
      FFF
      -
      GGG -
      - - - - - - - - - - - - - - - - - - - - - - -
      1234
      567
      89
      10111213
      - - ''') - - -def test_nested_table_rowspan(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'nested_table_rowspan.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - - - - - - - - -
      AAA
      BBB - - - - - - - - -
      CCCDDD
      EEE
      -
      - - ''') - - -def test_nested_tables(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'nested_tables.docx', - ) - actual_html = convert(file_path) - # Find out why br tag is there. - assert_html_equal(actual_html, ''' - - - - - - - - - - -
      AAABBB
      CCC - - - - - - - - - -
      DDDEEE
      FFFGGG
      -
      - - ''') - - -def test_list_in_table(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'list_in_table.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - - - - -
      -
        -
      1. AAA
      2. -
      3. BBB
      4. -
      5. CCC
      6. -
      -
      - - ''') - - -def test_tables_in_lists(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'tables_in_lists.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
        -
      1. AAA
      2. -
      3. BBB - - - - - - - - - -
        CCCDDD
        EEEFFF
        -
      4. -
      5. GGG
      6. -
      - - ''') - - -def test_track_changes_on(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'track_changes_on.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' -

      This was some content.

      - ''') - - -def test_headers(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'headers.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      This is an H1

      -

      This is an H2

      -

      This is an H3

      -

      This is an H4

      -
      This is an H5
      -
      This is an H6
      -
      This is an H7
      -
      This is an H8
      -
      This is an H9
      -
      This is an H10
      - - ''') - - -def _copy_file_to_tmp_dir(file_path, filename): - # Since the images need to be extracted from the docx, copy the file to a - # temp directory so we do not clutter up repo. - dp = tempfile.mkdtemp() - new_file_path = path.join(dp, filename) - shutil.copyfile(file_path, new_file_path) - return new_file_path, dp - - -def test_split_headers(): - filename = 'split_header.docx' - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'split_header.docx', - ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. - new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) - - actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' -

      AAA

      BBB

      CCC

      - ''') - - -def test_has_image(): - filename = 'has_image.docx' - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'has_image.docx', - ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. - new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) - - actual_html = convert(new_file_path) - # Ignore height, width for now. - assert_html_equal(actual_html, ''' - -

      AAA

      - - ''') - - -def test_local_dpi(): - filename = 'localDpi.docx' - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'localDpi.docx', - ) - new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) - actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - -

      - - ''') - - -def test_has_image_using_image_handler(): - raise SkipTest('This needs to be converted to an xml test') - filename = 'has_image.docx' - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'has_image.docx', - ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. - new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) - - def image_handler(*args, **kwargs): - return 'test' - actual_html = convert(new_file_path) - assert_html_equal(actual_html, ''' - -

      AAA

      - - ''') - - -#def test_attachment_is_tiff(): -# filename = 'attachment_is_tiff.docx' -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'attachment_is_tiff.docx', -# ) -# # preserve_images must be true in order for the image to not be removed. -# # This is handled in build_import, however here we need to manually set it -# # to True. -# new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) -# -# # First open the file and verify that the image attachment is a tiff. -# try: -# zf = ZipFile(new_file_path) -# # Get the document data. -# _, meta_data = _get_document_data(zf) -# finally: -# zf.close() -# # Find the path to the image. -# image_file = None -# for file_path in meta_data.relationship_dict.values(): -# if file_path.endswith('.gif'): -# image_file = file_path -# assert image_file is not None -# with open(image_file) as f: -# magic_number = f.read()[:4] -# # Make sure the image is actually a gif. -# assert magic_number == 'GIF8' - - -def test_headers_with_full_line_styles(): - raise SkipTest('This test is not yet passing') - # Show that if a natural header is completely bold/italics that - # bold/italics will get stripped out. - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'headers_with_full_line_styles.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      AAA

      -

      BBB

      -

      CCC

      - - ''') - - -def test_convert_p_to_h(): - raise SkipTest('This test is not yet passing') - # Show when it is correct to convert a p tag to an h tag based on - # bold/italics - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'convert_p_to_h.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      AAA

      -

      BBB

      -

      CCC

      -
        -
      1. DDD
      2. -
      3. EEE
      4. -
      5. FFF
      6. -
      - - - - - - - - - -
      GGGHHH
      IIIJJJ
      - - ''') - - -#def test_bigger_font_size_to_header(): -# # Show when it is appropriate to convert p tags to h tags based on font -# # size. -# if not DETECT_FONT_SIZE: -# raise SkipTest('Font size detection is disabled.') -# file_path = path.join( -# path.abspath(path.dirname(__file__)), -# '..', -# 'fixtures', -# 'bigger_font_size_to_header.docx', -# ) -# actual_html = convert(file_path) -# assert_html_equal(actual_html, ''' -# -#

      Paragraphs:

      -#

      Header

      -#

      paragraph 1

      -#

      Lists:

      -#
        -#
      1. bigger
      2. -#
      3. smaller
      4. -#
      -#

      Tables:

      -# -# -# -# -# -#
      biggersmaller
      -# -# ''') - - -def test_fake_headings_by_length(): - raise SkipTest('This test is not yet passing') - # Show that converting p tags to h tags has a length limit. If the p tag is - # supposed to be converted to an h tag but has more than seven words in the - # paragraph do not convert it. - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'fake_headings_by_length.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      Heading.

      -

      Still a heading.

      -

      - This is not a heading because it is too many words. -

      - - ''') - - -def test_shift_enter(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'shift_enter.docx', - ) - - # Test just the convert without clean_html to make sure the first - # break tag is present. - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      AAA
      BBB

      -

      CCC

      -
        -
      1. DDD
        EEE
      2. -
      3. FFF
      4. -
      - - - - - - - - - -
      GGG
      HHH
      III
      JJJ
      KKKLLL
      - - ''') - - -def test_lists_with_styles(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'lists_with_styles.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -
        -
      1. AAA
      2. -
      3. BBB -
          -
        1. CCC
        2. -
        3. DDD -
            -
          1. EEE -
              -
            1. FFF
            2. -
            -
          2. -
          -
        4. -
        -
      4. -
      - - ''') - - -def test_list_to_header(): - raise SkipTest('This test is not yet passing') - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'list_to_header.docx', - ) - actual_html = convert(file_path) - # It should be noted that list item `GGG` is upper roman in the word - # document to show that only top level upper romans get converted. - assert_html_equal(actual_html, ''' - -

      AAA

      -
        -
      1. BBB
      2. -
      -

      CCC

      -
        -
      1. DDD
      2. -
      -

      EEE

      -
        -
      1. FFF -
          -
        1. GGG
        2. -
        -
      2. -
      - - ''') - - -def test_has_title(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'has_title.docx', - ) - actual_html = convert(file_path) - assert_html_equal( - actual_html, - '''

      Title

      -

      Text

      ''', - ) - - -def test_upper_alpha_all_bold(): - raise SkipTest('This test is not yet passing') - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'upper_alpha_all_bold.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      AAA

      -

      BBB

      -

      CCC

      - - ''') - - -def test_simple_table(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'simple_table.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - - - - -
      Cell1
      Cell3
      Cell2
      - And I am writing in the table
      Cell4
      - - ''') - - -def test_justification(): - file_path = path.join( - path.abspath(path.dirname(__file__)), - '..', - 'fixtures', - 'justification.docx', - ) - actual_html = convert(file_path) - assert_html_equal(actual_html, ''' - -

      -

      Center Justified
      -

      -

      -

      Right justified
      -

      -

      -

      - Right justified and pushed in from right -
      -

      -

      -

      - Center justified and pushed in from left and it is - great and it is the coolest thing of all time and I like it and - I think it is cool -
      -

      -

      -

      - Left justified and pushed in from left -
      -

      - - ''') - - -def _converter(*args, **kwargs): - # Having a converter that does nothing is the same as if abiword fails to - # convert. - pass - - -#def test_converter_broken(): -# file_path = 'test.doc' -# assert_raises( -# ConversionFailed, -# lambda: convert(file_path, converter=_converter), -# ) - - -def test_fall_back(): - raise SkipTest('This test is not yet passing') - file_path = 'test.doc' - - def fall_back(*args, **kwargs): - return 'success' - html = convert(file_path, fall_back=fall_back, converter=_converter) - assert html == 'success' - - -#@mock.patch('docx2html.core.read_html_file') -#@mock.patch('docx2html.core.get_zip_file_handler') -#def test_html_files(patch_zip_handler, patch_read): -def test_html_files(): - raise SkipTest('This test is not yet passing') - - def raise_assertion(*args, **kwargs): - raise AssertionError('Should not have called get_zip_file_handler') - #patch_zip_handler.side_effect = raise_assertion - - def return_text(*args, **kwargs): - return 'test' - #patch_read.side_effect = return_text - - # Try with an html file - file_path = 'test.html' - - html = convert(file_path) - assert html == 'test' - - # Try again with an htm file. - file_path = 'test.htm' - - html = convert(file_path) - assert html == 'test' From 615bad903d9dbd55447b97bd63384d573ff29f74 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 09:49:48 -0400 Subject: [PATCH 249/404] removed unecessary imports --- pydocx/__init__.py | 11 +++-------- pydocx/tests/__init__.py | 1 - 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 743227cc..c630b304 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,9 +1,4 @@ -from .parsers import Docx2Html, Docx2Markdown +from .parsers import Docx2LaTex - -def docx2html(path): - return Docx2Html(path).parsed - - -def docx2markdown(path): - return Docx2Markdown(path).parsed +def docx2latex(path): + return Docx2LaTex(path).parsed \ No newline at end of file diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index e509c397..93f13ada 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -2,7 +2,6 @@ import re from contextlib import contextmanager -from pydocx.parsers.Docx2Html import Docx2Html from pydocx.DocxParser import ( remove_namespaces, # We are only importing this from DocxParse since we have added methods to From 07e145ba6a3a6347ba2a1bcf830b8bb90aae8059 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 10:04:33 -0400 Subject: [PATCH 250/404] updating --- pydocx/tests/document_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index a9b49aa4..22219571 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -14,8 +14,8 @@ 'style': 'style.xml', 'styles': 'styles.xml', 'table': 'table.xml', - 'tc': 'tc.xml', 'tr': 'tr.xml', + 'tc': 'tc.xml', } env = Environment( From 4937a55b354e768d7a80645abaa214bc1590f7de Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 18:27:52 -0400 Subject: [PATCH 251/404] updating --- pydocx/parsers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/parsers/__init__.py b/pydocx/parsers/__init__.py index 7684ae65..f1c6bb15 100644 --- a/pydocx/parsers/__init__.py +++ b/pydocx/parsers/__init__.py @@ -1,4 +1,4 @@ from pydocx.parsers.Docx2Html import Docx2Html from pydocx.parsers.Docx2Markdown import Docx2Markdown - +from pydocx.parsers.Docx2LaTex import Docx2LaTex __all__ = (Docx2Html, Docx2Markdown) From 72bf87ac5ee7b663ca07f5854b5b492822ee14fe Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 18:32:05 -0400 Subject: [PATCH 252/404] comment fixes --- pydocx/tests/document_builder.py | 33 +++++++++----------------------- pydocx/tests/test_xml.py | 1 - 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 22219571..5a89b4bd 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -127,30 +127,15 @@ def _tr(rows, text): @classmethod def drawing(self, r_id, height=None, width=None): template = env.get_template(templates['drawing']) - if height and width: - kwargs = { - 'r_id': r_id, - 'height': height * EMUS_PER_PIXEL, - 'width': width * EMUS_PER_PIXEL, - } - elif height: - kwargs = { - 'r_id': r_id, - 'height': height * EMUS_PER_PIXEL, - 'width': width, - } - elif width: - kwargs = { - 'r_id': r_id, - 'width': width * EMUS_PER_PIXEL, - 'height': height, - } - else: - kwargs = { - 'r_id': r_id, - 'width': width, - 'height': height, - } + if height is not None: + height = height * EMUS_PER_PIXEL + if width is not None: + width = width * EMUS_PER_PIXEL + kwargs = { + 'r_id': r_id, + 'height': height * EMUS_PER_PIXEL, + 'width': width * EMUS_PER_PIXEL, + } return template.render(**kwargs) @classmethod diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index f577fe75..7ba6760b 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -159,7 +159,6 @@ def get_xml(self): body += el xml = DXB.xml(body) - print xml return xml From d556e96cdf6cff09fc4c6b63a07e9f903053dc70 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 18:40:19 -0400 Subject: [PATCH 253/404] added import --- pydocx/tests/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index a498bfd4..d8247ffb 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -2,6 +2,7 @@ import re from contextlib import contextmanager +from pydocx.parsers.Docx2Html import Docx2Html from pydocx.DocxParser import ( remove_namespaces, # We are only importing this from DocxParse since we have added methods to From 92e7b7db7bb91bdb770f9102d3e05415c4855a1a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 18:41:30 -0400 Subject: [PATCH 254/404] fixed error --- pydocx/tests/document_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 5a89b4bd..b56d47b5 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -133,8 +133,8 @@ def drawing(self, r_id, height=None, width=None): width = width * EMUS_PER_PIXEL kwargs = { 'r_id': r_id, - 'height': height * EMUS_PER_PIXEL, - 'width': width * EMUS_PER_PIXEL, + 'height': height, + 'width': width, } return template.render(**kwargs) From a43be1ab9841d847815f7de12502d34c5376e140 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 18:48:00 -0400 Subject: [PATCH 255/404] fixed some errors --- pydocx/__init__.py | 13 +++++++++++-- pydocx/parsers/__init__.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index c630b304..f027a817 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,4 +1,13 @@ -from .parsers import Docx2LaTex +from .parsers import Docx2LaTex, Docx2Html, Docx2Markdown + + +def docx2html(path): + return Docx2Html(path).parsed + + +def docx2markdown(path): + return Docx2Markdown(path).parsed + def docx2latex(path): - return Docx2LaTex(path).parsed \ No newline at end of file + return Docx2LaTex(path).parsed diff --git a/pydocx/parsers/__init__.py b/pydocx/parsers/__init__.py index f1c6bb15..f6bb520f 100644 --- a/pydocx/parsers/__init__.py +++ b/pydocx/parsers/__init__.py @@ -1,4 +1,4 @@ from pydocx.parsers.Docx2Html import Docx2Html from pydocx.parsers.Docx2Markdown import Docx2Markdown from pydocx.parsers.Docx2LaTex import Docx2LaTex -__all__ = (Docx2Html, Docx2Markdown) +__all__ = (Docx2Html, Docx2Markdown, Docx2LaTex) From ecda5d2d3fbae6d5d9ad50fc1f066463386fbdb1 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 22 May 2013 21:54:44 -0400 Subject: [PATCH 256/404] fixed test --- pydocx/tests/__init__.py | 1 - pydocx/tests/test_xml.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index d8247ffb..76caa5e9 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -161,5 +161,4 @@ def test_expected_output(self): rels_dict=self.relationship_dict, numbering_dict=self.numbering_dict, ).parsed - assert_html_equal(html, BASE_HTML % self.expected_output) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 0fb09341..e757779c 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -124,10 +124,8 @@ class ImageLocal(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

      - ''' def get_xml(self): From 2b896dffecee182be9446456db025a55b395619a Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 11:39:33 -0400 Subject: [PATCH 257/404] refs #32: small refactor --- pydocx/tests/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index d8247ffb..73bce714 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -136,6 +136,8 @@ class _TranslationTestCase(TestCase): relationship_dict = None numbering_dict = DEFAULT_NUMBERING_DICT run_expected_output = True + parser = XMLDocx2Html + use_base_html = True def get_xml(self): raise NotImplementedError() @@ -156,10 +158,13 @@ def test_expected_output(self): tree = self.get_xml() # Verify the final output. - html = XMLDocx2Html( + html = self.parser( document_xml=tree, rels_dict=self.relationship_dict, numbering_dict=self.numbering_dict, ).parsed - assert_html_equal(html, BASE_HTML % self.expected_output) + if self.use_base_html: + assert_html_equal(html, BASE_HTML % self.expected_output) + else: + assert_html_equal(html, self.expected_output) From 31fe2a10d28fda30836beaebfa9f0c539224773f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 11:40:02 -0400 Subject: [PATCH 258/404] refs #32: code updated based on deprications --- pydocx/DocxParser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c57817f0..f684111c 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -85,7 +85,7 @@ def find_ancestor_with_tag(self, tag): Find the first ancestor with that is a `tag`. """ el = self - while el.parent: + while el.parent is not None: el = el.parent if el.tag == tag: return el @@ -276,7 +276,7 @@ def _set_text_attributes(self, el): def _set_is_in_table(self, el): paragraph_elements = el.find_all('p') for p in paragraph_elements: - if p.find_ancestor_with_tag('tc'): + if p.find_ancestor_with_tag('tc') is not None: p.is_in_table = True def _set_first_list_item(self, num_ids, ilvls, list_elements): @@ -360,12 +360,12 @@ def _assign_next(children): # Populate the `next` attribute for all the child elements. for i in range(len(children)): try: - if children[i + 1]: + if children[i + 1] is not None: children[i].next = children[i + 1] except IndexError: pass try: - if children[i - 1]: + if children[i - 1] is not None: children[i].previous = children[i - 1] except IndexError: pass @@ -620,7 +620,7 @@ def _should_parse_next_as_content(el): return True return False - while el: + while el is not None: if _should_parse_next_as_content(el): el = el.next next_elements_content = self.parse(el) @@ -683,7 +683,7 @@ def _should_parse_next_as_content(el): return False if next_el.is_in_table: return True - while el: + while el is not None: if _should_parse_next_as_content(el): el = el.next next_elements_content = self.parse(el) From d0cf19bfbdf9e561114ac4e2448dc1f2d8e1b0a4 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 11:40:23 -0400 Subject: [PATCH 259/404] refs #32: can now do underline/italics --- pydocx/tests/document_builder.py | 29 ++++++++++++++++++++++++++--- pydocx/tests/templates/r.xml | 6 +++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index cf67f2cc..9d463e6e 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -36,10 +36,23 @@ def xml(self, body): return template.render(body=body) @classmethod - def p_tag(self, text, bold=False, val=None): + def p_tag( + self, + text, + bold=False, + underline=False, + italics=False, + val=None, + ): if isinstance(text, str): # Use create a single r tag based on the text and the bold - run_tag = DocxBuilder.r_tag(text, bold, val) + run_tag = DocxBuilder.r_tag( + text, + is_bold=bold, + is_underline=underline, + is_italics=italics, + val=val, + ) run_tags = [run_tag] elif isinstance(text, list): run_tags = text @@ -53,11 +66,21 @@ def p_tag(self, text, bold=False, val=None): return template.render(**kwargs) @classmethod - def r_tag(self, text, is_bold=False, val=None, include_linebreak=False): + def r_tag( + self, + text, + is_bold=False, + is_underline=False, + is_italics=False, + val=None, + include_linebreak=False, + ): template = env.get_template(templates['r']) kwargs = { 'text': text, 'is_bold': is_bold, + 'is_underline': is_underline, + 'is_italics': is_italics, 'val': val, 'include_linebreak': include_linebreak, } diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml index 4f0d6f5c..8fc7ecb1 100644 --- a/pydocx/tests/templates/r.xml +++ b/pydocx/tests/templates/r.xml @@ -1,8 +1,8 @@ - {% if is_bold %} - - {% endif %} + {% if is_bold %}{% endif %} + {% if is_underline %}{% endif %} + {% if is_italics %}{% endif %} {% if include_linebreak %}{% endif %} {% if text %}{% include 't.xml' %}{% endif %} From 27de3f000e4dada62eaf17fbc790e739f090bef3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 12:01:02 -0400 Subject: [PATCH 260/404] refs #32: it is now possible to test headings --- pydocx/tests/__init__.py | 9 +++++++- pydocx/tests/document_builder.py | 2 ++ pydocx/tests/templates/p.xml | 2 +- pydocx/tests/test_xml.py | 37 ++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 73bce714..31a0b53c 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -89,6 +89,7 @@ def _build_data( document_xml=None, rels_dict=None, numbering_dict=None, + styles_dict=None, *args, **kwargs): self._test_rels_dict = rels_dict if numbering_dict is None: @@ -104,6 +105,8 @@ def _build_data( # width that we are looking for in the test. self.page_width = 612 + self.styles_dict = styles_dict + def _parse_rels_root(self, *args, **kwargs): if self._test_rels_dict is None: return {} @@ -116,7 +119,9 @@ def get_list_style(self, num_id, ilvl): return 'decimal' def _parse_styles(self): - return {} + if self.styles_dict is None: + return {} + return self.styles_dict DEFAULT_NUMBERING_DICT = { @@ -134,6 +139,7 @@ def _parse_styles(self): class _TranslationTestCase(TestCase): expected_output = None relationship_dict = None + styles_dict = None numbering_dict = DEFAULT_NUMBERING_DICT run_expected_output = True parser = XMLDocx2Html @@ -162,6 +168,7 @@ def test_expected_output(self): document_xml=tree, rels_dict=self.relationship_dict, numbering_dict=self.numbering_dict, + styles_dict=self.styles_dict, ).parsed if self.use_base_html: diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 9d463e6e..58a83016 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -42,6 +42,7 @@ def p_tag( bold=False, underline=False, italics=False, + style='style0', val=None, ): if isinstance(text, str): @@ -62,6 +63,7 @@ def p_tag( kwargs = { 'run_tags': run_tags, + 'style': style, } return template.render(**kwargs) diff --git a/pydocx/tests/templates/p.xml b/pydocx/tests/templates/p.xml index 1954e6c7..629d4a80 100644 --- a/pydocx/tests/templates/p.xml +++ b/pydocx/tests/templates/p.xml @@ -1,6 +1,6 @@ - + {% if is_list %} {% if ilvl != None %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 4e5cf1a0..187730aa 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -890,3 +890,40 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class HeadingTestCase(_TranslationTestCase): + expected_output = ''' +

      AAA

      +

      BBB

      +

      CCC

      +

      DDD

      +
      EEE
      +
      GGG
      +

      HHH

      + ''' + styles_dict = { + 'style0': 'heading 1', + 'style1': 'heading 2', + 'style2': 'heading 3', + 'style3': 'heading 4', + 'style4': 'heading 5', + 'style5': 'heading 6', + } + + def get_xml(self): + p_tags = [ + DXB.p_tag(text='AAA', style='style0'), + DXB.p_tag(text='BBB', style='style1'), + DXB.p_tag(text='CCC', style='style2'), + DXB.p_tag(text='DDD', style='style3'), + DXB.p_tag(text='EEE', style='style4'), + DXB.p_tag(text='GGG', style='style5'), + DXB.p_tag(text='HHH', style='garbage'), + ] + body = '' + for tag in p_tags: + body += tag + + xml = DXB.xml(body) + return xml From add38afa9f00713cedab53dac9d4452be9c725dd Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 14:27:58 -0400 Subject: [PATCH 261/404] refs #33: Updated the test for expected image behaviour --- pydocx/tests/test_docx.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 72b98f5b..df719f74 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -395,10 +395,12 @@ def test_has_image(): new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) - # Ignore height, width for now. assert_html_equal(actual_html, BASE_HTML % ''' -

      AAA

      - ''') +

      + AAA + +

      + ''' % dp) def test_local_dpi(): @@ -412,8 +414,9 @@ def test_local_dpi(): new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) assert_html_equal(actual_html, BASE_HTML % ''' -

      - ''') +

      + ''' % dp) + assert path.isfile('%s/word/media/image1.jpeg' % dp) def test_has_image_using_image_handler(): From 9329b0af13feb175b77a03f21a132a9f7ce7a829 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 14:29:05 -0400 Subject: [PATCH 262/404] refs #33: updated DocxParser to extract images --- pydocx/DocxParser.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 33c322d2..aaf73f71 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -1,3 +1,4 @@ +import os from abc import abstractmethod, ABCMeta import zipfile import logging @@ -128,6 +129,7 @@ class DocxParser: def _build_data(self, path, *args, **kwargs): with ZipFile(path) as f: + self.zip_path, _ = os.path.split(f.filename) self.document_text = f.read('word/document.xml') self.styles_text = f.read('word/styles.xml') try: @@ -143,6 +145,18 @@ def _build_data(self, path, *args, **kwargs): except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') + try: + files = [ + e for e in f.infolist() + if e.filename.startswith('word/media/') + ] + for e in files: + f.extract( + e.filename, + self.zip_path, + ) + except KeyError: + pass self.root = ElementTree.fromstring( remove_namespaces(self.document_text), # remove the namespaces @@ -769,6 +783,12 @@ def parse_image(self, el): src = self.rels_dict.get(rId) if not src: return '' + if hasattr(self, 'zip_path'): + src = os.path.join( + self.zip_path, + 'word', + src, + ) src = self.escape(src) return self.image(src, x, y) From 737d3515ef2ee2cb954490bcfff28841e5787693 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 14:52:46 -0400 Subject: [PATCH 263/404] refs #20 code cleanup, removed print statment --- pydocx/tests/templates/pict.xml | 5 +---- pydocx/tests/test_xml.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pydocx/tests/templates/pict.xml b/pydocx/tests/templates/pict.xml index 3efda80e..26f772a3 100644 --- a/pydocx/tests/templates/pict.xml +++ b/pydocx/tests/templates/pict.xml @@ -9,10 +9,7 @@ - + {% if r_id %}{% endif %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index e757779c..f1700ffa 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -212,7 +212,6 @@ def test_get_image_sizes(self): ('40px', '20px'), ('41pt', '21pt'), ] - print image_ids self.assertEqual( set(image_ids), set(expected), From 2cba0a4a7f7ab5e096670789de967da15b4149dd Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 15:06:26 -0400 Subject: [PATCH 264/404] refs #33: name change, import clenaup --- pydocx/DocxParser.py | 7 ++++--- pydocx/tests/test_docx.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index aad3aed6..f54843e2 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -1,9 +1,10 @@ +import logging import os -from abc import abstractmethod, ABCMeta +import xml.etree.ElementTree as ElementTree import zipfile -import logging + +from abc import abstractmethod, ABCMeta from contextlib import contextmanager -import xml.etree.ElementTree as ElementTree from xml.etree.ElementTree import _ElementInterface from pydocx.utils import NamespacedNumId diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index df719f74..bfd0a67a 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -356,10 +356,10 @@ def test_headers(): def _copy_file_to_tmp_dir(file_path, filename): # Since the images need to be extracted from the docx, copy the file to a # temp directory so we do not clutter up repo. - dp = tempfile.mkdtemp() - new_file_path = path.join(dp, filename) + directory_path = tempfile.mkdtemp() + new_file_path = path.join(directory_path, filename) shutil.copyfile(file_path, new_file_path) - return new_file_path, dp + return new_file_path, directory_path def test_split_headers(): @@ -392,7 +392,7 @@ def test_has_image(): # preserve_images must be true in order for the image to not be removed. # This is handled in build_import, however here we need to manually set it # to True. - new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) + new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) assert_html_equal(actual_html, BASE_HTML % ''' @@ -400,7 +400,7 @@ def test_has_image(): AAA

      - ''' % dp) + ''' % directory_path) def test_local_dpi(): @@ -411,12 +411,12 @@ def test_local_dpi(): 'fixtures', 'localDpi.docx', ) - new_file_path, dp = _copy_file_to_tmp_dir(file_path, filename) + new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) assert_html_equal(actual_html, BASE_HTML % '''

      - ''' % dp) - assert path.isfile('%s/word/media/image1.jpeg' % dp) + ''' % directory_path) + assert path.isfile('%s/word/media/image1.jpeg' % directory_path) def test_has_image_using_image_handler(): From e5975fc6132e1efaeb7e484d15ee2a50707a776d Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 15:09:05 -0400 Subject: [PATCH 265/404] refs #33: removed lying comments, added a comment --- pydocx/tests/test_docx.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index bfd0a67a..ae1478a8 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -370,9 +370,6 @@ def test_split_headers(): 'fixtures', 'split_header.docx', ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) @@ -389,9 +386,6 @@ def test_has_image(): 'fixtures', 'has_image.docx', ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) @@ -404,6 +398,8 @@ def test_has_image(): def test_local_dpi(): + # The image in this file does not have a set height or width, show that the + # html will generate without it. filename = 'localDpi.docx' file_path = path.join( path.abspath(path.dirname(__file__)), @@ -428,9 +424,6 @@ def test_has_image_using_image_handler(): 'fixtures', 'has_image.docx', ) - # preserve_images must be true in order for the image to not be removed. - # This is handled in build_import, however here we need to manually set it - # to True. new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) def image_handler(*args, **kwargs): From 196522a08b18c63030dac169dfd471c67eca6cb9 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 15:10:17 -0400 Subject: [PATCH 266/404] refs #33: on the rest of the image test cases, made sure the image was a real file --- pydocx/tests/test_docx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index ae1478a8..72c70157 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -395,6 +395,7 @@ def test_has_image():

      ''' % directory_path) + assert path.isfile('%s/word/media/image1.gif' % directory_path) def test_local_dpi(): From a25affc1bf4458ad4077a21eefb289c131f70d75 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 15:12:08 -0400 Subject: [PATCH 267/404] refs #33: good catch on the KeyError --- pydocx/DocxParser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index f54843e2..12f225d4 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -146,11 +146,11 @@ def _build_data(self, path, *args, **kwargs): except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') + files = [ + e for e in f.infolist() + if e.filename.startswith('word/media/') + ] try: - files = [ - e for e in f.infolist() - if e.filename.startswith('word/media/') - ] for e in files: f.extract( e.filename, From 5776117e392c455134d038f53334f5897130509c Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 15:57:12 -0400 Subject: [PATCH 268/404] refs #33: name change, no longer need try/except --- pydocx/DocxParser.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 12f225d4..93c2f8d3 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -146,18 +146,15 @@ def _build_data(self, path, *args, **kwargs): except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') - files = [ + zipped_image_files = [ e for e in f.infolist() if e.filename.startswith('word/media/') ] - try: - for e in files: - f.extract( - e.filename, - self.zip_path, - ) - except KeyError: - pass + for e in zipped_image_files: + f.extract( + e.filename, + self.zip_path, + ) self.root = ElementTree.fromstring( remove_namespaces(self.document_text), # remove the namespaces From 999c3a29351a77cafa509ce5605ae0b6a3e2de8f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 16:04:12 -0400 Subject: [PATCH 269/404] refs #33: assume zip_path is always set --- pydocx/DocxParser.py | 11 +++++------ pydocx/tests/__init__.py | 1 + pydocx/tests/test_xml.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 93c2f8d3..0a594326 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -779,12 +779,11 @@ def parse_image(self, el): src = self.rels_dict.get(rId) if not src: return '' - if hasattr(self, 'zip_path'): - src = os.path.join( - self.zip_path, - 'word', - src, - ) + src = os.path.join( + self.zip_path, + 'word', + src, + ) src = self.escape(src) return self.image(src, x, y) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 76caa5e9..7c0447df 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -99,6 +99,7 @@ def _build_data( self.root = ElementTree.fromstring( remove_namespaces(document_xml), ) + self.zip_path = '' # This is the standard page width for a word document, Also the page # width that we are looking for in the test. diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index b54a9bb9..6ad53f41 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -124,8 +124,8 @@ class ImageLocal(_TranslationTestCase): 'rId1': 'media/image2.jpeg', } expected_output = ''' -

      -

      +

      +

      ''' def get_xml(self): @@ -150,10 +150,10 @@ class ImageTestCase(_TranslationTestCase): } expected_output = '''

      - +

      - +

      ''' From cdd88466d66acaec62a931a26ce961624b46b4d6 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 16:10:40 -0400 Subject: [PATCH 270/404] refs #32: removed the != None part for the style --- pydocx/DocxParser.py | 2 +- pydocx/tests/templates/p.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 88867a25..ce01f784 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -343,7 +343,7 @@ def _set_headers(self, elements): # This element is using the default style which is not a heading. if element.find_first('pStyle') is None: continue - style = element.find_first('pStyle').attrib['val'] + style = element.find_first('pStyle').attrib.get('val', '') style = self.styles_dict.get(style) # Check to see if this element is actually a header. diff --git a/pydocx/tests/templates/p.xml b/pydocx/tests/templates/p.xml index 629d4a80..778a8866 100644 --- a/pydocx/tests/templates/p.xml +++ b/pydocx/tests/templates/p.xml @@ -1,6 +1,6 @@ - + {% if is_list %} {% if ilvl != None %} From 5dec2fb0162cfd2072dd731656de18af72da6454 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 16:16:22 -0400 Subject: [PATCH 271/404] Bumped to version 0.1.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 781c2f02..9108c8c6 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.2", + version="0.1.3", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 8897f03ddf0daa5250fb30c01efd717a9ad916f1 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 16:56:39 -0400 Subject: [PATCH 272/404] bumped to version 0.1.4 --- MANIFEST.in | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 414f7b3d..65584456 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include LICENSE include MANIFEST.in include README.md include pydocx/fixtures/* +include pydocx/tests/templates* diff --git a/setup.py b/setup.py index 9108c8c6..f457c719 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.3", + version="0.1.4", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 2875d87f5cbbfdd575964aa86225b5b0d815ff93 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 17:00:38 -0400 Subject: [PATCH 273/404] bumped to version 0.1.5 --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 65584456..b26c0b98 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ include LICENSE include MANIFEST.in include README.md include pydocx/fixtures/* -include pydocx/tests/templates* +include pydocx/tests/templates/* From a620d81cff2732ccd49d303e47a4e3c5b1fa5e11 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 17:01:39 -0400 Subject: [PATCH 274/404] bumped to version 0.1.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f457c719..5dd61588 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.4", + version="0.1.6", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 89e996f745526217031b3307f95fa4573de15ffd Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 23 May 2013 17:20:59 -0400 Subject: [PATCH 275/404] bumped to version 0.1.7 --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5dd61588..b6e0b9fc 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.6", + version="0.1.7", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", @@ -32,6 +32,11 @@ def get_description(): platforms=["any"], license="BSD", packages=find_packages(), + package_data={ + 'pydocx': [ + 'tests/templates/*.xml', + ], + }, scripts=[], zip_safe=False, install_requires=[], From bf79516a3778529468846228e14337bbc00da4ec Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:08:39 -0400 Subject: [PATCH 276/404] refs #35: updated the readme so PyPi likes it. --- README.md | 235 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index de86d68f..642ccaa9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ +====== pydocx ====== -[![Build Status](https://travis-ci.org/OpenScienceFramework/pydocx.png?branch=master)](https://travis-ci.org/OpenScienceFramework/pydocx) +.. image:: https://travis-ci.org/OpenScienceFramework/pydocx.png?branch=master + :align: left + :target: https://travis-ci.org/OpenScienceFramework/pydocx pydocx is a parser that breaks down the elements of a docxfile and converts them into different markup languages. Right now, HTML is supported. Markdown and LaTex @@ -8,161 +11,167 @@ will be available soon. You can extend any of the available parsers to customize to your needs. You can also create your own class that inherits DocxParser to create your own methods for a markup language not yet supported. -#Currently Supported - -tables -* nested tables -* rowspans -* colspans -* lists in tables - -lists -* list styles -* nested lists -* list of tables -* list of pragraphs - -justification - -images - -bold - -italics +Currently Supported +################### + +* tables + * nested tables + * rowspans + * colspans + * lists in tables +* lists + * list styles + * nested lists + * list of tables + * list of pragraphs +* justification +* images +* styles + * bold + * italics + * underline + * hyperlinks +* headings + +Usage +##### -underline +DocxParser includes abstracts methods that each parser overwrites to satsify its own needs. The abstract methods are as follows: -hyperlinks +:: -headings + class DocxParser: + @property + def parsed(self): + return self._parsed -#Usage + @property + def escape(self, text): + return text -DocxParser includes abstracts methods that each parser overwrites to satsify its own needs. The abstract methods are as follows: + @abstractmethod + def linebreak(self): + return '' - class DocxParser: + @abstractmethod + def paragraph(self, text): + return text - @property - def parsed(self): - return self._parsed + @abstractmethod + def heading(self, text, heading_level): + return text - @property - def escape(self, text): - return text + @abstractmethod + def insertion(self, text, author, date): + return text - @abstractmethod - def linebreak(self): - return '' + @abstractmethod + def hyperlink(self, text, href): + return text - @abstractmethod - def paragraph(self, text): - return text + @abstractmethod + def image_handler(self, path): + return path - @abstractmethod - def heading(self, text, heading_level): - return text + @abstractmethod + def image(self, path, x, y): + return self.image_handler(path) - @abstractmethod - def insertion(self, text, author, date): - return text + @abstractmethod + def deletion(self, text, author, date): + return text - @abstractmethod - def hyperlink(self, text, href): - return text + @abstractmethod + def bold(self, text): + return text - @abstractmethod - def image_handler(self, path): - return path + @abstractmethod + def italics(self, text): + return text - @abstractmethod - def image(self, path, x, y): - return self.image_handler(path) + @abstractmethod + def underline(self, text): + return text - @abstractmethod - def deletion(self, text, author, date): - return text + @abstractmethod + def tab(self): + return True - @abstractmethod - def bold(self, text): - return text + @abstractmethod + def ordered_list(self, text): + return text - @abstractmethod - def italics(self, text): - return text + @abstractmethod + def unordered_list(self, text): + return text - @abstractmethod - def underline(self, text): - return text + @abstractmethod + def list_element(self, text): + return text - @abstractmethod - def tab(self): - return True + @abstractmethod + def table(self, text): + return text + @abstractmethod + def table_row(self, text): + return text - @abstractmethod - def ordered_list(self, text): - return text + @abstractmethod + def table_cell(self, text): + return text - @abstractmethod - def unordered_list(self, text): - return text + @abstractmethod + def page_break(self): + return True - @abstractmethod - def list_element(self, text): - return text + @abstractmethod + def indent(self, text, left='', right='', firstLine=''): + return text - @abstractmethod - def table(self, text): - return text - @abstractmethod - def table_row(self, text): - return text +Docx2Html inherits DocxParser and implements basic HTML handling. Ex. - @abstractmethod - def table_cell(self, text): - return text +:: - @abstractmethod - def page_break(self): - return True + class Docx2Html(DocxParser): - @abstractmethod - def indent(self, text, left='', right='', firstLine=''): - return text + # Escape '&', '<', and '>' so we render the HTML correctly + def escape(self, text): + return xml.sax.saxutils.quoteattr(text)[1:-1] + # return a line break + def linebreak(self, pre=None): + return '
      ' -Docx2Html inherits DocxParser and implements basic HTML handling. Ex. + # add paragraph tags + def paragraph(self, text, pre=None): + return '

      ' + text + '

      ' - class Docx2Html(DocxParser): - def escape(self, text): - return xml.sax.saxutils.quoteattr(text)[1:-1] # Escape '&', '<', and '>' so we - # render the HTML correctly - def linebreak(self, pre=None): - return '
      ' # return a line break +However, let's say you want to add a specific style to your HTML document. In order to do this, you want to make each paragraph a class of type `my_implementation`. Simply extend docx2Html and add what you need. - def paragraph(self, text, pre=None): - return '

      ' + text + '

      ' # add paragraph tags +:: + class My_Implementation_of_Docx2Html(Docx2Html): -However, let's say you want to add a specific style to your HTML document. In order to do this, you want to make each paragraph a class of type "my_implementation". Simply extend docx2Html and add what you need. + def paragraph(self, text, pre = None): + return

      + text + '

      ' - class My_Implementation_of_Docx2Html(Docx2Html): - def paragraph(self, text, pre = None): - return

      + text + '

      ' +OR, let's say FOO is your new favorite markup language. Simply customize your own new parser, overwritting the abstract methods of DocxParser +:: -OR, let's say FOO is your new favorite markup language. Simply customize your own new parser, overwritting the abstract methods of DocxParser + class Docx2Foo(DocxParser): - class Docx2Foo(DocxParser): + # because linebreaks in are denoted by '!!!!!!!!!!!!' with the FOO markup langauge :) + def linebreak(self): + return '!!!!!!!!!!!!' - def linebreak(self): - return '!!!!!!!!!!!!' # because linebreaks in are denoted by '!!!!!!!!!!!!' - # with the FOO markup langauge :) -#Styles +Styles +###### The base parser `Docx2Html` relies on certain css class being set for certain behaviour to occur. Currently these include: From fe0c875806d971e71e8d0795c8d05985f00ba9ff Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:09:49 -0400 Subject: [PATCH 277/404] refs #35: hopefully the name change will force github to render with REST --- README.md => README.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename README.md => README.rst (100%) diff --git a/README.md b/README.rst similarity index 100% rename from README.md rename to README.rst From fb471339f3f9afc9d3aeca2dbd9ddf0cb573f294 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:11:39 -0400 Subject: [PATCH 278/404] refs #35: Peg PyPi support for 2.6 and 2.7 --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index b6e0b9fc..23be5543 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,9 @@ def get_description(): classifiers=[ "Development Status :: 3 - Alpha", "Programming Language :: Python", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 2 :: Only", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", From 4ae6ee6f6afbf352b5efb09022eee9010715397f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:23:25 -0400 Subject: [PATCH 279/404] bumped to version 0.1.8 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 23be5543..ccb35074 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.7", + version="0.1.8", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 766f768944684acb322d014c377eee694a824ebf Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:24:53 -0400 Subject: [PATCH 280/404] Fixed the manifest --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index b26c0b98..c3b3f1fb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ include AUTHORS include LICENSE include MANIFEST.in -include README.md +include README.rst include pydocx/fixtures/* include pydocx/tests/templates/* From 6d3a372a7fe77dfd49f5bead99fcd51815edd4bd Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 15:25:53 -0400 Subject: [PATCH 281/404] Fixed a broken filename --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ccb35074..4ba31e1a 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def get_file(filename): def get_description(): - return get_file('README.md') + return get_file('README.rst') setup( name="PyDocX", From 8fd0b87d210e3ba37171b6fd31a7aef9ab278765 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 28 May 2013 16:02:51 -0400 Subject: [PATCH 282/404] adding test; cleaned up parser --- pydocx/DocxParser.py | 39 ++- pydocx/tests/__init__.py | 118 ++++++- pydocx/tests/test_latex.py | 620 +++++++++++++++++++++++++++++++++++++ 3 files changed, 766 insertions(+), 11 deletions(-) create mode 100644 pydocx/tests/test_latex.py diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 8410c5fd..83911828 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -112,6 +112,7 @@ def find_ancestor_with_tag(self, tag): setattr(_ElementInterface, 'row_index', None) setattr(_ElementInterface, 'column_index', None) setattr(_ElementInterface, 'is_last_text', False) +setattr(_ElementInterface, 'is_last_row_item', False) # End helpers @@ -180,6 +181,12 @@ def _parse_rels_root(self): def __init__(self, *args, **kwargs): self._parsed = '' self.block_text = '' + self.last_row_item = False + self.line_break_in_table = False + self.is_table = False + self.indent_table = False + self.column_index = 0 + self.cols = 0 self.page_width = 0 self._build_data(*args, **kwargs) @@ -254,6 +261,8 @@ def _set_table_attributes(self, el): continue for i, row in enumerate(rows): tcs = self._filter_children(row, ['tc']) + self.cols = len(tcs) + tcs[-1].is_last_row_item = True for j, child in enumerate(tcs): child.row_index = i child.column_index = j @@ -453,6 +462,10 @@ def parse_table_cell(self, el, text): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) + if el.is_last_row_item: + self.last_row_item = True + else: + self.last_row_item = False return self.table_cell(text, colspan, rowspan) def parse_list(self, el, text): @@ -566,7 +579,9 @@ def parse_p(self, el, text): if el.is_list_item: return self.parse_list_item(el, text) if el.is_in_table: + self.is_table = True return self.parse_table_cell_contents(el, text) + self.is_table = False parsed = text # No p tags in li tags if self.list_depth == 0: @@ -816,25 +831,26 @@ def parse_r(self, el, parsed): elif jc.attrib['val'] == 'left': just = 'left' ind = paragraph_tag_property.find('ind') - right = '' - left = '' - firstLine = '' + right = None + left = None + firstLine = None + hanging = None if ind is not None: - right = None - left = None - firstLine = None + if 'hanging' in ind.attrib: + hanging = ind.attrib['hanging'] + hanging = (float(hanging)/20) if 'right' in ind.attrib: right = ind.attrib['right'] # divide by 20 to get to pt. multiply by (4/3) to get to px - right = (int(right) / 20) * float(4) / float(3) + right = (float(right) / 20) * float(4) / float(3) right = str(right) if 'left' in ind.attrib: left = ind.attrib['left'] - left = (int(left) / 20) * float(4) / float(3) + left = (float(left) / 20) * float(4) / float(3) left = str(left) if 'firstLine' in ind.attrib: firstLine = ind.attrib['firstLine'] - firstLine = (int(firstLine) / 20) * float(4) / float(3) + firstLine = (float(firstLine) / 20) * float(4) / float(3) firstLine = str(firstLine) if jc is not None or ind is not None: t_els = el.find_all('t') @@ -842,8 +858,11 @@ def parse_r(self, el, parsed): if el.is_last_text: block = False self.block_text += text + if el.parent.find('tbl') is not None: + tbl = el.parent.find('tbl') + self.column_index = tbl.find('tc').column text = self.indent(self.block_text, just, - firstLine, left, right) + firstLine, left, right, hanging) self.block_text = '' else: block = True diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index d8247ffb..b8a3acd1 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -3,6 +3,7 @@ from contextlib import contextmanager from pydocx.parsers.Docx2Html import Docx2Html +from pydocx.parsers.Docx2LaTex import Docx2LaTex from pydocx.DocxParser import ( remove_namespaces, # We are only importing this from DocxParse since we have added methods to @@ -33,6 +34,12 @@ ''' % STYLE +BASE_LATEX = r'''\documentclass{article}\usepackage{hyperref} +\usepackage{graphicx}\usepackage{changes}\usepackage{changepage} +\usepackage[paperwidth=612pt]{geometry}\usepackage{hanging} +\usepackage{multirow}\begin{document}''' + "%s" + r'''\end{document} +''' + def assert_html_equal(actual_html, expected_html): assert collapse_html( @@ -42,6 +49,34 @@ def assert_html_equal(actual_html, expected_html): ), actual_html +def assert_latex_equal(actual_latex, expected_latex): + assert collapse_latex( + actual_latex, + ) == collapse_latex( + expected_latex + ), actual_latex + + +def collapse_latex(latex): + + def smart_space(match): + # Put a space in between lines, unless exactly one side of the line + # break butts up against a tag. + before = match.group(1) + after = match.group(2) + space = ' ' + return before + space + after + # Replace newlines and their surrounding + # whitespace with a single space (or + # empty string) + latex = re.sub( + r'(>?)\s*\s*(CCC'], + [DXB.p_tag('DDD')], + )) + body = table + xml = DXB.xml(body) + return xml + + +class TableWithListAndParagraph(_LatexTranslationTestCase): + expected_output = r''' + \begin{tabular}{ l } + \pbox{20cm} + {CCC \\ DDD} \\ + \end{tabular} + ''' + + def get_xml(self): + els = [ + DXB.p_tag('CCC'), + DXB.p_tag('DDD'), + ] + td = '' + for el in els: + td += el + table = DXB.table(num_rows=1, num_columns=1, text=chain( + [td], + )) + body = table + xml = DXB.xml(body) + return xml + + +class SimpleListTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \item BBB + \item CCC + \end {enumerate} + ''' + + # Ensure its not failing somewhere and falling back to decimal + numbering_dict = { + '1': { + '0': 'lowerLetter', + } + } + + def get_xml(self): + li_text = [ + ('AAA', 0, 1), + ('BBB', 0, 1), + ('CCC', 0, 1), + ] + lis = '' + for text, ilvl, numId in li_text: + lis += DXB.li(text=text, ilvl=ilvl, numId=numId) + + xml = DXB.xml(lis) + return xml + + +class SingleListItemTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \end {enumerate} + ''' + + # Ensure its not failing somewhere and falling back to decimal + numbering_dict = { + '1': { + '0': 'lowerLetter', + } + } + + def get_xml(self): + li_text = [ + ('AAA', 0, 1), + ] + lis = '' + for text, ilvl, numId in li_text: + lis += DXB.li(text=text, ilvl=ilvl, numId=numId) + + xml = DXB.xml(lis) + return xml + + +class ListWithContinuationTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ BBB + \item CCC + \begin{tabular} {ll} + DDD & EEE \\ + FFF & GGG \\ + \end{tabular} + \item HHH + \end{enumerate} + ''' + + def get_xml(self): + table = DXB.table(num_rows=2, num_columns=2, text=chain( + [DXB.p_tag('DDD')], + [DXB.p_tag('EEE')], + [DXB.p_tag('FFF')], + [DXB.p_tag('GGG')], + )) + tags = [ + DXB.li(text='AAA', ilvl=0, numId=1), + DXB.p_tag('BBB'), + DXB.li(text='CCC', ilvl=0, numId=1), + table, + DXB.li(text='HHH', ilvl=0, numId=1), + ] + body = '' + for el in tags: + body += el + + xml = DXB.xml(body) + return xml + + +class ListWithMultipleContinuationTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \begin{tabular} {l} + BBB\\ + \end{tabular} + \begin{tabular} {l} + CCC\\ + \end{tabular} + \item DDD + \end{enumerate} + ''' + + def get_xml(self): + table1 = DXB.table(num_rows=1, num_columns=1, text=chain( + [DXB.p_tag('BBB')], + )) + table2 = DXB.table(num_rows=1, num_columns=1, text=chain( + [DXB.p_tag('CCC')], + )) + tags = [ + DXB.li(text='AAA', ilvl=0, numId=1), + table1, + table2, + DXB.li(text='DDD', ilvl=0, numId=1), + ] + body = '' + for el in tags: + body += el + + xml = DXB.xml(body) + return xml + + +class MangledIlvlTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate} + \begin{enumerate} + \item BBB + \begin{enumerate} + \item CCC + \end{enumerate} + \end{enumerate} + ''' + + def get_xml(self): + li_text = [ + ('AAA', 0, 2), + ('BBB', 1, 1), + ('CCC', 0, 1), + ] + lis = '' + for text, ilvl, numId in li_text: + lis += DXB.li(text=text, ilvl=ilvl, numId=numId) + + xml = DXB.xml(lis) + return xml + + +class SeperateListsTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate} + \begin{enumerate} + \item BBB + \end{enumerate} + \begin{enumerate} + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + li_text = [ + ('AAA', 0, 2), + # Because AAA and CCC are part of the same list (same list id) + # and BBB is different, these need to be split into three + # lists (or lose everything from BBB and after. + ('BBB', 0, 1), + ('CCC', 0, 2), + ] + lis = '' + for text, ilvl, numId in li_text: + lis += DXB.li(text=text, ilvl=ilvl, numId=numId) + + xml = DXB.xml(lis) + return xml + + +class InvalidIlvlOrderTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \begin{enumerate} + \item BBB + \begin{enumerate} + \item CCC + \end {enumerate} + \end{enumerate} + \end{enumerate} + ''' + + def get_xml(self): + tags = [ + DXB.li(text='AAA', ilvl=1, numId=1), + DXB.li(text='BBB', ilvl=3, numId=1), + DXB.li(text='CCC', ilvl=2, numId=1), + ] + body = '' + for el in tags: + body += el + + xml = DXB.xml(body) + return xml + + +class NonStandardTextTagsTestCase(_LatexTranslationTestCase): + expected_output = r''' + \added[id=, remark=]{insert} smarttag + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'insert '] + insert_tag = DXB.insert_tag(run_tags) + run_tags = [DXB.r_tag(i) for i in 'smarttag'] + smart_tag = DXB.smart_tag(run_tags) + + run_tags = [insert_tag, smart_tag] + body = DXB.p_tag(run_tags) + xml = DXB.xml(body) + return xml + + +class RTagWithNoText(_LatexTranslationTestCase): + expected_output = '' + + def get_xml(self): + p_tag = DXB.p_tag(None) # No text + run_tags = [p_tag] + # The bug is only present in a hyperlink + run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] + body = DXB.p_tag(run_tags) + + xml = DXB.xml(body) + return xml + + +class DeleteTagInList(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ + \deleted[id=, remark=]{BBB} + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + delete_tags = DXB.delete_tag(['BBB']) + p_tag = DXB.p_tag([delete_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + +class InsertTagInList(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ + \added[id=,remark=]{BBB} + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + insert_tags = DXB.insert_tag(run_tags) + p_tag = DXB.p_tag([insert_tags]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + +class SmartTagInList(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ + BBB + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + run_tags = [DXB.r_tag(i) for i in 'BBB'] + smart_tag = DXB.smart_tag(run_tags) + p_tag = DXB.p_tag([smart_tag]) + + body = DXB.li(text='AAA', ilvl=0, numId=0) + body += p_tag + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + + +class SingleListItem(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate}''' + '\n' + 'BBB' + + numbering_dict = { + '1': { + '0': 'lowerLetter', + } + } + + def get_xml(self): + li = DXB.li(text='AAA', ilvl=0, numId=1) + p_tags = [ + DXB.p_tag('BBB'), + ] + body = li + for p_tag in p_tags: + body += p_tag + xml = DXB.xml(body) + return xml + + +class SimpleTableTest(_LatexTranslationTestCase): + expected_output = r''' + \begin{tabular} { lll } + Blank & + Column 1 & + Column 2 \\ + Row 1 & + First & + Second \\ + Row 2 & + Third & + Fourth \\ + \end{tabular}''' + + def get_xml(self): + table = DXB.table(num_rows=3, num_columns=3, text=chain( + [DXB.p_tag('Blank')], + [DXB.p_tag('Column 1')], + [DXB.p_tag('Column 2')], + [DXB.p_tag('Row 1')], + [DXB.p_tag('First')], + [DXB.p_tag('Second')], + [DXB.p_tag('Row 2')], + [DXB.p_tag('Third')], + [DXB.p_tag('Fourth')], + ), merge=True) + body = table + + xml = DXB.xml(body) + return xml + + +class MissingIlvl(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ + BBB + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + li_text = [ + ('AAA', 0, 1), + ('BBB', None, 1), # Because why not. + ('CCC', 0, 1), + ] + lis = '' + for text, ilvl, numId in li_text: + lis += DXB.li(text=text, ilvl=ilvl, numId=numId) + body = lis + xml = DXB.xml(body) + return xml + + +class SDTTestCase(_LatexTranslationTestCase): + expected_output = r''' + \begin{enumerate} + \item AAA \\ + BBB + \item CCC + \end{enumerate} + ''' + + def get_xml(self): + body = '' + body += DXB.li(text='AAA', ilvl=0, numId=0) + body += DXB.sdt_tag(p_tag=DXB.p_tag(text='BBB')) + body += DXB.li(text='CCC', ilvl=0, numId=0) + + xml = DXB.xml(body) + return xml + +#TODO: WORKOUT IMAGE CONVERSIONS +#TODO: IMAGE NOSIZE TESTCASE From b8efd3932146944f429fe5368e7d91a58db785a5 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 28 May 2013 16:04:08 -0400 Subject: [PATCH 283/404] updating parser --- pydocx/parsers/Docx2Html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 782be941..bf553c47 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -148,7 +148,7 @@ def table_cell(self, text, col='', row=''): def page_break(self): return '
      ' - def indent(self, text, just='', firstLine='', left='', right=''): + def indent(self, text, just='', firstLine='', left='', right='', hanging = ''): slug = ' Date: Tue, 28 May 2013 16:16:33 -0400 Subject: [PATCH 284/404] refs #34: updated the tests for expected behaviour for base 64 encoding of image data. --- pydocx/tests/__init__.py | 10 +++++++++- pydocx/tests/test_docx.py | 37 +++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 237a9406..837fb6db 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -92,6 +92,9 @@ def _build_data( styles_dict=None, *args, **kwargs): self._test_rels_dict = rels_dict + if rels_dict: + for value in rels_dict.values(): + self._image_data['word/%s' % value] = 'word/%s' % value if numbering_dict is None: numbering_dict = {} self.numbering_dict = numbering_dict @@ -165,7 +168,12 @@ def test_expected_output(self): tree = self.get_xml() # Verify the final output. - html = self.parser( + parser = self.parser + + def image_handler(self, src, *args, **kwargs): + return src + parser.image_handler = image_handler + html = parser( document_xml=tree, rels_dict=self.relationship_dict, numbering_dict=self.numbering_dict, diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 72c70157..c57c5b6a 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -1,13 +1,14 @@ -#import mock -import tempfile +import base64 import shutil +import tempfile + from os import path -#from zipfile import ZipFile + from nose.plugins.skip import SkipTest -#from nose.tools import assert_raises from pydocx.tests import assert_html_equal, BASE_HTML from pydocx.parsers.Docx2Html import Docx2Html +from pydocx.DocxParser import ZipFile def convert(path): @@ -378,6 +379,22 @@ def test_split_headers(): ''') +def get_image_data(docx_file_path, image_name): + """ + Return base 64 encoded data for the image_name that is stored in the + docx_file_path. + """ + with ZipFile(docx_file_path) as f: + images = [ + e for e in f.infolist() + if e.filename == 'word/media/%s' % image_name + ] + if not images: + raise AssertionError('%s not in %s' % (image_name, docx_file_path)) + data = f.read(images[0].filename) + return base64.b64encode(data) + + def test_has_image(): filename = 'has_image.docx' file_path = path.join( @@ -389,13 +406,13 @@ def test_has_image(): new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) + image_data = get_image_data(new_file_path, 'image1.gif') assert_html_equal(actual_html, BASE_HTML % '''

      AAA - +

      - ''' % directory_path) - assert path.isfile('%s/word/media/image1.gif' % directory_path) + ''' % image_data) def test_local_dpi(): @@ -410,10 +427,10 @@ def test_local_dpi(): ) new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) actual_html = convert(new_file_path) + image_data = get_image_data(new_file_path, 'image1.jpeg') assert_html_equal(actual_html, BASE_HTML % ''' -

      - ''' % directory_path) - assert path.isfile('%s/word/media/image1.jpeg' % directory_path) +

      + ''' % image_data) def test_has_image_using_image_handler(): From 5f4f6498e6decfdb99755c7f2fa6a755582ce852 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 16:17:34 -0400 Subject: [PATCH 285/404] refs #34: store and pass around the image data, instead of the image path --- pydocx/DocxParser.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index ce01f784..f2a39550 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -130,7 +130,6 @@ class DocxParser: def _build_data(self, path, *args, **kwargs): with ZipFile(path) as f: - self.zip_path, _ = os.path.split(f.filename) self.document_text = f.read('word/document.xml') self.styles_text = f.read('word/styles.xml') try: @@ -151,10 +150,7 @@ def _build_data(self, path, *args, **kwargs): if e.filename.startswith('word/media/') ] for e in zipped_image_files: - f.extract( - e.filename, - self.zip_path, - ) + self._image_data[e.filename] = f.read(e.filename) self.root = ElementTree.fromstring( remove_namespaces(self.document_text), # remove the namespaces @@ -193,6 +189,7 @@ def __init__(self, *args, **kwargs): self._parsed = '' self.block_text = '' self.page_width = 0 + self._image_data = {} self._build_data(*args, **kwargs) def add_parent(el): # if a parent, make that an attribute @@ -780,12 +777,12 @@ def parse_image(self, el): if not src: return '' src = os.path.join( - self.zip_path, 'word', src, ) - src = self.escape(src) - return self.image(src, x, y) + if src in self._image_data: + return self.image(self._image_data[src], x, y) + return '' def _is_style_on(self, el): """ From 82735a58f4ef5967cdb50d7cd712bfdfdae7e60d Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 16:17:55 -0400 Subject: [PATCH 286/404] refs #34: image handler now deals with image data and base 64 encodes it instead. --- pydocx/parsers/Docx2Html.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 525bdd9c..8f6bf5ef 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -1,7 +1,8 @@ -from pydocx.DocxParser import DocxParser - +import base64 import xml.sax.saxutils +from pydocx.DocxParser import DocxParser + class Docx2Html(DocxParser): @@ -69,11 +70,15 @@ def hyperlink(self, text, href): 'text': text, } - def image_handler(self, path): - return path + def image_handler(self, image_data): + b64_encoded_src = 'data:image/png;base64,%s' % ( + base64.b64encode(image_data), + ) + b64_encoded_src = self.escape(b64_encoded_src) + return b64_encoded_src - def image(self, path, x, y): - src = self.image_handler(path) + def image(self, image_data, x, y): + src = self.image_handler(image_data) if not src: return '' if all([x, y]): From 5056af45abeba93ea3c486a177281d7ed274639f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 16:19:53 -0400 Subject: [PATCH 287/404] refs #34: since we no longer write the image to disk, we no longer need to copy the file to a temp dir. --- pydocx/tests/test_docx.py | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index c57c5b6a..543b34a0 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -1,6 +1,4 @@ import base64 -import shutil -import tempfile from os import path @@ -354,26 +352,15 @@ def test_headers(): ''') -def _copy_file_to_tmp_dir(file_path, filename): - # Since the images need to be extracted from the docx, copy the file to a - # temp directory so we do not clutter up repo. - directory_path = tempfile.mkdtemp() - new_file_path = path.join(directory_path, filename) - shutil.copyfile(file_path, new_file_path) - return new_file_path, directory_path - - def test_split_headers(): - filename = 'split_header.docx' file_path = path.join( path.abspath(path.dirname(__file__)), '..', 'fixtures', 'split_header.docx', ) - new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) - actual_html = convert(new_file_path) + actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

      AAA

      BBB

      CCC

      ''') @@ -396,17 +383,15 @@ def get_image_data(docx_file_path, image_name): def test_has_image(): - filename = 'has_image.docx' file_path = path.join( path.abspath(path.dirname(__file__)), '..', 'fixtures', 'has_image.docx', ) - new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) - actual_html = convert(new_file_path) - image_data = get_image_data(new_file_path, 'image1.gif') + actual_html = convert(file_path) + image_data = get_image_data(file_path, 'image1.gif') assert_html_equal(actual_html, BASE_HTML % '''

      AAA @@ -418,16 +403,14 @@ def test_has_image(): def test_local_dpi(): # The image in this file does not have a set height or width, show that the # html will generate without it. - filename = 'localDpi.docx' file_path = path.join( path.abspath(path.dirname(__file__)), '..', 'fixtures', 'localDpi.docx', ) - new_file_path, directory_path = _copy_file_to_tmp_dir(file_path, filename) - actual_html = convert(new_file_path) - image_data = get_image_data(new_file_path, 'image1.jpeg') + actual_html = convert(file_path) + image_data = get_image_data(file_path, 'image1.jpeg') assert_html_equal(actual_html, BASE_HTML % '''

      ''' % image_data) @@ -435,18 +418,16 @@ def test_local_dpi(): def test_has_image_using_image_handler(): raise SkipTest('This needs to be converted to an xml test') - filename = 'has_image.docx' file_path = path.join( path.abspath(path.dirname(__file__)), '..', 'fixtures', 'has_image.docx', ) - new_file_path, _ = _copy_file_to_tmp_dir(file_path, filename) def image_handler(*args, **kwargs): return 'test' - actual_html = convert(new_file_path) + actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

      AAA

      ''') From 6b63d895ec3a985d28f401baa9cd34577daf3999 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 28 May 2013 16:34:16 -0400 Subject: [PATCH 288/404] updating --- pydocx/parsers/Docx2LaTex.py | 177 ++++++++++++++++++----------------- 1 file changed, 92 insertions(+), 85 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index 2e5ea106..fcb11df3 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -4,11 +4,7 @@ class Docx2LaTex(DocxParser): def __init__(self, *args, **kwargs): - self.rows = 0 - self.cols = 0 - self.current_col = 0 - self.table_info = [] - self.columns = {} + self.table_info = {} super(Docx2LaTex, self).__init__(*args, **kwargs) @property @@ -26,50 +22,50 @@ def escape(self, text): return text def linebreak(self): - return '\\\\ ' + return '\n\n' def bold(self, text): - return '\\textbf {%s}' % text + return r'\textbf{%s}' % text def italics(self, text): - return '\\emph {%s}' % text + return r'\emph{%s}' % text def underline(self, text): - return '\\underline {%s}' % text + return r'\underline{%s}' % text def list_element(self, text): - return '\\item {%s}' % text + return r'\item %s' % text + '\n' def ordered_list(self, text, list_style): - return '\\begin{itemize}{%s}\\end{{enumerate}}' % text + return r'\begin{enumerate} %s \end{enumerate}' % text def unordered_list(self, text): - return '\\begin{itemize}{%s}\\end{{itemize}}' % text + return r'\begin{itemize} %s \end{itemize}' % text def head(self): - return "\\documentclass{article}\\usepackage{hyperref}"\ - "\\usepackage{graphicx}\\usepackage{changes}" \ - "\\usepackage{changepage} "\ - "\\usepackage[paperwidth=%spt]{geometry}" \ - "\\usepackage{hanging}" % self.page_width + return r'''\documentclass{article}\usepackage{hyperref} + \usepackage{graphicx}\usepackage{changes} + \usepackage{changepage} + \usepackage[paperwidth=%spt]{geometry} + \usepackage{hanging}\usepackage{multirow}''' % self.page_width def paragraph(self, text, pre=None): - return '\\par{'+text+'} ' + return text + '\n\n' def heading(self, text, heading_value): #TODO figure out what to do for headings return text def insertion(self, text, author, date): - return '\\added[id='+author+',remark='+date+']{%s}' % text + return r'\added[id='+author+',remark='+date+']{%s}' % text def hyperlink(self, text, href): if text == '': return '' - return '\\href{%(href)s}{%(text)s}' % { + return r'\href{%(href)s}{%(text)s}' % { 'href': href, 'text': text, - } + } def image_handler(self, path): return path @@ -85,12 +81,12 @@ def image(self, path, x, y): y = float(y) x = x * float(3) / float(4) y = y * float(3) / float(4) - return '\\includegraphics[height=%spt, width=%spt] {%s}' % ( + return r'\includegraphics[height=%spt, width=%spt]{%s}' % ( y, x, src) else: - return '\\includegraphics {%s}' % src + return r'\includegraphics {%s}' % src def tab(self): return '\\qquad ' @@ -99,7 +95,6 @@ def table(self, text): center = False right = False setup_cols = '' - print self.cols for i in range(self.cols): for column in self.table_info: if column['Column'] == i and column['justify'] == 'center': @@ -115,77 +110,89 @@ def table(self, text): else: setup_cols += 'l' self.table_info = [] - return '\\\\\\begin{tabular} {%s} %s \\end{tabular}\\\\ ' \ - % (setup_cols, text) + return r'\begin{tabular}{%s}' % setup_cols\ + + '\n'+ r'%s\end{tabular}'\ + % text + '\n' def table_row(self, text): - return '%s \\\\ ' % text + return text def table_cell(self, text, col='', row=''): + slug = '' + if col: + slug += r'\multicolumn{%s}{*}' % col + if row: + slug += r'\multirow{%s}{*}' % row + if self.line_break_in_table: + slug += r'\pbox{20cm}{' + text + '}' + else: + slug += text + if self.last_row_item: + slug += r' \\' + '\n' + return slug + self.line_break_in_table = False return '%s & ' % text def page_break(self): - return '\\newpage ' - - def indent_table(self, just='', firstLine='', left='', right='', column=0): - self.columns = {} - self.columns['Column'] = column - self.columns['justify'] = just - if self.columns not in self.table_info: - self.table_info.append(self.columns) - return '' + return r'\newpage ' def indent(self, text, just='', firstLine='', - left='', right='', hanging=0): - raggedright = False - raggedleft = False - center = False - slug = '{' - if hanging: - slug += '\\begin{hangpara}{%spt}{1} ' % (hanging) - if left and not right: - left = float(left) - left = left * float(3) / float(4) - slug += '\\begin{adjustwidth}{}{%spt}' % (left) - if right and not left: - right = float(right) - right = right * float(3) / float(4) - slug += '\\begin{adjustwidth}{%spt}{}' % (right) - if right and left: - left = float(left) - right = float(right) - left = left * float(3) / float(4) - right = right * float(3) / float(4) - slug += '\\begin{adjustwidth}{%spt}{%spt}' % (left, right) - if firstLine: - slug += '\\setlength{\\parindent}{'+firstLine+'pt}\\indent ' - if just: - if just == 'left': - raggedright = True - slug += '\\begin{flushright} ' - elif just == 'center': - center = True - slug += '\\begin{center} ' - elif just == 'right': - raggedleft = True - slug += '\\begin{flushleft} ' - slug += text - if left or right: - slug += '\\end{adjustwidth}' - if hanging: - slug += '\\end{hangpara}' - if raggedright: - slug += '\\end{flushright}' - if center: - slug += '\\end{center}' - if raggedleft: - slug += '\\end{flushleft}' - slug += '}' - return slug - #TODO left and right + left='', right='', hanging= ''): + if not self.indent_table: + raggedright = False + raggedleft = False + center = False + slug = '' + if hanging: + return r'\begin{hangparas}{%spt}{1} %s \end{hangparas}' % (hanging, text) + if right and left: + left = float(left) + right = float(right) + left = left * float(3) / float(4) + right = right * float(3) / float(4) + slug += r'\begin{adjustwidth}{%spt}{%spt}' % (left, right) + elif left: + left = float(left) + left = left * float(3) / float(4) + slug += r'\begin{adjustwidth}{}{%spt}' % (left) + elif right: + right = float(right) + right = right * float(3) / float(4) + slug += r'\begin{adjustwidth}{%spt}{}' % (right) + if firstLine: + slug += r'\setlength{\parindent}{'+firstLine+r'pt}\indent ' + if just: + if just == 'left': + raggedright = True + slug += r'\begin{flushright} ' + elif just == 'center': + center = True + slug += r'\begin{center} ' + elif just == 'right': + raggedleft = True + slug += r'\begin{flushleft} ' + slug += text + if left or right: + slug += r'\end{adjustwidth}' + if raggedright: + slug += r'\end{flushright}' + if center: + slug += r'\end{center}' + if raggedleft: + slug += r'\end{flushleft}' + return slug + else: + self.columns = {} + self.columns['Column'] = self.column_index + self.columns['justify'] = just + if self.columns not in self.table_info: + self.table_info.append(self.columns) + return '' def break_tag(self): - return '' + if self.is_table: + self.line_break_in_table = True + return r'\\' def deletion(self, text, author, date): - return '\\deleted[id='+author+',remark='+date+']{%s}' % text + return r'\deleted[id='+author+',remark='+date+']{%s}' % text From 9370edd750880e8762def7ff18ec37fa7be06788 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 28 May 2013 16:40:52 -0400 Subject: [PATCH 289/404] flake8 --- pydocx/parsers/Docx2Html.py | 3 ++- pydocx/parsers/Docx2LaTex.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 24bf35f3..c49be709 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -146,7 +146,8 @@ def table_cell(self, text, col='', row=''): def page_break(self): return '
      ' - def indent(self, text, just='', firstLine='', left='', right='', hanging = ''): + def indent(self, text, just='', firstLine='', left='', + right='', hanging=''): slug = ' Date: Tue, 28 May 2013 17:19:17 -0400 Subject: [PATCH 290/404] refs #37: Added tests showing what should happen with upper roman numerals if a flag is passed in --- pydocx/tests/__init__.py | 2 ++ pydocx/tests/test_docx.py | 9 +++---- pydocx/tests/test_xml.py | 56 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 237a9406..a8295240 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -145,6 +145,7 @@ class _TranslationTestCase(TestCase): run_expected_output = True parser = XMLDocx2Html use_base_html = True + convert_root_level_upper_roman = False def get_xml(self): raise NotImplementedError() @@ -166,6 +167,7 @@ def test_expected_output(self): # Verify the final output. html = self.parser( + convert_root_level_upper_roman=self.convert_root_level_upper_roman, document_xml=tree, rels_dict=self.relationship_dict, numbering_dict=self.numbering_dict, diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 72c70157..612826e2 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -10,8 +10,8 @@ from pydocx.parsers.Docx2Html import Docx2Html -def convert(path): - return Docx2Html(path).parsed +def convert(path, *args, **kwargs): + return Docx2Html(path, *args, **kwargs).parsed def test_extract_html(): @@ -568,14 +568,13 @@ def test_lists_with_styles(): def test_list_to_header(): - raise SkipTest('This test is not yet passing') file_path = path.join( path.abspath(path.dirname(__file__)), '..', 'fixtures', 'list_to_header.docx', ) - actual_html = convert(file_path) + actual_html = convert(file_path, convert_root_level_upper_roman=True) # It should be noted that list item `GGG` is upper roman in the word # document to show that only top level upper romans get converted. assert_html_equal(actual_html, BASE_HTML % ''' @@ -590,7 +589,7 @@ def test_list_to_header():

      EEE

      1. FFF -
          +
          1. GGG
          diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 14864b02..4cb5686c 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -949,3 +949,59 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class RomanNumeralToHeadingTestCase(_TranslationTestCase): + convert_root_level_upper_roman = True + numbering_dict = { + '1': { + '0': 'upperRoman', + '1': 'decimal', + '2': 'upperRoman', + }, + '2': { + '0': 'upperRoman', + '1': 'decimal', + '2': 'upperRoman', + }, + '3': { + '0': 'upperRoman', + '1': 'decimal', + '2': 'upperRoman', + }, + } + expected_output = ''' +

          AAA

          +
            +
          1. BBB
          2. +
          +

          CCC

          +
            +
          1. DDD
          2. +
          +

          EEE

          +
            +
          1. FFF +
              +
            1. GGG
            2. +
            +
          2. +
          + ''' + + def get_xml(self): + li_text = [ + ('AAA', 0, 1), + ('BBB', 1, 1), + ('CCC', 0, 2), + ('DDD', 1, 2), + ('EEE', 0, 3), + ('FFF', 1, 3), + ('GGG', 2, 3), + ] + body = '' + for text, ilvl, numId in li_text: + body += DXB.li(text=text, ilvl=ilvl, numId=numId) + + xml = DXB.xml(body) + return xml From eab44aba219e9a37c158ed3d321c3b7f534a3f94 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 28 May 2013 17:20:05 -0400 Subject: [PATCH 291/404] refs #37: it is now possible to convert root level upper roman lists to headers --- pydocx/DocxParser.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index ce01f784..e6c99ec5 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -194,6 +194,10 @@ def __init__(self, *args, **kwargs): self.block_text = '' self.page_width = 0 self._build_data(*args, **kwargs) + self.convert_root_level_upper_roman = kwargs.get( + 'convert_root_level_upper_roman', + False, + ) def add_parent(el): # if a parent, make that an attribute for child in el.getchildren(): @@ -355,6 +359,42 @@ def _set_headers(self, elements): # Prime the heading_level element.heading_level = headers[style.lower()] + def _convert_upper_roman(self, body): + if not self.convert_root_level_upper_roman: + return + first_root_list_items = [ + # Only root level elements. + el for el in body.getchildren() + # And only first_list_items + if el.is_first_list_item + ] + visited_num_ids = [] + for root_list_item in first_root_list_items: + if root_list_item.num_id in visited_num_ids: + continue + visited_num_ids.append(root_list_item.num_id) + lst_style = self.get_list_style( + root_list_item.num_id.num_id, + root_list_item.ilvl, + ) + if lst_style != 'upperRoman': + continue + ilvl = min( + el.ilvl for el in body.find_all('p') + if el.num_id == root_list_item.num_id + ) + root_upper_roman_list_items = [ + el for el in body.find_all('p') + if el.num_id == root_list_item.num_id and + el.ilvl == ilvl + ] + for list_item in root_upper_roman_list_items: + list_item.is_list_item = False + list_item.is_first_list_item = False + list_item.is_last_list_item = False + + list_item.heading_level = 'h2' + def _set_next(self, body): def _get_children_with_content(el): # We only care about children if they have text in them. @@ -409,6 +449,7 @@ def parse_begin(self, el): child for child in body.find_all('p') ] self._set_headers(p_elements) + self._convert_upper_roman(body) self._set_next(body) self._parsed += self.parse(el) From f0842d925cc5be46204986372600453822e04d7a Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 29 May 2013 13:43:54 -0400 Subject: [PATCH 292/404] refs #34: passed along the filename, correctly created the src for image (using the correct file extension) --- pydocx/DocxParser.py | 7 ++++--- pydocx/parsers/Docx2Html.py | 10 ++++++---- pydocx/tests/test_docx.py | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index f2a39550..de65df1b 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -781,7 +781,8 @@ def parse_image(self, el): src, ) if src in self._image_data: - return self.image(self._image_data[src], x, y) + filename = os.path.split(src)[-1] + return self.image(self._image_data[src], filename, x, y) return '' def _is_style_on(self, el): @@ -926,8 +927,8 @@ def image_handler(self, path): return path @abstractmethod - def image(self, path, x, y): - return self.image_handler(path) + def image(self, data, filename, x, y): + return self.image_handler(data) @abstractmethod def deletion(self, text, author, date): diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 8f6bf5ef..73b3c9b6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -70,15 +70,17 @@ def hyperlink(self, text, href): 'text': text, } - def image_handler(self, image_data): - b64_encoded_src = 'data:image/png;base64,%s' % ( + def image_handler(self, image_data, filename): + extension = filename.split('.')[-1].lower() + b64_encoded_src = 'data:image/%s;base64,%s' % ( + extension, base64.b64encode(image_data), ) b64_encoded_src = self.escape(b64_encoded_src) return b64_encoded_src - def image(self, image_data, x, y): - src = self.image_handler(image_data) + def image(self, image_data, filename, x, y): + src = self.image_handler(image_data, filename) if not src: return '' if all([x, y]): diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 543b34a0..211c750b 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -395,7 +395,7 @@ def test_has_image(): assert_html_equal(actual_html, BASE_HTML % '''

          AAA - +

          ''' % image_data) @@ -412,7 +412,7 @@ def test_local_dpi(): actual_html = convert(file_path) image_data = get_image_data(file_path, 'image1.jpeg') assert_html_equal(actual_html, BASE_HTML % ''' -

          +

          ''' % image_data) From 38c6d4809515b75174689288384f2906ad2d1097 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 29 May 2013 13:53:43 -0400 Subject: [PATCH 293/404] refs #37: updates based on code review --- README.rst | 5 +++++ pydocx/DocxParser.py | 17 ++++++++++------- pydocx/tests/__init__.py | 5 +++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 642ccaa9..68e90b59 100644 --- a/README.rst +++ b/README.rst @@ -182,3 +182,8 @@ The base parser `Docx2Html` relies on certain css class being set for certain be * class `pydocx-left` -> Aligns the text to the left. * class `pydocx-comment` -> Turns the text blue. * class `pydocx-underline` -> Underlines the text. + +Optional Arguments +################## + +You can pass in `convert_root_level_upper_roman=True` to the parser and it will convert all root level upper roman lists to headings instead. diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index e6c99ec5..0e4737c4 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -27,6 +27,7 @@ 'tbl', 'sdt', ) +UPPER_ROMAN_TO_HEADING_VALUE = 'h2' def remove_namespaces(document): # remove namespaces @@ -189,15 +190,17 @@ def _parse_rels_root(self): rels_dict[rId] = target return rels_dict - def __init__(self, *args, **kwargs): + def __init__( + self, + path, + convert_root_level_upper_roman=False, + *args, + **kwargs): self._parsed = '' self.block_text = '' self.page_width = 0 - self._build_data(*args, **kwargs) - self.convert_root_level_upper_roman = kwargs.get( - 'convert_root_level_upper_roman', - False, - ) + self._build_data(path, *args, **kwargs) + self.convert_root_level_upper_roman = convert_root_level_upper_roman def add_parent(el): # if a parent, make that an attribute for child in el.getchildren(): @@ -393,7 +396,7 @@ def _convert_upper_roman(self, body): list_item.is_first_list_item = False list_item.is_last_list_item = False - list_item.heading_level = 'h2' + list_item.heading_level = UPPER_ROMAN_TO_HEADING_VALUE def _set_next(self, body): def _get_children_with_content(el): diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index a8295240..00f52b59 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -84,8 +84,13 @@ class XMLDocx2Html(Docx2Html): Create the object without passing in a path to the document, set them manually. """ + def __init__(self, *args, **kwargs): + # Pass in nothing for the path + super(XMLDocx2Html, self).__init__(path=None, *args, **kwargs) + def _build_data( self, + path, document_xml=None, rels_dict=None, numbering_dict=None, From 8a649f1a3152d24b2f382df4c4d693af1e0ad1fb Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 29 May 2013 14:07:12 -0400 Subject: [PATCH 294/404] bumped to version 0.2.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4ba31e1a..7706c81e 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def get_description(): setup( name="PyDocX", - version="0.1.8", + version="0.2.0", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From dcfb4b455c31da00d80055a68c852d2146edf9ba Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 30 May 2013 12:14:38 -0400 Subject: [PATCH 295/404] some changes to tables --- pydocx/DocxParser.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 51625ac9..be783f43 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -273,11 +273,12 @@ def _set_table_attributes(self, el): continue for i, row in enumerate(rows): tcs = self._filter_children(row, ['tc']) - self.cols = len(tcs) tcs[-1].is_last_row_item = True for j, child in enumerate(tcs): child.row_index = i child.column_index = j + if self.cols <= j: + self.cols = j v_merge = child.find_first('vMerge') if ( v_merge is not None and @@ -883,9 +884,10 @@ def parse_r(self, el, parsed): if el.is_last_text: block = False self.block_text += text - if el.parent.find('tbl') is not None: - tbl = el.parent.find('tbl') - self.column_index = tbl.find('tc').column + if el.find_ancestor_with_tag('tc') is not None: + self.indent_table = True + tc = el.find_ancestor_with_tag('tc') + self.column_index = tc.column_index text = self.indent(self.block_text, just, firstLine, left, right, hanging) self.block_text = '' From 4c15915e7c81cc61ad8882abd0d9149bae2355b6 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 30 May 2013 12:15:06 -0400 Subject: [PATCH 296/404] some changes to tables --- pydocx/parsers/Docx2LaTex.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index a180f636..db0af3ee 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -4,12 +4,12 @@ class Docx2LaTex(DocxParser): def __init__(self, *args, **kwargs): - self.table_info = {} + self.table_info = [] super(Docx2LaTex, self).__init__(*args, **kwargs) @property def parsed(self): - content = "%(head)s\\begin{document}%(content)s\\end{document}" % { + content = r"%(head)s\begin{document}%(content)s\end{document}" % { 'head': self.head(), 'content': self._parsed} return unicode(content) @@ -47,7 +47,8 @@ def head(self): \usepackage{graphicx}\usepackage{changes} \usepackage{changepage} \usepackage[paperwidth=%spt]{geometry} - \usepackage{hanging}\usepackage{multirow}''' % self.page_width + \usepackage{hanging}\usepackage{multirow} + \usepackage{pbox}''' % self.page_width def paragraph(self, text, pre=None): return text + '\n\n' @@ -95,11 +96,11 @@ def table(self, text): center = False right = False setup_cols = '' - for i in range(self.cols): + for i in range(self.cols + 1): for column in self.table_info: - if column['Column'] == i and column['justify'] == 'center': + if column['Column'] == i and column['justify'] == 'center': center = True - elif column['Column'] == i and column['justify'] == 'right': + elif column['Column'] == i and column['justify'] == 'right': right = True if center is True: setup_cols += 'c' @@ -110,7 +111,7 @@ def table(self, text): else: setup_cols += 'l' self.table_info = [] - return r'\begin{tabular}{%s}' % setup_cols\ + return '\n' + r'\begin{tabular}{%s}' % setup_cols\ + '\n' + r'%s\end{tabular}'\ % text + '\n' @@ -120,11 +121,12 @@ def table_row(self, text): def table_cell(self, text, col='', row=''): slug = '' if col: - slug += r'\multicolumn{%s}{*}' % col + slug += r'\multicolumn{%s}{l}' % col if row: slug += r'\multirow{%s}{*}' % row if self.line_break_in_table: - slug += r'\pbox{20cm}{' + text + '}' + slug += '{' + text + '}' +# slug += r'\pbox{20cm}{' + text + '}' else: slug += text if self.last_row_item: @@ -145,7 +147,7 @@ def indent(self, text, just='', firstLine='', slug = '' if hanging: return r'\begin{hangparas}{%spt}{1} %s ' \ - r'\end{hangparas}' % (hanging, text) + r'\end{hangparas}' % (hanging, text) + '\n' if right and left: left = float(left) right = float(right) @@ -188,12 +190,12 @@ def indent(self, text, just='', firstLine='', self.columns['justify'] = just if self.columns not in self.table_info: self.table_info.append(self.columns) - return '' + return text def break_tag(self): if self.is_table: self.line_break_in_table = True - return r'\\' + return '' def deletion(self, text, author, date): return r'\deleted[id='+author+',remark='+date+']{%s}' % text From 0d11f13baf01584acbbb48f5d66a6c84b77f3f9f Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 30 May 2013 12:20:07 -0400 Subject: [PATCH 297/404] flake8 --- pydocx/parsers/Docx2LaTex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index db0af3ee..3e316f58 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -98,9 +98,9 @@ def table(self, text): setup_cols = '' for i in range(self.cols + 1): for column in self.table_info: - if column['Column'] == i and column['justify'] == 'center': + if column['Column'] == i and column['justify'] == 'center': center = True - elif column['Column'] == i and column['justify'] == 'right': + elif column['Column'] == i and column['justify'] == 'right': right = True if center is True: setup_cols += 'c' From 2d1292da3346222c6d5128269d36121ddf005258 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 15:05:29 -0400 Subject: [PATCH 298/404] refs #38: refactored the test code to do r tags correctly --- pydocx/tests/document_builder.py | 37 +++++++++++++++++++++------- pydocx/tests/templates/linebreak.xml | 1 + pydocx/tests/templates/r.xml | 5 ++-- pydocx/tests/test_xml.py | 24 +++++++++--------- 4 files changed, 44 insertions(+), 23 deletions(-) create mode 100644 pydocx/tests/templates/linebreak.xml diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index ba18ccae..7cd73a08 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -6,6 +6,7 @@ 'drawing': 'drawing.xml', 'hyperlink': 'hyperlink.xml', 'insert': 'insert.xml', + 'linebreak': 'linebreak.xml', 'main': 'base.xml', 'p': 'p.xml', 'pict': 'pict.xml', @@ -15,9 +16,10 @@ 'smartTag': 'smart_tag.xml', 'style': 'style.xml', 'styles': 'styles.xml', + 't': 't.xml', 'table': 'table.xml', - 'tr': 'tr.xml', 'tc': 'tc.xml', + 'tr': 'tr.xml', } env = Environment( @@ -48,7 +50,7 @@ def p_tag( if isinstance(text, str): # Use create a single r tag based on the text and the bold run_tag = DocxBuilder.r_tag( - text, + [DocxBuilder.t_tag(text)], is_bold=bold, is_underline=underline, is_italics=italics, @@ -58,7 +60,7 @@ def p_tag( elif isinstance(text, list): run_tags = text else: - run_tags = [self.r_tag(None)] + run_tags = [self.r_tag([])] template = env.get_template(templates['p']) kwargs = { @@ -67,24 +69,36 @@ def p_tag( } return template.render(**kwargs) + @classmethod + def linebreak(self): + template = env.get_template(templates['linebreak']) + kwargs = {} + return template.render(**kwargs) + + @classmethod + def t_tag(self, text): + template = env.get_template(templates['t']) + kwargs = { + 'text': text, + } + return template.render(**kwargs) + @classmethod def r_tag( self, - text, + elements, is_bold=False, is_underline=False, is_italics=False, val=None, - include_linebreak=False, ): template = env.get_template(templates['r']) kwargs = { - 'text': text, + 'elements': elements, 'is_bold': is_bold, 'is_underline': is_underline, 'is_italics': is_italics, 'val': val, - 'include_linebreak': include_linebreak, } return template.render(**kwargs) @@ -133,12 +147,17 @@ def sdt_tag(self, p_tag): def li(self, text, ilvl, numId, bold=False): if isinstance(text, str): # Use create a single r tag based on the text and the bold - run_tag = DocxBuilder.r_tag(text, bold) + run_tag = DocxBuilder.r_tag([DocxBuilder.t_tag(text)], bold) run_tags = [run_tag] elif isinstance(text, list): run_tags = [] for run_text, run_bold in text: - run_tags.append(DocxBuilder.r_tag(run_tags, run_bold)) + run_tags.append( + DocxBuilder.r_tag( + [DocxBuilder.t_tag(run_tags)], + run_bold, + ), + ) else: raise AssertionError('text must be a string or a list') template = env.get_template(templates['p']) diff --git a/pydocx/tests/templates/linebreak.xml b/pydocx/tests/templates/linebreak.xml new file mode 100644 index 00000000..ab92e811 --- /dev/null +++ b/pydocx/tests/templates/linebreak.xml @@ -0,0 +1 @@ + diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml index 8fc7ecb1..ed97150e 100644 --- a/pydocx/tests/templates/r.xml +++ b/pydocx/tests/templates/r.xml @@ -4,6 +4,7 @@ {% if is_underline %}{% endif %} {% if is_italics %}{% endif %} - {% if include_linebreak %}{% endif %} - {% if text %}{% include 't.xml' %}{% endif %} + {% for element in elements %} + {{ element }} + {% endfor %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 4cb5686c..ced5fe5d 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -43,9 +43,9 @@ class HyperlinkVanillaTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -61,9 +61,9 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'link'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'link'] run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -93,9 +93,9 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -110,8 +110,8 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link')) - run_tags.append(DXB.r_tag(None, include_linebreak=True)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')])) + run_tags.append(DXB.r_tag([DXB.linebreak()])) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] body = DXB.p_tag(run_tags) xml = DXB.xml(body) @@ -671,9 +671,9 @@ class NonStandardTextTagsTestCase(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'insert '] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'insert '] insert_tag = DXB.insert_tag(run_tags) - run_tags = [DXB.r_tag(i) for i in 'smarttag'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'smarttag'] smart_tag = DXB.smart_tag(run_tags) run_tags = [insert_tag, smart_tag] @@ -728,7 +728,7 @@ class InsertTagInList(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] insert_tags = DXB.insert_tag(run_tags) p_tag = DXB.p_tag([insert_tags]) @@ -750,7 +750,7 @@ class SmartTagInList(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] smart_tag = DXB.smart_tag(run_tags) p_tag = DXB.p_tag([smart_tag]) From 1db516e7c806ac704b684a61115483aed895170c Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 15:21:08 -0400 Subject: [PATCH 299/404] refs #38: added a test showing the duplicated content issue --- pydocx/tests/document_builder.py | 2 ++ pydocx/tests/templates/p.xml | 1 + pydocx/tests/test_xml.py | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 7cd73a08..f71e6f06 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -46,6 +46,7 @@ def p_tag( italics=False, style='style0', val=None, + jc=None, ): if isinstance(text, str): # Use create a single r tag based on the text and the bold @@ -66,6 +67,7 @@ def p_tag( kwargs = { 'run_tags': run_tags, 'style': style, + 'jc': jc, } return template.render(**kwargs) diff --git a/pydocx/tests/templates/p.xml b/pydocx/tests/templates/p.xml index 778a8866..7a78a060 100644 --- a/pydocx/tests/templates/p.xml +++ b/pydocx/tests/templates/p.xml @@ -11,6 +11,7 @@ {% endif %} {% endif %} + {% if jc %}{% endif %} {% for run_tag in run_tags %} {{ run_tag }} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index ced5fe5d..47099c0b 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -1005,3 +1005,22 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class MultipleTTagsInRTag(_TranslationTestCase): + expected_output = ''' +

          ABC

          + ''' + + def get_xml(self): + r_tag = DXB.r_tag( + [DXB.t_tag(letter) for letter in 'ABC'], + ) + p_tag = DXB.p_tag( + [r_tag], + jc='start', + ) + body = p_tag + + xml = DXB.xml(body) + return xml From 138684a9d6a2a351145b17b7b0ea77c1f97d8dac Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 15:29:26 -0400 Subject: [PATCH 300/404] refs #38: fixed justifications --- pydocx/DocxParser.py | 88 ++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c5bd69ec..466d37d5 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -609,9 +609,47 @@ def should_parse_last_el(last_el, first_el): return self._build_list(el, parsed) + def parse_justification(self, el, text): + paragraph_tag_property = el.find('pPr') + just = '' + if paragraph_tag_property is None: + return text + + jc = paragraph_tag_property.find('jc') + if jc is not None: # text alignments + if jc.attrib['val'] == 'right': + just = 'right' + elif jc.attrib['val'] == 'center': + just = 'center' + elif jc.attrib['val'] == 'left': + just = 'left' + + ind = paragraph_tag_property.find('ind') + right = '' + left = '' + firstLine = '' + if ind is not None: + if 'right' in ind.attrib: + right = ind.attrib['right'] + # divide by 20 to get to pt. multiply by (4/3) to get to px + right = (int(right) / 20) * float(4) / float(3) + right = str(right) + if 'left' in ind.attrib: + left = ind.attrib['left'] + left = (int(left) / 20) * float(4) / float(3) + left = str(left) + if 'firstLine' in ind.attrib: + firstLine = ind.attrib['firstLine'] + firstLine = (int(firstLine) / 20) * float(4) / float(3) + firstLine = str(firstLine) + if any([just, firstLine, left, right]): + return self.indent(text, just, firstLine, left, right) + return text + def parse_p(self, el, text): if text == '': return '' + text = self.parse_justification(el, text) if el.is_first_list_item: return self.parse_list(el, text) if el.heading_level: @@ -853,7 +891,6 @@ def parse_r(self, el, parsed): """ Parse the running text. """ - block = False text = parsed if not text: return '' @@ -871,54 +908,7 @@ def parse_r(self, el, parsed): fns.append(self.underline) for fn in fns: text = fn(text) - paragraph_tag_property = el.parent.find('pPr') - just = '' - if paragraph_tag_property is not None: - jc = paragraph_tag_property.find('jc') - if jc is not None: # text alignments - if jc.attrib['val'] == 'right': - just = 'right' - elif jc.attrib['val'] == 'center': - just = 'center' - elif jc.attrib['val'] == 'left': - just = 'left' - ind = paragraph_tag_property.find('ind') - right = '' - left = '' - firstLine = '' - if ind is not None: - right = None - left = None - firstLine = None - if 'right' in ind.attrib: - right = ind.attrib['right'] - # divide by 20 to get to pt. multiply by (4/3) to get to px - right = (int(right) / 20) * float(4) / float(3) - right = str(right) - if 'left' in ind.attrib: - left = ind.attrib['left'] - left = (int(left) / 20) * float(4) / float(3) - left = str(left) - if 'firstLine' in ind.attrib: - firstLine = ind.attrib['firstLine'] - firstLine = (int(firstLine) / 20) * float(4) / float(3) - firstLine = str(firstLine) - if jc is not None or ind is not None: - t_els = el.find_all('t') - for el in t_els: - if el.is_last_text: - block = False - self.block_text += text - text = self.indent(self.block_text, just, - firstLine, left, right) - self.block_text = '' - else: - block = True - self.block_text += text - if block is False: - return text - else: - return '' + return text def get_list_style(self, num_id, ilvl): ids = self.numbering_root.find_all('num') From 948a4fe08c33e290fd7b37d59f316008026a21d2 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 16:17:44 -0400 Subject: [PATCH 301/404] refs #38: removed dead code --- pydocx/DocxParser.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 466d37d5..0927a43c 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -114,7 +114,6 @@ def find_ancestor_with_tag(self, tag): setattr(_ElementInterface, 'vmerge_continue', None) setattr(_ElementInterface, 'row_index', None) setattr(_ElementInterface, 'column_index', None) -setattr(_ElementInterface, 'is_last_text', False) # End helpers @@ -280,15 +279,6 @@ def _set_table_attributes(self, el): ): child.vmerge_continue = True - def _set_text_attributes(self, el): - # find the ppr. look thru all the elements within and find the text - #if it's the last item in the list, it's the last text - paragraph_tag_property = el.find_all('pPr') - for el in paragraph_tag_property: - for i, t in enumerate(el.parent.find_all('t')): - if i == (len(el.parent.find_all('t')) - 1): - t.is_last_text = True - def _set_is_in_table(self, el): paragraph_elements = el.find_all('p') for p in paragraph_elements: @@ -431,7 +421,6 @@ def _assign_next(children): def parse_begin(self, el): self._set_list_attributes(el) self._set_table_attributes(el) - self._set_text_attributes(el) self._set_is_in_table(el) # Find the first and last li elements From 2cfe7e61d866a812fd1decd6a3c823fa8ffbb746 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 16:22:08 -0400 Subject: [PATCH 302/404] refs #38: added a comment showing what still needs to be done. --- pydocx/DocxParser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 0927a43c..5dd27380 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -638,6 +638,9 @@ def parse_justification(self, el, text): def parse_p(self, el, text): if text == '': return '' + # TODO This is still not correct, however it fixes the bug. We need to + # apply the classes/styles on p, td, li and h tags instead of inline, + # but that is for another ticket. text = self.parse_justification(el, text) if el.is_first_list_item: return self.parse_list(el, text) From 139dce22bbc6866a038496e15399c4ad2c007d93 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 30 May 2013 17:20:05 -0400 Subject: [PATCH 303/404] refs #38: code cleanup --- pydocx/DocxParser.py | 57 +++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 5dd27380..5b8c34f7 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -29,6 +29,14 @@ ) UPPER_ROMAN_TO_HEADING_VALUE = 'h2' +JUSTIFY_CENTER = 'center' +JUSTIFY_LEFT = 'left' +JUSTIFY_RIGHT = 'right' + +INDENTATION_RIGHT = 'right' +INDENTATION_LEFT = 'left' +INDENTATION_FIRST_LINE = 'firstLine' + def remove_namespaces(document): # remove namespaces @@ -598,41 +606,40 @@ def should_parse_last_el(last_el, first_el): return self._build_list(el, parsed) - def parse_justification(self, el, text): + def justification(self, el, text): paragraph_tag_property = el.find('pPr') - just = '' if paragraph_tag_property is None: return text - jc = paragraph_tag_property.find('jc') - if jc is not None: # text alignments - if jc.attrib['val'] == 'right': - just = 'right' - elif jc.attrib['val'] == 'center': - just = 'center' - elif jc.attrib['val'] == 'left': - just = 'left' - - ind = paragraph_tag_property.find('ind') - right = '' - left = '' - firstLine = '' - if ind is not None: - if 'right' in ind.attrib: - right = ind.attrib['right'] + _justification = paragraph_tag_property.find('jc') + indentation = paragraph_tag_property.find('ind') + if _justification is None and indentation is None: + return text + alignment = None + right = None + left = None + firstLine = None + if _justification is not None: # text alignments + value = _justification.attrib['val'] + if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: + alignment = value + + if indentation is not None: + if INDENTATION_RIGHT in indentation.attrib: + right = indentation.attrib[INDENTATION_RIGHT] # divide by 20 to get to pt. multiply by (4/3) to get to px right = (int(right) / 20) * float(4) / float(3) right = str(right) - if 'left' in ind.attrib: - left = ind.attrib['left'] + if INDENTATION_LEFT in indentation.attrib: + left = indentation.attrib[INDENTATION_LEFT] left = (int(left) / 20) * float(4) / float(3) left = str(left) - if 'firstLine' in ind.attrib: - firstLine = ind.attrib['firstLine'] + if INDENTATION_FIRST_LINE in indentation.attrib: + firstLine = indentation.attrib[INDENTATION_FIRST_LINE] firstLine = (int(firstLine) / 20) * float(4) / float(3) firstLine = str(firstLine) - if any([just, firstLine, left, right]): - return self.indent(text, just, firstLine, left, right) + if any([alignment, firstLine, left, right]): + return self.indent(text, alignment, firstLine, left, right) return text def parse_p(self, el, text): @@ -641,7 +648,7 @@ def parse_p(self, el, text): # TODO This is still not correct, however it fixes the bug. We need to # apply the classes/styles on p, td, li and h tags instead of inline, # but that is for another ticket. - text = self.parse_justification(el, text) + text = self.justification(el, text) if el.is_first_list_item: return self.parse_list(el, text) if el.heading_level: From 525857cce88e5c130a20aff789ba4ffd190bd501 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 30 May 2013 20:53:43 -0400 Subject: [PATCH 304/404] minor change --- pydocx/DocxParser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c5bd69ec..221f2587 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -195,6 +195,7 @@ def __init__( self._parsed = '' self.block_text = '' self.page_width = 0 + self.bookmark = False self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) @@ -462,7 +463,6 @@ def parse(self, el): for child in el: # recursive. So you can get all the way to the bottom parsed += self.parse(child) - if el.tag == 'br' and el.attrib.get('type') == 'page': return self.parse_page_break(el, parsed) elif el.tag == 'tbl': @@ -476,6 +476,7 @@ def parse(self, el): elif el.tag == 't': return self.parse_t(el, parsed) elif el.tag == 'br': + print 'ummm wheres the break' return self.parse_break_tag(el, parsed) elif el.tag == 'delText': return self.parse_deletion(el, parsed) @@ -489,7 +490,6 @@ def parse(self, el): return self.parse_image(el) else: return parsed - def parse_page_break(self, el, text): #TODO figure out what parsed is getting overwritten return self.page_break() @@ -673,6 +673,7 @@ def _should_parse_next_as_content(el): next_el = el.next if next_el is None: return False + return False if ( not next_el.is_list_item and not el.is_last_list_item_in_root From 9faa36c8c9c74d6382ab93d33392bf349db8a471 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Fri, 31 May 2013 09:09:55 -0400 Subject: [PATCH 305/404] merged with latex --- pydocx/DocxParser.py | 113 ++++++++++++--------------- pydocx/tests/document_builder.py | 39 ++++++--- pydocx/tests/templates/linebreak.xml | 1 + pydocx/tests/templates/p.xml | 1 + pydocx/tests/templates/r.xml | 5 +- pydocx/tests/test_xml.py | 43 +++++++--- 6 files changed, 115 insertions(+), 87 deletions(-) create mode 100644 pydocx/tests/templates/linebreak.xml diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 9b4a4beb..3667923c 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -29,6 +29,14 @@ ) UPPER_ROMAN_TO_HEADING_VALUE = 'h2' +JUSTIFY_CENTER = 'center' +JUSTIFY_LEFT = 'left' +JUSTIFY_RIGHT = 'right' + +INDENTATION_RIGHT = 'right' +INDENTATION_LEFT = 'left' +INDENTATION_FIRST_LINE = 'firstLine' + def remove_namespaces(document): # remove namespaces @@ -290,15 +298,6 @@ def _set_table_attributes(self, el): ): child.vmerge_continue = True - def _set_text_attributes(self, el): - # find the ppr. look thru all the elements within and find the text - #if it's the last item in the list, it's the last text - paragraph_tag_property = el.find_all('pPr') - for el in paragraph_tag_property: - for i, t in enumerate(el.parent.find_all('t')): - if i == (len(el.parent.find_all('t')) - 1): - t.is_last_text = True - def _set_is_in_table(self, el): paragraph_elements = el.find_all('p') for p in paragraph_elements: @@ -441,7 +440,6 @@ def _assign_next(children): def parse_begin(self, el): self._set_list_attributes(el) self._set_table_attributes(el) - self._set_text_attributes(el) self._set_is_in_table(el) # Find the first and last li elements @@ -623,9 +621,49 @@ def should_parse_last_el(last_el, first_el): return self._build_list(el, parsed) + def justification(self, el, text): + paragraph_tag_property = el.find('pPr') + if paragraph_tag_property is None: + return text + + _justification = paragraph_tag_property.find('jc') + indentation = paragraph_tag_property.find('ind') + if _justification is None and indentation is None: + return text + alignment = None + right = None + left = None + firstLine = None + if _justification is not None: # text alignments + value = _justification.attrib['val'] + if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: + alignment = value + + if indentation is not None: + if INDENTATION_RIGHT in indentation.attrib: + right = indentation.attrib[INDENTATION_RIGHT] + # divide by 20 to get to pt. multiply by (4/3) to get to px + right = (int(right) / 20) * float(4) / float(3) + right = str(right) + if INDENTATION_LEFT in indentation.attrib: + left = indentation.attrib[INDENTATION_LEFT] + left = (int(left) / 20) * float(4) / float(3) + left = str(left) + if INDENTATION_FIRST_LINE in indentation.attrib: + firstLine = indentation.attrib[INDENTATION_FIRST_LINE] + firstLine = (int(firstLine) / 20) * float(4) / float(3) + firstLine = str(firstLine) + if any([alignment, firstLine, left, right]): + return self.indent(text, alignment, firstLine, left, right) + return text + def parse_p(self, el, text): if text == '': return '' + # TODO This is still not correct, however it fixes the bug. We need to + # apply the classes/styles on p, td, li and h tags instead of inline, + # but that is for another ticket. + text = self.justification(el, text) if el.is_first_list_item: return self.parse_list(el, text) if el.heading_level: @@ -869,7 +907,6 @@ def parse_r(self, el, parsed): """ Parse the running text. """ - block = False text = parsed if not text: return '' @@ -887,59 +924,7 @@ def parse_r(self, el, parsed): fns.append(self.underline) for fn in fns: text = fn(text) - paragraph_tag_property = el.parent.find('pPr') - just = '' - if paragraph_tag_property is not None: - jc = paragraph_tag_property.find('jc') - if jc is not None: # text alignments - if jc.attrib['val'] == 'right': - just = 'right' - elif jc.attrib['val'] == 'center': - just = 'center' - elif jc.attrib['val'] == 'left': - just = 'left' - ind = paragraph_tag_property.find('ind') - right = None - left = None - firstLine = None - hanging = None - if ind is not None: - if 'hanging' in ind.attrib: - hanging = ind.attrib['hanging'] - hanging = (float(hanging)/20) - if 'right' in ind.attrib: - right = ind.attrib['right'] - # divide by 20 to get to pt. multiply by (4/3) to get to px - right = (float(right) / 20) * float(4) / float(3) - right = str(right) - if 'left' in ind.attrib: - left = ind.attrib['left'] - left = (float(left) / 20) * float(4) / float(3) - left = str(left) - if 'firstLine' in ind.attrib: - firstLine = ind.attrib['firstLine'] - firstLine = (float(firstLine) / 20) * float(4) / float(3) - firstLine = str(firstLine) - if jc is not None or ind is not None: - t_els = el.find_all('t') - for el in t_els: - if el.is_last_text: - block = False - self.block_text += text - if el.find_ancestor_with_tag('tc') is not None: - self.indent_table = True - tc = el.find_ancestor_with_tag('tc') - self.column_index = tc.column_index - text = self.indent(self.block_text, just, - firstLine, left, right, hanging) - self.block_text = '' - else: - block = True - self.block_text += text - if block is False: - return text - else: - return '' + return text def get_list_style(self, num_id, ilvl): ids = self.numbering_root.find_all('num') diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index ba18ccae..f71e6f06 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -6,6 +6,7 @@ 'drawing': 'drawing.xml', 'hyperlink': 'hyperlink.xml', 'insert': 'insert.xml', + 'linebreak': 'linebreak.xml', 'main': 'base.xml', 'p': 'p.xml', 'pict': 'pict.xml', @@ -15,9 +16,10 @@ 'smartTag': 'smart_tag.xml', 'style': 'style.xml', 'styles': 'styles.xml', + 't': 't.xml', 'table': 'table.xml', - 'tr': 'tr.xml', 'tc': 'tc.xml', + 'tr': 'tr.xml', } env = Environment( @@ -44,11 +46,12 @@ def p_tag( italics=False, style='style0', val=None, + jc=None, ): if isinstance(text, str): # Use create a single r tag based on the text and the bold run_tag = DocxBuilder.r_tag( - text, + [DocxBuilder.t_tag(text)], is_bold=bold, is_underline=underline, is_italics=italics, @@ -58,33 +61,46 @@ def p_tag( elif isinstance(text, list): run_tags = text else: - run_tags = [self.r_tag(None)] + run_tags = [self.r_tag([])] template = env.get_template(templates['p']) kwargs = { 'run_tags': run_tags, 'style': style, + 'jc': jc, + } + return template.render(**kwargs) + + @classmethod + def linebreak(self): + template = env.get_template(templates['linebreak']) + kwargs = {} + return template.render(**kwargs) + + @classmethod + def t_tag(self, text): + template = env.get_template(templates['t']) + kwargs = { + 'text': text, } return template.render(**kwargs) @classmethod def r_tag( self, - text, + elements, is_bold=False, is_underline=False, is_italics=False, val=None, - include_linebreak=False, ): template = env.get_template(templates['r']) kwargs = { - 'text': text, + 'elements': elements, 'is_bold': is_bold, 'is_underline': is_underline, 'is_italics': is_italics, 'val': val, - 'include_linebreak': include_linebreak, } return template.render(**kwargs) @@ -133,12 +149,17 @@ def sdt_tag(self, p_tag): def li(self, text, ilvl, numId, bold=False): if isinstance(text, str): # Use create a single r tag based on the text and the bold - run_tag = DocxBuilder.r_tag(text, bold) + run_tag = DocxBuilder.r_tag([DocxBuilder.t_tag(text)], bold) run_tags = [run_tag] elif isinstance(text, list): run_tags = [] for run_text, run_bold in text: - run_tags.append(DocxBuilder.r_tag(run_tags, run_bold)) + run_tags.append( + DocxBuilder.r_tag( + [DocxBuilder.t_tag(run_tags)], + run_bold, + ), + ) else: raise AssertionError('text must be a string or a list') template = env.get_template(templates['p']) diff --git a/pydocx/tests/templates/linebreak.xml b/pydocx/tests/templates/linebreak.xml new file mode 100644 index 00000000..ab92e811 --- /dev/null +++ b/pydocx/tests/templates/linebreak.xml @@ -0,0 +1 @@ + diff --git a/pydocx/tests/templates/p.xml b/pydocx/tests/templates/p.xml index 778a8866..7a78a060 100644 --- a/pydocx/tests/templates/p.xml +++ b/pydocx/tests/templates/p.xml @@ -11,6 +11,7 @@ {% endif %} {% endif %} + {% if jc %}{% endif %} {% for run_tag in run_tags %} {{ run_tag }} diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml index 8fc7ecb1..ed97150e 100644 --- a/pydocx/tests/templates/r.xml +++ b/pydocx/tests/templates/r.xml @@ -4,6 +4,7 @@ {% if is_underline %}{% endif %} {% if is_italics %}{% endif %} - {% if include_linebreak %}{% endif %} - {% if text %}{% include 't.xml' %}{% endif %} + {% for element in elements %} + {{ element }} + {% endfor %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 4cb5686c..47099c0b 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -43,9 +43,9 @@ class HyperlinkVanillaTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -61,9 +61,9 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'link'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'link'] run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -93,9 +93,9 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -110,8 +110,8 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag('link')) - run_tags.append(DXB.r_tag(None, include_linebreak=True)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')])) + run_tags.append(DXB.r_tag([DXB.linebreak()])) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] body = DXB.p_tag(run_tags) xml = DXB.xml(body) @@ -671,9 +671,9 @@ class NonStandardTextTagsTestCase(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'insert '] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'insert '] insert_tag = DXB.insert_tag(run_tags) - run_tags = [DXB.r_tag(i) for i in 'smarttag'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'smarttag'] smart_tag = DXB.smart_tag(run_tags) run_tags = [insert_tag, smart_tag] @@ -728,7 +728,7 @@ class InsertTagInList(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] insert_tags = DXB.insert_tag(run_tags) p_tag = DXB.p_tag([insert_tags]) @@ -750,7 +750,7 @@ class SmartTagInList(_TranslationTestCase): ''' def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] + run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] smart_tag = DXB.smart_tag(run_tags) p_tag = DXB.p_tag([smart_tag]) @@ -1005,3 +1005,22 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class MultipleTTagsInRTag(_TranslationTestCase): + expected_output = ''' +

          ABC

          + ''' + + def get_xml(self): + r_tag = DXB.r_tag( + [DXB.t_tag(letter) for letter in 'ABC'], + ) + p_tag = DXB.p_tag( + [r_tag], + jc='start', + ) + body = p_tag + + xml = DXB.xml(body) + return xml From 6e188f4e1027c713735ac4125f56bedce5ab0f1e Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 31 May 2013 11:57:40 -0400 Subject: [PATCH 306/404] refs #38: Added a changelog --- CHANGELOG | 9 +++++++++ MANIFEST.in | 1 + setup.py | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 00000000..a47af6a6 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,9 @@ + +Changelog +========= + +* 0.2.1 + * Added a changelog + * Added the version in pydocx.__init__ + * Fixed an issue with duplicating content if there was indentation or + justification on a p element that had multiple t tags. diff --git a/MANIFEST.in b/MANIFEST.in index c3b3f1fb..88fbbf67 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include AUTHORS +include CHANGELOG include LICENSE include MANIFEST.in include README.rst diff --git a/setup.py b/setup.py index 7706c81e..40adffc9 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def get_file(filename): def get_description(): - return get_file('README.rst') + return get_file('README.rst') + get_file('CHANGELOG') setup( name="PyDocX", From a0f1daa2821285ba65a44bd30e2f2bcd19d17791 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 31 May 2013 12:00:44 -0400 Subject: [PATCH 307/404] bumped to version 0.2.1 --- pydocx/__init__.py | 3 +++ setup.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 743227cc..c68a472e 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -7,3 +7,6 @@ def docx2html(path): def docx2markdown(path): return Docx2Markdown(path).parsed + + +VERSION = '0.2.1' diff --git a/setup.py b/setup.py index 40adffc9..74be1e3a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,8 @@ def get_description(): setup( name="PyDocX", - version="0.2.0", + # Edit here and pydocx.__init__ + version="0.2.1", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 4ff701dd18ccc31ea452e03e6af2ab59bfc210c7 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 31 May 2013 12:12:33 -0400 Subject: [PATCH 308/404] refs #41: refactor and started using spans inline instead of divs --- pydocx/DocxParser.py | 24 ++++++++++++++---------- pydocx/parsers/Docx2Html.py | 10 +++++----- pydocx/tests/test_docx.py | 26 +++++++++++++------------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 5b8c34f7..67ca6848 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -624,20 +624,24 @@ def justification(self, el, text): if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: alignment = value + def _scale_indent_to_pixels(value): + if value is None: + return None + return (float(value) / 20.0) * (4.0 / 3.0) + if indentation is not None: if INDENTATION_RIGHT in indentation.attrib: - right = indentation.attrib[INDENTATION_RIGHT] - # divide by 20 to get to pt. multiply by (4/3) to get to px - right = (int(right) / 20) * float(4) / float(3) - right = str(right) + right = _scale_indent_to_pixels( + indentation.attrib.get(INDENTATION_RIGHT), + ) if INDENTATION_LEFT in indentation.attrib: - left = indentation.attrib[INDENTATION_LEFT] - left = (int(left) / 20) * float(4) / float(3) - left = str(left) + left = _scale_indent_to_pixels( + indentation.attrib.get(INDENTATION_LEFT), + ) if INDENTATION_FIRST_LINE in indentation.attrib: - firstLine = indentation.attrib[INDENTATION_FIRST_LINE] - firstLine = (int(firstLine) / 20) * float(4) / float(3) - firstLine = str(firstLine) + firstLine = _scale_indent_to_pixels( + indentation.attrib.get(INDENTATION_FIRST_LINE), + ) if any([alignment, firstLine, left, right]): return self.indent(text, alignment, firstLine, left, right) return text diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 73b3c9b6..9b9983eb 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -154,19 +154,19 @@ def page_break(self): return '
          ' def indent(self, text, just='', firstLine='', left='', right=''): - slug = ' -
          EEE -
          + EEE + FFF -
          GGG -
          + GGG + @@ -605,7 +605,7 @@ def test_has_title(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

          Title

          -

          Text

          +

          Text

          ''') @@ -661,28 +661,28 @@ def test_justification(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

          -

          Center Justified
          + Center Justified

          -

          Right justified
          + Right justified

          -

          + Right justified and pushed in from right -
          +

          -

          Center justified and pushed in from left and it is great and it is the coolest thing of all time and I like it and I think it is cool -
          +

          -

          + Left justified and pushed in from left -
          +

          ''') From 3ab25699232026283272611d986b2229c2a17c54 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Mon, 3 Jun 2013 17:29:47 -0400 Subject: [PATCH 309/404] refs #43: switched to using lxml --- pydocx/DocxParser.py | 584 ++++++++++++++++++++++----------------- pydocx/tests/__init__.py | 15 +- pydocx/tests/test_xml.py | 17 +- 3 files changed, 359 insertions(+), 257 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 67ca6848..ba66c21a 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -1,11 +1,14 @@ import logging import os -import xml.etree.ElementTree as ElementTree +try: + from lxml.etree import fromstring + from lxml.etree import ElementBase +except ImportError: + from xml.etree.ElementTree import fromstring import zipfile from abc import abstractmethod, ABCMeta from contextlib import contextmanager -from xml.etree.ElementTree import _ElementInterface from pydocx.utils import NamespacedNumId logging.basicConfig(level=logging.DEBUG) @@ -38,197 +41,125 @@ INDENTATION_FIRST_LINE = 'firstLine' -def remove_namespaces(document): # remove namespaces +# Since I can't actually set attribute (reliably) on each element. I have to +# keep track of the next and previous elements here. +next_previous_states = {} - root = ElementTree.fromstring(document) - for child in el_iter(root): - child.tag = child.tag.split("}")[1] - child.attrib = dict( - (k.split("}")[-1], v) - for k, v in child.attrib.items() - ) - return ElementTree.tostring(root) - -# Add some helper functions to Element to make it slightly more readable - - -def has_child(self, tag): - """ - Determine if current element has a child. Stop at first child. - """ - return True if self.find(tag) is not None else False - - -def has_descendant_with_tag(self, tag): - """ - Determine if there is a child ahead in the element tree. - """ - # Get child. stop at first child. - return True if self.find('.//' + tag) is not None else False - - -def find_first(self, tag): - """ - Find the first occurrence of a tag beneath the current element. - """ - return self.find('.//' + tag) +def get_list_style(numbering_root, num_id, ilvl): + # This is needed on both the custom lxml parser and the pydocx parser. So + # make it a function. + ids = numbering_root.find_all('num') + for _id in ids: + if _id.attrib['numId'] != num_id: + continue + abstractid = _id.find('abstractNumId') + abstractid = abstractid.attrib['val'] + style_information = numbering_root.find_all( + 'abstractNum', + ) + for info in style_information: + if info.attrib['abstractNumId'] == abstractid: + for i in el_iter(info): + if ( + 'ilvl' in i.attrib and + i.attrib['ilvl'] != ilvl): + continue + if i.find('numFmt') is not None: + return i.find('numFmt').attrib['val'] -def find_all(self, tag): - """ - Find all occurrences of a tag - """ - return self.findall('.//' + tag) +class PydocxLXMLParser(ElementBase): -def el_iter(el): - """ - Go through all elements - """ - try: - return el.iter() - except AttributeError: - return el.findall('.//*') + @property + def is_first_list_item(self): + return self.attrib.get('is_first_list_item', '') == str(True) + @property + def is_last_list_item_in_root(self): + return self.attrib.get('is_last_list_item_in_root', '') == str(True) -def find_ancestor_with_tag(self, tag): - """ - Find the first ancestor with that is a `tag`. - """ - el = self - while el.parent is not None: - el = el.parent - if el.tag == tag: - return el - return None - - -#make all of these attributes of _ElementInterface -setattr(_ElementInterface, 'has_child', has_child) -setattr(_ElementInterface, 'has_descendant_with_tag', has_descendant_with_tag) -setattr(_ElementInterface, 'find_first', find_first) -setattr(_ElementInterface, 'find_all', find_all) -setattr(_ElementInterface, 'find_ancestor_with_tag', find_ancestor_with_tag) -setattr(_ElementInterface, 'parent', None) -setattr(_ElementInterface, 'is_first_list_item', False) -setattr(_ElementInterface, 'is_last_list_item_in_root', False) -setattr(_ElementInterface, 'is_list_item', False) -setattr(_ElementInterface, 'ilvl', None) -setattr(_ElementInterface, 'num_id', None) -setattr(_ElementInterface, 'heading_level', None) -setattr(_ElementInterface, 'is_in_table', False) -setattr(_ElementInterface, 'previous', None) -setattr(_ElementInterface, 'next', None) -setattr(_ElementInterface, 'vmerge_continue', None) -setattr(_ElementInterface, 'row_index', None) -setattr(_ElementInterface, 'column_index', None) - -# End helpers + @property + def is_list_item(self): + return self.attrib.get('is_list_item', '') == str(True) + @property + def num_id(self): + if not self.is_list_item: + return None + return self._generate_num_id(self) -@contextmanager -def ZipFile(path): # This is not needed in python 3.2+ - f = zipfile.ZipFile(path) - yield f - f.close() + @property + def ilvl(self): + return self.attrib.get('ilvl') + @property + def heading_level(self): + return self.attrib.get('heading_level') -class DocxParser: - __metaclass__ = ABCMeta + @property + def is_in_table(self): + return self.attrib.get('is_in_table') - def _build_data(self, path, *args, **kwargs): - with ZipFile(path) as f: - self.document_text = f.read('word/document.xml') - self.styles_text = f.read('word/styles.xml') - try: - self.fonts = f.read('/word/fontTable.xml') - except KeyError: - self.fonts = None - try: # Only present if there are lists - self.numbering_text = f.read('word/numbering.xml') - except KeyError: - self.numbering_text = None - try: # Only present if there are comments - self.comment_text = f.read('word/comments.xml') - except KeyError: - self.comment_text = None - self.relationship_text = f.read('word/_rels/document.xml.rels') - zipped_image_files = [ - e for e in f.infolist() - if e.filename.startswith('word/media/') - ] - for e in zipped_image_files: - self._image_data[e.filename] = f.read(e.filename) + @property + def row_index(self): + return self.attrib.get('row_index') - self.root = ElementTree.fromstring( - remove_namespaces(self.document_text), # remove the namespaces - ) - self.numbering_root = None - if self.numbering_text: - self.numbering_root = ElementTree.fromstring( - remove_namespaces(self.numbering_text), - ) - self.comment_root = None - if self.comment_text: - self.comment_root = ElementTree.fromstring( - remove_namespaces(self.comment_text), - ) + @property + def column_index(self): + return self.attrib.get('column_index') - def _parse_styles(self): - tree = ElementTree.fromstring( - remove_namespaces(self.styles_text), - ) - result = {} - for style in tree.find_all('style'): - style_val = style.find_first('name').attrib['val'] - result[style.attrib['styleId']] = style_val - return result + @property + def vmerge_continue(self): + return self.attrib.get('vmerge_continue') - def _parse_rels_root(self): - tree = ElementTree.fromstring(self.relationship_text) - rels_dict = {} - for el in tree: - rId = el.get('Id') - target = el.get('Target') - rels_dict[rId] = target - return rels_dict + @property + def next(self): + if self not in next_previous_states: + return + return next_previous_states[self].get('next') - def __init__( - self, - path, - convert_root_level_upper_roman=False, - *args, - **kwargs): - self._parsed = '' - self.block_text = '' - self.page_width = 0 - self.convert_root_level_upper_roman = convert_root_level_upper_roman - self._image_data = {} - self._build_data(path, *args, **kwargs) + @property + def previous(self): + if self not in next_previous_states: + return + return next_previous_states[self].get('previous') - def add_parent(el): # if a parent, make that an attribute - for child in el.getchildren(): - setattr(child, 'parent', el) - add_parent(child) + def find_first(self, tag): + """ + Find the first occurrence of a tag beneath the current element. + """ + return self.find('.//' + tag) - #divide by 20 to get to pt (Office works in 20th's of a point) + def find_all(self, tag): """ - see http://msdn.microsoft.com/en-us/library/documentformat - .openxml.wordprocessing.indentation.aspx + Find all occurrences of a tag """ - if self.root.find_first('pgSz') is not None: - self.page_width = int(self.root. - find_first('pgSz').attrib['w']) / 20 + return self.findall('.//' + tag) - add_parent(self.root) # create the parent attributes + def find_ancestor_with_tag(self, tag): + """ + Find the first ancestor with that is a `tag`. + """ + el = self + while el.getparent() is not None: + el = el.getparent() + if el.tag == tag: + return el + return None + + def has_descendant_with_tag(self, tag): + """ + Determine if there is a child ahead in the element tree. + """ + # Get child. stop at first child. + return True if self.find('.//' + tag) is not None else False - #all blank when we init - self.comment_store = None - self.visited = [] - self.list_depth = 0 - self.rels_dict = self._parse_rels_root() - self.styles_dict = self._parse_styles() - self.parse_begin(self.root) # begin to parse + def has_child(self, tag): + """ + Determine if current element has a child. Stop at first child. + """ + return True if self.find(tag) is not None else False def _filter_children(self, element, tags): return [ @@ -241,11 +172,13 @@ def _set_list_attributes(self, el): for li in list_elements: parent = li.find_ancestor_with_tag('p') # Deleted text in a list will have a numId but no ilvl. + if parent is None: + continue if parent.find_first('ilvl') is None: continue - parent.is_list_item = True - parent.num_id = self._generate_num_id(parent) - parent.ilvl = parent.find_first('ilvl').attrib['val'] + parent.attrib['is_list_item'] = str(True) + parent.attrib['num_id'] = str(self._generate_num_id(parent)) + parent.attrib['ilvl'] = parent.find_first('ilvl').attrib['val'] def _generate_num_id(self, el): ''' @@ -260,39 +193,15 @@ def _generate_num_id(self, el): # First, go up the parent until we get None and count the number of # tables there are. num_tables = 0 - while el.parent is not None: + while el.getparent() is not None: if el.tag == 'tbl': num_tables += 1 - el = el.parent + el = el.getparent() return NamespacedNumId( num_id=num_id, num_tables=num_tables, ) - def _set_table_attributes(self, el): - tables = el.find_all('tbl') - for table in tables: - rows = self._filter_children(table, ['tr']) - if rows is None: - continue - for i, row in enumerate(rows): - tcs = self._filter_children(row, ['tc']) - for j, child in enumerate(tcs): - child.row_index = i - child.column_index = j - v_merge = child.find_first('vMerge') - if ( - v_merge is not None and - 'continue' == v_merge.get('val', '') - ): - child.vmerge_continue = True - - def _set_is_in_table(self, el): - paragraph_elements = el.find_all('p') - for p in paragraph_elements: - if p.find_ancestor_with_tag('tc') is not None: - p.is_in_table = True - def _set_first_list_item(self, num_ids, ilvls, list_elements): # Lists are grouped by having the same `num_id` and `ilvl`. The first # list item is the first list item found for each `num_id` and `ilvl` @@ -309,7 +218,7 @@ def _set_first_list_item(self, num_ids, ilvls, list_elements): if not filtered_list_elements: continue first_el = filtered_list_elements[0] - first_el.is_first_list_item = True + first_el.attrib['is_first_list_item'] = str(True) def _set_last_list_item(self, num_ids, list_elements): # Find last list elements. Only mark list tags as the last list tag if @@ -324,7 +233,31 @@ def _set_last_list_item(self, num_ids, list_elements): if not filtered_list_elements: continue last_el = filtered_list_elements[-1] - last_el.is_last_list_item_in_root = True + last_el.attrib['is_last_list_item_in_root'] = str(True) + + def _set_table_attributes(self, el): + tables = el.find_all('tbl') + for table in tables: + rows = self._filter_children(table, ['tr']) + if rows is None: + continue + for i, row in enumerate(rows): + tcs = self._filter_children(row, ['tc']) + for j, child in enumerate(tcs): + child.attrib['row_index'] = str(i) + child.attrib['column_index'] = str(j) + v_merge = child.find_first('vMerge') + if ( + v_merge is not None and + 'continue' == v_merge.get('val', '') + ): + child.attrib['vmerge_continue'] = str(True) + + def _set_is_in_table(self, el): + paragraph_elements = el.find_all('p') + for p in paragraph_elements: + if p.find_ancestor_with_tag('tc') is not None: + p.attrib['is_in_table'] = str(True) def _set_headers(self, elements): # These are the styles for headers and what the html tag should be if @@ -351,11 +284,11 @@ def _set_headers(self, elements): # Check to see if this element is actually a header. if style and style.lower() in headers: # Set all the list item variables to false. - element.is_list_item = False - element.is_first_list_item = False - element.is_last_list_item = False + element.attrib['is_list_item'] = str(False) + element.attrib['is_first_list_item'] = str(False) + element.attrib['is_last_list_item'] = str(False) # Prime the heading_level - element.heading_level = headers[style.lower()] + element.attrib['heading_level'] = headers[style.lower()] def _convert_upper_roman(self, body): if not self.convert_root_level_upper_roman: @@ -371,7 +304,8 @@ def _convert_upper_roman(self, body): if root_list_item.num_id in visited_num_ids: continue visited_num_ids.append(root_list_item.num_id) - lst_style = self.get_list_style( + lst_style = get_list_style( + self.numbering_root, root_list_item.num_id.num_id, root_list_item.ilvl, ) @@ -387,11 +321,11 @@ def _convert_upper_roman(self, body): el.ilvl == ilvl ] for list_item in root_upper_roman_list_items: - list_item.is_list_item = False - list_item.is_first_list_item = False - list_item.is_last_list_item = False + list_item.attrib['is_list_item'] = str(False) + list_item.attrib['is_first_list_item'] = str(False) + list_item.attrib['is_last_list_item'] = str(False) - list_item.heading_level = UPPER_ROMAN_TO_HEADING_VALUE + list_item.attrib['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa def _set_next(self, body): def _get_children_with_content(el): @@ -409,16 +343,18 @@ def _get_children_with_content(el): def _assign_next(children): # Populate the `next` attribute for all the child elements. for i in range(len(children)): + child_state = {} try: if children[i + 1] is not None: - children[i].next = children[i + 1] + child_state['next'] = children[i + 1] except IndexError: pass try: if children[i - 1] is not None: - children[i].previous = children[i - 1] + child_state['previous'] = children[i - 1] except IndexError: pass + next_previous_states[children[i]] = child_state # Assign next for everything in the root. _assign_next(_get_children_with_content(body)) @@ -426,29 +362,186 @@ def _assign_next(children): for tc in body.find_all('tc'): _assign_next(_get_children_with_content(tc)) - def parse_begin(self, el): - self._set_list_attributes(el) - self._set_table_attributes(el) - self._set_is_in_table(el) - - # Find the first and last li elements - body = el.find_first('body') - list_elements = [ - child for child in body.find_all('p') - if child.is_list_item - ] - num_ids = set([i.num_id for i in list_elements]) - ilvls = set([i.ilvl for i in list_elements]) + def _init( + self, + add_attributes=False, + convert_root_level_upper_roman=False, + styles_dict=None, + numbering_root=None, + *args, + **kwargs): + super(PydocxLXMLParser, self)._init(*args, **kwargs) + if add_attributes: + self.convert_root_level_upper_roman = convert_root_level_upper_roman # noqa + self.styles_dict = styles_dict + self.numbering_root = numbering_root + self._set_list_attributes(self) + self._set_table_attributes(self) + self._set_is_in_table(self) + + list_elements = [ + child for child in self.find_all('p') + if child.is_list_item + ] + num_ids = set([i.num_id for i in list_elements]) + ilvls = set([i.ilvl for i in list_elements]) + self._set_first_list_item(num_ids, ilvls, list_elements) + self._set_last_list_item(num_ids, list_elements) + + # Find the first and last li elements + body = self.find_first('body') + p_elements = [ + child for child in body.find_all('p') + ] + self._set_headers(p_elements) + self._convert_upper_roman(body) + self._set_next(body) - self._set_first_list_item(num_ids, ilvls, list_elements) - self._set_last_list_item(num_ids, list_elements) - p_elements = [ - child for child in body.find_all('p') - ] - self._set_headers(p_elements) - self._convert_upper_roman(body) - self._set_next(body) +try: + from lxml import etree + parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) + parser = etree.XMLParser() + parser.set_element_class_lookup(parser_lookup) +except ImportError: + pass + + +def remove_namespaces(document): + # I can't really find a good way to do this with lxml. Se just do it with + # xml. + import xml.etree.ElementTree as xml_etree + root = xml_etree.fromstring(document) + for child in el_iter(root): + child.tag = child.tag.split("}")[1] + child.attrib = dict( + (k.split("}")[-1], v) + for k, v in child.attrib.items() + ) + return xml_etree.tostring(root) + +# Add some helper functions to Element to make it slightly more readable + + +def el_iter(el): + """ + Go through all elements + """ + try: + return el.iter() + except AttributeError: + return el.findall('.//*') + + +@contextmanager +def ZipFile(path): # This is not needed in python 3.2+ + f = zipfile.ZipFile(path) + yield f + f.close() + + +class DocxParser: + __metaclass__ = ABCMeta + + def _build_data(self, path, *args, **kwargs): + with ZipFile(path) as f: + self.document_text = f.read('word/document.xml') + self.styles_text = f.read('word/styles.xml') + try: + self.fonts = f.read('/word/fontTable.xml') + except KeyError: + self.fonts = None + try: # Only present if there are lists + self.numbering_text = f.read('word/numbering.xml') + except KeyError: + self.numbering_text = None + try: # Only present if there are comments + self.comment_text = f.read('word/comments.xml') + except KeyError: + self.comment_text = None + self.relationship_text = f.read('word/_rels/document.xml.rels') + zipped_image_files = [ + e for e in f.infolist() + if e.filename.startswith('word/media/') + ] + for e in zipped_image_files: + self._image_data[e.filename] = f.read(e.filename) + + self.root = fromstring( + remove_namespaces(self.document_text), # remove the namespaces + parser, + ) + self.numbering_root = None + if self.numbering_text: + self.numbering_root = fromstring( + remove_namespaces(self.numbering_text), + parser, + ) + self.comment_root = None + if self.comment_text: + self.comment_root = fromstring( + remove_namespaces(self.comment_text), + parser, + ) + + def _parse_styles(self): + tree = fromstring( + remove_namespaces(self.styles_text), + parser, + ) + result = {} + for style in tree.find_all('style'): + style_val = style.find_first('name').attrib['val'] + result[style.attrib['styleId']] = style_val + return result + + def _parse_rels_root(self): + tree = fromstring(self.relationship_text) + rels_dict = {} + for el in tree: + rId = el.get('Id') + target = el.get('Target') + rels_dict[rId] = target + return rels_dict + + def __init__( + self, + path, + convert_root_level_upper_roman=False, + *args, + **kwargs): + self._parsed = '' + self.block_text = '' + self.page_width = 0 + self.convert_root_level_upper_roman = convert_root_level_upper_roman + self._image_data = {} + self._build_data(path, *args, **kwargs) + + #divide by 20 to get to pt (Office works in 20th's of a point) + """ + see http://msdn.microsoft.com/en-us/library/documentformat + .openxml.wordprocessing.indentation.aspx + """ + if self.root.find_first('pgSz') is not None: + self.page_width = int( + self.root.find_first('pgSz').attrib['w'] + ) / 20 + + #all blank when we init + self.comment_store = None + self.visited = [] + self.list_depth = 0 + self.rels_dict = self._parse_rels_root() + self.styles_dict = self._parse_styles() + self.parse_begin(self.root) # begin to parse + + def parse_begin(self, el): + el._init( + add_attributes=True, + convert_root_level_upper_roman=self.convert_root_level_upper_roman, + styles_dict=self.styles_dict, + numbering_root=self.numbering_root, + ) self._parsed += self.parse(el) def parse(self, el): @@ -520,6 +613,9 @@ def parse_list(self, el, text): return self.parse_table_cell_contents(el, parsed) return parsed + def get_list_style(self, num_id, ilvl): + return get_list_style(self.numbering_root, num_id, ilvl) + def _build_list(self, el, text): # Get the list style for the pending list. lst_style = self.get_list_style( @@ -747,6 +843,8 @@ def _get_rowspan(self, el, v_merge): tbl = el.find_ancestor_with_tag('tbl') # We only want table cells that have a higher row_index that is greater # than the current_row and that are on the current_col + if tbl is None: + return '' tcs = [ tc for tc in tbl.find_all('tc') if tc.row_index >= current_row and @@ -913,24 +1011,6 @@ def parse_r(self, el, parsed): text = fn(text) return text - def get_list_style(self, num_id, ilvl): - ids = self.numbering_root.find_all('num') - for _id in ids: - if _id.attrib['numId'] != num_id: - continue - abstractid = _id.find('abstractNumId') - abstractid = abstractid.attrib['val'] - style_information = self.numbering_root.find_all( - 'abstractNum', - ) - for info in style_information: - if info.attrib['abstractNumId'] == abstractid: - for i in el_iter(info): - if 'ilvl' in i.attrib and i.attrib['ilvl'] != ilvl: - continue - if i.find('numFmt') is not None: - return i.find('numFmt').attrib['val'] - @property def parsed(self): return self._parsed diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 7600c20c..5895086d 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -7,7 +7,8 @@ remove_namespaces, # We are only importing this from DocxParse since we have added methods to # it there. - ElementTree, + fromstring, + PydocxLXMLParser, ) from unittest import TestCase @@ -78,6 +79,14 @@ def smart_space(match): ) return html.strip() +try: + from lxml import etree + parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) + PARSER = etree.XMLParser() + PARSER.set_element_class_lookup(parser_lookup) +except ImportError: + pass + class XMLDocx2Html(Docx2Html): """ @@ -102,11 +111,13 @@ def _build_data( self._image_data['word/%s' % value] = 'word/%s' % value if numbering_dict is None: numbering_dict = {} + self.numbering_root = None self.numbering_dict = numbering_dict # Intentionally not calling super if document_xml is not None: - self.root = ElementTree.fromstring( + self.root = fromstring( remove_namespaces(document_xml), + PARSER, ) self.zip_path = '' diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 47099c0b..bd8a0db8 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -6,11 +6,20 @@ from pydocx.tests.document_builder import DocxBuilder as DXB from pydocx.tests import ( - ElementTree, + fromstring, XMLDocx2Html, _TranslationTestCase, remove_namespaces, ) +from pydocx.DocxParser import PydocxLXMLParser + +try: + from lxml import etree + parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) + PARSER = etree.XMLParser() + PARSER.set_element_class_lookup(parser_lookup) +except ImportError: + pass class BoldTestCase(_TranslationTestCase): @@ -176,8 +185,9 @@ def test_get_image_id(self): document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) - tree = ElementTree.fromstring( + tree = fromstring( remove_namespaces(self.get_xml()), + PARSER, ) els = [] els.extend(tree.find_all('drawing')) @@ -199,8 +209,9 @@ def test_get_image_sizes(self): document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) - tree = ElementTree.fromstring( + tree = fromstring( remove_namespaces(self.get_xml()), + PARSER, ) els = [] els.extend(tree.find_all('drawing')) From 9db7b51e33dfab2c2f00d286be639597a1f0805e Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Mon, 3 Jun 2013 17:31:01 -0400 Subject: [PATCH 310/404] refs #43: udpated the reqs --- .travis.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7253ee0c..4251ba15 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - "2.7" script: ./run_tests.sh install: + - python setup.py -q install - pip install -r requirements.txt env: - TRAVIS_EXECUTE_PERFORMANCE=1 diff --git a/setup.py b/setup.py index 74be1e3a..16948016 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def get_description(): }, scripts=[], zip_safe=False, - install_requires=[], + install_requires=['lxml'], cmdclass={}, classifiers=[ "Development Status :: 3 - Alpha", From 18213fbf12197858b93bdfc795e1f5a691e3caa6 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 3 Jun 2013 18:02:14 -0400 Subject: [PATCH 311/404] fixed init --- pydocx/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 26057884..62bce556 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -9,9 +9,7 @@ def docx2markdown(path): return Docx2Markdown(path).parsed -<<<<<<< HEAD def docx2latex(path): return Docx2LaTex(path).parsed -======= + VERSION = '0.2.1' ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 From f817bd0c2dce971c4405025f00c253771f6c0d4c Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 3 Jun 2013 23:48:05 -0400 Subject: [PATCH 312/404] benchmark_test --- main.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 00000000..b20b93a0 --- /dev/null +++ b/main.py @@ -0,0 +1,31 @@ +from pydocx import docx2html, Docx2Html, docx2latex +from bs4 import BeautifulSoup +import xml.etree.ElementTree as ElementTree +import benchmark + +class Benchmark_Sqrt(benchmark.Benchmark): + + each = 10 + + def test_docx(self): + with open('test.html', 'w') as f: + f.write(docx2html('/Users/samportnow/Documents/bigger_doc.docx').encode('utf-8')) + +if __name__ == '__main__': + benchmark.main(format="markdown", numberFormat="%.4g") + +#with open('test.tex', 'w') as f: +# f.write(docx2latex('/Users/samportnow/Documents/Nosek Articles/HN2012.docx').encode('utf-8')) +# +# +#with open('test.html', 'w') as f: +# f.write(docx2html('/Users/samportnow/Documents/Nosek Articles/HN2012.docx').encode('utf-8')) +##with open('test.html', 'w') as f: +## f.write(docx2html('/Users/samportnow/Documents/Nosek Articles/Letal2013.docx').encode('utf-8')) +##print docx2html('/Users/samportnow/Documents/policy.docx') +## +#print BeautifulSoup( +# ElementTree.tostring( +# Docx2Html('/Users/samportnow/Documents/landscape.docx').root, +# ), +#).prettify() \ No newline at end of file From c9a13041732ce3cab8ce919aa4fed5324299f696 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 3 Jun 2013 23:48:35 -0400 Subject: [PATCH 313/404] removed unncessary --- main.py | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 main.py diff --git a/main.py b/main.py deleted file mode 100644 index b20b93a0..00000000 --- a/main.py +++ /dev/null @@ -1,31 +0,0 @@ -from pydocx import docx2html, Docx2Html, docx2latex -from bs4 import BeautifulSoup -import xml.etree.ElementTree as ElementTree -import benchmark - -class Benchmark_Sqrt(benchmark.Benchmark): - - each = 10 - - def test_docx(self): - with open('test.html', 'w') as f: - f.write(docx2html('/Users/samportnow/Documents/bigger_doc.docx').encode('utf-8')) - -if __name__ == '__main__': - benchmark.main(format="markdown", numberFormat="%.4g") - -#with open('test.tex', 'w') as f: -# f.write(docx2latex('/Users/samportnow/Documents/Nosek Articles/HN2012.docx').encode('utf-8')) -# -# -#with open('test.html', 'w') as f: -# f.write(docx2html('/Users/samportnow/Documents/Nosek Articles/HN2012.docx').encode('utf-8')) -##with open('test.html', 'w') as f: -## f.write(docx2html('/Users/samportnow/Documents/Nosek Articles/Letal2013.docx').encode('utf-8')) -##print docx2html('/Users/samportnow/Documents/policy.docx') -## -#print BeautifulSoup( -# ElementTree.tostring( -# Docx2Html('/Users/samportnow/Documents/landscape.docx').root, -# ), -#).prettify() \ No newline at end of file From 20eb9bca24aedb2f03d997bb0401ac5740585938 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 3 Jun 2013 23:49:53 -0400 Subject: [PATCH 314/404] updating --- pydocx/DocxParser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 79e8e86f..52f5fe8d 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -287,7 +287,8 @@ def _set_table_attributes(self, el): continue for i, row in enumerate(rows): tcs = self._filter_children(row, ['tc']) - tcs[-1].is_last_row_item = True + if tcs: + tcs[-1].is_last_row_item = True for j, child in enumerate(tcs): child.row_index = i child.column_index = j From 7f24073550727399a7cbe9b24573de419ddba47c Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 3 Jun 2013 23:50:31 -0400 Subject: [PATCH 315/404] updating --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 81a14d38..33954f41 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,2 @@ -Sam Protnow +Sam Portnow Jason Ward From b2d70b6e8db1585a02b67e8c5f5c1a570107c1d4 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 10:56:31 -0400 Subject: [PATCH 316/404] refs #43: got the last of the failing unit tests passing --- pydocx/tests/__init__.py | 8 ++++++-- pydocx/tests/document_builder.py | 12 ++++++++++++ pydocx/tests/templates/numbering.xml | 23 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 pydocx/tests/templates/numbering.xml diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 5895086d..5ce6acc5 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -10,6 +10,7 @@ fromstring, PydocxLXMLParser, ) +from pydocx.tests.document_builder import DocxBuilder as DXB from unittest import TestCase STYLE = ( @@ -109,9 +110,12 @@ def _build_data( if rels_dict: for value in rels_dict.values(): self._image_data['word/%s' % value] = 'word/%s' % value - if numbering_dict is None: - numbering_dict = {} self.numbering_root = None + if numbering_dict is not None: + self.numbering_root = fromstring( + remove_namespaces(DXB.numbering(numbering_dict)), + PARSER, + ) self.numbering_dict = numbering_dict # Intentionally not calling super if document_xml is not None: diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index f71e6f06..73fd333c 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -8,6 +8,7 @@ 'insert': 'insert.xml', 'linebreak': 'linebreak.xml', 'main': 'base.xml', + 'numbering': 'numbering.xml', 'p': 'p.xml', 'pict': 'pict.xml', 'r': 'r.xml', @@ -240,3 +241,14 @@ def style(self, style_id, value): } return template.render(**kwargs) + + @classmethod + def numbering(self, numbering_dict): + print numbering_dict + template = env.get_template(templates['numbering']) + + kwargs = { + 'numbering_dict': numbering_dict, + } + + return template.render(**kwargs) diff --git a/pydocx/tests/templates/numbering.xml b/pydocx/tests/templates/numbering.xml new file mode 100644 index 00000000..4eaac3cc --- /dev/null +++ b/pydocx/tests/templates/numbering.xml @@ -0,0 +1,23 @@ + + + {% for num_id, ilvl_data in numbering_dict.items() %} + + {% for ilvl, format in ilvl_data.items() %} + + + + + + + + + + {% endfor %} + + {% endfor %} + {% for num_id in numbering_dict %} + + + + {% endfor %} + From cb25ea06062c465afe215920964fee66ef301b8c Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 11:22:02 -0400 Subject: [PATCH 317/404] refs #43: Big refactor. Moved all the new lxml parser to its own file. --- pydocx/DocxParser.py | 432 +-------------------------------------- pydocx/tests/__init__.py | 26 +-- pydocx/tests/test_xml.py | 22 +- pydocx/utils.py | 411 +++++++++++++++++++++++++++++++++++++ 4 files changed, 426 insertions(+), 465 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index ba66c21a..cf1e6215 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -1,16 +1,12 @@ import logging import os -try: - from lxml.etree import fromstring - from lxml.etree import ElementBase -except ImportError: - from xml.etree.ElementTree import fromstring import zipfile from abc import abstractmethod, ABCMeta from contextlib import contextmanager -from pydocx.utils import NamespacedNumId +from pydocx.utils import parse_xml_from_string, get_list_style + logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("NewParser") @@ -18,18 +14,6 @@ # http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx EMUS_PER_PIXEL = 9525 USE_ALIGNMENTS = True -TAGS_CONTAINING_CONTENT = ( - 't', - 'pict', - 'drawing', - 'delText', - 'ins', -) -TAGS_HOLDING_CONTENT_TAGS = ( - 'p', - 'tbl', - 'sdt', -) UPPER_ROMAN_TO_HEADING_VALUE = 'h2' JUSTIFY_CENTER = 'center' @@ -40,399 +24,9 @@ INDENTATION_LEFT = 'left' INDENTATION_FIRST_LINE = 'firstLine' - -# Since I can't actually set attribute (reliably) on each element. I have to -# keep track of the next and previous elements here. -next_previous_states = {} - - -def get_list_style(numbering_root, num_id, ilvl): - # This is needed on both the custom lxml parser and the pydocx parser. So - # make it a function. - ids = numbering_root.find_all('num') - for _id in ids: - if _id.attrib['numId'] != num_id: - continue - abstractid = _id.find('abstractNumId') - abstractid = abstractid.attrib['val'] - style_information = numbering_root.find_all( - 'abstractNum', - ) - for info in style_information: - if info.attrib['abstractNumId'] == abstractid: - for i in el_iter(info): - if ( - 'ilvl' in i.attrib and - i.attrib['ilvl'] != ilvl): - continue - if i.find('numFmt') is not None: - return i.find('numFmt').attrib['val'] - - -class PydocxLXMLParser(ElementBase): - - @property - def is_first_list_item(self): - return self.attrib.get('is_first_list_item', '') == str(True) - - @property - def is_last_list_item_in_root(self): - return self.attrib.get('is_last_list_item_in_root', '') == str(True) - - @property - def is_list_item(self): - return self.attrib.get('is_list_item', '') == str(True) - - @property - def num_id(self): - if not self.is_list_item: - return None - return self._generate_num_id(self) - - @property - def ilvl(self): - return self.attrib.get('ilvl') - - @property - def heading_level(self): - return self.attrib.get('heading_level') - - @property - def is_in_table(self): - return self.attrib.get('is_in_table') - - @property - def row_index(self): - return self.attrib.get('row_index') - - @property - def column_index(self): - return self.attrib.get('column_index') - - @property - def vmerge_continue(self): - return self.attrib.get('vmerge_continue') - - @property - def next(self): - if self not in next_previous_states: - return - return next_previous_states[self].get('next') - - @property - def previous(self): - if self not in next_previous_states: - return - return next_previous_states[self].get('previous') - - def find_first(self, tag): - """ - Find the first occurrence of a tag beneath the current element. - """ - return self.find('.//' + tag) - - def find_all(self, tag): - """ - Find all occurrences of a tag - """ - return self.findall('.//' + tag) - - def find_ancestor_with_tag(self, tag): - """ - Find the first ancestor with that is a `tag`. - """ - el = self - while el.getparent() is not None: - el = el.getparent() - if el.tag == tag: - return el - return None - - def has_descendant_with_tag(self, tag): - """ - Determine if there is a child ahead in the element tree. - """ - # Get child. stop at first child. - return True if self.find('.//' + tag) is not None else False - - def has_child(self, tag): - """ - Determine if current element has a child. Stop at first child. - """ - return True if self.find(tag) is not None else False - - def _filter_children(self, element, tags): - return [ - el for el in element.getchildren() - if el.tag in tags - ] - - def _set_list_attributes(self, el): - list_elements = el.find_all('numId') - for li in list_elements: - parent = li.find_ancestor_with_tag('p') - # Deleted text in a list will have a numId but no ilvl. - if parent is None: - continue - if parent.find_first('ilvl') is None: - continue - parent.attrib['is_list_item'] = str(True) - parent.attrib['num_id'] = str(self._generate_num_id(parent)) - parent.attrib['ilvl'] = parent.find_first('ilvl').attrib['val'] - - def _generate_num_id(self, el): - ''' - Fun fact: It is possible to have a list in the root, that holds a table - that holds a list and for both lists to have the same numId. When this - happens we should namespace the nested list with the number of tables - it is in to ensure it is considered a new list. Otherwise all sorts of - terrible html gets generated. - ''' - num_id = el.find_first('numId').attrib['val'] - - # First, go up the parent until we get None and count the number of - # tables there are. - num_tables = 0 - while el.getparent() is not None: - if el.tag == 'tbl': - num_tables += 1 - el = el.getparent() - return NamespacedNumId( - num_id=num_id, - num_tables=num_tables, - ) - - def _set_first_list_item(self, num_ids, ilvls, list_elements): - # Lists are grouped by having the same `num_id` and `ilvl`. The first - # list item is the first list item found for each `num_id` and `ilvl` - # combination. - for num_id in num_ids: - for ilvl in ilvls: - filtered_list_elements = [ - i for i in list_elements - if ( - i.num_id == num_id and - i.ilvl == ilvl - ) - ] - if not filtered_list_elements: - continue - first_el = filtered_list_elements[0] - first_el.attrib['is_first_list_item'] = str(True) - - def _set_last_list_item(self, num_ids, list_elements): - # Find last list elements. Only mark list tags as the last list tag if - # it is in the root of the document. This is only used to ensure that - # once a root level list is finished we do not roll in the rest of the - # non list elements into the first root level list. - for num_id in num_ids: - filtered_list_elements = [ - i for i in list_elements - if i.num_id == num_id - ] - if not filtered_list_elements: - continue - last_el = filtered_list_elements[-1] - last_el.attrib['is_last_list_item_in_root'] = str(True) - - def _set_table_attributes(self, el): - tables = el.find_all('tbl') - for table in tables: - rows = self._filter_children(table, ['tr']) - if rows is None: - continue - for i, row in enumerate(rows): - tcs = self._filter_children(row, ['tc']) - for j, child in enumerate(tcs): - child.attrib['row_index'] = str(i) - child.attrib['column_index'] = str(j) - v_merge = child.find_first('vMerge') - if ( - v_merge is not None and - 'continue' == v_merge.get('val', '') - ): - child.attrib['vmerge_continue'] = str(True) - - def _set_is_in_table(self, el): - paragraph_elements = el.find_all('p') - for p in paragraph_elements: - if p.find_ancestor_with_tag('tc') is not None: - p.attrib['is_in_table'] = str(True) - - def _set_headers(self, elements): - # These are the styles for headers and what the html tag should be if - # we have one. - headers = { - 'heading 1': 'h1', - 'heading 2': 'h2', - 'heading 3': 'h3', - 'heading 4': 'h4', - 'heading 5': 'h5', - 'heading 6': 'h6', - 'heading 7': 'h6', - 'heading 8': 'h6', - 'heading 9': 'h6', - 'heading 10': 'h6', - } - for element in elements: - # This element is using the default style which is not a heading. - if element.find_first('pStyle') is None: - continue - style = element.find_first('pStyle').attrib.get('val', '') - style = self.styles_dict.get(style) - - # Check to see if this element is actually a header. - if style and style.lower() in headers: - # Set all the list item variables to false. - element.attrib['is_list_item'] = str(False) - element.attrib['is_first_list_item'] = str(False) - element.attrib['is_last_list_item'] = str(False) - # Prime the heading_level - element.attrib['heading_level'] = headers[style.lower()] - - def _convert_upper_roman(self, body): - if not self.convert_root_level_upper_roman: - return - first_root_list_items = [ - # Only root level elements. - el for el in body.getchildren() - # And only first_list_items - if el.is_first_list_item - ] - visited_num_ids = [] - for root_list_item in first_root_list_items: - if root_list_item.num_id in visited_num_ids: - continue - visited_num_ids.append(root_list_item.num_id) - lst_style = get_list_style( - self.numbering_root, - root_list_item.num_id.num_id, - root_list_item.ilvl, - ) - if lst_style != 'upperRoman': - continue - ilvl = min( - el.ilvl for el in body.find_all('p') - if el.num_id == root_list_item.num_id - ) - root_upper_roman_list_items = [ - el for el in body.find_all('p') - if el.num_id == root_list_item.num_id and - el.ilvl == ilvl - ] - for list_item in root_upper_roman_list_items: - list_item.attrib['is_list_item'] = str(False) - list_item.attrib['is_first_list_item'] = str(False) - list_item.attrib['is_last_list_item'] = str(False) - - list_item.attrib['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa - - def _set_next(self, body): - def _get_children_with_content(el): - # We only care about children if they have text in them. - children = [] - for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): - has_descendant_with_tag = any( - child.has_descendant_with_tag(tag) for - tag in TAGS_CONTAINING_CONTENT - ) - if has_descendant_with_tag: - children.append(child) - return children - - def _assign_next(children): - # Populate the `next` attribute for all the child elements. - for i in range(len(children)): - child_state = {} - try: - if children[i + 1] is not None: - child_state['next'] = children[i + 1] - except IndexError: - pass - try: - if children[i - 1] is not None: - child_state['previous'] = children[i - 1] - except IndexError: - pass - next_previous_states[children[i]] = child_state - # Assign next for everything in the root. - _assign_next(_get_children_with_content(body)) - - # In addition set next for everything in table cells. - for tc in body.find_all('tc'): - _assign_next(_get_children_with_content(tc)) - - def _init( - self, - add_attributes=False, - convert_root_level_upper_roman=False, - styles_dict=None, - numbering_root=None, - *args, - **kwargs): - super(PydocxLXMLParser, self)._init(*args, **kwargs) - if add_attributes: - self.convert_root_level_upper_roman = convert_root_level_upper_roman # noqa - self.styles_dict = styles_dict - self.numbering_root = numbering_root - self._set_list_attributes(self) - self._set_table_attributes(self) - self._set_is_in_table(self) - - list_elements = [ - child for child in self.find_all('p') - if child.is_list_item - ] - num_ids = set([i.num_id for i in list_elements]) - ilvls = set([i.ilvl for i in list_elements]) - self._set_first_list_item(num_ids, ilvls, list_elements) - self._set_last_list_item(num_ids, list_elements) - - # Find the first and last li elements - body = self.find_first('body') - p_elements = [ - child for child in body.find_all('p') - ] - self._set_headers(p_elements) - self._convert_upper_roman(body) - self._set_next(body) - - -try: - from lxml import etree - parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) - parser = etree.XMLParser() - parser.set_element_class_lookup(parser_lookup) -except ImportError: - pass - - -def remove_namespaces(document): - # I can't really find a good way to do this with lxml. Se just do it with - # xml. - import xml.etree.ElementTree as xml_etree - root = xml_etree.fromstring(document) - for child in el_iter(root): - child.tag = child.tag.split("}")[1] - child.attrib = dict( - (k.split("}")[-1], v) - for k, v in child.attrib.items() - ) - return xml_etree.tostring(root) - # Add some helper functions to Element to make it slightly more readable -def el_iter(el): - """ - Go through all elements - """ - try: - return el.iter() - except AttributeError: - return el.findall('.//*') - - @contextmanager def ZipFile(path): # This is not needed in python 3.2+ f = zipfile.ZipFile(path) @@ -467,28 +61,16 @@ def _build_data(self, path, *args, **kwargs): for e in zipped_image_files: self._image_data[e.filename] = f.read(e.filename) - self.root = fromstring( - remove_namespaces(self.document_text), # remove the namespaces - parser, - ) + self.root = parse_xml_from_string(self.document_text) self.numbering_root = None if self.numbering_text: - self.numbering_root = fromstring( - remove_namespaces(self.numbering_text), - parser, - ) + self.numbering_root = parse_xml_from_string(self.numbering_text) self.comment_root = None if self.comment_text: - self.comment_root = fromstring( - remove_namespaces(self.comment_text), - parser, - ) + self.comment_root = parse_xml_from_string(self.comment_text) def _parse_styles(self): - tree = fromstring( - remove_namespaces(self.styles_text), - parser, - ) + tree = parse_xml_from_string(self.styles_text) result = {} for style in tree.find_all('style'): style_val = style.find_first('name').attrib['val'] @@ -496,7 +78,7 @@ def _parse_styles(self): return result def _parse_rels_root(self): - tree = fromstring(self.relationship_text) + tree = parse_xml_from_string(self.relationship_text) rels_dict = {} for el in tree: rId = el.get('Id') diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 5ce6acc5..67a79b63 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -3,12 +3,8 @@ from contextlib import contextmanager from pydocx.parsers.Docx2Html import Docx2Html -from pydocx.DocxParser import ( - remove_namespaces, - # We are only importing this from DocxParse since we have added methods to - # it there. - fromstring, - PydocxLXMLParser, +from pydocx.utils import ( + parse_xml_from_string, ) from pydocx.tests.document_builder import DocxBuilder as DXB from unittest import TestCase @@ -80,14 +76,6 @@ def smart_space(match): ) return html.strip() -try: - from lxml import etree - parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) - PARSER = etree.XMLParser() - PARSER.set_element_class_lookup(parser_lookup) -except ImportError: - pass - class XMLDocx2Html(Docx2Html): """ @@ -112,17 +100,13 @@ def _build_data( self._image_data['word/%s' % value] = 'word/%s' % value self.numbering_root = None if numbering_dict is not None: - self.numbering_root = fromstring( - remove_namespaces(DXB.numbering(numbering_dict)), - PARSER, + self.numbering_root = parse_xml_from_string( + DXB.numbering(numbering_dict), ) self.numbering_dict = numbering_dict # Intentionally not calling super if document_xml is not None: - self.root = fromstring( - remove_namespaces(document_xml), - PARSER, - ) + self.root = parse_xml_from_string(document_xml) self.zip_path = '' # This is the standard page width for a word document, Also the page diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index bd8a0db8..7f3320bc 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -6,20 +6,10 @@ from pydocx.tests.document_builder import DocxBuilder as DXB from pydocx.tests import ( - fromstring, XMLDocx2Html, _TranslationTestCase, - remove_namespaces, ) -from pydocx.DocxParser import PydocxLXMLParser - -try: - from lxml import etree - parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) - PARSER = etree.XMLParser() - PARSER.set_element_class_lookup(parser_lookup) -except ImportError: - pass +from pydocx.utils import parse_xml_from_string class BoldTestCase(_TranslationTestCase): @@ -185,10 +175,7 @@ def test_get_image_id(self): document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) - tree = fromstring( - remove_namespaces(self.get_xml()), - PARSER, - ) + tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(tree.find_all('drawing')) els.extend(tree.find_all('pict')) @@ -209,10 +196,7 @@ def test_get_image_sizes(self): document_xml=self.get_xml(), rels_dict=self.relationship_dict, ) - tree = fromstring( - remove_namespaces(self.get_xml()), - PARSER, - ) + tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(tree.find_all('drawing')) els.extend(tree.find_all('pict')) diff --git a/pydocx/utils.py b/pydocx/utils.py index c7e0063c..af3d7701 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -1,3 +1,68 @@ +from lxml import etree + + +UPPER_ROMAN_TO_HEADING_VALUE = 'h2' +TAGS_CONTAINING_CONTENT = ( + 't', + 'pict', + 'drawing', + 'delText', + 'ins', +) +TAGS_HOLDING_CONTENT_TAGS = ( + 'p', + 'tbl', + 'sdt', +) + + +def el_iter(el): + """ + Go through all elements + """ + try: + return el.iter() + except AttributeError: + return el.findall('.//*') + + +def remove_namespaces(document): + # I can't really find a good way to do this with lxml. Se just do it with + # xml. + import xml.etree.ElementTree as xml_etree + root = xml_etree.fromstring(document) + for child in el_iter(root): + child.tag = child.tag.split("}")[1] + child.attrib = dict( + (k.split("}")[-1], v) + for k, v in child.attrib.items() + ) + return xml_etree.tostring(root) + + +def get_list_style(numbering_root, num_id, ilvl): + # This is needed on both the custom lxml parser and the pydocx parser. So + # make it a function. + ids = numbering_root.find_all('num') + for _id in ids: + if _id.attrib['numId'] != num_id: + continue + abstractid = _id.find('abstractNumId') + abstractid = abstractid.attrib['val'] + style_information = numbering_root.find_all( + 'abstractNum', + ) + for info in style_information: + if info.attrib['abstractNumId'] == abstractid: + for i in el_iter(info): + if ( + 'ilvl' in i.attrib and + i.attrib['ilvl'] != ilvl): + continue + if i.find('numFmt') is not None: + return i.find('numFmt').attrib['val'] + + class NamespacedNumId(object): def __init__(self, num_id, num_tables, *args, **kwargs): self._num_id = num_id @@ -25,3 +90,349 @@ def __ne__(self, other): @property def num_id(self): return self._num_id + + +# Since I can't actually set attribute (reliably) on each element. I have to +# keep track of the next and previous elements here. +next_previous_states = {} + + +class PydocxLXMLParser(etree.ElementBase): + + @property + def is_first_list_item(self): + return self.attrib.get('is_first_list_item', '') == str(True) + + @property + def is_last_list_item_in_root(self): + return self.attrib.get('is_last_list_item_in_root', '') == str(True) + + @property + def is_list_item(self): + return self.attrib.get('is_list_item', '') == str(True) + + @property + def num_id(self): + if not self.is_list_item: + return None + return self._generate_num_id(self) + + @property + def ilvl(self): + return self.attrib.get('ilvl') + + @property + def heading_level(self): + return self.attrib.get('heading_level') + + @property + def is_in_table(self): + return self.attrib.get('is_in_table') + + @property + def row_index(self): + return self.attrib.get('row_index') + + @property + def column_index(self): + return self.attrib.get('column_index') + + @property + def vmerge_continue(self): + return self.attrib.get('vmerge_continue') + + @property + def next(self): + if self not in next_previous_states: + return + return next_previous_states[self].get('next') + + @property + def previous(self): + if self not in next_previous_states: + return + return next_previous_states[self].get('previous') + + def find_first(self, tag): + """ + Find the first occurrence of a tag beneath the current element. + """ + return self.find('.//' + tag) + + def find_all(self, tag): + """ + Find all occurrences of a tag + """ + return self.findall('.//' + tag) + + def find_ancestor_with_tag(self, tag): + """ + Find the first ancestor with that is a `tag`. + """ + el = self + while el.getparent() is not None: + el = el.getparent() + if el.tag == tag: + return el + return None + + def has_descendant_with_tag(self, tag): + """ + Determine if there is a child ahead in the element tree. + """ + # Get child. stop at first child. + return True if self.find('.//' + tag) is not None else False + + def has_child(self, tag): + """ + Determine if current element has a child. Stop at first child. + """ + return True if self.find(tag) is not None else False + + def _filter_children(self, element, tags): + return [ + el for el in element.getchildren() + if el.tag in tags + ] + + def _set_list_attributes(self, el): + list_elements = el.find_all('numId') + for li in list_elements: + parent = li.find_ancestor_with_tag('p') + # Deleted text in a list will have a numId but no ilvl. + if parent is None: + continue + if parent.find_first('ilvl') is None: + continue + parent.attrib['is_list_item'] = str(True) + parent.attrib['num_id'] = str(self._generate_num_id(parent)) + parent.attrib['ilvl'] = parent.find_first('ilvl').attrib['val'] + + def _generate_num_id(self, el): + ''' + Fun fact: It is possible to have a list in the root, that holds a table + that holds a list and for both lists to have the same numId. When this + happens we should namespace the nested list with the number of tables + it is in to ensure it is considered a new list. Otherwise all sorts of + terrible html gets generated. + ''' + num_id = el.find_first('numId').attrib['val'] + + # First, go up the parent until we get None and count the number of + # tables there are. + num_tables = 0 + while el.getparent() is not None: + if el.tag == 'tbl': + num_tables += 1 + el = el.getparent() + return NamespacedNumId( + num_id=num_id, + num_tables=num_tables, + ) + + def _set_first_list_item(self, num_ids, ilvls, list_elements): + # Lists are grouped by having the same `num_id` and `ilvl`. The first + # list item is the first list item found for each `num_id` and `ilvl` + # combination. + for num_id in num_ids: + for ilvl in ilvls: + filtered_list_elements = [ + i for i in list_elements + if ( + i.num_id == num_id and + i.ilvl == ilvl + ) + ] + if not filtered_list_elements: + continue + first_el = filtered_list_elements[0] + first_el.attrib['is_first_list_item'] = str(True) + + def _set_last_list_item(self, num_ids, list_elements): + # Find last list elements. Only mark list tags as the last list tag if + # it is in the root of the document. This is only used to ensure that + # once a root level list is finished we do not roll in the rest of the + # non list elements into the first root level list. + for num_id in num_ids: + filtered_list_elements = [ + i for i in list_elements + if i.num_id == num_id + ] + if not filtered_list_elements: + continue + last_el = filtered_list_elements[-1] + last_el.attrib['is_last_list_item_in_root'] = str(True) + + def _set_table_attributes(self, el): + tables = el.find_all('tbl') + for table in tables: + rows = self._filter_children(table, ['tr']) + if rows is None: + continue + for i, row in enumerate(rows): + tcs = self._filter_children(row, ['tc']) + for j, child in enumerate(tcs): + child.attrib['row_index'] = str(i) + child.attrib['column_index'] = str(j) + v_merge = child.find_first('vMerge') + if ( + v_merge is not None and + 'continue' == v_merge.get('val', '') + ): + child.attrib['vmerge_continue'] = str(True) + + def _set_is_in_table(self, el): + paragraph_elements = el.find_all('p') + for p in paragraph_elements: + if p.find_ancestor_with_tag('tc') is not None: + p.attrib['is_in_table'] = str(True) + + def _set_headers(self, elements): + # These are the styles for headers and what the html tag should be if + # we have one. + headers = { + 'heading 1': 'h1', + 'heading 2': 'h2', + 'heading 3': 'h3', + 'heading 4': 'h4', + 'heading 5': 'h5', + 'heading 6': 'h6', + 'heading 7': 'h6', + 'heading 8': 'h6', + 'heading 9': 'h6', + 'heading 10': 'h6', + } + for element in elements: + # This element is using the default style which is not a heading. + if element.find_first('pStyle') is None: + continue + style = element.find_first('pStyle').attrib.get('val', '') + style = self.styles_dict.get(style) + + # Check to see if this element is actually a header. + if style and style.lower() in headers: + # Set all the list item variables to false. + element.attrib['is_list_item'] = str(False) + element.attrib['is_first_list_item'] = str(False) + element.attrib['is_last_list_item'] = str(False) + # Prime the heading_level + element.attrib['heading_level'] = headers[style.lower()] + + def _convert_upper_roman(self, body): + if not self.convert_root_level_upper_roman: + return + first_root_list_items = [ + # Only root level elements. + el for el in body.getchildren() + # And only first_list_items + if el.is_first_list_item + ] + visited_num_ids = [] + for root_list_item in first_root_list_items: + if root_list_item.num_id in visited_num_ids: + continue + visited_num_ids.append(root_list_item.num_id) + lst_style = get_list_style( + self.numbering_root, + root_list_item.num_id.num_id, + root_list_item.ilvl, + ) + if lst_style != 'upperRoman': + continue + ilvl = min( + el.ilvl for el in body.find_all('p') + if el.num_id == root_list_item.num_id + ) + root_upper_roman_list_items = [ + el for el in body.find_all('p') + if el.num_id == root_list_item.num_id and + el.ilvl == ilvl + ] + for list_item in root_upper_roman_list_items: + list_item.attrib['is_list_item'] = str(False) + list_item.attrib['is_first_list_item'] = str(False) + list_item.attrib['is_last_list_item'] = str(False) + + list_item.attrib['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa + + def _set_next(self, body): + def _get_children_with_content(el): + # We only care about children if they have text in them. + children = [] + for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): + has_descendant_with_tag = any( + child.has_descendant_with_tag(tag) for + tag in TAGS_CONTAINING_CONTENT + ) + if has_descendant_with_tag: + children.append(child) + return children + + def _assign_next(children): + # Populate the `next` attribute for all the child elements. + for i in range(len(children)): + child_state = {} + try: + if children[i + 1] is not None: + child_state['next'] = children[i + 1] + except IndexError: + pass + try: + if children[i - 1] is not None: + child_state['previous'] = children[i - 1] + except IndexError: + pass + next_previous_states[children[i]] = child_state + # Assign next for everything in the root. + _assign_next(_get_children_with_content(body)) + + # In addition set next for everything in table cells. + for tc in body.find_all('tc'): + _assign_next(_get_children_with_content(tc)) + + def _init( + self, + add_attributes=False, + convert_root_level_upper_roman=False, + styles_dict=None, + numbering_root=None, + *args, + **kwargs): + super(PydocxLXMLParser, self)._init(*args, **kwargs) + if add_attributes: + self.convert_root_level_upper_roman = convert_root_level_upper_roman # noqa + self.styles_dict = styles_dict + self.numbering_root = numbering_root + self._set_list_attributes(self) + self._set_table_attributes(self) + self._set_is_in_table(self) + + list_elements = [ + child for child in self.find_all('p') + if child.is_list_item + ] + num_ids = set([i.num_id for i in list_elements]) + ilvls = set([i.ilvl for i in list_elements]) + self._set_first_list_item(num_ids, ilvls, list_elements) + self._set_last_list_item(num_ids, list_elements) + + # Find the first and last li elements + body = self.find_first('body') + p_elements = [ + child for child in body.find_all('p') + ] + self._set_headers(p_elements) + self._convert_upper_roman(body) + self._set_next(body) + + +parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) +parser = etree.XMLParser() +parser.set_element_class_lookup(parser_lookup) + + +def parse_xml_from_string(xml): + return etree.fromstring( + remove_namespaces(xml), + parser, + ) From f55427bc7b566811e47f82b72399fd5b3ae310c3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 12:13:30 -0400 Subject: [PATCH 318/404] refs #43: No longer storing on attrib, storing on a global dictionary now --- pydocx/DocxParser.py | 1 - pydocx/utils.py | 75 +++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index cf1e6215..baccdaf2 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -14,7 +14,6 @@ # http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx EMUS_PER_PIXEL = 9525 USE_ALIGNMENTS = True -UPPER_ROMAN_TO_HEADING_VALUE = 'h2' JUSTIFY_CENTER = 'center' JUSTIFY_LEFT = 'left' diff --git a/pydocx/utils.py b/pydocx/utils.py index af3d7701..a927359a 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -1,3 +1,4 @@ +from collections import defaultdict from lxml import etree @@ -94,64 +95,66 @@ def num_id(self): # Since I can't actually set attribute (reliably) on each element. I have to # keep track of the next and previous elements here. -next_previous_states = {} +element_meta_data = defaultdict(dict) class PydocxLXMLParser(etree.ElementBase): @property def is_first_list_item(self): - return self.attrib.get('is_first_list_item', '') == str(True) + return element_meta_data[self].get('is_first_list_item', False) @property def is_last_list_item_in_root(self): - return self.attrib.get('is_last_list_item_in_root', '') == str(True) + return element_meta_data[self].get('is_last_list_item_in_root', False) @property def is_list_item(self): - return self.attrib.get('is_list_item', '') == str(True) + return element_meta_data[self].get('is_list_item', False) @property def num_id(self): if not self.is_list_item: return None - return self._generate_num_id(self) + return element_meta_data[self].get('num_id') @property def ilvl(self): - return self.attrib.get('ilvl') + if not self.is_list_item: + return None + return element_meta_data[self].get('ilvl') @property def heading_level(self): - return self.attrib.get('heading_level') + return element_meta_data[self].get('heading_level') @property def is_in_table(self): - return self.attrib.get('is_in_table') + return element_meta_data[self].get('is_in_table') @property def row_index(self): - return self.attrib.get('row_index') + return element_meta_data[self].get('row_index') @property def column_index(self): - return self.attrib.get('column_index') + return element_meta_data[self].get('column_index') @property def vmerge_continue(self): - return self.attrib.get('vmerge_continue') + return element_meta_data[self].get('vmerge_continue') @property def next(self): - if self not in next_previous_states: + if self not in element_meta_data: return - return next_previous_states[self].get('next') + return element_meta_data[self].get('next') @property def previous(self): - if self not in next_previous_states: + if self not in element_meta_data: return - return next_previous_states[self].get('previous') + return element_meta_data[self].get('previous') def find_first(self, tag): """ @@ -204,9 +207,11 @@ def _set_list_attributes(self, el): continue if parent.find_first('ilvl') is None: continue - parent.attrib['is_list_item'] = str(True) - parent.attrib['num_id'] = str(self._generate_num_id(parent)) - parent.attrib['ilvl'] = parent.find_first('ilvl').attrib['val'] + element_meta_data[parent]['is_list_item'] = True + element_meta_data[parent]['num_id'] = self._generate_num_id(parent) + element_meta_data[parent]['ilvl'] = parent.find_first( + 'ilvl', + ).attrib['val'] def _generate_num_id(self, el): ''' @@ -246,7 +251,7 @@ def _set_first_list_item(self, num_ids, ilvls, list_elements): if not filtered_list_elements: continue first_el = filtered_list_elements[0] - first_el.attrib['is_first_list_item'] = str(True) + element_meta_data[first_el]['is_first_list_item'] = True def _set_last_list_item(self, num_ids, list_elements): # Find last list elements. Only mark list tags as the last list tag if @@ -261,7 +266,7 @@ def _set_last_list_item(self, num_ids, list_elements): if not filtered_list_elements: continue last_el = filtered_list_elements[-1] - last_el.attrib['is_last_list_item_in_root'] = str(True) + element_meta_data[last_el]['is_last_list_item_in_root'] = True def _set_table_attributes(self, el): tables = el.find_all('tbl') @@ -272,20 +277,20 @@ def _set_table_attributes(self, el): for i, row in enumerate(rows): tcs = self._filter_children(row, ['tc']) for j, child in enumerate(tcs): - child.attrib['row_index'] = str(i) - child.attrib['column_index'] = str(j) + element_meta_data[child]['row_index'] = i + element_meta_data[child]['column_index'] = j v_merge = child.find_first('vMerge') if ( v_merge is not None and 'continue' == v_merge.get('val', '') ): - child.attrib['vmerge_continue'] = str(True) + element_meta_data[child]['vmerge_continue'] = True def _set_is_in_table(self, el): paragraph_elements = el.find_all('p') for p in paragraph_elements: if p.find_ancestor_with_tag('tc') is not None: - p.attrib['is_in_table'] = str(True) + element_meta_data[p]['is_in_table'] = True def _set_headers(self, elements): # These are the styles for headers and what the html tag should be if @@ -312,11 +317,11 @@ def _set_headers(self, elements): # Check to see if this element is actually a header. if style and style.lower() in headers: # Set all the list item variables to false. - element.attrib['is_list_item'] = str(False) - element.attrib['is_first_list_item'] = str(False) - element.attrib['is_last_list_item'] = str(False) + element_meta_data[element]['is_list_item'] = False + element_meta_data[element]['is_first_list_item'] = False + element_meta_data[element]['is_last_list_item_in_root'] = False # Prime the heading_level - element.attrib['heading_level'] = headers[style.lower()] + element_meta_data[element]['heading_level'] = headers[style.lower()] # noqa def _convert_upper_roman(self, body): if not self.convert_root_level_upper_roman: @@ -349,11 +354,11 @@ def _convert_upper_roman(self, body): el.ilvl == ilvl ] for list_item in root_upper_roman_list_items: - list_item.attrib['is_list_item'] = str(False) - list_item.attrib['is_first_list_item'] = str(False) - list_item.attrib['is_last_list_item'] = str(False) + element_meta_data[list_item]['is_list_item'] = False + element_meta_data[list_item]['is_first_list_item'] = False + element_meta_data[list_item]['is_last_list_item_in_root'] = False # noqa - list_item.attrib['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa + element_meta_data[list_item]['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa def _set_next(self, body): def _get_children_with_content(el): @@ -371,18 +376,16 @@ def _get_children_with_content(el): def _assign_next(children): # Populate the `next` attribute for all the child elements. for i in range(len(children)): - child_state = {} try: if children[i + 1] is not None: - child_state['next'] = children[i + 1] + element_meta_data[children[i]]['next'] = children[i + 1] # noqa except IndexError: pass try: if children[i - 1] is not None: - child_state['previous'] = children[i - 1] + element_meta_data[children[i]]['previous'] = children[i - 1] # noqa except IndexError: pass - next_previous_states[children[i]] = child_state # Assign next for everything in the root. _assign_next(_get_children_with_content(body)) From b48e8e3b319855bb6819b3dd5045e6b1e81d25cd Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 14:34:04 -0400 Subject: [PATCH 319/404] refs #43: Refactor to no longer need the subclassed parser --- pydocx/DocxParser.py | 143 +++++++++------- pydocx/tests/test_xml.py | 10 +- pydocx/utils.py | 345 ++++++++++++++++++--------------------- 3 files changed, 249 insertions(+), 249 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index baccdaf2..e09d090d 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -5,7 +5,16 @@ from abc import abstractmethod, ABCMeta from contextlib import contextmanager -from pydocx.utils import parse_xml_from_string, get_list_style +from pydocx.utils import ( + PydocxPrePorcessor, + get_list_style, + parse_xml_from_string, + find_first, + find_all, + find_ancestor_with_tag, + has_child, + has_descendant_with_tag, +) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("NewParser") @@ -35,6 +44,7 @@ def ZipFile(path): # This is not needed in python 3.2+ class DocxParser: __metaclass__ = ABCMeta + pre_processor_class = PydocxPrePorcessor def _build_data(self, path, *args, **kwargs): with ZipFile(path) as f: @@ -71,8 +81,8 @@ def _build_data(self, path, *args, **kwargs): def _parse_styles(self): tree = parse_xml_from_string(self.styles_text) result = {} - for style in tree.find_all('style'): - style_val = style.find_first('name').attrib['val'] + for style in find_all(tree, 'style'): + style_val = find_first(style, 'name').attrib['val'] result[style.attrib['styleId']] = style_val return result @@ -97,15 +107,16 @@ def __init__( self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) + self.pre_processor = None #divide by 20 to get to pt (Office works in 20th's of a point) """ see http://msdn.microsoft.com/en-us/library/documentformat .openxml.wordprocessing.indentation.aspx """ - if self.root.find_first('pgSz') is not None: + if find_first(self.root, 'pgSz') is not None: self.page_width = int( - self.root.find_first('pgSz').attrib['w'] + find_first(self.root, 'pgSz').attrib['w'] ) / 20 #all blank when we init @@ -117,12 +128,12 @@ def __init__( self.parse_begin(self.root) # begin to parse def parse_begin(self, el): - el._init( - add_attributes=True, + self.pre_processor = self.pre_processor_class( convert_root_level_upper_roman=self.convert_root_level_upper_roman, styles_dict=self.styles_dict, numbering_root=self.numbering_root, ) + self.pre_processor.perform_pre_processing(el) self._parsed += self.parse(el) def parse(self, el): @@ -172,7 +183,7 @@ def parse_table_row(self, el, text): return self.table_row(text) def parse_table_cell(self, el, text): - v_merge = el.find_first('vMerge') + v_merge = find_first(el, 'vMerge') if v_merge is not None and 'continue' == v_merge.get('val', ''): return '' colspan = self.get_colspan(el) @@ -190,7 +201,7 @@ def parse_list(self, el, text): self.list_depth += 1 parsed = self._parse_list(el, text) self.list_depth -= 1 - if el.is_in_table: + if self.pre_processor.is_in_table(el): return self.parse_table_cell_contents(el, parsed) return parsed @@ -200,8 +211,8 @@ def get_list_style(self, num_id, ilvl): def _build_list(self, el, text): # Get the list style for the pending list. lst_style = self.get_list_style( - el.num_id.num_id, - el.ilvl, + self.pre_processor.num_id(el).num_id, + self.pre_processor.ilvl(el), ) parsed = text @@ -216,29 +227,29 @@ def _build_list(self, el, text): def _parse_list(self, el, text): parsed = self.parse_list_item(el, text) - num_id = el.num_id - ilvl = el.ilvl + num_id = self.pre_processor.num_id(el) + ilvl = self.pre_processor.ilvl(el) # Everything after this point assumes the first element is not also the # last. If the first element is also the last then early return by # building and returning the completed list. - if el.is_last_list_item_in_root: + if self.pre_processor.is_last_list_item_in_root(el): return self._build_list(el, parsed) - next_el = el.next + next_el = self.pre_processor.next(el) def is_same_list(next_el, num_id, ilvl): # Bail if next_el is not an element if next_el is None: return False - if next_el.is_last_list_item_in_root: + if self.pre_processor.is_last_list_item_in_root(next_el): return False # If next_el is not a list item then roll it into the list by # returning True. - if not next_el.is_list_item: + if not self.pre_processor.is_list_item(next_el): return True - if next_el.num_id != num_id: + if self.pre_processor.num_id(next_el) != num_id: # The next element is a new list entirely return False - if next_el.ilvl < ilvl: + if self.pre_processor.ilvl(next_el) < ilvl: # The next element is de-indented, so this is really the last # element in the list return False @@ -247,31 +258,35 @@ def is_same_list(next_el, num_id, ilvl): while is_same_list(next_el, num_id, ilvl): if next_el in self.visited: # Early continue for elements we have already visited. - next_el = next_el.next + next_el = self.pre_processor.next(next_el) continue - if next_el.is_list_item: + if self.pre_processor.is_list_item(next_el): # Reset the ilvl - ilvl = next_el.ilvl + ilvl = self.pre_processor.ilvl(next_el) parsed += self.parse(next_el) - next_el = next_el.next + next_el = self.pre_processor.next(next_el) def should_parse_last_el(last_el, first_el): if last_el is None: return False # Different list - if last_el.num_id != first_el.num_id: + if ( + self.pre_processor.num_id(last_el) != + self.pre_processor.num_id(first_el)): return False # Will be handled when the ilvls do match (nesting issue) - if last_el.ilvl != first_el.ilvl: + if ( + self.pre_processor.ilvl(last_el) != + self.pre_processor.ilvl(first_el)): return False # We only care about last items that have not been parsed before # (first list items are always parsed at the beginning of this # method.) return ( - not last_el.is_first_list_item and - last_el.is_last_list_item_in_root + not self.pre_processor.is_first_list_item(last_el) and + self.pre_processor.is_last_list_item_in_root(last_el) ) if should_parse_last_el(next_el, el): parsed += self.parse(next_el) @@ -330,13 +345,13 @@ def parse_p(self, el, text): # apply the classes/styles on p, td, li and h tags instead of inline, # but that is for another ticket. text = self.justification(el, text) - if el.is_first_list_item: + if self.pre_processor.is_first_list_item(el): return self.parse_list(el, text) - if el.heading_level: + if self.pre_processor.heading_level(el): return self.parse_heading(el, text) - if el.is_list_item: + if self.pre_processor.is_list_item(el): return self.parse_list_item(el, text) - if el.is_in_table: + if self.pre_processor.is_in_table(el): return self.parse_table_cell_contents(el, text) parsed = text # No p tags in li tags @@ -353,26 +368,28 @@ def _should_append_break_tag(self, next_el): 'ins', 'delText', ] - if next_el.is_list_item: + if self.pre_processor.is_list_item(next_el): return False - if next_el.previous is None: + if self.pre_processor.previous(next_el) is None: return False tag_is_inline_like = any( - next_el.has_descendant_with_tag(tag) for + has_descendant_with_tag(next_el, tag) for tag in inline_like_tags ) if tag_is_inline_like: return False - if next_el.previous.is_last_list_item_in_root: + if ( + self.pre_processor.is_last_list_item_in_root( + self.pre_processor.previous(next_el))): return False - if next_el.previous.tag not in paragraph_like_tags: + if self.pre_processor.previous(next_el).tag not in paragraph_like_tags: return False if next_el.tag not in paragraph_like_tags: return False return True def parse_heading(self, el, parsed): - return self.heading(parsed, el.heading_level) + return self.heading(parsed, self.pre_processor.heading_level(el)) def parse_list_item(self, el, text): # If for whatever reason we are not currently in a list, then start @@ -388,22 +405,24 @@ def _should_parse_next_as_content(el): contents of the current el (that way things like tables are actually in the li tag instead of in the ol/ul tag). """ - next_el = el.next + next_el = self.pre_processor.next(el) if next_el is None: return False if ( - not next_el.is_list_item and - not el.is_last_list_item_in_root + not self.pre_processor.is_list_item(next_el) and + not self.pre_processor.is_last_list_item_in_root(el) ): return True - if next_el.is_first_list_item: - if next_el.num_id == el.num_id: + if self.pre_processor.is_first_list_item(next_el): + if ( + self.pre_processor.num_id(next_el) == + self.pre_processor.num_id(el)): return True return False while el is not None: if _should_parse_next_as_content(el): - el = el.next + el = self.pre_processor.next(el) next_elements_content = self.parse(el) if not next_elements_content: continue @@ -416,20 +435,20 @@ def _should_parse_next_as_content(el): return self.list_element(parsed) def _get_rowspan(self, el, v_merge): - current_row = el.row_index - current_col = el.column_index + current_row = self.pre_processor.row_index(el) + current_col = self.pre_processor.column_index(el) rowspan = 1 result = '' - tbl = el.find_ancestor_with_tag('tbl') + tbl = find_ancestor_with_tag(el, 'tbl') # We only want table cells that have a higher row_index that is greater # than the current_row and that are on the current_col if tbl is None: return '' tcs = [ - tc for tc in tbl.find_all('tc') - if tc.row_index >= current_row and - tc.column_index == current_col + tc for tc in find_all(tbl, 'tc') + if self.pre_processor.row_index(tc) >= current_row and + self.pre_processor.column_index(tc) == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: @@ -438,7 +457,7 @@ def _get_rowspan(self, el, v_merge): def increment_rowspan(tc): if not restart_in_v_merge: return False - if not tc.vmerge_continue: + if not self.pre_processor.vmerge_continue(tc): return False return True @@ -452,23 +471,23 @@ def increment_rowspan(tc): return str(result) def get_colspan(self, el): - grid_span = el.find_first('gridSpan') + grid_span = find_first(el, 'gridSpan') if grid_span is None: return '' - return el.find_first('gridSpan').attrib['val'] + return find_first(el, 'gridSpan').attrib['val'] def parse_table_cell_contents(self, el, text): parsed = text def _should_parse_next_as_content(el): - next_el = el.next + next_el = self.pre_processor.next(el) if next_el is None: return False - if next_el.is_in_table: + if self.pre_processor.is_in_table(next_el): return True while el is not None: if _should_parse_next_as_content(el): - el = el.next + el = self.pre_processor.next(el) next_elements_content = self.parse(el) if not next_elements_content: continue @@ -489,13 +508,13 @@ def parse_hyperlink(self, el, text): def _get_image_id(self, el): # Drawings - blip = el.find_first('blip') + blip = find_first(el, 'blip') if blip is not None: # On drawing tags the id is actually whatever is returned from the # embed attribute on the blip tag. Thanks a lot Microsoft. return blip.get('embed') # Picts - imagedata = el.find_first('imagedata') + imagedata = find_first(el, 'imagedata') if imagedata is not None: return imagedata.get('id') @@ -508,7 +527,7 @@ def _get_image_size(self, el): found, then rely on the `image` handler to strip those attributes. This functionality can change once we integrate PIL. """ - sizes = el.find_first('ext') + sizes = find_first(el, 'ext') if sizes is not None and sizes.get('cx'): if sizes.get('cx'): x = self._convert_image_size(int(sizes.get('cx'))) @@ -518,7 +537,7 @@ def _get_image_size(self, el): '%dpx' % x, '%dpx' % y, ) - shape = el.find_first('shape') + shape = find_first(el, 'shape') if shape is not None and shape.get('style') is not None: # If either of these are not set, rely on the method `image` to not # use either of them. @@ -579,13 +598,13 @@ def parse_r(self, el, parsed): run_tag_property = el.find('rPr') if run_tag_property is not None: fns = [] - if run_tag_property.has_child('b'): # text styling + if has_child(run_tag_property, 'b'): # text styling if self._is_style_on(run_tag_property.find('b')): fns.append(self.bold) - if run_tag_property.has_child('i'): + if has_child(run_tag_property, 'i'): if self._is_style_on(run_tag_property.find('i')): fns.append(self.italics) - if run_tag_property.has_child('u'): + if has_child(run_tag_property, 'u'): if self._is_style_on(run_tag_property.find('u')): fns.append(self.underline) for fn in fns: diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 7f3320bc..6257cafe 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -9,7 +9,7 @@ XMLDocx2Html, _TranslationTestCase, ) -from pydocx.utils import parse_xml_from_string +from pydocx.utils import parse_xml_from_string, find_all class BoldTestCase(_TranslationTestCase): @@ -177,8 +177,8 @@ def test_get_image_id(self): ) tree = parse_xml_from_string(self.get_xml()) els = [] - els.extend(tree.find_all('drawing')) - els.extend(tree.find_all('pict')) + els.extend(find_all(tree, 'drawing')) + els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_id(el)) @@ -198,8 +198,8 @@ def test_get_image_sizes(self): ) tree = parse_xml_from_string(self.get_xml()) els = [] - els.extend(tree.find_all('drawing')) - els.extend(tree.find_all('pict')) + els.extend(find_all(tree, 'drawing')) + els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_size(el)) diff --git a/pydocx/utils.py b/pydocx/utils.py index a927359a..a7ef1be6 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -27,6 +27,54 @@ def el_iter(el): return el.findall('.//*') +def find_first(self, tag): + """ + Find the first occurrence of a tag beneath the current element. + """ + return self.find('.//' + tag) + + +def find_all(self, tag): + """ + Find all occurrences of a tag + """ + return self.findall('.//' + tag) + + +def find_ancestor_with_tag(self, tag): + """ + Find the first ancestor with that is a `tag`. + """ + el = self + while el.getparent() is not None: + el = el.getparent() + if el.tag == tag: + return el + return None + + +def has_descendant_with_tag(self, tag): + """ + Determine if there is a child ahead in the element tree. + """ + # Get child. stop at first child. + return True if self.find('.//' + tag) is not None else False + + +def has_child(self, tag): + """ + Determine if current element has a child. Stop at first child. + """ + return True if self.find(tag) is not None else False + + +def _filter_children(element, tags): + return [ + el for el in element.getchildren() + if el.tag in tags + ] + + def remove_namespaces(document): # I can't really find a good way to do this with lxml. Se just do it with # xml. @@ -44,13 +92,14 @@ def remove_namespaces(document): def get_list_style(numbering_root, num_id, ilvl): # This is needed on both the custom lxml parser and the pydocx parser. So # make it a function. - ids = numbering_root.find_all('num') + ids = find_all(numbering_root, 'num') for _id in ids: if _id.attrib['numId'] != num_id: continue abstractid = _id.find('abstractNumId') abstractid = abstractid.attrib['val'] - style_information = numbering_root.find_all( + style_information = find_all( + numbering_root, 'abstractNum', ) for info in style_information: @@ -93,123 +142,98 @@ def num_id(self): return self._num_id -# Since I can't actually set attribute (reliably) on each element. I have to -# keep track of the next and previous elements here. -element_meta_data = defaultdict(dict) - +class PydocxPrePorcessor(object): + def __init__( + self, + convert_root_level_upper_roman=False, + styles_dict=None, + numbering_root=None, + *args, **kwargs): + self.meta_data = defaultdict(dict) + self.convert_root_level_upper_roman = convert_root_level_upper_roman + self.styles_dict = styles_dict + self.numbering_root = numbering_root + + def perform_pre_processing(self, root, *args, **kwargs): + self._set_list_attributes(root) + self._set_table_attributes(root) + self._set_is_in_table(root) + + body = find_first(root, 'body') + p_elements = [ + child for child in find_all(body, 'p') + ] + list_elements = [ + child for child in p_elements + if self.is_list_item(child) + ] + # Find the first and last li elements + num_ids = set([self.num_id(i) for i in list_elements]) + ilvls = set([self.ilvl(i) for i in list_elements]) + self._set_first_list_item(num_ids, ilvls, list_elements) + self._set_last_list_item(num_ids, list_elements) -class PydocxLXMLParser(etree.ElementBase): + self._set_headers(p_elements) + self._convert_upper_roman(body) + self._set_next(body) - @property - def is_first_list_item(self): - return element_meta_data[self].get('is_first_list_item', False) + def is_first_list_item(self, el): + return self.meta_data[el].get('is_first_list_item', False) - @property - def is_last_list_item_in_root(self): - return element_meta_data[self].get('is_last_list_item_in_root', False) + def is_last_list_item_in_root(self, el): + return self.meta_data[el].get('is_last_list_item_in_root', False) - @property - def is_list_item(self): - return element_meta_data[self].get('is_list_item', False) + def is_list_item(self, el): + return self.meta_data[el].get('is_list_item', False) - @property - def num_id(self): - if not self.is_list_item: + def num_id(self, el): + if not self.is_list_item(el): return None - return element_meta_data[self].get('num_id') + return self.meta_data[el].get('num_id') - @property - def ilvl(self): - if not self.is_list_item: + def ilvl(self, el): + if not self.is_list_item(el): return None - return element_meta_data[self].get('ilvl') + return self.meta_data[el].get('ilvl') - @property - def heading_level(self): - return element_meta_data[self].get('heading_level') + def heading_level(self, el): + return self.meta_data[el].get('heading_level') - @property - def is_in_table(self): - return element_meta_data[self].get('is_in_table') + def is_in_table(self, el): + return self.meta_data[el].get('is_in_table') - @property - def row_index(self): - return element_meta_data[self].get('row_index') + def row_index(self, el): + return self.meta_data[el].get('row_index') - @property - def column_index(self): - return element_meta_data[self].get('column_index') + def column_index(self, el): + return self.meta_data[el].get('column_index') - @property - def vmerge_continue(self): - return element_meta_data[self].get('vmerge_continue') + def vmerge_continue(self, el): + return self.meta_data[el].get('vmerge_continue') - @property - def next(self): - if self not in element_meta_data: + def next(self, el): + if el not in self.meta_data: return - return element_meta_data[self].get('next') + return self.meta_data[el].get('next') - @property - def previous(self): - if self not in element_meta_data: + def previous(self, el): + if el not in self.meta_data: return - return element_meta_data[self].get('previous') - - def find_first(self, tag): - """ - Find the first occurrence of a tag beneath the current element. - """ - return self.find('.//' + tag) - - def find_all(self, tag): - """ - Find all occurrences of a tag - """ - return self.findall('.//' + tag) - - def find_ancestor_with_tag(self, tag): - """ - Find the first ancestor with that is a `tag`. - """ - el = self - while el.getparent() is not None: - el = el.getparent() - if el.tag == tag: - return el - return None - - def has_descendant_with_tag(self, tag): - """ - Determine if there is a child ahead in the element tree. - """ - # Get child. stop at first child. - return True if self.find('.//' + tag) is not None else False - - def has_child(self, tag): - """ - Determine if current element has a child. Stop at first child. - """ - return True if self.find(tag) is not None else False - - def _filter_children(self, element, tags): - return [ - el for el in element.getchildren() - if el.tag in tags - ] + return self.meta_data[el].get('previous') def _set_list_attributes(self, el): - list_elements = el.find_all('numId') + list_elements = find_all(el, 'numId') for li in list_elements: - parent = li.find_ancestor_with_tag('p') + parent = find_ancestor_with_tag(li, 'p') # Deleted text in a list will have a numId but no ilvl. if parent is None: continue - if parent.find_first('ilvl') is None: + if find_first(parent, 'ilvl') is None: continue - element_meta_data[parent]['is_list_item'] = True - element_meta_data[parent]['num_id'] = self._generate_num_id(parent) - element_meta_data[parent]['ilvl'] = parent.find_first( + self.meta_data[parent]['is_list_item'] = True + self.meta_data[parent]['num_id'] = self._generate_num_id(parent) + self.meta_data[parent]['ilvl'] = find_first( + parent, 'ilvl', ).attrib['val'] @@ -221,7 +245,7 @@ def _generate_num_id(self, el): it is in to ensure it is considered a new list. Otherwise all sorts of terrible html gets generated. ''' - num_id = el.find_first('numId').attrib['val'] + num_id = find_first(el, 'numId').attrib['val'] # First, go up the parent until we get None and count the number of # tables there are. @@ -244,14 +268,14 @@ def _set_first_list_item(self, num_ids, ilvls, list_elements): filtered_list_elements = [ i for i in list_elements if ( - i.num_id == num_id and - i.ilvl == ilvl + self.num_id(i) == num_id and + self.ilvl(i) == ilvl ) ] if not filtered_list_elements: continue first_el = filtered_list_elements[0] - element_meta_data[first_el]['is_first_list_item'] = True + self.meta_data[first_el]['is_first_list_item'] = True def _set_last_list_item(self, num_ids, list_elements): # Find last list elements. Only mark list tags as the last list tag if @@ -261,36 +285,36 @@ def _set_last_list_item(self, num_ids, list_elements): for num_id in num_ids: filtered_list_elements = [ i for i in list_elements - if i.num_id == num_id + if self.num_id(i) == num_id ] if not filtered_list_elements: continue last_el = filtered_list_elements[-1] - element_meta_data[last_el]['is_last_list_item_in_root'] = True + self.meta_data[last_el]['is_last_list_item_in_root'] = True def _set_table_attributes(self, el): - tables = el.find_all('tbl') + tables = find_all(el, 'tbl') for table in tables: - rows = self._filter_children(table, ['tr']) + rows = _filter_children(table, ['tr']) if rows is None: continue for i, row in enumerate(rows): - tcs = self._filter_children(row, ['tc']) + tcs = _filter_children(row, ['tc']) for j, child in enumerate(tcs): - element_meta_data[child]['row_index'] = i - element_meta_data[child]['column_index'] = j - v_merge = child.find_first('vMerge') + self.meta_data[child]['row_index'] = i + self.meta_data[child]['column_index'] = j + v_merge = find_first(child, 'vMerge') if ( v_merge is not None and 'continue' == v_merge.get('val', '') ): - element_meta_data[child]['vmerge_continue'] = True + self.meta_data[child]['vmerge_continue'] = True def _set_is_in_table(self, el): - paragraph_elements = el.find_all('p') + paragraph_elements = find_all(el, 'p') for p in paragraph_elements: - if p.find_ancestor_with_tag('tc') is not None: - element_meta_data[p]['is_in_table'] = True + if find_ancestor_with_tag(p, 'tc') is not None: + self.meta_data[p]['is_in_table'] = True def _set_headers(self, elements): # These are the styles for headers and what the html tag should be if @@ -309,19 +333,19 @@ def _set_headers(self, elements): } for element in elements: # This element is using the default style which is not a heading. - if element.find_first('pStyle') is None: + if find_first(element, 'pStyle') is None: continue - style = element.find_first('pStyle').attrib.get('val', '') + style = find_first(element, 'pStyle').attrib.get('val', '') style = self.styles_dict.get(style) # Check to see if this element is actually a header. if style and style.lower() in headers: # Set all the list item variables to false. - element_meta_data[element]['is_list_item'] = False - element_meta_data[element]['is_first_list_item'] = False - element_meta_data[element]['is_last_list_item_in_root'] = False + self.meta_data[element]['is_list_item'] = False + self.meta_data[element]['is_first_list_item'] = False + self.meta_data[element]['is_last_list_item_in_root'] = False # Prime the heading_level - element_meta_data[element]['heading_level'] = headers[style.lower()] # noqa + self.meta_data[element]['heading_level'] = headers[style.lower()] # noqa def _convert_upper_roman(self, body): if not self.convert_root_level_upper_roman: @@ -330,46 +354,46 @@ def _convert_upper_roman(self, body): # Only root level elements. el for el in body.getchildren() # And only first_list_items - if el.is_first_list_item + if self.is_first_list_item(el) ] visited_num_ids = [] for root_list_item in first_root_list_items: - if root_list_item.num_id in visited_num_ids: + if self.num_id(root_list_item) in visited_num_ids: continue - visited_num_ids.append(root_list_item.num_id) + visited_num_ids.append(self.num_id(root_list_item)) lst_style = get_list_style( self.numbering_root, - root_list_item.num_id.num_id, - root_list_item.ilvl, + self.num_id(root_list_item).num_id, + self.ilvl(root_list_item), ) if lst_style != 'upperRoman': continue ilvl = min( - el.ilvl for el in body.find_all('p') - if el.num_id == root_list_item.num_id + self.ilvl(el) for el in find_all(body, 'p') + if self.num_id(el) == self.num_id(root_list_item) ) root_upper_roman_list_items = [ - el for el in body.find_all('p') - if el.num_id == root_list_item.num_id and - el.ilvl == ilvl + el for el in find_all(body, 'p') + if self.num_id(el) == self.num_id(root_list_item) and + self.ilvl(el) == ilvl ] for list_item in root_upper_roman_list_items: - element_meta_data[list_item]['is_list_item'] = False - element_meta_data[list_item]['is_first_list_item'] = False - element_meta_data[list_item]['is_last_list_item_in_root'] = False # noqa + self.meta_data[list_item]['is_list_item'] = False + self.meta_data[list_item]['is_first_list_item'] = False + self.meta_data[list_item]['is_last_list_item_in_root'] = False # noqa - element_meta_data[list_item]['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa + self.meta_data[list_item]['heading_level'] = UPPER_ROMAN_TO_HEADING_VALUE # noqa def _set_next(self, body): def _get_children_with_content(el): # We only care about children if they have text in them. children = [] - for child in self._filter_children(el, TAGS_HOLDING_CONTENT_TAGS): - has_descendant_with_tag = any( - child.has_descendant_with_tag(tag) for + for child in _filter_children(el, TAGS_HOLDING_CONTENT_TAGS): + _has_descendant_with_tag = any( + has_descendant_with_tag(child, tag) for tag in TAGS_CONTAINING_CONTENT ) - if has_descendant_with_tag: + if _has_descendant_with_tag: children.append(child) return children @@ -378,64 +402,21 @@ def _assign_next(children): for i in range(len(children)): try: if children[i + 1] is not None: - element_meta_data[children[i]]['next'] = children[i + 1] # noqa + self.meta_data[children[i]]['next'] = children[i + 1] # noqa except IndexError: pass try: if children[i - 1] is not None: - element_meta_data[children[i]]['previous'] = children[i - 1] # noqa + self.meta_data[children[i]]['previous'] = children[i - 1] # noqa except IndexError: pass # Assign next for everything in the root. _assign_next(_get_children_with_content(body)) # In addition set next for everything in table cells. - for tc in body.find_all('tc'): + for tc in find_all(body, 'tc'): _assign_next(_get_children_with_content(tc)) - def _init( - self, - add_attributes=False, - convert_root_level_upper_roman=False, - styles_dict=None, - numbering_root=None, - *args, - **kwargs): - super(PydocxLXMLParser, self)._init(*args, **kwargs) - if add_attributes: - self.convert_root_level_upper_roman = convert_root_level_upper_roman # noqa - self.styles_dict = styles_dict - self.numbering_root = numbering_root - self._set_list_attributes(self) - self._set_table_attributes(self) - self._set_is_in_table(self) - - list_elements = [ - child for child in self.find_all('p') - if child.is_list_item - ] - num_ids = set([i.num_id for i in list_elements]) - ilvls = set([i.ilvl for i in list_elements]) - self._set_first_list_item(num_ids, ilvls, list_elements) - self._set_last_list_item(num_ids, list_elements) - - # Find the first and last li elements - body = self.find_first('body') - p_elements = [ - child for child in body.find_all('p') - ] - self._set_headers(p_elements) - self._convert_upper_roman(body) - self._set_next(body) - - -parser_lookup = etree.ElementDefaultClassLookup(element=PydocxLXMLParser) -parser = etree.XMLParser() -parser.set_element_class_lookup(parser_lookup) - def parse_xml_from_string(xml): - return etree.fromstring( - remove_namespaces(xml), - parser, - ) + return etree.fromstring(remove_namespaces(xml)) From ceabf6506ff5c09ccd5d0079d1454f583028db83 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 14:52:59 -0400 Subject: [PATCH 320/404] refs #43: switched to using cElementTree --- pydocx/DocxParser.py | 2 +- pydocx/utils.py | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index e09d090d..89a28743 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -440,7 +440,7 @@ def _get_rowspan(self, el, v_merge): rowspan = 1 result = '' - tbl = find_ancestor_with_tag(el, 'tbl') + tbl = find_ancestor_with_tag(self.pre_processor, el, 'tbl') # We only want table cells that have a higher row_index that is greater # than the current_row and that are on the current_col if tbl is None: diff --git a/pydocx/utils.py b/pydocx/utils.py index a7ef1be6..e33e1f1d 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -1,5 +1,5 @@ from collections import defaultdict -from lxml import etree +from xml.etree import cElementTree UPPER_ROMAN_TO_HEADING_VALUE = 'h2' @@ -41,13 +41,12 @@ def find_all(self, tag): return self.findall('.//' + tag) -def find_ancestor_with_tag(self, tag): +def find_ancestor_with_tag(pre_processor, el, tag): """ Find the first ancestor with that is a `tag`. """ - el = self - while el.getparent() is not None: - el = el.getparent() + while pre_processor.parent(el) is not None: + el = pre_processor.parent(el) if el.tag == tag: return el return None @@ -78,15 +77,14 @@ def _filter_children(element, tags): def remove_namespaces(document): # I can't really find a good way to do this with lxml. Se just do it with # xml. - import xml.etree.ElementTree as xml_etree - root = xml_etree.fromstring(document) + root = cElementTree.fromstring(document) for child in el_iter(root): child.tag = child.tag.split("}")[1] child.attrib = dict( (k.split("}")[-1], v) for k, v in child.attrib.items() ) - return xml_etree.tostring(root) + return cElementTree.tostring(root) def get_list_style(numbering_root, num_id, ilvl): @@ -155,6 +153,7 @@ def __init__( self.numbering_root = numbering_root def perform_pre_processing(self, root, *args, **kwargs): + self._add_parent(root) self._set_list_attributes(root) self._set_table_attributes(root) self._set_is_in_table(root) @@ -221,10 +220,18 @@ def previous(self, el): return return self.meta_data[el].get('previous') + def parent(self, el): + return self.meta_data[el].get('parent') + + def _add_parent(self, el): # if a parent, make that an attribute + for child in el.getchildren(): + self.meta_data[child]['parent'] = el + self._add_parent(child) + def _set_list_attributes(self, el): list_elements = find_all(el, 'numId') for li in list_elements: - parent = find_ancestor_with_tag(li, 'p') + parent = find_ancestor_with_tag(self, li, 'p') # Deleted text in a list will have a numId but no ilvl. if parent is None: continue @@ -250,10 +257,10 @@ def _generate_num_id(self, el): # First, go up the parent until we get None and count the number of # tables there are. num_tables = 0 - while el.getparent() is not None: + while self.parent(el) is not None: if el.tag == 'tbl': num_tables += 1 - el = el.getparent() + el = self.parent(el) return NamespacedNumId( num_id=num_id, num_tables=num_tables, @@ -313,7 +320,7 @@ def _set_table_attributes(self, el): def _set_is_in_table(self, el): paragraph_elements = find_all(el, 'p') for p in paragraph_elements: - if find_ancestor_with_tag(p, 'tc') is not None: + if find_ancestor_with_tag(self, p, 'tc') is not None: self.meta_data[p]['is_in_table'] = True def _set_headers(self, elements): @@ -419,4 +426,4 @@ def _assign_next(children): def parse_xml_from_string(xml): - return etree.fromstring(remove_namespaces(xml)) + return cElementTree.fromstring(remove_namespaces(xml)) From f78e8572251de031aac1cda9bbe8cbf7130b0c8a Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 14:54:23 -0400 Subject: [PATCH 321/404] refs #43: no longer need lxml --- pydocx/utils.py | 2 -- setup.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pydocx/utils.py b/pydocx/utils.py index e33e1f1d..f899dcb3 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -75,8 +75,6 @@ def _filter_children(element, tags): def remove_namespaces(document): - # I can't really find a good way to do this with lxml. Se just do it with - # xml. root = cElementTree.fromstring(document) for child in el_iter(root): child.tag = child.tag.split("}")[1] diff --git a/setup.py b/setup.py index 16948016..74be1e3a 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def get_description(): }, scripts=[], zip_safe=False, - install_requires=['lxml'], + install_requires=[], cmdclass={}, classifiers=[ "Development Status :: 3 - Alpha", From 738912752ef6d8b281a39464acb71bf06bd665b3 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 15:24:27 -0400 Subject: [PATCH 322/404] refs #43: small refactor, no longer skipping the test that use to take so long --- pydocx/tests/test_xml.py | 4 +--- pydocx/utils.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 6257cafe..7b4266a3 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -645,8 +645,6 @@ def get_xml(self): return xml def test_performance(self): - if not os.environ.get('TRAVIS_EXECUTE_PERFORMANCE', False): - raise SkipTest('TRAVIS_EXECUTE_PERFORMANCE is false') with self.toggle_run_expected_output(): start_time = time.time() try: @@ -656,7 +654,7 @@ def test_performance(self): end_time = time.time() total_time = end_time - start_time # This finishes in under a second on python 2.7 - assert total_time < 5, total_time + assert total_time < 3, total_time class NonStandardTextTagsTestCase(_TranslationTestCase): diff --git a/pydocx/utils.py b/pydocx/utils.py index f899dcb3..ce3c2417 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -27,18 +27,18 @@ def el_iter(el): return el.findall('.//*') -def find_first(self, tag): +def find_first(el, tag): """ Find the first occurrence of a tag beneath the current element. """ - return self.find('.//' + tag) + return el.find('.//' + tag) -def find_all(self, tag): +def find_all(el, tag): """ Find all occurrences of a tag """ - return self.findall('.//' + tag) + return el.findall('.//' + tag) def find_ancestor_with_tag(pre_processor, el, tag): @@ -52,19 +52,19 @@ def find_ancestor_with_tag(pre_processor, el, tag): return None -def has_descendant_with_tag(self, tag): +def has_descendant_with_tag(el, tag): """ Determine if there is a child ahead in the element tree. """ # Get child. stop at first child. - return True if self.find('.//' + tag) is not None else False + return True if el.find('.//' + tag) is not None else False -def has_child(self, tag): +def has_child(el, tag): """ Determine if current element has a child. Stop at first child. """ - return True if self.find(tag) is not None else False + return True if el.find(tag) is not None else False def _filter_children(element, tags): From 39ccc37c1c3e248298398725c119396840492f28 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 15:34:31 -0400 Subject: [PATCH 323/404] refs #43: change log and updated readme --- CHANGELOG | 6 ++++++ README.rst | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index a47af6a6..46aee53a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,12 @@ Changelog ========= +* 0.3.0 + * We switched from using stock *xml.etree.ElementTree* to using + *xml.etree.cElementTree*. This has resulted in a fairly significant speed + increase for python 2.6 + * It is now possible to create your own pre processor to do additional pre + processing. * 0.2.1 * Added a changelog * Added the version in pydocx.__init__ diff --git a/README.rst b/README.rst index 68e90b59..3f1675ec 100644 --- a/README.rst +++ b/README.rst @@ -169,6 +169,33 @@ OR, let's say FOO is your new favorite markup language. Simply customize your ow def linebreak(self): return '!!!!!!!!!!!!' +Custom Pre-Processor +#################### + +When creating your own Parser (as described above) you can now add in your own custom Pre Processor. To do so you will need to set the `pre_processor` field on the custom parser, like so: + +:: + + class Docx2Foo(DocxParser): + pre_processor_class = FooPrePorcessor + + +The `FooPrePorcessor` will need a few things to get you going: + +:: + + class FooPrePorcessor(PydocxPrePorcessor): + def perform_pre_processing(self, root, *args, **kwargs): + super(FooPrePorcessor, self).perform_pre_processing(root, *args, **kwargs) + self._set_foo(root) + + def _set_foo(self, root): + pass + +If you want `_set_foo` to be called you must add it to `perform_pre_processing` which is called in the base parser for pydocx. + +Everything done during pre-processing is executed prior to `parse` being called for the first time. + Styles ###### From 7ce47bd130b0d7a43c995c6f215c3ef5b4fd4f42 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 15:43:04 -0400 Subject: [PATCH 324/404] refs #43: removed a dead print statement --- pydocx/tests/document_builder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 73fd333c..24180cb6 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -244,7 +244,6 @@ def style(self, style_id, value): @classmethod def numbering(self, numbering_dict): - print numbering_dict template = env.get_template(templates['numbering']) kwargs = { From 514fb5a30bd8aff0aa54f172521b9849d49a1501 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 17:07:17 -0400 Subject: [PATCH 325/404] refs #42: Added two tests for how sub/super scripts are supposed to work --- pydocx/tests/document_builder.py | 2 ++ pydocx/tests/templates/r.xml | 1 + pydocx/tests/test_docx.py | 14 ++++++++++++++ pydocx/tests/test_xml.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 24180cb6..0373f86d 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -93,6 +93,7 @@ def r_tag( is_bold=False, is_underline=False, is_italics=False, + vert_align=None, val=None, ): template = env.get_template(templates['r']) @@ -101,6 +102,7 @@ def r_tag( 'is_bold': is_bold, 'is_underline': is_underline, 'is_italics': is_italics, + 'vert_align': vert_align, 'val': val, } return template.render(**kwargs) diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml index ed97150e..2cb40f19 100644 --- a/pydocx/tests/templates/r.xml +++ b/pydocx/tests/templates/r.xml @@ -3,6 +3,7 @@ {% if is_bold %}{% endif %} {% if is_underline %}{% endif %} {% if is_italics %}{% endif %} + {% if vert_align %}{% endif %} {% for element in elements %} {{ element }} diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 6aaf5365..58129548 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -119,6 +119,20 @@ def test_inline_tags(): )) +def test_super_and_subscript(): + file_path = path.join( + path.abspath(path.dirname(__file__)), + '..', + 'fixtures', + 'super_and_subscript.docx', + ) + actual_html = convert(file_path) + assert_html_equal(actual_html, BASE_HTML % ''' +

          AAABBB

          +

          CCCDDD

          + ''') + + def test_unicode(): file_path = path.join( path.abspath(path.dirname(__file__)), diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 7b4266a3..cc54e9dd 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -1017,3 +1017,32 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class SuperAndSubScripts(_TranslationTestCase): + expected_output = ''' +

          AAABBB

          +

          CCCDDD

          + ''' + + def get_xml(self): + p_tags = [ + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('AAA')]), + DXB.r_tag([DXB.t_tag('BBB')], vert_align='superscript'), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('CCC')], vert_align='subscript'), + DXB.r_tag([DXB.t_tag('DDD')]), + ], + ), + ] + body = '' + for p_tag in p_tags: + body += p_tag + + xml = DXB.xml(body) + return xml From e07b683b6492d01890572b635da25b25f41a424e Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 17:07:45 -0400 Subject: [PATCH 326/404] refs #42: sub and super scripts are now working --- pydocx/DocxParser.py | 25 +++++++++++++++++++------ pydocx/parsers/Docx2Html.py | 10 ++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 89a28743..d38c8c94 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -597,18 +597,23 @@ def parse_r(self, el, parsed): return '' run_tag_property = el.find('rPr') if run_tag_property is not None: - fns = [] if has_child(run_tag_property, 'b'): # text styling if self._is_style_on(run_tag_property.find('b')): - fns.append(self.bold) + text = self.bold(text) if has_child(run_tag_property, 'i'): if self._is_style_on(run_tag_property.find('i')): - fns.append(self.italics) + text = self.italics(text) if has_child(run_tag_property, 'u'): if self._is_style_on(run_tag_property.find('u')): - fns.append(self.underline) - for fn in fns: - text = fn(text) + text = self.underline(text) + + # This could be a superscript or a subscript + if has_child(run_tag_property, 'vertAlign'): + vert_align = run_tag_property.find('vertAlign') + if vert_align.attrib['val'] == 'superscript': + text = self.superscript(text) + if vert_align.attrib['val'] == 'subscript': + text = self.subscript(text) return text @property @@ -663,6 +668,14 @@ def italics(self, text): def underline(self, text): return text + @abstractmethod + def superscript(self, text): + return text + + @abstractmethod + def subscript(self, text): + return text + @abstractmethod def tab(self): return True diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 9b9983eb..8abbcfa0 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -126,6 +126,16 @@ def italics(self, text): def underline(self, text): return '' + text + '' + def superscript(self, text): + return '%(text)s' % { + 'text': text, + } + + def subscript(self, text): + return '%(text)s' % { + 'text': text, + } + def tab(self): # Insert before the text right?? So got the text and just do an insert # at the beginning! From 23533d6446a55d7e67e93729c900eabc627630d4 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 17:08:08 -0400 Subject: [PATCH 327/404] refs #42: it would help to add the fixture for the docx test --- pydocx/fixtures/super_and_subscript.docx | Bin 0 -> 3437 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pydocx/fixtures/super_and_subscript.docx diff --git a/pydocx/fixtures/super_and_subscript.docx b/pydocx/fixtures/super_and_subscript.docx new file mode 100644 index 0000000000000000000000000000000000000000..06ea2d7ab645d25dd58cfb7d2211fcf3f3396ad6 GIT binary patch literal 3437 zcmai0XIN9&7L^*1mH;YJLRZQQO}gL^ni>>o3Irm>5NRP0kS1Nm31t*8ln6*fY+#fk zh>;*8AUz_z3Io!lqZo>TNt|(Dd~baFe&7A>kGt+Yd!2RGURD-t?81AvxVZKN+^sX- zv#WR*&n{RG-#}Fr=AD#w+JX(rdv%e%&tzfPR~gqx{VZdQky!=A6Xq7$semJ1r0sXm zy=1FIVnbEf$0hL&GEh}1sn5W*|Rpy(^c7L8HRl8dME5*z`D0)I&Ub#lYaTOE-!V-R`d89=;N?uM_q@CXXy%e zoycP_!>8^`&2}6!10e7>7HCc(AZ>?=^Bzp@!xRTh6kc`PlV(Y1@~P2|}u* z`xRnCv|-xOYxLhc9kt!%gsR5Qh&3Gymw=C}CPwGDNQUHCvyQ~^f%oO5tlf_4n=0fv z22!5CRrn}0ufvKPNYj2+Gt@*9{>`h{<*Mqb66Y{~ zjzDex1PS#r+UO{%-O%(^(M9&6O1yJx6jjil1F#W<(c&Z>4K{$(d3k7K$(zvXPaH|j6;|F)Mj;kGxEht)h|)R4IU;?(CM4#UOp2Xg6cOuyJkcb5zwmKk0c&zBT!k881V{GYB%k55<2mG|&-r); zD>d)Ce4t`x^W-QMmt-taZE7WnN;oF?Tue)?o(1UxEj&2LH6&_Yfg z5m7;AV={oF>7O^2MAJd~Ds?kHlspi86kQkw|rJXfNBQ8A<%T<_Ie5I9|DhtlTtcocQi!M%0(fh}<}>sfM0gZAWjh z%OQf1GKW=S@a+?E*FbdHs!q=ryqQJaO0IKfFyjUX9h|w}JxE=1{qhQ*A(lKR+Zj%& zcFQ-3-FBQntkNUr+fX0P4wxSWXP}6B7QuwgBjT--^roNUE>9ZybL6ci=2s3DT@k#{ z+LRorv{nQ0As$d~yA1TBr*Cd)&@vzMB6>+&8hZTuoM zu!hGo_N*+>YFxf)i6Qu}CX!l0D%!EtcjKH! z7P_5(9-bdBxC(7|bDEiWQu5{NaTuBigiskm2xtG17zDnFAuuS+*CX(&gV>=l3@L0? zyMWe@@)3X4(XzvF$Hw=`4M_T*daaCG9ZVoENu~1!L4Jv7c8}2oM zX|%>J;LT^K+Rg7>hkJZ$jR|&Q=~Ai6c}c~CezaHOmB?|js}(6p(|saI?14!Cm0*gN zNS|oCBSA%))~_lhQ?Y_z=V{ShKubJP!Uxbh*Y1u!RJsR)-!aXtV1b6VmI7_d$+od6 z`3YG;eROfSb=}6ibP;CnkVCZkm&hBza4|qYX_-?+(%}0XJ5q2tOi0sg6YyZCA@=r$|Ua;AFYNQw-@B?5k=G9W``47ahuV*CSD-TeKT zGRu9V&T~Z#$T3QJX^?f}v|<-!eEheD%Y_I!9s+PHi){|B(=)f#^H4EE%55yBL#+0%SUfB(bR zU7v)jCWULI9xmp_DibmR$S|drviNisNMFQO?3nU7p6w*KMqcD{KRR$lv>P=0Fc5sx zrpZv@`H1H`W`i@egzI-0%jfyOp89u#7ckhbqZplvu&7lNW@Z$58agdlm>V5(d(K#x z{jkuYF?KGl4RF%=Su|!4ed)&V#QVqoj_IkM!)X;wwGAmVRzF<$1&l0|0?bSu}##Lu^b zs`zz&S0pq=S{>=8MfMyJ()n~|xuNCbpr&_W>)KgXE8El&^0>CGYkA{6eIU4{02YU~pOq3!VExT8# zH(i+I-sJPW2xbtw{X=qv81y@tFhUMvPP^EJ`FjK+m;$2QXtS*Sb5b|j<8}Rwxa4P8 z5@bICsf~TvbuwG2z4*`bjygbTBVky5ecA+J9VPSv?$??{k|qXMpzfPBX%u%TqBn02 zm-nu7IB6#p4u{tyKi%)6T{o?&cU&(9=_ zwng+r2Rc`cg$i&(47)w{OZ30>G$~E zA%z*ncIpJfN510!C7ONzrQH*b8ESS4#%TEKmv%>-@8P?xi#Y~&%AVsd_|8!L9=zKY znC`Yy6owQ2Z3TaM-S_C-y8q|OZ*lEHe>0Nr;k);hc{p}zlly-j6)Ou4PUcHojEm0L JCHU7J{{!ahijn{T literal 0 HcmV?d00001 From 9543d4e7920766e39a320d1ed0244b1e063bcb3b Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 17:09:34 -0400 Subject: [PATCH 328/404] refs #42: added an update note and updated the readme --- CHANGELOG | 2 ++ README.rst | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 46aee53a..0932e690 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ Changelog ========= +* 0.3.1 + * Superscripts and subscripts are now extracted correctly. * 0.3.0 * We switched from using stock *xml.etree.ElementTree* to using *xml.etree.cElementTree*. This has resulted in a fairly significant speed diff --git a/README.rst b/README.rst index 3f1675ec..82cbc71e 100644 --- a/README.rst +++ b/README.rst @@ -94,6 +94,14 @@ DocxParser includes abstracts methods that each parser overwrites to satsify its def underline(self, text): return text + @abstractmethod + def superscript(self, text): + return text + + @abstractmethod + def subscript(self, text): + return text + @abstractmethod def tab(self): return True From 898cfd0ac2094dda56b28ec4d6e3c9240d13d33f Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Tue, 4 Jun 2013 17:11:04 -0400 Subject: [PATCH 329/404] refs #42: added a comment --- pydocx/tests/document_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 0373f86d..4bed0384 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -99,6 +99,8 @@ def r_tag( template = env.get_template(templates['r']) kwargs = { 'elements': elements, + # TODO Pass in an `rPr` instead. That is what all of this is for + # anyway. 'is_bold': is_bold, 'is_underline': is_underline, 'is_italics': is_italics, From bc87a890806169434fac91776e34a369a0a8e6fa Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 5 Jun 2013 11:12:50 -0400 Subject: [PATCH 330/404] Revert "refs #41: refactor and started using spans inline instead of divs" This reverts commit 4ff701dd18ccc31ea452e03e6af2ab59bfc210c7. --- pydocx/DocxParser.py | 24 ++++++++++-------------- pydocx/parsers/Docx2Html.py | 10 +++++----- pydocx/tests/test_docx.py | 26 +++++++++++++------------- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 89a28743..200faae6 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -316,24 +316,20 @@ def justification(self, el, text): if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: alignment = value - def _scale_indent_to_pixels(value): - if value is None: - return None - return (float(value) / 20.0) * (4.0 / 3.0) - if indentation is not None: if INDENTATION_RIGHT in indentation.attrib: - right = _scale_indent_to_pixels( - indentation.attrib.get(INDENTATION_RIGHT), - ) + right = indentation.attrib[INDENTATION_RIGHT] + # divide by 20 to get to pt. multiply by (4/3) to get to px + right = (int(right) / 20) * float(4) / float(3) + right = str(right) if INDENTATION_LEFT in indentation.attrib: - left = _scale_indent_to_pixels( - indentation.attrib.get(INDENTATION_LEFT), - ) + left = indentation.attrib[INDENTATION_LEFT] + left = (int(left) / 20) * float(4) / float(3) + left = str(left) if INDENTATION_FIRST_LINE in indentation.attrib: - firstLine = _scale_indent_to_pixels( - indentation.attrib.get(INDENTATION_FIRST_LINE), - ) + firstLine = indentation.attrib[INDENTATION_FIRST_LINE] + firstLine = (int(firstLine) / 20) * float(4) / float(3) + firstLine = str(firstLine) if any([alignment, firstLine, left, right]): return self.indent(text, alignment, firstLine, left, right) return text diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 9b9983eb..73b3c9b6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -154,19 +154,19 @@ def page_break(self): return '
          ' def indent(self, text, just='', firstLine='', left='', right=''): - slug = '
          +
          EEE +
          +
          GGG +
          - EEE - FFF
          - GGG -
          @@ -605,7 +605,7 @@ def test_has_title(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

          Title

          -

          Text

          +

          Text

          ''') @@ -661,28 +661,28 @@ def test_justification(): actual_html = convert(file_path) assert_html_equal(actual_html, BASE_HTML % '''

          - Center Justified +

          Center Justified

          - Right justified +

          Right justified

          - +

          Right justified and pushed in from right - +

          - Center justified and pushed in from left and it is great and it is the coolest thing of all time and I like it and I think it is cool - +

          - +

          Left justified and pushed in from left - +

          ''') From 155dc346822b86edbaeb26528a18e41951368f20 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 5 Jun 2013 12:27:22 -0400 Subject: [PATCH 331/404] refs #42: changed conditional to elif --- pydocx/DocxParser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 046eb17d..b10485c2 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -608,7 +608,7 @@ def parse_r(self, el, parsed): vert_align = run_tag_property.find('vertAlign') if vert_align.attrib['val'] == 'superscript': text = self.superscript(text) - if vert_align.attrib['val'] == 'subscript': + elif vert_align.attrib['val'] == 'subscript': text = self.subscript(text) return text From 07d1da6c1b3da96d8d0acdb7421c2da035bd955e Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 5 Jun 2013 12:48:19 -0400 Subject: [PATCH 332/404] bumped to version 0.3.0 --- CHANGELOG | 3 +-- pydocx/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 0932e690..86786a81 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,14 +2,13 @@ Changelog ========= -* 0.3.1 - * Superscripts and subscripts are now extracted correctly. * 0.3.0 * We switched from using stock *xml.etree.ElementTree* to using *xml.etree.cElementTree*. This has resulted in a fairly significant speed increase for python 2.6 * It is now possible to create your own pre processor to do additional pre processing. + * Superscripts and subscripts are now extracted correctly. * 0.2.1 * Added a changelog * Added the version in pydocx.__init__ diff --git a/pydocx/__init__.py b/pydocx/__init__.py index c68a472e..4beb371e 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -9,4 +9,4 @@ def docx2markdown(path): return Docx2Markdown(path).parsed -VERSION = '0.2.1' +VERSION = '0.3.0' diff --git a/setup.py b/setup.py index 74be1e3a..c85d4ad5 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def get_description(): setup( name="PyDocX", # Edit here and pydocx.__init__ - version="0.2.1", + version="0.3.0", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 7b088abcf52724d38009e8fbece69fedfb41252b Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 6 Jun 2013 11:51:38 -0400 Subject: [PATCH 333/404] simple lists and simple tables working --- pydocx/DocxParser.py | 94 ++------------------------ pydocx/HtmlConverter.py | 145 +++++++++++++++++++++++++++++++++------- 2 files changed, 125 insertions(+), 114 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 0f9b5c7f..243bf11e 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -122,13 +122,10 @@ def find_ancestor_with_tag(self, tag): setattr(_ElementInterface, 'vmerge_continue', None) setattr(_ElementInterface, 'row_index', None) setattr(_ElementInterface, 'column_index', None) -<<<<<<< HEAD setattr(_ElementInterface, 'is_last_text', False) setattr(_ElementInterface, 'lst_style', None) setattr(_ElementInterface, 'is_last_tc', False) -======= ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 # End helpers @@ -209,15 +206,11 @@ def __init__( self._parsed = '' self.block_text = '' self.page_width = 0 -<<<<<<< HEAD self.col = 0 self.row = 0 - self._build_data(*args, **kwargs) -======= self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 def add_parent(el): # if a parent, make that an attribute for child in el.getchildren(): @@ -300,7 +293,7 @@ def _set_table_attributes(self, el): 'continue' == v_merge.get('val', '') ): child.vmerge_continue = True -<<<<<<< HEAD + def _set_text_attributes(self, el): # find the ppr. look thru all the elements within and find the text #if it's the last item in the list, it's the last text @@ -309,8 +302,7 @@ def _set_text_attributes(self, el): for i, t in enumerate(el.parent.find_all('t')): if i == (len(el.parent.find_all('t')) - 1): t.is_last_text = True -======= ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 + def _set_is_in_table(self, el): paragraph_elements = el.find_all('p') @@ -527,6 +519,8 @@ def parse_table_cell(self, el, text): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) + print 'YOU ALREADY KNOW!' + print rowspan return self.table_cell(text, el.is_last_tc, el.column_index, el.row_index, colspan, rowspan) def parse_list(self, el, text): @@ -565,18 +559,11 @@ def _parse_list(self, el, text): parsed = self.parse_list_item(el, text) num_id = el.num_id ilvl = el.ilvl -<<<<<<< HEAD - el.lst_style = self.get_list_style( - el.num_id, - el.ilvl, - ) -======= # Everything after this point assumes the first element is not also the # last. If the first element is also the last then early return by # building and returning the completed list. if el.is_last_list_item_in_root: return self._build_list(el, parsed) ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 next_el = el.next def is_same_list(next_el, num_id, ilvl): @@ -635,21 +622,6 @@ def should_parse_last_el(last_el, first_el): if parsed == '': return parsed -<<<<<<< HEAD - # Get the list style for the pending list. - lst_style = self.get_list_style( - el.num_id, - el.ilvl, - ) - # Create the actual list and return it. - if lst_style == 'bullet': - return self.unordered_list(parsed) - else: - return self.ordered_list( - parsed, - lst_style, - ) -======= return self._build_list(el, parsed) def justification(self, el, text): @@ -687,7 +659,6 @@ def justification(self, el, text): if any([alignment, firstLine, left, right]): return self.indent(text, alignment, firstLine, left, right) return text ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 def parse_p(self, el, text): if text == '': @@ -960,64 +931,7 @@ def parse_r(self, el, parsed): fns.append(self.underline) for fn in fns: text = fn(text) -<<<<<<< HEAD - paragraph_tag_property = el.parent.find('pPr') - just = '' - if paragraph_tag_property is not None: - jc = paragraph_tag_property.find('jc') - if jc is not None: # text alignments - if jc.attrib['val'] == 'right': - just = 'right' - elif jc.attrib['val'] == 'center': - just = 'center' - elif jc.attrib['val'] == 'left': - just = 'left' - ind = paragraph_tag_property.find('ind') - right = '' - left = '' - firstLine = '' - if ind is not None: - right = None - left = None - firstLine = None - if 'right' in ind.attrib: - right = ind.attrib['right'] - # divide by 20 to get to pt. multiply by (4/3) to get to px - right = (int(right) / 20) * float(4) / float(3) - right = str(right) - if 'left' in ind.attrib: - left = ind.attrib['left'] - left = (int(left) / 20) * float(4) / float(3) - left = str(left) - if 'firstLine' in ind.attrib: - firstLine = ind.attrib['firstLine'] - firstLine = (int(firstLine) / 20) * float(4) / float(3) - firstLine = str(firstLine) - if jc is not None or ind is not None: - t_els = el.find_all('t') - for el in t_els: - if el.is_last_text: - block = False - is_table = False - self.block_text += text - column = 0 - #might need to write column to justify appropriately - if el.find_ancestor_with_tag('tc') is not None: - column = el.find_ancestor_with_tag('tc').column_index - is_table = True - text = self.indent(self.block_text, - just, firstLine, left, right) - self.block_text = '' - else: - block = True - self.block_text += text - if block is False: - return text - else: - return '' -======= return text ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 def get_list_style(self, num_id, ilvl): ids = self.numbering_root.find_all('num') diff --git a/pydocx/HtmlConverter.py b/pydocx/HtmlConverter.py index 1ff1d7c3..6d8059eb 100644 --- a/pydocx/HtmlConverter.py +++ b/pydocx/HtmlConverter.py @@ -1,13 +1,26 @@ __author__ = 'samportnow' - -from .tests import document_builder +from bs4 import BeautifulSoup +import zipfile from pydocx.DocxParser import ElementTree +from pydocx.py_docx.docx import * +import py_docx.docx as docx +import os - -class converter(): +class Html2Docx(): def __init__(self, html): + # set up what is parsed + self.parsed = '' + with open(html, 'r') as f: + html = f.read() + # set up the html self.html = ElementTree.fromstring(html) + # get the relationship list + self.relationships = relationshiplist() + # make a new document + self.document = newdocument() + #make the document + self.body = self.document.xpath('/w:document/w:body', namespaces=nsprefixes)[0] self.build() def build(self): @@ -36,39 +49,123 @@ def check_for_lst_parent(self, el): return lst_parent def set_list_attributes(self): - ilvl = 0 - numId = -1 + ilvl = -1 + numId = 0 lsts = self.find_all_by_tags(self.html, 'ol', 'ul') for lst in lsts: lst.getchildren()[0].is_first_list_item = True lst.getchildren()[-1].is_last_list_item = True - for el in self.html.find_first('body').iter(): + for el in self.html.find('body').iter(): if el.tag == 'li': if self.check_for_lst_parent(el.parent) \ is False and el.is_first_list_item is True: numId += 1 - ilvl = 0 + ilvl = -1 if el.is_first_list_item is True: ilvl += 1 el.ilvl = ilvl el.num_id = numId + el.is_list_item = True + + def justificaton(self, el): + pass def parse(self, el): for child in el.getchildren(): - parsed = '' - self.parse(child) - if child.tag == 'b': - self.bold = True + if child.tag == 'p': + text_and_style = self.parse_r(child) + self.body.append(paragraph(text_and_style)) + if child.tag == 'ul' or child.tag == 'ol': + lst_type = child.tag + self.parse_list(child, lst_type) + if child.tag == 'table': + self.body.append(self.parse_table(child)) + self.parse(child) + self.save() + + def parse_r(self, el): + par_block = [] + breaks = [] + style = '' + for child in el.iter(): + text = '' + if child.tag == 'em': + style += 'i' + if child.tag == 'strong': + style += 'b' + if child.tag == 'underline': + style += 'u' + if child.text: + text = child.text + if child.tag == 'br': + text = child.tail + breaks.append('br') + if text: + par_block.append([text, style, breaks]) + style = '' + if child.parent.tag == 'li': + return par_block + return par_block + + + def parse_list(self, lst, lst_type = ''): + for child in lst.getchildren(): + if child.tag == 'li': + text_and_style = self.parse_r(child) + self.body.append( + paragraph( + text_and_style, is_list=True, ilvl=str(child.ilvl), numId=str(child.num_id), style=lst_type)) + + def table_look_ahead(self, tbl): + columns = 0 + trs = tbl.find_all('tr') + tcs = trs[0].find_all('td') + for tc in tcs: + if 'colspan' in tc.attrib: + columns += int(tc.attrib['colspan']) + else: + columns += 1 + return columns + + def parse_table(self, el): + columns = self.table_look_ahead(el) + tbl = createtblproperties(columns) + for tr in el.getchildren(): + table_row = createtablerow() + tcs = tr.find_all('td') + for tc in tcs: + if "colspan" in tc.attrib: + cell = createtablecell(gridspan=tc.attrib["colspan"]) + if "rowspan" in tc.attrib: + print "ya buddy" + print tc.attrib["rowspan"] else: - self.bold = False - if child.tag == 'i': - pass - if child.tag == 'u': - pass - if child.tag == 'li': - parsed = document_builder.DocxBuilder.li( - child.text, child.ilvl, child.num_id, self.bold) - if child.tag == 'p': - parsed = document_builder.DocxBuilder.p_tag( - child.text, self.bold) - return parsed + cell = createtablecell() + if tc.text: + text_and_style = self.parse_r(tc) + par_run=paragraph(text_and_style) + cell.append(par_run) + table_row.append(cell) + tbl.append(table_row) + return tbl + + + def save(self): + title = 'Python docx demo' + subject = 'A practical example of making docx from Python' + creator = 'Mike MacCana' + keywords = ['python', 'Office Open XML', 'Word'] +# print BeautifulSoup( +# ElementTree.tostring( +# self.document, +# ), +# ).prettify() + coreprops = coreproperties(title=title, subject=subject, creator=creator, + keywords=keywords) + appprops = appproperties() + contenttypes = docx.contenttypes() + websettings = docx.websettings() + wordrelationships = docx.wordrelationships(self.relationships) + # Save our document + savedocx(self.document, coreprops, appprops, contenttypes, websettings, + wordrelationships, 'Testing.docx') From 11f196aa94aee4e36a2dbe0358bdff8daa9a9115 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 6 Jun 2013 11:55:22 -0400 Subject: [PATCH 334/404] merged with master --- pydocx/DocxParser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 243bf11e..b1646c46 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -206,8 +206,12 @@ def __init__( self._parsed = '' self.block_text = '' self.page_width = 0 +<<<<<<< .merge_file_Q0i6Oc self.col = 0 self.row = 0 +======= + self.bookmark = False +>>>>>>> .merge_file_7ASwsQ self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) @@ -489,6 +493,7 @@ def parse(self, el): elif el.tag == 't': return self.parse_t(el, parsed) elif el.tag == 'br': + print 'ummm wheres the break' return self.parse_break_tag(el, parsed) elif el.tag == 'delText': return self.parse_deletion(el, parsed) @@ -502,7 +507,6 @@ def parse(self, el): return self.parse_image(el) else: return parsed - def parse_page_break(self, el, text): #TODO figure out what parsed is getting overwritten return self.page_break() @@ -728,6 +732,7 @@ def _should_parse_next_as_content(el): next_el = el.next if next_el is None: return False + return False if ( not next_el.is_list_item and not el.is_last_list_item_in_root From 2758f4a58a2346c4da0718e0459b0db45afa7b91 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 6 Jun 2013 11:56:43 -0400 Subject: [PATCH 335/404] merged with master --- pydocx/DocxParser.py | 8 +------- pydocx/parsers/Docx2Html.py | 5 ----- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index b1646c46..43c72051 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -139,7 +139,7 @@ def ZipFile(path): # This is not needed in python 3.2+ class DocxParser: __metaclass__ = ABCMeta - + def _build_data(self, path, *args, **kwargs): with ZipFile(path) as f: self.document_text = f.read('word/document.xml') @@ -206,12 +206,6 @@ def __init__( self._parsed = '' self.block_text = '' self.page_width = 0 -<<<<<<< .merge_file_Q0i6Oc - self.col = 0 - self.row = 0 -======= - self.bookmark = False ->>>>>>> .merge_file_7ASwsQ self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 1836bf0b..698c2e27 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -101,15 +101,10 @@ def deletion(self, text, author, date): 'text': text, } -<<<<<<< HEAD - def list_element(self, text, lst_style = None): - return "
        1. {text}
        2. ".format(text=text) -======= def list_element(self, text): return "
        3. %(text)s
        4. " % { 'text': text, } ->>>>>>> a0f1daa2821285ba65a44bd30e2f2bcd19d17791 def ordered_list(self, text, list_style): return '
            %(text)s
          ' % { From 76dc1e7c0ae1721bda1d25ecfe760ef2b7dc030a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 6 Jun 2013 11:57:06 -0400 Subject: [PATCH 336/404] merged with master --- pydocx/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index c68a472e..1bcbcb32 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,5 +1,5 @@ from .parsers import Docx2Html, Docx2Markdown - +from HtmlConverter import Html2Docx def docx2html(path): return Docx2Html(path).parsed @@ -8,5 +8,8 @@ def docx2html(path): def docx2markdown(path): return Docx2Markdown(path).parsed +def html2docx(path): + return Html2Docx(path).parsed + VERSION = '0.2.1' From 191a99c23ba13eb9b85459519cabb3287c2af30a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 14:41:33 -0400 Subject: [PATCH 337/404] table issue --- pydocx/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/utils.py b/pydocx/utils.py index ff7f276b..85095bac 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -312,7 +312,7 @@ def _set_table_attributes(self, el): if ( v_merge is not None and ('continue' == v_merge.get('val', '') or - v_merge.attrib == {}) + v_merge.attrib == {}) ): self.meta_data[child]['vmerge_continue'] = True From bc28d22ac7d4c4709150089ce82efa539520279b Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 14:46:31 -0400 Subject: [PATCH 338/404] removed uncessary import --- pydocx/DocxParser.py | 3 --- pydocx/__init__.py | 5 ----- 2 files changed, 8 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index b129d739..50d0b186 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -15,8 +15,6 @@ has_child, has_descendant_with_tag, ) -from bs4 import BeautifulSoup - logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("NewParser") @@ -58,7 +56,6 @@ def _build_data(self, path, *args, **kwargs): self.fonts = None try: # Only present if there are lists self.numbering_text = f.read('word/numbering.xml') - print BeautifulSoup(self.numbering_text).prettify() except KeyError: self.numbering_text = None try: # Only present if there are comments diff --git a/pydocx/__init__.py b/pydocx/__init__.py index b1a9e4ee..cdb7ace3 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,5 +1,4 @@ from .parsers import Docx2Html, Docx2Markdown -from HtmlConverter import Html2Docx def docx2html(path): return Docx2Html(path).parsed @@ -8,8 +7,4 @@ def docx2html(path): def docx2markdown(path): return Docx2Markdown(path).parsed -def html2docx(path): - return Html2Docx(path).parsed - - VERSION = '0.3.0' From 52286ae6c995cfe93beb42eab3bfdaa538fa185f Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 14:55:12 -0400 Subject: [PATCH 339/404] removed uncessary file --- pydocx/HtmlConverter.py | 171 ---------------------------------------- 1 file changed, 171 deletions(-) delete mode 100644 pydocx/HtmlConverter.py diff --git a/pydocx/HtmlConverter.py b/pydocx/HtmlConverter.py deleted file mode 100644 index 6d8059eb..00000000 --- a/pydocx/HtmlConverter.py +++ /dev/null @@ -1,171 +0,0 @@ -__author__ = 'samportnow' -from bs4 import BeautifulSoup -import zipfile -from pydocx.DocxParser import ElementTree -from pydocx.py_docx.docx import * -import py_docx.docx as docx -import os - -class Html2Docx(): - - def __init__(self, html): - # set up what is parsed - self.parsed = '' - with open(html, 'r') as f: - html = f.read() - # set up the html - self.html = ElementTree.fromstring(html) - # get the relationship list - self.relationships = relationshiplist() - # make a new document - self.document = newdocument() - #make the document - self.body = self.document.xpath('/w:document/w:body', namespaces=nsprefixes)[0] - self.build() - - def build(self): - def add_parent(el): - for child in el.getchildren(): - setattr(child, 'parent', el) - add_parent(child) - add_parent(self.html) - self.set_list_attributes() - self.parse(self.html.find_first('body')) - - def find_all_by_tags(self, html, *args): - list_elements = [] - for el in html.iter(): - if el.tag in args: - list_elements.append(el) - return list_elements - - def check_for_lst_parent(self, el): - lst_parent = False - if el.parent.tag != 'body': - if el.parent.tag == 'ol' or el.parent.tag == 'ul': - lst_parent = True - self.check_for_lst_parent(el.parent) - else: - return lst_parent - - def set_list_attributes(self): - ilvl = -1 - numId = 0 - lsts = self.find_all_by_tags(self.html, 'ol', 'ul') - for lst in lsts: - lst.getchildren()[0].is_first_list_item = True - lst.getchildren()[-1].is_last_list_item = True - for el in self.html.find('body').iter(): - if el.tag == 'li': - if self.check_for_lst_parent(el.parent) \ - is False and el.is_first_list_item is True: - numId += 1 - ilvl = -1 - if el.is_first_list_item is True: - ilvl += 1 - el.ilvl = ilvl - el.num_id = numId - el.is_list_item = True - - def justificaton(self, el): - pass - - def parse(self, el): - for child in el.getchildren(): - if child.tag == 'p': - text_and_style = self.parse_r(child) - self.body.append(paragraph(text_and_style)) - if child.tag == 'ul' or child.tag == 'ol': - lst_type = child.tag - self.parse_list(child, lst_type) - if child.tag == 'table': - self.body.append(self.parse_table(child)) - self.parse(child) - self.save() - - def parse_r(self, el): - par_block = [] - breaks = [] - style = '' - for child in el.iter(): - text = '' - if child.tag == 'em': - style += 'i' - if child.tag == 'strong': - style += 'b' - if child.tag == 'underline': - style += 'u' - if child.text: - text = child.text - if child.tag == 'br': - text = child.tail - breaks.append('br') - if text: - par_block.append([text, style, breaks]) - style = '' - if child.parent.tag == 'li': - return par_block - return par_block - - - def parse_list(self, lst, lst_type = ''): - for child in lst.getchildren(): - if child.tag == 'li': - text_and_style = self.parse_r(child) - self.body.append( - paragraph( - text_and_style, is_list=True, ilvl=str(child.ilvl), numId=str(child.num_id), style=lst_type)) - - def table_look_ahead(self, tbl): - columns = 0 - trs = tbl.find_all('tr') - tcs = trs[0].find_all('td') - for tc in tcs: - if 'colspan' in tc.attrib: - columns += int(tc.attrib['colspan']) - else: - columns += 1 - return columns - - def parse_table(self, el): - columns = self.table_look_ahead(el) - tbl = createtblproperties(columns) - for tr in el.getchildren(): - table_row = createtablerow() - tcs = tr.find_all('td') - for tc in tcs: - if "colspan" in tc.attrib: - cell = createtablecell(gridspan=tc.attrib["colspan"]) - if "rowspan" in tc.attrib: - print "ya buddy" - print tc.attrib["rowspan"] - else: - cell = createtablecell() - if tc.text: - text_and_style = self.parse_r(tc) - par_run=paragraph(text_and_style) - cell.append(par_run) - table_row.append(cell) - tbl.append(table_row) - return tbl - - - def save(self): - title = 'Python docx demo' - subject = 'A practical example of making docx from Python' - creator = 'Mike MacCana' - keywords = ['python', 'Office Open XML', 'Word'] -# print BeautifulSoup( -# ElementTree.tostring( -# self.document, -# ), -# ).prettify() - coreprops = coreproperties(title=title, subject=subject, creator=creator, - keywords=keywords) - appprops = appproperties() - contenttypes = docx.contenttypes() - websettings = docx.websettings() - wordrelationships = docx.wordrelationships(self.relationships) - # Save our document - savedocx(self.document, coreprops, appprops, contenttypes, websettings, - wordrelationships, 'Testing.docx') From 2531459386c31bf65ed0653964f7dab14067d31d Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 14:58:40 -0400 Subject: [PATCH 340/404] updated the test --- pydocx/tests/test_docx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index cc7368f6..99caa80b 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -650,7 +650,8 @@ def test_simple_table(): assert_html_equal(actual_html, BASE_HTML % '''
          - ' + text + '' - def table_cell(self, text, col='', row=''): + def table_cell(self, text, col='', row='', *args): slug = '' def change_orientation(self, parsed, orient): diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index aa75ddbf..41267d10 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -37,9 +37,11 @@ ''' % STYLE BASE_LATEX = r'''\documentclass{article}\usepackage{hyperref} -\usepackage{graphicx}\usepackage{changes}\usepackage{changepage} -\usepackage[paperwidth=612pt]{geometry}\usepackage{hanging} -\usepackage{multirow}\begin{document}''' + "%s" + r'''\end{document} +\usepackage{graphicx}\usepackage{changes} +\usepackage{changepage} +\usepackage{hanging}\usepackage{multirow} +\usepackage{pbox}\usepackage{pdflscape} +\usepackage{ulem}\usepackage{comment}\begin{document}''' + "%s" + r'''\end{document} ''' @@ -116,54 +118,72 @@ def smart_space(match): return html.strip() -#class XMLDocx2Latex(Docx2LaTex): -# -# def _build_data( -# self, -# document_xml=None, -# rels_dict=None, -# numbering_dict=None, -# *args, **kwargs): -# self._test_rels_dict = rels_dict -# if numbering_dict is None: -# numbering_dict = {} -# self.numbering_dict = numbering_dict -# # Intentionally not calling super -# if document_xml is not None: -# self.root = ElementTree.fromstring( -# remove_namespaces(document_xml), -# ) -# -# # This is the standard page width for a word document, Also the page -# # width that we are looking for in the test. -# self.page_width = 612 -# -# def _parse_rels_root(self, *args, **kwargs): -# if self._test_rels_dict is None: -# return {} -# return self._test_rels_dict -# -# def get_list_style(self, num_id, ilvl): -# try: -# return self.numbering_dict[num_id][ilvl] -# except KeyError: -# return 'decimal' -# -# def _parse_styles(self): -# return {} -# -# -#DEFAULT_NUMBERING_DICT = { -# '1': { -# '0': 'decimal', -# '1': 'decimal', -# }, -# '2': { -# '0': 'lowerLetter', -# '1': 'lowerLetter', -# }, -#} +class XMLDocx2Latex(Docx2LaTex): + """ + Create the object without passing in a path to the document, set them + manually. + """ + def __init__(self, *args, **kwargs): + # Pass in nothing for the path + super(XMLDocx2Latex, self).__init__(path=None, *args, **kwargs) + + def _build_data( + self, + path, + document_xml=None, + rels_dict=None, + numbering_dict=None, + styles_dict=None, + *args, **kwargs): + self._test_rels_dict = rels_dict + if rels_dict: + for value in rels_dict.values(): + self._image_data['word/%s' % value] = 'word/%s' % value + self.numbering_root = None + if numbering_dict is not None: + self.numbering_root = parse_xml_from_string( + DXB.numbering(numbering_dict), + ) + self.numbering_dict = numbering_dict + # Intentionally not calling super + if document_xml is not None: + self.root = parse_xml_from_string(document_xml) + self.zip_path = '' + + # This is the standard page width for a word document, Also the page + # width that we are looking for in the test. + self.page_width = 612 + + self.styles_dict = styles_dict + + def _parse_rels_root(self, *args, **kwargs): + if self._test_rels_dict is None: + return {} + return self._test_rels_dict + + def get_list_style(self, num_id, ilvl): + try: + return self.numbering_dict[num_id][ilvl] + except KeyError: + return 'decimal' + + def _parse_styles(self): + if self.styles_dict is None: + return {} + return self.styles_dict + + +DEFAULT_NUMBERING_DICT = { + '1': { + '0': 'decimal', + '1': 'decimal', + }, + '2': { + '0': 'lowerLetter', + '1': 'lowerLetter', + }, + } class XMLDocx2Html(Docx2Html): """ @@ -234,11 +254,14 @@ def _parse_styles(self): class _TranslationTestCase(TestCase): expected_output = None + latex_expected_output = None relationship_dict = None styles_dict = None numbering_dict = DEFAULT_NUMBERING_DICT run_expected_output = True parser = XMLDocx2Html + latex_parser = XMLDocx2Latex + latex_expected_output = None use_base_html = True convert_root_level_upper_roman = False @@ -262,6 +285,7 @@ def test_expected_output(self): # Verify the final output. parser = self.parser + latex_parser = self.latex_parser def image_handler(self, src, *args, **kwargs): return src @@ -275,35 +299,12 @@ def image_handler(self, src, *args, **kwargs): ).parsed assert_html_equal(html, BASE_HTML % self.expected_output) -# -#class _LatexTranslationTestCase(TestCase): -# expected_output = None -# relationship_dict = None -# numbering_dict = DEFAULT_NUMBERING_DICT -# run_expected_output = True -# -# def get_xml(self): -# raise NotImplementedError() -# -# @contextmanager -# def toggle_run_expected_output(self): -# self.run_expected_output = not self.run_expected_output -# yield -# self.run_expected_output = not self.run_expected_output -# -# def test_expected_output(self): -# if self.expected_output is None: -# raise NotImplementedError('expected_output is not defined') -# if not self.run_expected_output: -# return -# -# # Create the xml -# tree = self.get_xml() -# -# # Verify the final output. -# latex = XMLDocx2Latex( -# document_xml=tree, -# rels_dict=self.relationship_dict, -# numbering_dict=self.numbering_dict, -# ).parsed -# assert_latex_equal(latex, BASE_LATEX % self.expected_output) + latex_parser.image_handler = image_handler + latex = latex_parser( + convert_root_level_upper_roman=self.convert_root_level_upper_roman, + document_xml=tree, + rels_dict=self.relationship_dict, + numbering_dict=self.numbering_dict, + styles_dict=self.styles_dict, + ).parsed + assert_latex_equal(latex, BASE_LATEX % self.latex_expected_output) diff --git a/pydocx/tests/test_latex.py b/pydocx/tests/test_latex.py deleted file mode 100644 index a39b4e5f..00000000 --- a/pydocx/tests/test_latex.py +++ /dev/null @@ -1,616 +0,0 @@ -from itertools import chain - -from pydocx.tests.document_builder import DocxBuilder as DXB -from pydocx.tests import ( - _LatexTranslationTestCase, -) - - -class ParagraphTestCase(_LatexTranslationTestCase): - expected_output = ''' - AAA \n\n - BBB \n\n -''' - - def get_xml(self): - tags = [ - DXB.p_tag(text='AAA', bold=False), - DXB.p_tag(text='BBB', bold=False), - ] - - body = '' - for tag in tags: - body += tag - xml = DXB.xml(body) - return xml - - -class BoldTestCase(_LatexTranslationTestCase): - expected_output = r''' - \textbf{AAA}''' \ - + "\n" + '''BBB''' + "\n" - - def get_xml(self): - tags = [ - DXB.p_tag(text='AAA', bold=True), - DXB.p_tag(text='BBB', bold=True, val='false'), - ] - - body = '' - for tag in tags: - body += tag - xml = DXB.xml(body) - return xml - - -class HyperlinkVanillaTestCase(_LatexTranslationTestCase): - relationship_dict = { - 'rId0': 'www.google.com', - } - - expected_output = r''' - \href{www.google.com}{link}. - ''' - - def get_xml(self): - run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class HyperlinkWithMultipleRunsTestCase(_LatexTranslationTestCase): - relationship_dict = { - 'rId0': 'www.google.com', - } - - expected_output = r''' - \href{www.google.com}{link}. - ''' - - def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'link'] - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class HyperlinkNoTextTestCase(_LatexTranslationTestCase): - relationship_dict = { - 'rId0': 'www.google.com', - } - - expected_output = '' - - def get_xml(self): - run_tags = [] - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class HyperlinkNotInRelsDictTestCase(_LatexTranslationTestCase): - relationship_dict = { - # 'rId0': 'www.google.com', missing - } - - expected_output = r''' - link. - ''' - - def get_xml(self): - run_tags = [] - run_tags.append(DXB.r_tag('link', is_bold=False)) - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag('.', is_bold=False)) - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class HyperlinkWithBreakTestCase(_LatexTranslationTestCase): - relationship_dict = { - 'rId0': 'www.google.com', - } - - expected_output = r''' - \href{www.google.com}{link\\} - ''' - - def get_xml(self): - run_tags = [] - run_tags.append(DXB.r_tag('link')) - run_tags.append(DXB.r_tag(None, include_linebreak=True)) - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class ImageNotInRelsDictTestCase(_LatexTranslationTestCase): - relationship_dict = { - # 'rId0': 'media/image1.jpeg', - } - expected_output = '' - - def get_xml(self): - drawing = DXB.drawing(height=20, width=40, r_id='rId0') - body = drawing - - xml = DXB.xml(body) - return xml - - -class TableTag(_LatexTranslationTestCase): - expected_output = r''' - \begin{tabular}{ l l } - AAA & BBB \\ - CCC & DDD \\ - \end{tabular} - ''' - - def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('AAA')], - [DXB.p_tag('BBB')], - [DXB.p_tag('CCC')], - [DXB.p_tag('DDD')], - )) - body = table - xml = DXB.xml(body) - return xml - - -class TableWithInvalidTag(_LatexTranslationTestCase): - expected_output = r''' - \begin{tabular}{ l l } - AAA & BBB \\ - & DDD \\ - \end{tabular} - ''' - - def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('AAA')], - [DXB.p_tag('BBB')], - # This tag may have CCC in it, however this tag has no meaning - # pertaining to content. - ['CCC'], - [DXB.p_tag('DDD')], - )) - body = table - xml = DXB.xml(body) - return xml - - -class TableWithListAndParagraph(_LatexTranslationTestCase): - expected_output = r''' - \begin{tabular}{ l } - \pbox{20cm} - {CCC \\ DDD} \\ - \end{tabular} - ''' - - def get_xml(self): - els = [ - DXB.p_tag('CCC'), - DXB.p_tag('DDD'), - ] - td = '' - for el in els: - td += el - table = DXB.table(num_rows=1, num_columns=1, text=chain( - [td], - )) - body = table - xml = DXB.xml(body) - return xml - - -class SimpleListTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \item BBB - \item CCC - \end {enumerate} - ''' - - # Ensure its not failing somewhere and falling back to decimal - numbering_dict = { - '1': { - '0': 'lowerLetter', - } - } - - def get_xml(self): - li_text = [ - ('AAA', 0, 1), - ('BBB', 0, 1), - ('CCC', 0, 1), - ] - lis = '' - for text, ilvl, numId in li_text: - lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - - xml = DXB.xml(lis) - return xml - - -class SingleListItemTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \end {enumerate} - ''' - - # Ensure its not failing somewhere and falling back to decimal - numbering_dict = { - '1': { - '0': 'lowerLetter', - } - } - - def get_xml(self): - li_text = [ - ('AAA', 0, 1), - ] - lis = '' - for text, ilvl, numId in li_text: - lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - - xml = DXB.xml(lis) - return xml - - -class ListWithContinuationTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA \\ BBB - \item CCC - \begin{tabular} {ll} - DDD & EEE \\ - FFF & GGG \\ - \end{tabular} - \item HHH - \end{enumerate} - ''' - - def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('DDD')], - [DXB.p_tag('EEE')], - [DXB.p_tag('FFF')], - [DXB.p_tag('GGG')], - )) - tags = [ - DXB.li(text='AAA', ilvl=0, numId=1), - DXB.p_tag('BBB'), - DXB.li(text='CCC', ilvl=0, numId=1), - table, - DXB.li(text='HHH', ilvl=0, numId=1), - ] - body = '' - for el in tags: - body += el - - xml = DXB.xml(body) - return xml - - -class ListWithMultipleContinuationTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \begin{tabular} {l} - BBB\\ - \end{tabular} - \begin{tabular} {l} - CCC\\ - \end{tabular} - \item DDD - \end{enumerate} - ''' - - def get_xml(self): - table1 = DXB.table(num_rows=1, num_columns=1, text=chain( - [DXB.p_tag('BBB')], - )) - table2 = DXB.table(num_rows=1, num_columns=1, text=chain( - [DXB.p_tag('CCC')], - )) - tags = [ - DXB.li(text='AAA', ilvl=0, numId=1), - table1, - table2, - DXB.li(text='DDD', ilvl=0, numId=1), - ] - body = '' - for el in tags: - body += el - - xml = DXB.xml(body) - return xml - - -class MangledIlvlTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \end{enumerate} - \begin{enumerate} - \item BBB - \begin{enumerate} - \item CCC - \end{enumerate} - \end{enumerate} - ''' - - def get_xml(self): - li_text = [ - ('AAA', 0, 2), - ('BBB', 1, 1), - ('CCC', 0, 1), - ] - lis = '' - for text, ilvl, numId in li_text: - lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - - xml = DXB.xml(lis) - return xml - - -class SeperateListsTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \end{enumerate} - \begin{enumerate} - \item BBB - \end{enumerate} - \begin{enumerate} - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - li_text = [ - ('AAA', 0, 2), - # Because AAA and CCC are part of the same list (same list id) - # and BBB is different, these need to be split into three - # lists (or lose everything from BBB and after. - ('BBB', 0, 1), - ('CCC', 0, 2), - ] - lis = '' - for text, ilvl, numId in li_text: - lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - - xml = DXB.xml(lis) - return xml - - -class InvalidIlvlOrderTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \begin{enumerate} - \item BBB - \begin{enumerate} - \item CCC - \end {enumerate} - \end{enumerate} - \end{enumerate} - ''' - - def get_xml(self): - tags = [ - DXB.li(text='AAA', ilvl=1, numId=1), - DXB.li(text='BBB', ilvl=3, numId=1), - DXB.li(text='CCC', ilvl=2, numId=1), - ] - body = '' - for el in tags: - body += el - - xml = DXB.xml(body) - return xml - - -class NonStandardTextTagsTestCase(_LatexTranslationTestCase): - expected_output = r''' - \added[id=, remark=]{insert} smarttag - ''' - - def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'insert '] - insert_tag = DXB.insert_tag(run_tags) - run_tags = [DXB.r_tag(i) for i in 'smarttag'] - smart_tag = DXB.smart_tag(run_tags) - - run_tags = [insert_tag, smart_tag] - body = DXB.p_tag(run_tags) - xml = DXB.xml(body) - return xml - - -class RTagWithNoText(_LatexTranslationTestCase): - expected_output = '' - - def get_xml(self): - p_tag = DXB.p_tag(None) # No text - run_tags = [p_tag] - # The bug is only present in a hyperlink - run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - body = DXB.p_tag(run_tags) - - xml = DXB.xml(body) - return xml - - -class DeleteTagInList(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA \deleted[id=, remark=]{BBB} - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - delete_tags = DXB.delete_tag(['BBB']) - p_tag = DXB.p_tag([delete_tags]) - - body = DXB.li(text='AAA', ilvl=0, numId=0) - body += p_tag - body += DXB.li(text='CCC', ilvl=0, numId=0) - - xml = DXB.xml(body) - return xml - - -class InsertTagInList(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA\added[id=,remark=]{BBB} - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] - insert_tags = DXB.insert_tag(run_tags) - p_tag = DXB.p_tag([insert_tags]) - - body = DXB.li(text='AAA', ilvl=0, numId=0) - body += p_tag - body += DXB.li(text='CCC', ilvl=0, numId=0) - - xml = DXB.xml(body) - return xml - - -class SmartTagInList(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAABBB - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - run_tags = [DXB.r_tag(i) for i in 'BBB'] - smart_tag = DXB.smart_tag(run_tags) - p_tag = DXB.p_tag([smart_tag]) - - body = DXB.li(text='AAA', ilvl=0, numId=0) - body += p_tag - body += DXB.li(text='CCC', ilvl=0, numId=0) - - xml = DXB.xml(body) - return xml - - -class SingleListItem(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA - \end{enumerate}''' + '\n' + 'BBB' - - numbering_dict = { - '1': { - '0': 'lowerLetter', - } - } - - def get_xml(self): - li = DXB.li(text='AAA', ilvl=0, numId=1) - p_tags = [ - DXB.p_tag('BBB'), - ] - body = li - for p_tag in p_tags: - body += p_tag - xml = DXB.xml(body) - return xml - - -class SimpleTableTest(_LatexTranslationTestCase): - expected_output = r''' - \begin{tabular} { lll } - Blank & - Column 1 & - Column 2 \\ - Row 1 & - First & - Second \\ - Row 2 & - Third & - Fourth \\ - \end{tabular}''' - - def get_xml(self): - table = DXB.table(num_rows=3, num_columns=3, text=chain( - [DXB.p_tag('Blank')], - [DXB.p_tag('Column 1')], - [DXB.p_tag('Column 2')], - [DXB.p_tag('Row 1')], - [DXB.p_tag('First')], - [DXB.p_tag('Second')], - [DXB.p_tag('Row 2')], - [DXB.p_tag('Third')], - [DXB.p_tag('Fourth')], - ), merge=True) - body = table - - xml = DXB.xml(body) - return xml - - -class MissingIlvl(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAA \\ - BBB - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - li_text = [ - ('AAA', 0, 1), - ('BBB', None, 1), # Because why not. - ('CCC', 0, 1), - ] - lis = '' - for text, ilvl, numId in li_text: - lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - body = lis - xml = DXB.xml(body) - return xml - - -class SDTTestCase(_LatexTranslationTestCase): - expected_output = r''' - \begin{enumerate} - \item AAABBB - \item CCC - \end{enumerate} - ''' - - def get_xml(self): - body = '' - body += DXB.li(text='AAA', ilvl=0, numId=0) - body += DXB.sdt_tag(p_tag=DXB.p_tag(text='BBB')) - body += DXB.li(text='CCC', ilvl=0, numId=0) - - xml = DXB.xml(body) - return xml - -#TODO: WORKOUT IMAGE CONVERSIONS -#TODO: IMAGE NOSIZE TESTCASE diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 00c231db..31f699cf 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -17,6 +17,9 @@ class BoldTestCase(_TranslationTestCase):

          AAA

          BBB

          """ + latex_expected_output = r''' + \textbf{AAA}'''\ + + "\n" + '''BBB''' + "\n" def get_xml(self): tags = [ @@ -54,6 +57,10 @@ class HyperlinkVanillaTestCase(_TranslationTestCase):

          link.

          ''' + latex_expected_output = r''' + \href{www.google.com}{link}. + ''' + def get_xml(self): run_tags = [] run_tags.append(DXB.r_tag([DXB.t_tag('link')])) @@ -73,6 +80,10 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase):

          link.

          ''' + latex_expected_output = r''' + \href{www.google.com}{link}. + ''' + def get_xml(self): run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'link'] run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] @@ -89,6 +100,8 @@ class HyperlinkNoTextTestCase(_TranslationTestCase): expected_output = '' + latex_expected_output = '' + def get_xml(self): run_tags = [] run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] @@ -104,6 +117,10 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): expected_output = '

          link.

          ' + latex_expected_output = r''' + link. + ''' + def get_xml(self): run_tags = [] run_tags.append(DXB.r_tag([DXB.t_tag('link')])) @@ -121,6 +138,10 @@ class HyperlinkWithBreakTestCase(_TranslationTestCase): expected_output = '

          link

          ' + latex_expected_output = r''' + \href{www.google.com}{link\\} + ''' + def get_xml(self): run_tags = [] run_tags.append(DXB.r_tag([DXB.t_tag('link')])) @@ -141,19 +162,26 @@ class ImageLocal(_TranslationTestCase):

          ''' + latex_expected_output = r''' + \includegraphics {word/media/image1.jpeg} + ''' + '\n' + ''' + \includegraphics {word/media/image2.jpeg} + ''' + + def get_xml(self): - drawing = DXB.drawing(height=None, width=None, r_id='rId0') - pict = DXB.pict(height=None, width=None, r_id='rId1') - tags = [ - drawing, - pict, - ] - body = '' - for el in tags: - body += el + drawing = DXB.drawing(height=None, width=None, r_id='rId0') + pict = DXB.pict(height=None, width=None, r_id='rId1') + tags = [ + drawing, + pict, + ] + body = '' + for el in tags: + body += el - xml = DXB.xml(body) - return xml + xml = DXB.xml(body) + return xml class ImageTestCase(_TranslationTestCase): @@ -170,6 +198,11 @@ class ImageTestCase(_TranslationTestCase):

          ''' + latex_expected_output = r''' + \includegraphics[height=20pxpt, width=30.0pt]{word/media/image1.jpeg} + ''' + '\n' + ''' + \includegraphics[height=21ptpt, width=41pt]{word/media/image2.jpeg} + ''' def get_xml(self): drawing = DXB.drawing(height=20, width=40, r_id='rId0') pict = DXB.pict(height=21, width=41, r_id='rId1') @@ -233,6 +266,8 @@ class ImageNotInRelsDictTestCase(_TranslationTestCase): } expected_output = '' + latex_expected_output = '' + def get_xml(self): drawing = DXB.drawing(height=20, width=40, r_id='rId0') body = drawing @@ -261,6 +296,8 @@ class ImageNoSizeTestCase(_TranslationTestCase): ''' % relationship_dict['rId0'] + latex_expected_output = r'\includegraphics{%s}' % relationship_dict['rId0'] + @staticmethod def image_handler(image_id, relationship_dict): return relationship_dict.get(image_id) @@ -295,6 +332,14 @@ class TableTag(_TranslationTestCase):
          Cell1
          +
          + Cell1
          Cell3
          Cell2
          From 468502ead8d714f0e7eccee818aa5f3080efdf6e Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 15:04:53 -0400 Subject: [PATCH 341/404] flake8 compliant --- pydocx/DocxParser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 07ace3a1..11f7f193 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -172,6 +172,7 @@ def parse(self, el): return self.parse_image(el) else: return parsed + def parse_page_break(self, el, text): #TODO figure out what parsed is getting overwritten return self.page_break() @@ -185,7 +186,7 @@ def parse_table_row(self, el, text): def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') if v_merge is not None and ( - 'continue' == v_merge.get('val', '')): + 'continue' == v_merge.get('val', '')): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) @@ -447,9 +448,9 @@ def _get_rowspan(self, el, v_merge): if tbl is None: return '' tcs = [ - tc for tc in find_all(tbl, 'tc') - if self.pre_processor.row_index(tc) >= current_row and - self.pre_processor.column_index(tc) == current_col + tc for tc in find_all(tbl, 'tc') + if self.pre_processor.row_index(tc) >= current_row and + self.pre_processor.column_index(tc) == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: From 4145aa765a31cc5d0840265374d38cd931921e3d Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 15:08:01 -0400 Subject: [PATCH 342/404] flake8 compliant --- pydocx/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index cdb7ace3..8a4973b6 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,5 +1,6 @@ from .parsers import Docx2Html, Docx2Markdown + def docx2html(path): return Docx2Html(path).parsed From b4edee2feaa01ed9b049a0546fb30604cf45ca0b Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 16:25:47 -0400 Subject: [PATCH 343/404] minor changes --- pydocx/DocxParser.py | 3 --- pydocx/parsers/Docx2XML.py | 16 ---------------- pydocx/tests/document_builder.py | 10 ++++++++-- pydocx/tests/templates/tc.xml | 4 ++++ pydocx/tests/test_xml.py | 19 ++++++++++++++++++- 5 files changed, 30 insertions(+), 22 deletions(-) delete mode 100644 pydocx/parsers/Docx2XML.py diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 11f7f193..c13d574d 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -104,7 +104,6 @@ def __init__( self._parsed = '' self.block_text = '' self.page_width = 0 - self.bookmark = False self.convert_root_level_upper_roman = convert_root_level_upper_roman self._image_data = {} self._build_data(path, *args, **kwargs) @@ -158,7 +157,6 @@ def parse(self, el): elif el.tag == 't': return self.parse_t(el, parsed) elif el.tag == 'br': - print 'ummm wheres the break' return self.parse_break_tag(el, parsed) elif el.tag == 'delText': return self.parse_deletion(el, parsed) @@ -410,7 +408,6 @@ def _should_parse_next_as_content(el): next_el = self.pre_processor.next(el) if next_el is None: return False - return False if ( not self.pre_processor.is_list_item(next_el) and not self.pre_processor.is_last_list_item_in_root(el) diff --git a/pydocx/parsers/Docx2XML.py b/pydocx/parsers/Docx2XML.py deleted file mode 100644 index be0a4953..00000000 --- a/pydocx/parsers/Docx2XML.py +++ /dev/null @@ -1,16 +0,0 @@ -__author__ = 'samportnow' - -from pydocx.parsers.Docx2Html import Docx2Html - - -class Docx2XML(Docx2Html): - - def insertion(self, text, author, date): - return ("{text}" - ).format(author=author, date=date, text=text) - - def deletion(self, text, author, date): - return ("{text}" - ).format(author=author, date=date, text=text) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 4bed0384..757ed94e 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -178,11 +178,11 @@ def li(self, text, ilvl, numId, bold=False): return template.render(**kwargs) @classmethod - def table(self, num_rows, num_columns, text, merge=False): + def table(self, num_rows, num_columns, text, merge=False, merge_continue=False): def _tc(cell_value): template = env.get_template(templates['tc']) - return template.render(p_tag=cell_value, merge=merge) + return template.render(p_tag=cell_value, merge=merge, merge_continue= merge_continue) def _tr(rows, text): tcs = [_tc(text.next()) for _ in range(rows)] @@ -193,6 +193,12 @@ def _tr(rows, text): template = env.get_template(templates['table']) return template.render(table_rows=trs) + @classmethod + def tc(cell_value, merge, merge_continue): + template = env.get_template(templates['tc']) + return template.render(p_tag=cell_value, merge=merge, merge_continue= merge_continue) + + @classmethod def drawing(self, r_id, height=None, width=None): template = env.get_template(templates['drawing']) diff --git a/pydocx/tests/templates/tc.xml b/pydocx/tests/templates/tc.xml index 70a318d7..255ce08b 100644 --- a/pydocx/tests/templates/tc.xml +++ b/pydocx/tests/templates/tc.xml @@ -1,6 +1,10 @@ + {% if merge_continue %} + + + {% endif %} {% if merge %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index cc54e9dd..c0b9453a 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -30,7 +30,6 @@ def get_xml(self): xml = DXB.xml(body) return xml - class HyperlinkVanillaTestCase(_TranslationTestCase): relationship_dict = { 'rId0': 'www.google.com', @@ -292,6 +291,24 @@ def get_xml(self): xml = DXB.xml(body) return xml +class RowSpanTestCase(_TranslationTestCase): + expected_output = ''' + + + + + + + + +
          AAA +
          + BBB
          CCCDDD
          + ''' + def get_xml(self): + Cell1 = DXB.tc(DXB.p_tag('AAA'), merge=True) + Cell2 = DXB.tc(DXB.p_tag('BBB'), merge_continue=True) + print Cell1 class NestedTableTag(_TranslationTestCase): expected_output = ''' From ddc560020edd5b4cf05df73654bf49121f554033 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 12 Jun 2013 17:57:03 -0400 Subject: [PATCH 344/404] refs #44: Added tests showing what the different supported r styles should look like --- pydocx/fixtures/all_configured_styles.docx | Bin 0 -> 3486 bytes pydocx/tests/__init__.py | 4 ++++ pydocx/tests/test_docx.py | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 pydocx/fixtures/all_configured_styles.docx diff --git a/pydocx/fixtures/all_configured_styles.docx b/pydocx/fixtures/all_configured_styles.docx new file mode 100644 index 0000000000000000000000000000000000000000..8f51437226a389ee627361bfc80fa6037199249c GIT binary patch literal 3486 zcmai0c|4SB8@7*qmqA9hkQi#LHOh!AU-o3ll4UZO8BE!-Hz&Ig9lJ>)qtQr|u~ibI zOxc$b@s%dKtl7Sq&gpRazSDQVzu)_N|9G$WeeUbIp6k9W&6rt)8Q9s`84@dA!5H=w z7wy?K(8~`4R;Aww8E4Fx^|`OjZHXAnj`%6Zw^LSRU}%|T05)fO_6-Ga6hYW|ug^)c zOvJS{gnpgh-;Jbfk%HE|Qv0$Xh5_%B=@XI;WS`YnpTZ5dJ;3_=2Fx z_4w9*O_xK|1McQY!!4}aj=RfLm!$`c)UIYn9l;f;_x1a24Acs8& zcPTx8zgk;W#5~lq%k6e(dBD04B#S%5*?a5CEp2Yfyba{^YyHa51qW^W+DBQ6cbtrZ zcMuSC_{)Yizb=>Yq|V}&A&I3TOEUob=QoV9L$rROObiT`|Hz0S?Tuh`peGoPMqRSK z;O^(88iMpAq&K5l4)DKN)nj!>d5M`)U6La4w~+ALJi{<5@fM3NyiCrVpP7XRB*l((`YOYNTeuj?N;RAB%Vu8KzXJR ze=nq{+mb_{bQ%ak>L>kmf$j6JUOicP%UKC(`iej+V{#&ZZx%ac-A+AHzRT1xah2D%ziXj zp>Y!OOV;-PVC@evd7?coBfU_U=+yloth7$fXKMV|xrjt!a9%Ay05P1z-e?*TosEi8 zi;}aNek)|og9u%=N^0PN8NUV5 zTIz<$7$rUlCfRh#QVO1+Cs-$;rWW!U5*QtM9lR|zDk13@nX!hN7l)e3$vcPFl9=fX z#BXJ~Kc@AU#`r7U2>f7RZ|{v+uTzKB_+QZ4ZV%Rs>Gly3SRvkp*1%%*qN~~x&h8{k zb2xlkx;!rDH+1&5-n*iCH8j~9Qw;pJoON$2WOyCDS#5W_vN3MhvCa3Q zScsG=tfIZoHkX?16%_YjHov^MDejdd(p7rL&r|eAnDvmRCHMn0*;#|khS`O z9!GBg325p3uQ}2#Hg^g~GZ@@Zg1YNMC^cAoj{2svxx>w)7)r0P>xdg3|4j<0^syeu z#|`MbQ^?Qe;&1VCyIBDH@x!@x+Y2&LVdvv=-UVTN>l_KyR`V&Ol02q(ySU!P36c$m zufQ|68M6+y`}(Yz7BNgQ12PIPznqP)1;sKhH+HoR;@{}E#gON(CJ&*4W(r!n;&2^& z2i5F{O+=@!IY2*U9V>L#Z8*`SOE!o-+bikQ;Wlk9K4|9obo)^59173OHLSS5DQ*GO z6R@vzAh>%)i!L5)m($cXO*n&WzY-7M5Ak3wh5C76zB`UB5>1oNc9W~X`Y|3{ik60L zJVy+6P;OYt--Oq_uS%r|foqNoGA8L?;F?*KHm6M1UvbIUh>K8-6@@&{O17V@>}1MJ zFs*xMG))cY7biv^ZWqR80IQeC`-&X;NdwAy~Iq$6h}Z1b!7`rdi3w)<-gywbOeXlb84p+<23?k=M6G z4DXGC9(*f#lClhF8C5`()dcV>9IN;QD7aOk`!-k7Zm17Z>Ig?2C}PX>2UW>m?~1;h z%i;?OpV*MET(W*~Bsq3D&J&B8`PBqWl zQM`RWNVR_~*8Zx5&YZ@F%`S{s#nL>Yf5&i1J{Ntm2OSG1lU)GTe48MX9X`2%0{_ZY zWFU7gLw4*+=~^`Ks*O7S#6X7G`I*3rnm$V!YXi-ZJfhl)Mw=l;))P?_o(7)0`Pww4 z*%MEOiHyEQkW*WyCS2(<+bp>wc!=iE#eiSQjGrbmnmt(tqWv-83;zCeofW)R=enxK z&pOuF413_~7iTT^#acI*IARd!^W>dLpvczh8KvIFiHWCeSMbhTnJWCnviDl66b;$D z+jaN*EPO_-8+xW|I6?(rDw^@dNZv0X?`uzZ-EXoa*8p=SO*_1e=5_a#zvx{OZulhJ za-e*!AVxXocYtT;p_kRzEJl^V@a>p!Wjn5&B*!B~5eq{|%%XUoz*IQ~lxt0fD%Op9 zzo$1yZOOTQo3?ze|7*g3HRyp3{64#p>CR>?YQpqTBfpBw*oV)ot+x!4=8Vls2$t3f zdiv?AT;fv{C1F!Plm#UIW={e+y=b=Q z(^e>X&dDTnSnW86O+B#7L}{(~;V3GbHP!=oGKLq_lRWNfNA@TBFTc=fz%groy#EaI zAngiR)MtO2-(k4wip9dl*Pf}(UfVC!3T<4iUb*sYG%LS}ux;B53c2)g)aU-Yog6CG z)}@fS&q%|UkB`9gd^RoAb*^8T7(V?e#a#1MLV{CjS(Wx68Ih|}zTl;EF6v_cHZ2JO z*xz-%+yv2x-T5WCLNxlF3}~r{HrQQlL;bxl&U67a-sm)M`D3)VpUc$tKgljnWlWG= z;-|C^<~^6$P9MlznQ_qKmp&x4r zbyg?MB>YI&ljMi{d^Mkc2J4*Gi59Te=DTUGE1DetkO#U7(@O-cIXDzzsD3OGU$c+c z+l&aRdb19hVnbxPdy_{2z;scB*HE9Q@vB^pxzj3!C6VFCK7ix4sVSv^4KTBSWl4@Y z6BeYukjhR70p2q1aAkBJKyY-B6%205w62fI9q3j#>Y;vmPDQ=*xI+5o=%*+?=' ) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index cc7368f6..f06e14e7 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -119,6 +119,27 @@ def test_inline_tags(): )) +def test_all_configured_styles(): + file_path = path.join( + path.abspath(path.dirname(__file__)), + '..', + 'fixtures', + 'all_configured_styles.docx', + ) + actual_html = convert(file_path) + assert_html_equal(actual_html, BASE_HTML % ''' +

          aaa

          +

          bbb

          +

          ccc

          +

          ddd

          +

          eee

          +

          fff

          +

          ggg

          +

          hhh

          +

          iii

          + ''') + + def test_super_and_subscript(): file_path = path.join( path.abspath(path.dirname(__file__)), From cef0f9e70a91f76cfc2cf98df3ac53e665de2691 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 12 Jun 2013 17:57:49 -0400 Subject: [PATCH 345/404] refs #44: Got several more r styles working correctly --- pydocx/DocxParser.py | 34 ++++++++++++++++++++++++++++++++++ pydocx/parsers/Docx2Html.py | 16 ++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index b10485c2..c2f84132 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -602,6 +602,24 @@ def parse_r(self, el, parsed): if has_child(run_tag_property, 'u'): if self._is_style_on(run_tag_property.find('u')): text = self.underline(text) + if has_child(run_tag_property, 'caps'): + if self._is_style_on(run_tag_property.find('caps')): + text = self.caps(text) + if has_child(run_tag_property, 'smallCaps'): + if self._is_style_on(run_tag_property.find('smallCaps')): + text = self.small_caps(text) + if has_child(run_tag_property, 'strike'): + if self._is_style_on(run_tag_property.find('strike')): + text = self.strike(text) + if has_child(run_tag_property, 'dstrike'): + if self._is_style_on(run_tag_property.find('dstrike')): + text = self.strike(text) + if has_child(run_tag_property, 'vanish'): + if self._is_style_on(run_tag_property.find('vanish')): + text = self.hide(text) + if has_child(run_tag_property, 'webHidden'): + if self._is_style_on(run_tag_property.find('webHidden')): + text = self.hide(text) # This could be a superscript or a subscript if has_child(run_tag_property, 'vertAlign'): @@ -664,6 +682,22 @@ def italics(self, text): def underline(self, text): return text + @abstractmethod + def caps(self, text): + return text + + @abstractmethod + def small_caps(self, text): + return text + + @abstractmethod + def strike(self, text): + return text + + @abstractmethod + def hide(self, text): + return text + @abstractmethod def superscript(self, text): return text diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 3d45d695..71a2ebc6 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -30,6 +30,10 @@ def style(self): '.pydocx-left {text-align:left;}' '.pydocx-comment {color:blue;}' '.pydocx-underline {text-decoration: underline;}' + '.pydocx-caps {text-transform:uppercase;}' + '.pydocx-small-caps {font-variant: small-caps;}' + '.pydocx-strike {text-decoration: line-through;}' + '.pydocx-hidden {visibility: hidden;}' 'body {width:%(width)spx;margin:0px auto;}' '' ) % { @@ -126,6 +130,18 @@ def italics(self, text): def underline(self, text): return '' + text + '' + def caps(self, text): + return '' + text + '' + + def small_caps(self, text): + return '' + text + '' + + def strike(self, text): + return '' + text + '' + + def hide(self, text): + return '' + text + '' + def superscript(self, text): return '%(text)s' % { 'text': text, From 9741e6611c474899f1df00f8543aa7369d4ed6a4 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Wed, 12 Jun 2013 18:06:01 -0400 Subject: [PATCH 346/404] refs #44: update note and updated readme --- CHANGELOG | 9 +++++++++ README.rst | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 86786a81..7a244824 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,15 @@ Changelog ========= +* 0.3.1 + * Added support for several more OOXML tags including: + * caps + * smallCaps + * strike + * dstrike + * vanish + * webHidden + More details in the README. * 0.3.0 * We switched from using stock *xml.etree.ElementTree* to using *xml.etree.cElementTree*. This has resulted in a fairly significant speed diff --git a/README.rst b/README.rst index 82cbc71e..2f750299 100644 --- a/README.rst +++ b/README.rst @@ -217,6 +217,10 @@ The base parser `Docx2Html` relies on certain css class being set for certain be * class `pydocx-left` -> Aligns the text to the left. * class `pydocx-comment` -> Turns the text blue. * class `pydocx-underline` -> Underlines the text. +* class `pydocx-caps` -> Makes all text uppercase. +* class `pydocx-small-caps` -> Makes all text uppercase, however truly lowercase letters will be small than their uppercase counterparts. +* class `pydocx-strike` -> Strike a line through. +* class `pydocx-hidden` -> Hide the text. Optional Arguments ################## From 137fbc049cf04b10538703e0fe83ea0e3d4fbbe5 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 20:45:43 -0400 Subject: [PATCH 347/404] made changes based on comments; added test case --- pydocx/DocxParser.py | 4 ++-- pydocx/tests/document_builder.py | 12 +++++++++++- pydocx/tests/templates/tc.xml | 8 ++++---- pydocx/tests/test_docx.py | 2 +- pydocx/tests/test_xml.py | 18 +++++++++++------- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c13d574d..53a27cc4 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -183,8 +183,8 @@ def parse_table_row(self, el, text): def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') - if v_merge is not None and ( - 'continue' == v_merge.get('val', '')): + if v_merge is not None and (not + 'restart' == v_merge.get('val', '')): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 757ed94e..77ac9603 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -194,10 +194,20 @@ def _tr(rows, text): return template.render(table_rows=trs) @classmethod - def tc(cell_value, merge, merge_continue): + def table_cell(self, cell_value, merge=False, merge_continue=False): template = env.get_template(templates['tc']) return template.render(p_tag=cell_value, merge=merge, merge_continue= merge_continue) + @classmethod + def table_row(self, tcs): + template = env.get_template(templates['tr']) + return template.render(table_cells=tcs) + + @classmethod + def tableSingle(self, trs): + template = env.get_template(templates['table']) + return template.render(table_rows=trs) + @classmethod def drawing(self, r_id, height=None, width=None): diff --git a/pydocx/tests/templates/tc.xml b/pydocx/tests/templates/tc.xml index 255ce08b..9d725b1b 100644 --- a/pydocx/tests/templates/tc.xml +++ b/pydocx/tests/templates/tc.xml @@ -2,12 +2,12 @@ {% if merge_continue %} - - + + {% endif %} {% if merge %} - - + + {% endif %} diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index 99caa80b..f4771d6b 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -647,6 +647,7 @@ def test_simple_table(): 'simple_table.docx', ) actual_html = convert(file_path) + print actual_html assert_html_equal(actual_html, BASE_HTML % ''' @@ -659,7 +660,6 @@ def test_simple_table(): -
          Cell4
          diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index c0b9453a..ff6cd3b5 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -295,20 +295,24 @@ class RowSpanTestCase(_TranslationTestCase): expected_output = ''' - + + -
          AAA -
          - BBB
          AAABBB
          CCCDDD
          ''' def get_xml(self): - Cell1 = DXB.tc(DXB.p_tag('AAA'), merge=True) - Cell2 = DXB.tc(DXB.p_tag('BBB'), merge_continue=True) - print Cell1 + cell1 = DXB.table_cell(DXB.p_tag('AAA'), True, False) + cell2 = DXB.table_cell(None, False, True) + cell3 = DXB.table_cell(DXB.p_tag('BBB'), False, False) + cell4 = DXB.table_cell(DXB.p_tag('CCC'), False, False) + rows = [DXB.table_row([cell1, cell3]),DXB.table_row([cell2, cell4])] + table = DXB.tableSingle(rows) + body = table + xml = DXB.xml(body) + return xml class NestedTableTag(_TranslationTestCase): expected_output = ''' From 1fb36a2fddead9b79389605d507dfa5bebc72e13 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 20:48:18 -0400 Subject: [PATCH 348/404] made changes based on comments; added test case --- pydocx/DocxParser.py | 4 ++-- pydocx/tests/test_xml.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 53a27cc4..0532fc6b 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -183,8 +183,8 @@ def parse_table_row(self, el, text): def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') - if v_merge is not None and (not - 'restart' == v_merge.get('val', '')): + if v_merge is not None and ( + not 'restart' == v_merge.get('val', '')): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index ff6cd3b5..7aa62d80 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -30,7 +30,9 @@ def get_xml(self): xml = DXB.xml(body) return xml + class HyperlinkVanillaTestCase(_TranslationTestCase): + relationship_dict = { 'rId0': 'www.google.com', } @@ -291,7 +293,9 @@ def get_xml(self): xml = DXB.xml(body) return xml + class RowSpanTestCase(_TranslationTestCase): + expected_output = ''' @@ -303,17 +307,19 @@ class RowSpanTestCase(_TranslationTestCase):
          ''' + def get_xml(self): cell1 = DXB.table_cell(DXB.p_tag('AAA'), True, False) cell2 = DXB.table_cell(None, False, True) - cell3 = DXB.table_cell(DXB.p_tag('BBB'), False, False) - cell4 = DXB.table_cell(DXB.p_tag('CCC'), False, False) - rows = [DXB.table_row([cell1, cell3]),DXB.table_row([cell2, cell4])] + cell3 = DXB.table_cell(DXB.p_tag('BBB'), False, False) + cell4 = DXB.table_cell(DXB.p_tag('CCC'), False, False) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] table = DXB.tableSingle(rows) body = table xml = DXB.xml(body) return xml + class NestedTableTag(_TranslationTestCase): expected_output = ''' From 7774916886b902d7dd0358ed512e1526338e6250 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Wed, 12 Jun 2013 20:55:51 -0400 Subject: [PATCH 349/404] made changes based on comments; added test case --- pydocx/tests/document_builder.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 77ac9603..931a7829 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -178,11 +178,13 @@ def li(self, text, ilvl, numId, bold=False): return template.render(**kwargs) @classmethod - def table(self, num_rows, num_columns, text, merge=False, merge_continue=False): + def table(self, num_rows, num_columns, + text, merge=False, merge_continue=False): def _tc(cell_value): template = env.get_template(templates['tc']) - return template.render(p_tag=cell_value, merge=merge, merge_continue= merge_continue) + return template.render( + p_tag=cell_value, merge=merge, merge_continue=merge_continue) def _tr(rows, text): tcs = [_tc(text.next()) for _ in range(rows)] @@ -196,7 +198,8 @@ def _tr(rows, text): @classmethod def table_cell(self, cell_value, merge=False, merge_continue=False): template = env.get_template(templates['tc']) - return template.render(p_tag=cell_value, merge=merge, merge_continue= merge_continue) + return template.render( + p_tag=cell_value, merge=merge, merge_continue=merge_continue) @classmethod def table_row(self, tcs): @@ -208,7 +211,6 @@ def tableSingle(self, trs): template = env.get_template(templates['table']) return template.render(table_rows=trs) - @classmethod def drawing(self, r_id, height=None, width=None): template = env.get_template(templates['drawing']) From 251a2258ebc4065c7e5c3a0da1dcd25126e72c8b Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 10:48:22 -0400 Subject: [PATCH 350/404] refs #44: First step at passing in an rPr instead --- pydocx/tests/document_builder.py | 40 ++++++++++++----- pydocx/tests/templates/r.xml | 7 +-- pydocx/tests/templates/rpr.xml | 5 +++ pydocx/tests/test_xml.py | 75 ++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 18 deletions(-) create mode 100644 pydocx/tests/templates/rpr.xml diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 4bed0384..d382b7f7 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -12,6 +12,7 @@ 'p': 'p.xml', 'pict': 'pict.xml', 'r': 'r.xml', + 'rpr': 'rpr.xml', 'sdt': 'sdt.xml', 'sectPr': 'sectPr.xml', 'smartTag': 'smart_tag.xml', @@ -90,22 +91,37 @@ def t_tag(self, text): def r_tag( self, elements, - is_bold=False, - is_underline=False, - is_italics=False, - vert_align=None, - val=None, + rpr=None, ): template = env.get_template(templates['r']) + if rpr is None: + rpr = DocxBuilder.rpr_tag() kwargs = { 'elements': elements, - # TODO Pass in an `rPr` instead. That is what all of this is for - # anyway. - 'is_bold': is_bold, - 'is_underline': is_underline, - 'is_italics': is_italics, - 'vert_align': vert_align, - 'val': val, + 'rpr': rpr, + } + return template.render(**kwargs) + + @classmethod + def rpr_tag(self, *args, **rkwargs): + valid_kwargs = ( + 'b', + 'i', + 'u', + 'caps', + 'smallCaps', + 'strike', + 'dstrike', + 'vanish', + 'webHidden', + 'vertAlign', + ) + for key in rkwargs: + if key not in valid_kwargs: + raise AssertionError('%s is not a valid kwarg' % key) + template = env.get_template(templates['rpr']) + kwargs = { + 'tags': rkwargs, } return template.render(**kwargs) diff --git a/pydocx/tests/templates/r.xml b/pydocx/tests/templates/r.xml index 2cb40f19..2f28a66b 100644 --- a/pydocx/tests/templates/r.xml +++ b/pydocx/tests/templates/r.xml @@ -1,10 +1,5 @@ - - {% if is_bold %}{% endif %} - {% if is_underline %}{% endif %} - {% if is_italics %}{% endif %} - {% if vert_align %}{% endif %} - + {{ rpr }} {% for element in elements %} {{ element }} {% endfor %} diff --git a/pydocx/tests/templates/rpr.xml b/pydocx/tests/templates/rpr.xml new file mode 100644 index 00000000..f49eb08b --- /dev/null +++ b/pydocx/tests/templates/rpr.xml @@ -0,0 +1,5 @@ + + {% for tag, value in tags.items() %} + + {% endfor %} + diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index cc54e9dd..5a4b0492 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -1046,3 +1046,78 @@ def get_xml(self): xml = DXB.xml(body) return xml + + +class AvaliableInlineTags(_TranslationTestCase): + expected_output = ''' +

          aaa

          +

          bbb

          +

          ccc

          +

          ddd

          +

          eee

          +

          fff

          +

          ggg

          +

          hhh

          +

          iii

          +

          jjj

          + ''' + + def get_xml(self): + p_tags = [ + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('aaa')], rpr=DXB.rpr_tag(b=None)), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('bbb')], rpr=DXB.rpr_tag(u=None)), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('ccc')], rpr=DXB.rpr_tag(i=None)), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('ddd')], rpr=DXB.rpr_tag(caps=None)), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('eee')], rpr=DXB.rpr_tag(smallCaps=None)), # noqa + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('fff')], rpr=DXB.rpr_tag(strike=None)), # noqa + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('ggg')], rpr=DXB.rpr_tag(dstrike=None)), # noqa + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('hhh')], rpr=DXB.rpr_tag(vanish=None)), # noqa + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('iii')], rpr=DXB.rpr_tag(webHidden=None)), # noqa + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('jjj')], rpr=DXB.rpr_tag(vertAlign='superscript')), # noqa + ], + ), + ] + body = '' + for p_tag in p_tags: + body += p_tag + + xml = DXB.xml(body) + return xml From 3fcebc6fd4ebbae923abad57fc4c9328fd1dcdea Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 10:52:50 -0400 Subject: [PATCH 351/404] refs #44: updated the tests to use the rpr instead --- pydocx/tests/document_builder.py | 8 -------- pydocx/tests/test_xml.py | 26 +++++++++++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index d382b7f7..96243195 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -43,21 +43,13 @@ def xml(self, body): def p_tag( self, text, - bold=False, - underline=False, - italics=False, style='style0', - val=None, jc=None, ): if isinstance(text, str): # Use create a single r tag based on the text and the bold run_tag = DocxBuilder.r_tag( [DocxBuilder.t_tag(text)], - is_bold=bold, - is_underline=underline, - is_italics=italics, - val=val, ) run_tags = [run_tag] elif isinstance(text, list): diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 5a4b0492..f281435c 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -20,8 +20,16 @@ class BoldTestCase(_TranslationTestCase): def get_xml(self): tags = [ - DXB.p_tag(text='AAA', bold=True), - DXB.p_tag(text='BBB', bold=True, val='false'), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('AAA')], rpr=DXB.rpr_tag(b=None)), + ], + ), + DXB.p_tag( + [ + DXB.r_tag([DXB.t_tag('BBB')], rpr=DXB.rpr_tag(b='false')), + ], + ), ] body = '' @@ -42,9 +50,9 @@ class HyperlinkVanillaTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')])) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')])) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -62,7 +70,7 @@ class HyperlinkWithMultipleRunsTestCase(_TranslationTestCase): def get_xml(self): run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'link'] run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')])) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -92,9 +100,9 @@ class HyperlinkNotInRelsDictTestCase(_TranslationTestCase): def get_xml(self): run_tags = [] - run_tags.append(DXB.r_tag([DXB.t_tag('link')], is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('link')])) run_tags = [DXB.hyperlink_tag(r_id='rId0', run_tags=run_tags)] - run_tags.append(DXB.r_tag([DXB.t_tag('.')], is_bold=False)) + run_tags.append(DXB.r_tag([DXB.t_tag('.')])) body = DXB.p_tag(run_tags) xml = DXB.xml(body) return xml @@ -1030,12 +1038,12 @@ def get_xml(self): DXB.p_tag( [ DXB.r_tag([DXB.t_tag('AAA')]), - DXB.r_tag([DXB.t_tag('BBB')], vert_align='superscript'), + DXB.r_tag([DXB.t_tag('BBB')], rpr=DXB.rpr_tag(vertAlign='superscript')), # noqa ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('CCC')], vert_align='subscript'), + DXB.r_tag([DXB.t_tag('CCC')], rpr=DXB.rpr_tag(vertAlign='subscript')), # noqa DXB.r_tag([DXB.t_tag('DDD')]), ], ), From 97ab97af3555c0c8aff9a00cdfcd93fc4b3cfa9a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 13 Jun 2013 12:13:47 -0400 Subject: [PATCH 352/404] removed uncessary line --- pydocx/tests/test_docx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydocx/tests/test_docx.py b/pydocx/tests/test_docx.py index f4771d6b..862ac945 100644 --- a/pydocx/tests/test_docx.py +++ b/pydocx/tests/test_docx.py @@ -647,7 +647,6 @@ def test_simple_table(): 'simple_table.docx', ) actual_html = convert(file_path) - print actual_html assert_html_equal(actual_html, BASE_HTML % '''
          From 0fcfc7cea37d19562404c7e316765f713fa16240 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 13 Jun 2013 16:25:03 -0400 Subject: [PATCH 353/404] changed table tests --- pydocx/tests/document_builder.py | 30 ++----- pydocx/tests/templates/tc.xml | 4 +- pydocx/tests/test_xml.py | 133 +++++++++++++++---------------- 3 files changed, 77 insertions(+), 90 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 931a7829..21c7b561 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -178,28 +178,14 @@ def li(self, text, ilvl, numId, bold=False): return template.render(**kwargs) @classmethod - def table(self, num_rows, num_columns, - text, merge=False, merge_continue=False): - - def _tc(cell_value): - template = env.get_template(templates['tc']) - return template.render( - p_tag=cell_value, merge=merge, merge_continue=merge_continue) - - def _tr(rows, text): - tcs = [_tc(text.next()) for _ in range(rows)] - template = env.get_template(templates['tr']) - return template.render(table_cells=tcs) - - trs = [_tr(num_rows, text) for _ in range(num_rows)] - template = env.get_template(templates['table']) - return template.render(table_rows=trs) - - @classmethod - def table_cell(self, cell_value, merge=False, merge_continue=False): + def table_cell(self, paragraph, merge=False, merge_continue=False): + kwargs = { + 'paragraph': paragraph, + 'merge': merge, + 'merge_continue': merge_continue + } template = env.get_template(templates['tc']) - return template.render( - p_tag=cell_value, merge=merge, merge_continue=merge_continue) + return template.render(**kwargs) @classmethod def table_row(self, tcs): @@ -207,7 +193,7 @@ def table_row(self, tcs): return template.render(table_cells=tcs) @classmethod - def tableSingle(self, trs): + def table(self, trs): template = env.get_template(templates['table']) return template.render(table_rows=trs) diff --git a/pydocx/tests/templates/tc.xml b/pydocx/tests/templates/tc.xml index 9d725b1b..eff9ce0d 100644 --- a/pydocx/tests/templates/tc.xml +++ b/pydocx/tests/templates/tc.xml @@ -22,5 +22,7 @@ - {{ p_tag }} + {% if paragraph %} + {{ paragraph }} + {% endif %} diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 7aa62d80..76db3755 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -1,6 +1,5 @@ import os import time -from itertools import chain from nose.plugins.skip import SkipTest @@ -283,12 +282,12 @@ class TableTag(_TranslationTestCase): ''' def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('AAA')], - [DXB.p_tag('BBB')], - [DXB.p_tag('CCC')], - [DXB.p_tag('DDD')], - )) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA')) + cell2 = DXB.table_cell(paragraph=DXB.p_tag('CCC')) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('DDD')) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] + table = DXB.table(rows) body = table xml = DXB.xml(body) return xml @@ -309,12 +308,14 @@ class RowSpanTestCase(_TranslationTestCase): ''' def get_xml(self): - cell1 = DXB.table_cell(DXB.p_tag('AAA'), True, False) - cell2 = DXB.table_cell(None, False, True) - cell3 = DXB.table_cell(DXB.p_tag('BBB'), False, False) - cell4 = DXB.table_cell(DXB.p_tag('CCC'), False, False) + cell1 = DXB.table_cell( + paragraph=DXB.p_tag('AAA'), merge=True, merge_continue=False) + cell2 = DXB.table_cell( + paragraph=DXB.p_tag(None), merge=False, merge_continue=True) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('CCC')) rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] - table = DXB.tableSingle(rows) + table = DXB.table(rows) body = table xml = DXB.xml(body) return xml @@ -346,18 +347,18 @@ class NestedTableTag(_TranslationTestCase): ''' def get_xml(self): - nested_table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('DDD')], - [DXB.p_tag('EEE')], - [DXB.p_tag('FFF')], - [DXB.p_tag('GGG')], - )) - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('AAA')], - [DXB.p_tag('BBB')], - [DXB.p_tag('CCC')], - [nested_table], - )) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('DDD')) + cell2 = DXB.table_cell(paragraph=DXB.p_tag('FFF')) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('EEE')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('GGG')) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] + nested_table = DXB.table(rows) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA')) + cell2 = DXB.table_cell(paragraph=DXB.p_tag('CCC')) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB')) + cell4 = DXB.table_cell(nested_table) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] + table = DXB.table(rows) body = table xml = DXB.xml(body) return xml @@ -378,14 +379,12 @@ class TableWithInvalidTag(_TranslationTestCase): ''' def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('AAA')], - [DXB.p_tag('BBB')], - # This tag may have CCC in it, however this tag has no meaning - # pertaining to content. - ['CCC'], - [DXB.p_tag('DDD')], - )) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA')) + cell2 = DXB.table_cell('CCC') + cell3 = DXB.table_cell(paragraph=DXB.p_tag('BBB')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('DDD')) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] + table = DXB.table(rows) body = table xml = DXB.xml(body) return xml @@ -423,9 +422,9 @@ def get_xml(self): td = '' for el in els: td += el - table = DXB.table(num_rows=1, num_columns=1, text=chain( - [td], - )) + cell1 = DXB.table_cell(td) + row = DXB.table_row([cell1]) + table = DXB.table([row]) body = table xml = DXB.xml(body) return xml @@ -508,12 +507,12 @@ class ListWithContinuationTestCase(_TranslationTestCase): ''' def get_xml(self): - table = DXB.table(num_rows=2, num_columns=2, text=chain( - [DXB.p_tag('DDD')], - [DXB.p_tag('EEE')], - [DXB.p_tag('FFF')], - [DXB.p_tag('GGG')], - )) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('DDD')) + cell2 = DXB.table_cell(paragraph=DXB.p_tag('FFF')) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('EEE')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('GGG')) + rows = [DXB.table_row([cell1, cell3]), DXB.table_row([cell2, cell4])] + table = DXB.table(rows) tags = [ DXB.li(text='AAA', ilvl=0, numId=1), DXB.p_tag('BBB'), @@ -549,12 +548,12 @@ class ListWithMultipleContinuationTestCase(_TranslationTestCase): ''' def get_xml(self): - table1 = DXB.table(num_rows=1, num_columns=1, text=chain( - [DXB.p_tag('BBB')], - )) - table2 = DXB.table(num_rows=1, num_columns=1, text=chain( - [DXB.p_tag('CCC')], - )) + cell = DXB.table_cell(paragraph=DXB.p_tag('BBB')) + row = DXB.table_row([cell]) + table1 = DXB.table([row]) + cell = DXB.table_cell(paragraph=DXB.p_tag('CCC')) + row = DXB.table_row([cell]) + table2 = DXB.table([row]) tags = [ DXB.li(text='AAA', ilvl=0, numId=1), table1, @@ -661,12 +660,12 @@ class DeeplyNestedTableTestCase(_TranslationTestCase): run_expected_output = False def get_xml(self): - table = DXB.p_tag('AAA') + paragraph = DXB.p_tag('AAA') for _ in range(50): - table = DXB.table(num_rows=1, num_columns=1, text=chain( - [table], - )) + cell = DXB.table_cell(paragraph) + row = DXB.table_cell([cell]) + table = DXB.table([row]) body = table xml = DXB.xml(body) return xml @@ -829,19 +828,20 @@ class SimpleTableTest(_TranslationTestCase):
          ''' def get_xml(self): - table = DXB.table(num_rows=3, num_columns=3, text=chain( - [DXB.p_tag('Blank')], - [DXB.p_tag('Column 1')], - [DXB.p_tag('Column 2')], - [DXB.p_tag('Row 1')], - [DXB.p_tag('First')], - [DXB.p_tag('Second')], - [DXB.p_tag('Row 2')], - [DXB.p_tag('Third')], - [DXB.p_tag('Fourth')], - ), merge=True) + cell1 = DXB.table_cell(paragraph=DXB.p_tag('Blank')) + cell2 = DXB.table_cell(paragraph=DXB.p_tag('Row 1')) + cell3 = DXB.table_cell(paragraph=DXB.p_tag('Row 2')) + cell4 = DXB.table_cell(paragraph=DXB.p_tag('Column 1')) + cell5 = DXB.table_cell(paragraph=DXB.p_tag('First')) + cell6 = DXB.table_cell(paragraph=DXB.p_tag('Third')) + cell7 = DXB.table_cell(paragraph=DXB.p_tag('Column 2')) + cell8 = DXB.table_cell(paragraph=DXB.p_tag('Second')) + cell9 = DXB.table_cell(paragraph=DXB.p_tag('Fourth')) + rows = [DXB.table_row([cell1, cell4, cell7]), + DXB.table_row([cell2, cell5, cell8]), + DXB.table_row([cell3, cell6, cell9])] + table = DXB.table(rows) body = table - xml = DXB.xml(body) return xml @@ -902,15 +902,14 @@ def get_xml(self): lis = '' for text, ilvl, numId in li_text: lis += DXB.li(text=text, ilvl=ilvl, numId=numId) - table = DXB.table(num_rows=1, num_columns=1, text=chain( - [lis], - )) + cell1 = DXB.table_cell(lis) + rows = DXB.table_row([cell1]) + table = DXB.table([rows]) lis = '' lis += DXB.li(text='AAA', ilvl=0, numId=1) lis += table lis += DXB.li(text='CCC', ilvl=0, numId=1) body = lis - xml = DXB.xml(body) return xml From d50cdfdb4a7e584a8b56a8511a1147dcb3e158f7 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 16:56:45 -0400 Subject: [PATCH 354/404] refs #44: small refactor, not calling find twice per inline call anymore --- pydocx/DocxParser.py | 53 +++++++++++++++++++------------------------- pydocx/utils.py | 7 ------ 2 files changed, 23 insertions(+), 37 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index c2f84132..f01fd843 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -12,7 +12,6 @@ find_first, find_all, find_ancestor_with_tag, - has_child, has_descendant_with_tag, ) @@ -592,38 +591,32 @@ def parse_r(self, el, parsed): if not text: return '' run_tag_property = el.find('rPr') + + def _has_style_on(run_tag_property, tag): + el = run_tag_property.find(tag) + if el is not None: + return self._is_style_on(el) + inline_tags = ( + ('b', self.bold), + ('i', self.italics), + ('u', self.underline), + ('caps', self.caps), + ('smallCaps', self.small_caps), + ('strike', self.strike), + ('dstrike', self.strike), + ('vanish', self.hide), + ('webHidden', self.hide), + ) if run_tag_property is not None: - if has_child(run_tag_property, 'b'): # text styling - if self._is_style_on(run_tag_property.find('b')): - text = self.bold(text) - if has_child(run_tag_property, 'i'): - if self._is_style_on(run_tag_property.find('i')): - text = self.italics(text) - if has_child(run_tag_property, 'u'): - if self._is_style_on(run_tag_property.find('u')): - text = self.underline(text) - if has_child(run_tag_property, 'caps'): - if self._is_style_on(run_tag_property.find('caps')): - text = self.caps(text) - if has_child(run_tag_property, 'smallCaps'): - if self._is_style_on(run_tag_property.find('smallCaps')): - text = self.small_caps(text) - if has_child(run_tag_property, 'strike'): - if self._is_style_on(run_tag_property.find('strike')): - text = self.strike(text) - if has_child(run_tag_property, 'dstrike'): - if self._is_style_on(run_tag_property.find('dstrike')): - text = self.strike(text) - if has_child(run_tag_property, 'vanish'): - if self._is_style_on(run_tag_property.find('vanish')): - text = self.hide(text) - if has_child(run_tag_property, 'webHidden'): - if self._is_style_on(run_tag_property.find('webHidden')): - text = self.hide(text) + for tag, formatter in inline_tags: + if _has_style_on(run_tag_property, tag): + text = formatter(text) + # These tags are a little different, handle them separately from + # the rest. # This could be a superscript or a subscript - if has_child(run_tag_property, 'vertAlign'): - vert_align = run_tag_property.find('vertAlign') + vert_align = run_tag_property.find('vertAlign') + if vert_align is not None: if vert_align.attrib['val'] == 'superscript': text = self.superscript(text) elif vert_align.attrib['val'] == 'subscript': diff --git a/pydocx/utils.py b/pydocx/utils.py index ce3c2417..4e1e0e9b 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -60,13 +60,6 @@ def has_descendant_with_tag(el, tag): return True if el.find('.//' + tag) is not None else False -def has_child(el, tag): - """ - Determine if current element has a child. Stop at first child. - """ - return True if el.find(tag) is not None else False - - def _filter_children(element, tags): return [ el for el in element.getchildren() From e06802f6d7271e9e4582de6fc28bf7b87573d6b1 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 17:05:48 -0400 Subject: [PATCH 355/404] refs #44: even better performance --- pydocx/DocxParser.py | 47 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index f01fd843..b2da90f9 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -596,31 +596,30 @@ def _has_style_on(run_tag_property, tag): el = run_tag_property.find(tag) if el is not None: return self._is_style_on(el) - inline_tags = ( - ('b', self.bold), - ('i', self.italics), - ('u', self.underline), - ('caps', self.caps), - ('smallCaps', self.small_caps), - ('strike', self.strike), - ('dstrike', self.strike), - ('vanish', self.hide), - ('webHidden', self.hide), - ) + inline_tags = { + 'b': self.bold, + 'i': self.italics, + 'u': self.underline, + 'caps': self.caps, + 'smallCaps': self.small_caps, + 'strike': self.strike, + 'dstrike': self.strike, + 'vanish': self.hide, + 'webHidden': self.hide, + } if run_tag_property is not None: - for tag, formatter in inline_tags: - if _has_style_on(run_tag_property, tag): - text = formatter(text) - - # These tags are a little different, handle them separately from - # the rest. - # This could be a superscript or a subscript - vert_align = run_tag_property.find('vertAlign') - if vert_align is not None: - if vert_align.attrib['val'] == 'superscript': - text = self.superscript(text) - elif vert_align.attrib['val'] == 'subscript': - text = self.subscript(text) + for child in run_tag_property: + # These tags are a little different, handle them separately + # from the rest. + # This could be a superscript or a subscript + if child.tag == 'vertAlign': + if child.attrib['val'] == 'superscript': + text = self.superscript(text) + elif child.attrib['val'] == 'subscript': + text = self.subscript(text) + elif child.tag in inline_tags and self._is_style_on(child): + text = inline_tags[child.tag](text) + return text @property From a6807b84a657c536dabd926be8207f2db83c44e9 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 17:22:56 -0400 Subject: [PATCH 356/404] refs #44: no more kwargs abusing --- pydocx/tests/document_builder.py | 14 ++++--- pydocx/tests/test_xml.py | 70 +++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/pydocx/tests/document_builder.py b/pydocx/tests/document_builder.py index 96243195..a73adedd 100644 --- a/pydocx/tests/document_builder.py +++ b/pydocx/tests/document_builder.py @@ -95,8 +95,10 @@ def r_tag( return template.render(**kwargs) @classmethod - def rpr_tag(self, *args, **rkwargs): - valid_kwargs = ( + def rpr_tag(self, inline_styles=None, *args, **kwargs): + if inline_styles is None: + inline_styles = {} + valid_styles = ( 'b', 'i', 'u', @@ -108,12 +110,12 @@ def rpr_tag(self, *args, **rkwargs): 'webHidden', 'vertAlign', ) - for key in rkwargs: - if key not in valid_kwargs: - raise AssertionError('%s is not a valid kwarg' % key) + for key in inline_styles: + if key not in valid_styles: + raise AssertionError('%s is not a valid style' % key) template = env.get_template(templates['rpr']) kwargs = { - 'tags': rkwargs, + 'tags': inline_styles, } return template.render(**kwargs) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index f281435c..00c231db 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -22,12 +22,18 @@ def get_xml(self): tags = [ DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('AAA')], rpr=DXB.rpr_tag(b=None)), + DXB.r_tag( + [DXB.t_tag('AAA')], + rpr=DXB.rpr_tag({'b': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('BBB')], rpr=DXB.rpr_tag(b='false')), + DXB.r_tag( + [DXB.t_tag('BBB')], + rpr=DXB.rpr_tag({'b': 'false'}), + ), ], ), ] @@ -1038,12 +1044,18 @@ def get_xml(self): DXB.p_tag( [ DXB.r_tag([DXB.t_tag('AAA')]), - DXB.r_tag([DXB.t_tag('BBB')], rpr=DXB.rpr_tag(vertAlign='superscript')), # noqa + DXB.r_tag( + [DXB.t_tag('BBB')], + rpr=DXB.rpr_tag({'vertAlign': 'superscript'}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('CCC')], rpr=DXB.rpr_tag(vertAlign='subscript')), # noqa + DXB.r_tag( + [DXB.t_tag('CCC')], + rpr=DXB.rpr_tag({'vertAlign': 'subscript'}), + ), DXB.r_tag([DXB.t_tag('DDD')]), ], ), @@ -1074,52 +1086,82 @@ def get_xml(self): p_tags = [ DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('aaa')], rpr=DXB.rpr_tag(b=None)), + DXB.r_tag( + [DXB.t_tag('aaa')], + rpr=DXB.rpr_tag({'b': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('bbb')], rpr=DXB.rpr_tag(u=None)), + DXB.r_tag( + [DXB.t_tag('bbb')], + rpr=DXB.rpr_tag({'u': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('ccc')], rpr=DXB.rpr_tag(i=None)), + DXB.r_tag( + [DXB.t_tag('ccc')], + rpr=DXB.rpr_tag({'i': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('ddd')], rpr=DXB.rpr_tag(caps=None)), + DXB.r_tag( + [DXB.t_tag('ddd')], + rpr=DXB.rpr_tag({'caps': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('eee')], rpr=DXB.rpr_tag(smallCaps=None)), # noqa + DXB.r_tag( + [DXB.t_tag('eee')], + rpr=DXB.rpr_tag({'smallCaps': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('fff')], rpr=DXB.rpr_tag(strike=None)), # noqa + DXB.r_tag( + [DXB.t_tag('fff')], + rpr=DXB.rpr_tag({'strike': None}) + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('ggg')], rpr=DXB.rpr_tag(dstrike=None)), # noqa + DXB.r_tag( + [DXB.t_tag('ggg')], + rpr=DXB.rpr_tag({'dstrike': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('hhh')], rpr=DXB.rpr_tag(vanish=None)), # noqa + DXB.r_tag( + [DXB.t_tag('hhh')], + rpr=DXB.rpr_tag({'vanish': None}) + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('iii')], rpr=DXB.rpr_tag(webHidden=None)), # noqa + DXB.r_tag( + [DXB.t_tag('iii')], + rpr=DXB.rpr_tag({'webHidden': None}), + ), ], ), DXB.p_tag( [ - DXB.r_tag([DXB.t_tag('jjj')], rpr=DXB.rpr_tag(vertAlign='superscript')), # noqa + DXB.r_tag( + [DXB.t_tag('jjj')], + rpr=DXB.rpr_tag({'vertAlign': 'superscript'}), + ), ], ), ] From c67efbf13c1bc16e4ef9179f49497aa5839b4467 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Thu, 13 Jun 2013 17:25:55 -0400 Subject: [PATCH 357/404] bumped to version 0.3.1 --- pydocx/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 4beb371e..adcaa0ad 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -9,4 +9,4 @@ def docx2markdown(path): return Docx2Markdown(path).parsed -VERSION = '0.3.0' +VERSION = '0.3.1' diff --git a/setup.py b/setup.py index c85d4ad5..c47dbe66 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def get_description(): setup( name="PyDocX", # Edit here and pydocx.__init__ - version="0.3.0", + version="0.3.1", description="docx (OOXML) to html converter", author="Jason Ward, Sam Portnow", author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", From 1a57dd15a413618a8491f5e91a6916507cf3da0a Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 10:04:33 -0400 Subject: [PATCH 358/404] updating --- pydocx/DocxParser.py | 106 ------------------------------------------- 1 file changed, 106 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index e1ae9f4f..0fcba960 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -12,22 +12,6 @@ logger = logging.getLogger("NewParser") -# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx -EMUS_PER_PIXEL = 9525 -USE_ALIGNMENTS = True -TAGS_CONTAINING_CONTENT = ( - 't', - 'pict', - 'drawing', - 'delText', - 'ins', -) -TAGS_HOLDING_CONTENT_TAGS = ( - 'p', - 'tbl', - 'sdt', -) -UPPER_ROMAN_TO_HEADING_VALUE = 'h2' JUSTIFY_CENTER = 'center' JUSTIFY_LEFT = 'left' @@ -39,96 +23,6 @@ INDENTATION_HANGING = 'hanging' -def remove_namespaces(document): # remove namespaces - - root = ElementTree.fromstring(document) - for child in el_iter(root): - child.tag = child.tag.split("}")[1] - child.attrib = dict( - (k.split("}")[-1], v) - for k, v in child.attrib.items() - ) - return ElementTree.tostring(root) - -# Add some helper functions to Element to make it slightly more readable - - -def has_child(self, tag): - """ - Determine if current element has a child. Stop at first child. - """ - return True if self.find(tag) is not None else False - - -def has_descendant_with_tag(self, tag): - """ -Determine if there is a child ahead in the element tree. -""" - # Get child. stop at first child. - return True if self.find('.//' + tag) is not None else False - - -def find_first(self, tag): - """ - Find the first occurrence of a tag beneath the current element. - """ - return self.find('.//' + tag) - - -def find_all(self, tag): - """ -Find all occurrences of a tag - """ - return self.findall('.//' + tag) - - -def el_iter(el): - """ - Go through all elements - """ - try: - return el.iter() - except AttributeError: - return el.findall('.//*') - - -def find_ancestor_with_tag(self, tag): - """ - Find the first ancestor with that is a `tag`. - """ - el = self - while el.parent is not None: - el = el.parent - if el.tag == tag: - return el - return None - - -#make all of these attributes of _ElementInterface -setattr(_ElementInterface, 'has_child', has_child) -setattr(_ElementInterface, 'has_descendant_with_tag', has_descendant_with_tag) -setattr(_ElementInterface, 'find_first', find_first) -setattr(_ElementInterface, 'find_all', find_all) -setattr(_ElementInterface, 'find_ancestor_with_tag', find_ancestor_with_tag) -setattr(_ElementInterface, 'parent', None) -setattr(_ElementInterface, 'is_first_list_item', False) -setattr(_ElementInterface, 'is_last_list_item_in_root', False) -setattr(_ElementInterface, 'is_list_item', False) -setattr(_ElementInterface, 'ilvl', None) -setattr(_ElementInterface, 'num_id', None) -setattr(_ElementInterface, 'heading_level', None) -setattr(_ElementInterface, 'is_in_table', False) -setattr(_ElementInterface, 'previous', None) -setattr(_ElementInterface, 'next', None) -setattr(_ElementInterface, 'vmerge_continue', None) -setattr(_ElementInterface, 'row_index', None) -setattr(_ElementInterface, 'column_index', None) -setattr(_ElementInterface, 'is_last_text', False) -setattr(_ElementInterface, 'is_last_row_item', False) - -# End helpers - - @contextmanager def ZipFile(path): # This is not needed in python 3.2+ f = zipfile.ZipFile(path) From 7493338fb570de1bd3c8b4cde3832ef00713aecd Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 16:29:52 -0400 Subject: [PATCH 359/404] updated tests --- pydocx/DocxParser.py | 131 ++++---- pydocx/parsers/Docx2Html.py | 4 +- pydocx/tests/__init__.py | 165 +++++----- pydocx/tests/test_latex.py | 616 ------------------------------------ pydocx/tests/test_xml.py | 265 +++++++++++++++- pydocx/utils.py | 4 + 6 files changed, 406 insertions(+), 779 deletions(-) delete mode 100644 pydocx/tests/test_latex.py diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 5ec1d468..eb9d9961 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -13,11 +13,13 @@ find_all, find_ancestor_with_tag, has_descendant_with_tag, -) + ) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("NewParser") + +# http://openxmldeveloper.org/discussions/formats/f/15/p/396/933.aspx EMUS_PER_PIXEL = 9525 USE_ALIGNMENTS = True @@ -28,11 +30,12 @@ INDENTATION_RIGHT = 'right' INDENTATION_LEFT = 'left' INDENTATION_FIRST_LINE = 'firstLine' -INDENTATION_HANGING = 'hanging' + +# Add some helper functions to Element to make it slightly more readable @contextmanager -def ZipFile(path): # This is not needed in python 3.2+ +def ZipFile(path): # This is not needed in python 3.2+ f = zipfile.ZipFile(path) yield f f.close() @@ -50,18 +53,18 @@ def _build_data(self, path, *args, **kwargs): self.fonts = f.read('/word/fontTable.xml') except KeyError: self.fonts = None - try: # Only present if there are lists + try: # Only present if there are lists self.numbering_text = f.read('word/numbering.xml') except KeyError: self.numbering_text = None - try: # Only present if there are comments + try: # Only present if there are comments self.comment_text = f.read('word/comments.xml') except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') zipped_image_files = [ - e for e in f.infolist() - if e.filename.startswith('word/media/') + e for e in f.infolist() + if e.filename.startswith('word/media/') ] for e in zipped_image_files: self._image_data[e.filename] = f.read(e.filename) @@ -97,14 +100,31 @@ def __init__( convert_root_level_upper_roman=False, *args, **kwargs): + self._parsed = '' + self.block_text = '' + self.page_width = 0 + self.convert_root_level_upper_roman = convert_root_level_upper_roman + self._image_data = {} + self._build_data(path, *args, **kwargs) + self.pre_processor = None + + #divide by 20 to get to pt (Office works in 20th's of a point) + """ + see http://msdn.microsoft.com/en-us/library/documentformat + .openxml.wordprocessing.indentation.aspx + """ + if find_first(self.root, 'pgSz') is not None: + self.page_width = int( + find_first(self.root, 'pgSz').attrib['w'] + ) / 20 + #all blank when we init self.comment_store = None self.visited = [] self.list_depth = 0 - self.track_pages = 0 self.rels_dict = self._parse_rels_root() self.styles_dict = self._parse_styles() - self.parse_begin(self.root) # begin to parse + self.parse_begin(self.root) # begin to parse def parse_begin(self, el): self.pre_processor = self.pre_processor_class( @@ -123,6 +143,7 @@ def parse(self, el): for child in el: # recursive. So you can get all the way to the bottom parsed += self.parse(child) + if el.tag == 'br' and el.attrib.get('type') == 'page': return self.parse_page_break(el, parsed) elif el.tag == 'tbl': @@ -166,11 +187,7 @@ def parse_table_cell(self, el, text): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) - if el.is_last_row_item: - self.last_row_item = True - else: - self.last_row_item = False - return self.table_cell(text, colspan, rowspan) + return self.table_cell(text, colspan, rowspan, self.pre_processor.is_last_row_item(el)) def parse_list(self, el, text): """ @@ -253,23 +270,23 @@ def is_same_list(next_el, num_id, ilvl): def should_parse_last_el(last_el, first_el): if last_el is None: return False - # Different list + # Different list if ( - self.pre_processor.num_id(last_el) != - self.pre_processor.num_id(first_el)): + self.pre_processor.num_id(last_el) != + self.pre_processor.num_id(first_el)): return False - # Will be handled when the ilvls do match (nesting issue) + # Will be handled when the ilvls do match (nesting issue) if ( - self.pre_processor.ilvl(last_el) != - self.pre_processor.ilvl(first_el)): + self.pre_processor.ilvl(last_el) != + self.pre_processor.ilvl(first_el)): return False - # We only care about last items that have not been parsed before + # We only care about last items that have not been parsed before # (first list items are always parsed at the beginning of this # method.) return ( not self.pre_processor.is_first_list_item(last_el) and self.pre_processor.is_last_list_item_in_root(last_el) - ) + ) if should_parse_last_el(next_el, el): parsed += self.parse(next_el) @@ -284,10 +301,7 @@ def justification(self, el, text): paragraph_tag_property = el.find('pPr') if paragraph_tag_property is None: return text - if el.find_ancestor_with_tag('tc') is not None: - self.is_table = True - else: - self.is_table = False + _justification = paragraph_tag_property.find('jc') indentation = paragraph_tag_property.find('ind') if _justification is None and indentation is None: @@ -296,10 +310,7 @@ def justification(self, el, text): right = None left = None firstLine = None - hanging = None - if _justification is not None: # text alignments - if el.find_ancestor_with_tag('tc') is not None: - self.column_index = el.find_ancestor_with_tag('tc').column_index + if _justification is not None: # text alignments value = _justification.attrib['val'] if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: alignment = value @@ -318,18 +329,14 @@ def justification(self, el, text): firstLine = indentation.attrib[INDENTATION_FIRST_LINE] firstLine = (int(firstLine) / 20) * float(4) / float(3) firstLine = str(firstLine) - if INDENTATION_HANGING in indentation.attrib: - hanging = indentation.attrib[INDENTATION_HANGING] - hanging = (int(hanging) / 20) * float(4) / float(3) - hanging = str(hanging) - if any([alignment, firstLine, left, right, hanging]): - return self.indent(text, alignment, firstLine, left, right, hanging) + if any([alignment, firstLine, left, right]): + return self.indent(text, alignment, firstLine, left, right, self.pre_processor.is_in_table(el)) return text def parse_p(self, el, text): if text == '': return '' - # TODO This is still not correct, however it fixes the bug. We need to + # TODO This is still not correct, however it fixes the bug. We need to # apply the classes/styles on p, td, li and h tags instead of inline, # but that is for another ticket. text = self.justification(el, text) @@ -345,37 +352,30 @@ def parse_p(self, el, text): # No p tags in li tags if self.list_depth == 0: parsed = self.paragraph(parsed) - if el.find_first('pgSz') is not None: - if 'orient' in el.find_first('pgSz').attrib: - orient = 'landscape' - else: - orient = 'portrait' - parsed = self.change_orientation(parsed, orient) - print parsed return parsed def _should_append_break_tag(self, next_el): paragraph_like_tags = [ 'p', - ] + ] inline_like_tags = [ 'smartTag', 'ins', 'delText', - ] + ] if self.pre_processor.is_list_item(next_el): return False if self.pre_processor.previous(next_el) is None: return False tag_is_inline_like = any( has_descendant_with_tag(next_el, tag) for - tag in inline_like_tags + tag in inline_like_tags ) if tag_is_inline_like: return False if ( - self.pre_processor.is_last_list_item_in_root( - self.pre_processor.previous(next_el))): + self.pre_processor.is_last_list_item_in_root( + self.pre_processor.previous(next_el))): return False if self.pre_processor.previous(next_el).tag not in paragraph_like_tags: return False @@ -403,16 +403,15 @@ def _should_parse_next_as_content(el): next_el = self.pre_processor.next(el) if next_el is None: return False - return False if ( - not self.pre_processor.is_list_item(next_el) and - not self.pre_processor.is_last_list_item_in_root(el) - ): + not self.pre_processor.is_list_item(next_el) and + not self.pre_processor.is_last_list_item_in_root(el) + ): return True if self.pre_processor.is_first_list_item(next_el): if ( - self.pre_processor.num_id(next_el) == - self.pre_processor.num_id(el)): + self.pre_processor.num_id(next_el) == + self.pre_processor.num_id(el)): return True return False @@ -423,7 +422,7 @@ def _should_parse_next_as_content(el): if not next_elements_content: continue if self._should_append_break_tag(el): - parsed += self.break_tag() + parsed += self.break_tag(self.pre_processor.is_in_table(el)) parsed += next_elements_content else: break @@ -442,9 +441,9 @@ def _get_rowspan(self, el, v_merge): if tbl is None: return '' tcs = [ - tc for tc in find_all(tbl, 'tc') - if self.pre_processor.row_index(tc) >= current_row and - self.pre_processor.column_index(tc) == current_col + tc for tc in find_all(tbl, 'tc') + if self.pre_processor.row_index(tc) >= current_row and + self.pre_processor.column_index(tc) == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: @@ -488,7 +487,7 @@ def _should_parse_next_as_content(el): if not next_elements_content: continue if self._should_append_break_tag(el): - parsed += self.break_tag() + parsed += self.break_tag(self.pre_processor.is_in_table(el)) parsed += next_elements_content else: break @@ -532,7 +531,7 @@ def _get_image_size(self, el): return ( '%dpx' % x, '%dpx' % y, - ) + ) shape = find_first(el, 'shape') if shape is not None and shape.get('style') is not None: # If either of these are not set, rely on the method `image` to not @@ -576,7 +575,7 @@ def parse_t(self, el, parsed): return self.escape(el.text) def parse_break_tag(self, el, parsed): - return self.break_tag() + return self.break_tag(self.pre_processor.is_in_table(el)) def parse_deletion(self, el, parsed): return self.deletion(el.text, '', '') @@ -607,7 +606,7 @@ def _has_style_on(run_tag_property, tag): 'dstrike': self.strike, 'vanish': self.hide, 'webHidden': self.hide, - } + } if run_tag_property is not None: for child in run_tag_property: # These tags are a little different, handle them separately @@ -733,8 +732,4 @@ def page_break(self): @abstractmethod def indent(self, text, left='', right='', firstLine=''): - return text - - @abstractmethod - def change_orientation(self, parsed, orientation): - return True + return text # TODO JUSTIFIED JUSTIFIED TEXT \ No newline at end of file diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index 4aeb3e87..d4e95cb0 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -163,7 +163,7 @@ def table(self, text): def table_row(self, text): return '
          ''' + latex_expected_output = r''' + \begin{tabular}{ll} + {AAA} & {BBB} \\ + {CCC} & {DDD} \\ + \end{tabular} + ''' + + def get_xml(self): table = DXB.table(num_rows=2, num_columns=2, text=chain( [DXB.p_tag('AAA')], @@ -332,6 +377,16 @@ class NestedTableTag(_TranslationTestCase): ''' + latex_expected_output = r'''\begin{tabular}{ll} + {AAA} & {BBB} \\ + {CCC} & { + \begin{tabular}{ll} + {DDD} & {EEE} \\ + {FFF} & {GGG} \\ + \end{tabular} + } \\ + \end{tabular}''' + def get_xml(self): nested_table = DXB.table(num_rows=2, num_columns=2, text=chain( [DXB.p_tag('DDD')], @@ -364,6 +419,13 @@ class TableWithInvalidTag(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{tabular}{ l l } + {AAA} & {BBB} \\ + {} & {DDD} \\ + \end{tabular} + ''' + def get_xml(self): table = DXB.table(num_rows=2, num_columns=2, text=chain( [DXB.p_tag('AAA')], @@ -394,6 +456,14 @@ class TableWithListAndParagraph(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{tabular}{p{3cm}} + \parbox{20cm}{\begin{enumerate} \item AAA + \item BBB + \end{enumerate}CCC\\DDD} \\ + \end{tabular}''' + + def get_xml(self): li_text = [ ('AAA', 0, 1), @@ -427,6 +497,13 @@ class SimpleListTestCase(_TranslationTestCase):
        ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \item BBB + \item CCC + \end {enumerate} + ''' # Ensure its not failing somewhere and falling back to decimal numbering_dict = { '1': { @@ -454,6 +531,11 @@ class SingleListItemTestCase(_TranslationTestCase):
      2. AAA
      ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \end {enumerate} + ''' # Ensure its not failing somewhere and falling back to decimal numbering_dict = { @@ -494,6 +576,18 @@ class ListWithContinuationTestCase(_TranslationTestCase):
    ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA \\ BBB + \item CCC + \begin{tabular} {ll} + {DDD} & {EEE} \\ + {FFF} & {GGG} \\ + \end{tabular} + \item HHH + \end{enumerate} + ''' + def get_xml(self): table = DXB.table(num_rows=2, num_columns=2, text=chain( [DXB.p_tag('DDD')], @@ -535,6 +629,20 @@ class ListWithMultipleContinuationTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \begin{tabular} {l} + {BBB}\\ + \end{tabular} + \begin{tabular} {l} + {CCC}\\ + \end{tabular} + \item DDD + \end{enumerate} + ''' + + def get_xml(self): table1 = DXB.table(num_rows=1, num_columns=1, text=chain( [DXB.p_tag('BBB')], @@ -570,6 +678,18 @@ class MangledIlvlTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate} + \begin{enumerate} + \item BBB + \begin{enumerate} + \item CCC + \end{enumerate} + \end{enumerate} + ''' + def get_xml(self): li_text = [ ('AAA', 0, 2), @@ -597,6 +717,18 @@ class SeperateListsTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate} + \begin{enumerate} + \item BBB + \end{enumerate} + \begin{enumerate} + \item CCC + \end{enumerate} + ''' + def get_xml(self): li_text = [ ('AAA', 0, 2), @@ -629,6 +761,18 @@ class InvalidIlvlOrderTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \begin{enumerate} + \item BBB + \begin{enumerate} + \item CCC + \end {enumerate} + \end{enumerate} + \end{enumerate} + ''' + def get_xml(self): tags = [ DXB.li(text='AAA', ilvl=1, numId=1), @@ -677,6 +821,10 @@ class NonStandardTextTagsTestCase(_TranslationTestCase): smarttag

    ''' + latex_expected_output = r''' + \added[id=, remark=]{insert} smarttag + ''' + def get_xml(self): run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'insert '] insert_tag = DXB.insert_tag(run_tags) @@ -691,6 +839,7 @@ def get_xml(self): class RTagWithNoText(_TranslationTestCase): expected_output = '' + latex_expected_output = '' def get_xml(self): p_tag = DXB.p_tag(None) # No text @@ -713,6 +862,13 @@ class DeleteTagInList(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA \deleted[id=, remark=]{BBB} + \item CCC + \end{enumerate} + ''' + def get_xml(self): delete_tags = DXB.delete_tag(['BBB']) p_tag = DXB.p_tag([delete_tags]) @@ -733,6 +889,12 @@ class InsertTagInList(_TranslationTestCase):
  • CCC
  • ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA\added[id=,remark=]{BBB} + \item CCC + \end{enumerate} + ''' def get_xml(self): run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] @@ -756,6 +918,13 @@ class SmartTagInList(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAABBB + \item CCC + \end{enumerate} + ''' + def get_xml(self): run_tags = [DXB.r_tag([DXB.t_tag(i)]) for i in 'BBB'] smart_tag = DXB.smart_tag(run_tags) @@ -777,6 +946,11 @@ class SingleListItem(_TranslationTestCase):

    BBB

    ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA + \end{enumerate}''' + '\n' + 'BBB' + numbering_dict = { '1': { '0': 'lowerLetter', @@ -815,6 +989,13 @@ class SimpleTableTest(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{tabular} { lll } + {Blank} & {Column 1} & {Column 2} \\ + {Row 1} & {First} & {Second} \\ + {Row 2} & {Third} & {Fourth} \\ + \end{tabular}''' + def get_xml(self): table = DXB.table(num_rows=3, num_columns=3, text=chain( [DXB.p_tag('Blank')], @@ -842,6 +1023,13 @@ class MissingIlvl(_TranslationTestCase):
  • CCC
  • ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAA \\ + BBB + \item CCC + \end{enumerate} + ''' def get_xml(self): li_text = [ @@ -874,6 +1062,15 @@ class SameNumIdInTable(_TranslationTestCase):
  • CCC
  • ''' + latex_expected_output = r''' + \begin{enumerate} \item AAA + \begin{tabular}{p{3cm}} + {\begin{enumerate} \item BBB + \end{enumerate}} \\ + \end{tabular} + \item CCC + \end{enumerate} + ''' # Ensure its not failing somewhere and falling back to decimal numbering_dict = { @@ -910,6 +1107,12 @@ class SDTTestCase(_TranslationTestCase):
  • CCC
  • ''' + latex_expected_output = r''' + \begin{enumerate} + \item AAABBB + \item CCC + \end{enumerate} + ''' def get_xml(self): body = '' @@ -931,6 +1134,22 @@ class HeadingTestCase(_TranslationTestCase):
    GGG

    HHH

    ''' + + latex_expected_output = r'''\section{AAA} + ''' + '\n' + ''' + \subsection{BBB} + ''' + '\n' + ''' + \paragraph{CCC} + ''' + '\n' + ''' + \subparagraph{DDD} + ''' + '\n' + ''' + EEE + ''' + '\n' + ''' + GGG + ''' + '\n' + ''' + HHH + ''' + styles_dict = { 'style0': 'heading 1', 'style1': 'heading 2', @@ -996,6 +1215,13 @@ class RomanNumeralToHeadingTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \subsection{AAA}\begin{enumerate} \item BBB + \end{enumerate}\subsection{CCC}\begin{enumerate} \item DDD + \end{enumerate}\subsection{EEE}\begin{enumerate} \item FFF\begin{enumerate} \item GGG + \end{enumerate} + \end{enumerate}''' + def get_xml(self): li_text = [ ('AAA', 0, 1), @@ -1018,6 +1244,7 @@ class MultipleTTagsInRTag(_TranslationTestCase): expected_output = '''

    ABC

    ''' + latex_expected_output = 'ABC' def get_xml(self): r_tag = DXB.r_tag( @@ -1039,6 +1266,10 @@ class SuperAndSubScripts(_TranslationTestCase):

    CCCDDD

    ''' + latex_expected_output = r''' + AAA \textsuperscript{BBB} + ''' + '\n' + r'\textsubscript{CCC} DDD' + def get_xml(self): p_tags = [ DXB.p_tag( @@ -1082,6 +1313,18 @@ class AvaliableInlineTags(_TranslationTestCase):

    jjj

    ''' + latex_expected_output = r'''\textbf {aaa} + \underline {bbb} + \emph {ccc} + \MakeUppercase{ddd} + \textsx{eee} + \sout{fff} + \sout{ggg} + \begin{comment}hhh\end{comment} + \begin{comment}iii\end{comment} + \textsuperscript{jjj} + ''' + def get_xml(self): p_tags = [ DXB.p_tag( diff --git a/pydocx/utils.py b/pydocx/utils.py index 4e1e0e9b..d62975a6 100644 --- a/pydocx/utils.py +++ b/pydocx/utils.py @@ -192,6 +192,9 @@ def heading_level(self, el): def is_in_table(self, el): return self.meta_data[el].get('is_in_table') + def is_last_row_item(self, el): + return self.meta_data[el].get('is_last_row_item') + def row_index(self, el): return self.meta_data[el].get('row_index') @@ -298,6 +301,7 @@ def _set_table_attributes(self, el): continue for i, row in enumerate(rows): tcs = _filter_children(row, ['tc']) + self.meta_data[tcs[-1]]['is_last_row_item'] = True for j, child in enumerate(tcs): self.meta_data[child]['row_index'] = i self.meta_data[child]['column_index'] = j From 721492ee3bfc75e5663a9055c05f5b9d1b9c36a4 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 16:30:47 -0400 Subject: [PATCH 360/404] updated tests --- pydocx/parsers/Docx2LaTex.py | 110 ++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 35 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index 5eff9a2d..9a40283a 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -8,6 +8,9 @@ def __init__(self, *args, **kwargs): self.table_info = [] self.counted_columns = False self.previous_orient = '' + self.col_count = 0 + self.hit_list = False + self.line_break_in_table = False super(Docx2LaTex, self).__init__(*args, **kwargs) @property @@ -40,9 +43,11 @@ def list_element(self, text): return r'\item %s' % text + '\n' def ordered_list(self, text, list_style): + self.hit_list = True return r'\begin{enumerate} %s \end{enumerate}' % text def unordered_list(self, text): + self.hit_list = True return r'\begin{itemize} %s \end{itemize}' % text def head(self): @@ -50,15 +55,25 @@ def head(self): \usepackage{graphicx}\usepackage{changes} \usepackage{changepage} \usepackage{hanging}\usepackage{multirow} - \usepackage{pbox}\usepackage{pdflscape}''' + \usepackage{pbox}\usepackage{pdflscape} + \usepackage{ulem}\usepackage{comment}''' + + def heading(self, text, heading_value): + if heading_value == 'h1': + return r'\section{%s}' % text + '\n\n' + elif heading_value == 'h2': + return r'\subsection{%s}' % text + '\n\n' + elif heading_value == 'h3': + return r'\paragraph{%s}' % text + '\n\n' + elif heading_value == 'h4': + return r'\subparagraph{%s}' % text + '\n\n' + else: + return text + '\n\n' def paragraph(self, text, pre=None): + self.hit_list = False return text + '\n\n' - def heading(self, text, heading_value): - #TODO figure out what to do for headings - return text - def insertion(self, text, author, date): return r'\added[id='+author+',remark='+date+']{%s}' % text @@ -85,13 +100,17 @@ def image(self, image_data, filename, x, y): if not src: return '' if all([x, y]): - x = x.replace('px', '') - y = y.replace('px', '') - x = float(x) - y = float(y) - x = x * float(3) / float(4) - y = y * float(3) / float(4) - return r'\includegraphics[height=%spt, width=%spt]{%s}' % ( + if x.find('px') != -1: + x = x.replace('px', '') + x = float(x) + x = x * float(3) / float(4) + x = str(x) + 'pt' + elif y.find('px') != -1: + y = y.replace('px', '') + y = float(y) + y = y * float(3) / float(4) + y = str(y) + 'pt' + return r'\includegraphics[height=%spt, width=%s]{%s}' % ( y, x, src) @@ -104,24 +123,30 @@ def tab(self): def table(self, text): center = False right = False + pcm = False setup_cols = '' - for i in range(self.col_count + 1): + for i in range(0, self.col_count): for column in self.table_info: - if column['Column'] == i and column['justify'] == 'center': - center = True - elif column['Column'] == i and column['justify'] == 'right': - right = True + if 'Column' in column: + if column['Column'] == i: + if 'justify' in column: + if column['justify']== 'center': + center = True + elif column['justify'] == 'right': + right = True + elif column['list']: + pcm = True if center is True: setup_cols += 'c' center = False elif right is True: setup_cols += 'r' right = False + elif pcm is True: + setup_cols += 'p{3cm}' else: setup_cols += 'l' self.table_info = [] - self.col_count = 0 - self.counted_columns = False return '\n' + r'\begin{tabular}{%s}' % setup_cols\ + '\n' + r'%s\end{tabular}'\ % text + '\n\n' @@ -130,7 +155,12 @@ def table_row(self, text): self.counted_columns = True return text - def table_cell(self, text, col='', row=''): + def table_cell(self, text, col='', row='', is_last_row_item = False): + if self.hit_list: + self.columns = {} + self.columns['Column'] = self.col_count + self.columns['list'] = True + self.table_info.append(self.columns) if col: col = int(col) if not self.counted_columns and col: @@ -144,13 +174,13 @@ def table_cell(self, text, col='', row=''): slug += r'\multicolumn{%s}{c}' % col if row: slug += r'\multirow{%s}{*}' % row -# if self.line_break_in_table: -# slug += r'\pbox{20cm}{' + text + '}' + if self.line_break_in_table: + slug += r'\parbox{20cm}' if text == '': slug += '{}' else: slug += '{' + text + '}' - if self.last_row_item: + if is_last_row_item: slug += r' \\' + '\n' return slug self.line_break_in_table = False @@ -160,8 +190,8 @@ def page_break(self): return r'\newpage ' def indent(self, text, just='', firstLine='', - left='', right='', hanging=''): - if not self.is_table: + left='', right='', hanging='', is_in_table = False): + if not is_in_table: raggedright = False raggedleft = False center = False @@ -209,18 +239,16 @@ def indent(self, text, just='', firstLine='', return slug else: self.columns = {} - self.columns['Column'] = self.column_index + self.columns['Column'] = self.col_count self.columns['justify'] = just if self.columns not in self.table_info: self.table_info.append(self.columns) return text - def break_tag(self): - if self.is_table: - self.is_table = False + def break_tag(self, is_in_table): + if is_in_table: self.line_break_in_table = True - return '' - return r'\vspace{1cm}' + return r'\\' def change_orientation(self, parsed, orient): if orient == 'portrait': @@ -231,8 +259,20 @@ def change_orientation(self, parsed, orient): def deletion(self, text, author, date): return r'\deleted[id='+author+',remark='+date+']{%s}' % text + def caps(self, text): + return r'\MakeUppercase{%s}' %text + + def small_caps(self, text): + return r'\textsx{%s}' % text + + def strike(self, text): + return r'\sout{%s}' % text + + def hide(self, text): + return r'\begin{comment}%s\end{comment}' % text + + def superscript(self, text): + return r'\textsuperscript{%s}' % text -#with open('test.jpg', 'wb+') as f: -# f.write(data) -#gotcha -#The default handler takes that raw image data and base64 encodes it and embeds it into the html \ No newline at end of file + def subscript(self, text): + return r'\textsubscript{%s}' % text \ No newline at end of file From bd6e8e712214bebcc51859a4fffa8bddf213e43c Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 16:33:25 -0400 Subject: [PATCH 361/404] change to vmerge --- pydocx/DocxParser.py | 2 +- pydocx/tests/test_xml.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 0532fc6b..e3ee3c24 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -184,7 +184,7 @@ def parse_table_row(self, el, text): def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') if v_merge is not None and ( - not 'restart' == v_merge.get('val', '')): + 'restart' != v_merge.get('val', '')): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index 76db3755..b26281e7 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -943,6 +943,7 @@ class HeadingTestCase(_TranslationTestCase):
    GGG

    HHH

    ''' + styles_dict = { 'style0': 'heading 1', 'style1': 'heading 2', From 83505df8432832fddac4c77aee07bbe4ac034532 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 16:48:29 -0400 Subject: [PATCH 362/404] updated changelog --- CHANGELOG | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 7a244824..d01f144f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,11 @@ Changelog ========= - +* 0.3.2 + * We were not taking into account that vertical merges should have a + continue attribute, but sometimes they do not, and in those cases word + assumes the continue attribute. We updated the parser to handle the + cases in which the continue attribute is not there. * 0.3.1 * Added support for several more OOXML tags including: * caps From 4e32fd5e5496d1cc736fd6e6d24d56c7159cd6c4 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 16:53:09 -0400 Subject: [PATCH 363/404] updated changelog --- CHANGELOG | 2 +- pydocx/DocxParser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d01f144f..a3c57d6f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,7 +5,7 @@ Changelog * We were not taking into account that vertical merges should have a continue attribute, but sometimes they do not, and in those cases word assumes the continue attribute. We updated the parser to handle the - cases in which the continue attribute is not there. + cases in which the continue attribute is not there. * 0.3.1 * Added support for several more OOXML tags including: * caps diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index b80c391a..ecfc4dcd 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -183,7 +183,7 @@ def parse_table_row(self, el, text): def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') if v_merge is not None and ( - 'restart' != v_merge.get('val', '')): + 'restart' != v_merge.get('val', '')): return '' colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) From 7c1603a8efc758ccdc2b6d8fa5dd54556f523efa Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Mon, 17 Jun 2013 17:54:35 -0400 Subject: [PATCH 364/404] changes based on comments --- pydocx/DocxParser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index ecfc4dcd..0f734b5e 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -714,7 +714,7 @@ def unordered_list(self, text): return text @abstractmethod - def list_element(self, text, lst_style): + def list_element(self, text): return text @abstractmethod @@ -726,7 +726,7 @@ def table_row(self, text): return text @abstractmethod - def table_cell(self, text, last, col, row, col_index, row_index): + def table_cell(self, text): return text @abstractmethod From 7b0517ce9f51e0eed02dff0383b4e29efaeded49 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 11:55:35 -0400 Subject: [PATCH 365/404] merged with master; added latex parser --- pydocx/DocxParser.py | 96 +++++++++++++++++++----------------- pydocx/__init__.py | 3 -- pydocx/parsers/Docx2Html.py | 7 ++- pydocx/parsers/Docx2LaTex.py | 21 ++++---- pydocx/tests/test_xml.py | 19 ++++--- 5 files changed, 77 insertions(+), 69 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index ca049d21..273c5998 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -13,7 +13,7 @@ find_all, find_ancestor_with_tag, has_descendant_with_tag, - ) +) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("NewParser") @@ -35,7 +35,7 @@ @contextmanager -def ZipFile(path): # This is not needed in python 3.2+ +def ZipFile(path): # This is not needed in python 3.2+ f = zipfile.ZipFile(path) yield f f.close() @@ -53,18 +53,18 @@ def _build_data(self, path, *args, **kwargs): self.fonts = f.read('/word/fontTable.xml') except KeyError: self.fonts = None - try: # Only present if there are lists + try: # Only present if there are lists self.numbering_text = f.read('word/numbering.xml') except KeyError: self.numbering_text = None - try: # Only present if there are comments + try: # Only present if there are comments self.comment_text = f.read('word/comments.xml') except KeyError: self.comment_text = None self.relationship_text = f.read('word/_rels/document.xml.rels') zipped_image_files = [ - e for e in f.infolist() - if e.filename.startswith('word/media/') + e for e in f.infolist() + if e.filename.startswith('word/media/') ] for e in zipped_image_files: self._image_data[e.filename] = f.read(e.filename) @@ -124,7 +124,7 @@ def __init__( self.list_depth = 0 self.rels_dict = self._parse_rels_root() self.styles_dict = self._parse_styles() - self.parse_begin(self.root) # begin to parse + self.parse_begin(self.root) # begin to parse def parse_begin(self, el): self.pre_processor = self.pre_processor_class( @@ -185,18 +185,15 @@ def parse_table_cell(self, el, text): v_merge = find_first(el, 'vMerge') if v_merge is not None and ( 'restart' != v_merge.get('val', '')): - return '' + return self.empty_cell() colspan = self.get_colspan(el) rowspan = self._get_rowspan(el, v_merge) -<<<<<<< HEAD - return self.table_cell(text, colspan, rowspan, self.pre_processor.is_last_row_item(el)) -======= if rowspan > 1: rowspan = str(rowspan) else: rowspan = '' - return self.table_cell(text, colspan, rowspan) ->>>>>>> table_fix + return self.table_cell( + text, colspan, rowspan, self.pre_processor.is_last_row_item(el)) def parse_list(self, el, text): """ @@ -281,21 +278,21 @@ def should_parse_last_el(last_el, first_el): return False # Different list if ( - self.pre_processor.num_id(last_el) != - self.pre_processor.num_id(first_el)): - return False + self.pre_processor.num_id(last_el) != + self.pre_processor.num_id(first_el)): + return False # Will be handled when the ilvls do match (nesting issue) if ( - self.pre_processor.ilvl(last_el) != - self.pre_processor.ilvl(first_el)): - return False - # We only care about last items that have not been parsed before - # (first list items are always parsed at the beginning of this - # method.) + self.pre_processor.ilvl(last_el) != + self.pre_processor.ilvl(first_el)): + return False + # We only care about last items that have not been + # parsed before (first list items are + # always parsed at the beginning of this method.) return ( not self.pre_processor.is_first_list_item(last_el) and self.pre_processor.is_last_list_item_in_root(last_el) - ) + ) if should_parse_last_el(next_el, el): parsed += self.parse(next_el) @@ -319,7 +316,7 @@ def justification(self, el, text): right = None left = None firstLine = None - if _justification is not None: # text alignments + if _justification is not None: # text alignments value = _justification.attrib['val'] if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: alignment = value @@ -339,15 +336,18 @@ def justification(self, el, text): firstLine = (int(firstLine) / 20) * float(4) / float(3) firstLine = str(firstLine) if any([alignment, firstLine, left, right]): - return self.indent(text, alignment, firstLine, left, right, self.pre_processor.is_in_table(el)) + return self.indent( + text, alignment, firstLine, + left, right, self.pre_processor.is_in_table(el)) return text def parse_p(self, el, text): if text == '': return '' - # TODO This is still not correct, however it fixes the bug. We need to - # apply the classes/styles on p, td, li and h tags instead of inline, - # but that is for another ticket. + # TODO This is still not correct, however it fixes the bug. + # We need to apply the classes/styles on p, td, + # li and h tags instead of inline, + # but that is for another ticket. text = self.justification(el, text) if self.pre_processor.is_first_list_item(el): return self.parse_list(el, text) @@ -366,19 +366,19 @@ def parse_p(self, el, text): def _should_append_break_tag(self, next_el): paragraph_like_tags = [ 'p', - ] + ] inline_like_tags = [ 'smartTag', 'ins', 'delText', - ] + ] if self.pre_processor.is_list_item(next_el): return False if self.pre_processor.previous(next_el) is None: return False tag_is_inline_like = any( has_descendant_with_tag(next_el, tag) for - tag in inline_like_tags + tag in inline_like_tags ) if tag_is_inline_like: return False @@ -413,15 +413,15 @@ def _should_parse_next_as_content(el): if next_el is None: return False if ( - not self.pre_processor.is_list_item(next_el) and - not self.pre_processor.is_last_list_item_in_root(el) - ): + not self.pre_processor.is_list_item(next_el) and + not self.pre_processor.is_last_list_item_in_root(el) + ): return True if self.pre_processor.is_first_list_item(next_el): if ( - self.pre_processor.num_id(next_el) == - self.pre_processor.num_id(el)): - return True + self.pre_processor.num_id(next_el) == + self.pre_processor.num_id(el)): + return True return False while el is not None: @@ -431,7 +431,8 @@ def _should_parse_next_as_content(el): if not next_elements_content: continue if self._should_append_break_tag(el): - parsed += self.break_tag(self.pre_processor.is_in_table(el)) + parsed += self.break_tag( + self.pre_processor.is_in_table(el)) parsed += next_elements_content else: break @@ -449,9 +450,9 @@ def _get_rowspan(self, el, v_merge): if tbl is None: return '' tcs = [ - tc for tc in find_all(tbl, 'tc') - if self.pre_processor.row_index(tc) >= current_row and - self.pre_processor.column_index(tc) == current_col + tc for tc in find_all(tbl, 'tc') + if self.pre_processor.row_index(tc) >= current_row and + self.pre_processor.column_index(tc) == current_col ] restart_in_v_merge = False if v_merge is not None and 'val' in v_merge.attrib: @@ -495,7 +496,8 @@ def _should_parse_next_as_content(el): if not next_elements_content: continue if self._should_append_break_tag(el): - parsed += self.break_tag(self.pre_processor.is_in_table(el)) + parsed += self.break_tag( + self.pre_processor.is_in_table(el)) parsed += next_elements_content else: break @@ -539,7 +541,7 @@ def _get_image_size(self, el): return ( '%dpx' % x, '%dpx' % y, - ) + ) shape = find_first(el, 'shape') if shape is not None and shape.get('style') is not None: # If either of these are not set, rely on the method `image` to not @@ -614,7 +616,7 @@ def _has_style_on(run_tag_property, tag): 'dstrike': self.strike, 'vanish': self.hide, 'webHidden': self.hide, - } + } if run_tag_property is not None: for child in run_tag_property: # These tags are a little different, handle them separately @@ -740,4 +742,8 @@ def page_break(self): @abstractmethod def indent(self, text, left='', right='', firstLine=''): - return text # TODO JUSTIFIED JUSTIFIED TEXT + return text + + @abstractmethod + def empty_cell(self): + return '' diff --git a/pydocx/__init__.py b/pydocx/__init__.py index fd55f897..ea84cb6b 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -8,11 +8,8 @@ def docx2html(path): def docx2markdown(path): return Docx2Markdown(path).parsed -<<<<<<< HEAD def docx2latex(path): return Docx2LaTex(path).parsed -======= ->>>>>>> table_fix VERSION = '0.3.1' diff --git a/pydocx/parsers/Docx2Html.py b/pydocx/parsers/Docx2Html.py index ca88c2d5..c829e33d 100644 --- a/pydocx/parsers/Docx2Html.py +++ b/pydocx/parsers/Docx2Html.py @@ -163,11 +163,7 @@ def table(self, text): def table_row(self, text): return '' + text + '' -<<<<<<< HEAD def table_cell(self, text, col='', row='', *args): -======= - def table_cell(self, text, col='', row=''): ->>>>>>> table_fix slug = '' + + def empty_cell(self): + return '' diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index 9a40283a..a86bd159 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -65,7 +65,7 @@ def heading(self, text, heading_value): return r'\subsection{%s}' % text + '\n\n' elif heading_value == 'h3': return r'\paragraph{%s}' % text + '\n\n' - elif heading_value == 'h4': + elif heading_value == 'h4': return r'\subparagraph{%s}' % text + '\n\n' else: return text + '\n\n' @@ -85,13 +85,12 @@ def hyperlink(self, text, href): 'text': text, } - def image_handler(self, image_data, filename): extension = filename.split('.')[-1].lower() b64_encoded_src = 'data:image/%s;base64,%s' % ( extension, base64.b64encode(image_data), - ) + ) b64_encoded_src = self.escape(b64_encoded_src) return b64_encoded_src @@ -130,7 +129,7 @@ def table(self, text): if 'Column' in column: if column['Column'] == i: if 'justify' in column: - if column['justify']== 'center': + if column['justify'] == 'center': center = True elif column['justify'] == 'right': right = True @@ -155,7 +154,7 @@ def table_row(self, text): self.counted_columns = True return text - def table_cell(self, text, col='', row='', is_last_row_item = False): + def table_cell(self, text, col='', row='', is_last_row_item=False): if self.hit_list: self.columns = {} self.columns['Column'] = self.col_count @@ -190,7 +189,7 @@ def page_break(self): return r'\newpage ' def indent(self, text, just='', firstLine='', - left='', right='', hanging='', is_in_table = False): + left='', right='', hanging='', is_in_table=False): if not is_in_table: raggedright = False raggedleft = False @@ -254,13 +253,14 @@ def change_orientation(self, parsed, orient): if orient == 'portrait': return parsed if orient == 'landscape': - return r'\begin{landscape}' + '\n' + parsed + '\end{landscape}' + '\n' + return r'\begin{landscape}' + '\n' \ + + parsed + '\end{landscape}' + '\n' def deletion(self, text, author, date): return r'\deleted[id='+author+',remark='+date+']{%s}' % text def caps(self, text): - return r'\MakeUppercase{%s}' %text + return r'\MakeUppercase{%s}' % text def small_caps(self, text): return r'\textsx{%s}' % text @@ -275,4 +275,7 @@ def superscript(self, text): return r'\textsuperscript{%s}' % text def subscript(self, text): - return r'\textsubscript{%s}' % text \ No newline at end of file + return r'\textsubscript{%s}' % text + + def empty_cell(self): + return ' & ' diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index ccb80521..ba83bdc2 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -168,7 +168,6 @@ class ImageLocal(_TranslationTestCase): \includegraphics {word/media/image2.jpeg} ''' - def get_xml(self): drawing = DXB.drawing(height=None, width=None, r_id='rId0') pict = DXB.pict(height=None, width=None, r_id='rId1') @@ -203,7 +202,9 @@ class ImageTestCase(_TranslationTestCase): ''' + '\n' + ''' \includegraphics[height=21ptpt, width=41pt]{word/media/image2.jpeg} ''' + def get_xml(self): + drawing = DXB.drawing(height=20, width=40, r_id='rId0') pict = DXB.pict(height=21, width=41, r_id='rId1') tags = [ @@ -339,7 +340,6 @@ class TableTag(_TranslationTestCase): \end{tabular} ''' - def get_xml(self): cell1 = DXB.table_cell(paragraph=DXB.p_tag('AAA')) cell2 = DXB.table_cell(paragraph=DXB.p_tag('CCC')) @@ -366,6 +366,13 @@ class RowSpanTestCase(_TranslationTestCase): ''' + latex_expected_output = r''' + \begin{tabular}{ll} + \multirow{2}{*}{AAA} & {BBB} \\ + & {CCC} \\ + \end{tabular} + ''' + def get_xml(self): cell1 = DXB.table_cell( paragraph=DXB.p_tag('AAA'), merge=True, merge_continue=False) @@ -489,7 +496,6 @@ class TableWithListAndParagraph(_TranslationTestCase): \end{enumerate}CCC\\DDD} \\ \end{tabular}''' - def get_xml(self): li_text = [ ('AAA', 0, 1), @@ -668,7 +674,6 @@ class ListWithMultipleContinuationTestCase(_TranslationTestCase): \end{enumerate} ''' - def get_xml(self): cell = DXB.table_cell(paragraph=DXB.p_tag('BBB')) row = DXB.table_row([cell]) @@ -1161,7 +1166,6 @@ class HeadingTestCase(_TranslationTestCase):

    HHH

    ''' -<<<<<<< HEAD latex_expected_output = r'''\section{AAA} ''' + '\n' + ''' \subsection{BBB} @@ -1177,8 +1181,6 @@ class HeadingTestCase(_TranslationTestCase): HHH ''' -======= ->>>>>>> table_fix styles_dict = { 'style0': 'heading 1', 'style1': 'heading 2', @@ -1247,7 +1249,8 @@ class RomanNumeralToHeadingTestCase(_TranslationTestCase): latex_expected_output = r''' \subsection{AAA}\begin{enumerate} \item BBB \end{enumerate}\subsection{CCC}\begin{enumerate} \item DDD - \end{enumerate}\subsection{EEE}\begin{enumerate} \item FFF\begin{enumerate} \item GGG + \end{enumerate}\subsection{EEE}\begin{enumerate} + \item FFF\begin{enumerate} \item GGG \end{enumerate} \end{enumerate}''' From 227342a3819e3684cef2b166ef87c3865db3a2a7 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 13:50:23 -0400 Subject: [PATCH 366/404] changes based on comments --- pydocx/DocxParser.py | 26 ++++++++++++-------------- pydocx/parsers/Docx2LaTex.py | 5 +++-- pydocx/tests/__init__.py | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 273c5998..3c6d17e3 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -143,7 +143,6 @@ def parse(self, el): for child in el: # recursive. So you can get all the way to the bottom parsed += self.parse(child) - if el.tag == 'br' and el.attrib.get('type') == 'page': return self.parse_page_break(el, parsed) elif el.tag == 'tbl': @@ -193,7 +192,8 @@ def parse_table_cell(self, el, text): else: rowspan = '' return self.table_cell( - text, colspan, rowspan, self.pre_processor.is_last_row_item(el)) + text, colspan, rowspan, self.pre_processor.is_last_row_item(el), + has_descendant_with_tag(el, 'ilvl')) def parse_list(self, el, text): """ @@ -247,7 +247,7 @@ def is_same_list(next_el, num_id, ilvl): return False if self.pre_processor.is_last_list_item_in_root(next_el): return False - # If next_el is not a list item then roll it into the list by + # If next_el is not a list item then roll it into the list by # returning True. if not self.pre_processor.is_list_item(next_el): return True @@ -276,19 +276,19 @@ def is_same_list(next_el, num_id, ilvl): def should_parse_last_el(last_el, first_el): if last_el is None: return False - # Different list + # Different list if ( self.pre_processor.num_id(last_el) != self.pre_processor.num_id(first_el)): return False - # Will be handled when the ilvls do match (nesting issue) + # Will be handled when the ilvls do match (nesting issue) if ( self.pre_processor.ilvl(last_el) != self.pre_processor.ilvl(first_el)): return False - # We only care about last items that have not been - # parsed before (first list items are - # always parsed at the beginning of this method.) + # We only care about last items that have not been + # parsed before (first list items are + # always parsed at the beginning of this method.) return ( not self.pre_processor.is_first_list_item(last_el) and self.pre_processor.is_last_list_item_in_root(last_el) @@ -320,7 +320,6 @@ def justification(self, el, text): value = _justification.attrib['val'] if value in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: alignment = value - if indentation is not None: if INDENTATION_RIGHT in indentation.attrib: right = indentation.attrib[INDENTATION_RIGHT] @@ -344,9 +343,8 @@ def justification(self, el, text): def parse_p(self, el, text): if text == '': return '' - # TODO This is still not correct, however it fixes the bug. - # We need to apply the classes/styles on p, td, - # li and h tags instead of inline, + # TODO This is still not correct, however it fixes the bug. We need to + # apply the classes/styles on p, td, li and h tags instead of inline, # but that is for another ticket. text = self.justification(el, text) if self.pre_processor.is_first_list_item(el): @@ -436,7 +434,7 @@ def _should_parse_next_as_content(el): parsed += next_elements_content else: break - # Create the actual li element + # Create the actual li element return self.list_element(parsed) def _get_rowspan(self, el, v_merge): @@ -518,7 +516,7 @@ def _get_image_id(self, el): # On drawing tags the id is actually whatever is returned from the # embed attribute on the blip tag. Thanks a lot Microsoft. return blip.get('embed') - # Picts + # Picts imagedata = find_first(el, 'imagedata') if imagedata is not None: return imagedata.get('id') diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index a86bd159..d53f1ba0 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -154,8 +154,9 @@ def table_row(self, text): self.counted_columns = True return text - def table_cell(self, text, col='', row='', is_last_row_item=False): - if self.hit_list: + def table_cell(self, text, col='', row='', is_last_row_item=False, is_list_item=False): + print text, is_list_item + if is_list_item: self.columns = {} self.columns['Column'] = self.col_count self.columns['list'] = True diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index 41267d10..c56c65d2 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -297,7 +297,8 @@ def image_handler(self, src, *args, **kwargs): numbering_dict=self.numbering_dict, styles_dict=self.styles_dict, ).parsed - assert_html_equal(html, BASE_HTML % self.expected_output) + if self.use_base_html: + assert_html_equal(html, BASE_HTML % self.expected_output) latex_parser.image_handler = image_handler latex = latex_parser( From be11f108d55371330691ae2878f91bc834dc74ee Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 13:52:48 -0400 Subject: [PATCH 367/404] fixed some more indentation stuff --- pydocx/tests/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index c56c65d2..acf82471 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -41,7 +41,8 @@ \usepackage{changepage} \usepackage{hanging}\usepackage{multirow} \usepackage{pbox}\usepackage{pdflscape} -\usepackage{ulem}\usepackage{comment}\begin{document}''' + "%s" + r'''\end{document} +\usepackage{ulem}\usepackage{comment} +\begin{document}''' + "%s" + r'''\end{document} ''' @@ -178,12 +179,13 @@ def _parse_styles(self): '1': { '0': 'decimal', '1': 'decimal', - }, + }, '2': { '0': 'lowerLetter', '1': 'lowerLetter', - }, - } + }, +} + class XMLDocx2Html(Docx2Html): """ From 331efe9eece998894f241cd2478e5cd2169a9db6 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 14:12:26 -0400 Subject: [PATCH 368/404] comments based on changes; fixed more spacing errors --- pydocx/DocxParser.py | 12 ++++++------ pydocx/tests/__init__.py | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pydocx/DocxParser.py b/pydocx/DocxParser.py index 3c6d17e3..092248f0 100644 --- a/pydocx/DocxParser.py +++ b/pydocx/DocxParser.py @@ -280,12 +280,12 @@ def should_parse_last_el(last_el, first_el): if ( self.pre_processor.num_id(last_el) != self.pre_processor.num_id(first_el)): - return False + return False # Will be handled when the ilvls do match (nesting issue) if ( self.pre_processor.ilvl(last_el) != self.pre_processor.ilvl(first_el)): - return False + return False # We only care about last items that have not been # parsed before (first list items are # always parsed at the beginning of this method.) @@ -343,9 +343,9 @@ def justification(self, el, text): def parse_p(self, el, text): if text == '': return '' - # TODO This is still not correct, however it fixes the bug. We need to - # apply the classes/styles on p, td, li and h tags instead of inline, - # but that is for another ticket. + # TODO This is still not correct, however it fixes the bug. We need to + # apply the classes/styles on p, td, li and h tags instead of inline, + # but that is for another ticket. text = self.justification(el, text) if self.pre_processor.is_first_list_item(el): return self.parse_list(el, text) @@ -419,7 +419,7 @@ def _should_parse_next_as_content(el): if ( self.pre_processor.num_id(next_el) == self.pre_processor.num_id(el)): - return True + return True return False while el is not None: diff --git a/pydocx/tests/__init__.py b/pydocx/tests/__init__.py index acf82471..1e23ec96 100644 --- a/pydocx/tests/__init__.py +++ b/pydocx/tests/__init__.py @@ -301,6 +301,8 @@ def image_handler(self, src, *args, **kwargs): ).parsed if self.use_base_html: assert_html_equal(html, BASE_HTML % self.expected_output) + else: + assert_html_equal(html, self.expected_output) latex_parser.image_handler = image_handler latex = latex_parser( From 8b90ce18963a2cd77091e433583bad1b98c44f59 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 14:16:54 -0400 Subject: [PATCH 369/404] flake8 fix --- pydocx/parsers/Docx2LaTex.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index d53f1ba0..3c806246 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -154,8 +154,9 @@ def table_row(self, text): self.counted_columns = True return text - def table_cell(self, text, col='', row='', is_last_row_item=False, is_list_item=False): - print text, is_list_item + def table_cell( + self, text, col='', row='', + is_last_row_item=False, is_list_item=False): if is_list_item: self.columns = {} self.columns['Column'] = self.col_count From 1ce235dbb0e6a5c1786f68f0605c697c758795c5 Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Tue, 18 Jun 2013 15:09:07 -0400 Subject: [PATCH 370/404] fixed another spacing issue --- pydocx/tests/test_xml.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pydocx/tests/test_xml.py b/pydocx/tests/test_xml.py index ba83bdc2..adfd05cb 100644 --- a/pydocx/tests/test_xml.py +++ b/pydocx/tests/test_xml.py @@ -169,18 +169,18 @@ class ImageLocal(_TranslationTestCase): ''' def get_xml(self): - drawing = DXB.drawing(height=None, width=None, r_id='rId0') - pict = DXB.pict(height=None, width=None, r_id='rId1') - tags = [ - drawing, - pict, - ] - body = '' - for el in tags: - body += el + drawing = DXB.drawing(height=None, width=None, r_id='rId0') + pict = DXB.pict(height=None, width=None, r_id='rId1') + tags = [ + drawing, + pict, + ] + body = '' + for el in tags: + body += el - xml = DXB.xml(body) - return xml + xml = DXB.xml(body) + return xml class ImageTestCase(_TranslationTestCase): @@ -204,7 +204,6 @@ class ImageTestCase(_TranslationTestCase): ''' def get_xml(self): - drawing = DXB.drawing(height=20, width=40, r_id='rId0') pict = DXB.pict(height=21, width=41, r_id='rId1') tags = [ From 8b43476a0493afa71d56638753e8acbf47aa5abe Mon Sep 17 00:00:00 2001 From: Sam Portnow Date: Thu, 20 Jun 2013 22:14:55 -0400 Subject: [PATCH 371/404] fixed up the table method --- pydocx/parsers/Docx2LaTex.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pydocx/parsers/Docx2LaTex.py b/pydocx/parsers/Docx2LaTex.py index 3c806246..5a59ed3c 100644 --- a/pydocx/parsers/Docx2LaTex.py +++ b/pydocx/parsers/Docx2LaTex.py @@ -30,6 +30,9 @@ def escape(self, text): def linebreak(self): return '\n\n' + def paragraph(self, text, pre=None): + return text + '\n\n' + def bold(self, text): return r'\textbf {%s}' % text @@ -70,10 +73,6 @@ def heading(self, text, heading_value): else: return text + '\n\n' - def paragraph(self, text, pre=None): - self.hit_list = False - return text + '\n\n' - def insertion(self, text, author, date): return r'\added[id='+author+',remark='+date+']{%s}' % text @@ -125,16 +124,17 @@ def table(self, text): pcm = False setup_cols = '' for i in range(0, self.col_count): - for column in self.table_info: - if 'Column' in column: - if column['Column'] == i: - if 'justify' in column: - if column['justify'] == 'center': - center = True - elif column['justify'] == 'right': - right = True - elif column['list']: - pcm = True + match = next(( + column for column in self.table_info + if 'Column' in column and column['Column'] == i), None) + if match: + if 'justify' in match: + if match['justify'] == 'center': + center = True + elif match['justify'] == 'right': + right = True + elif match['list']: + pcm = True if center is True: setup_cols += 'c' center = False From 1b83a0f5e591f8399c5c784af137fc072766d272 Mon Sep 17 00:00:00 2001 From: SamPortnow Date: Wed, 26 Jun 2013 16:02:03 -0400 Subject: [PATCH 372/404] updated init function to include docx2html --- pydocx/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index ea84cb6b..07833131 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -1,4 +1,5 @@ from .parsers import Docx2LaTex, Docx2Html, Docx2Markdown +from HtmlConversion import Html2Docx def docx2html(path): @@ -12,4 +13,7 @@ def docx2markdown(path): def docx2latex(path): return Docx2LaTex(path).parsed +def html2docx(path): + return Html2Docx(path).parsed + VERSION = '0.3.1' From b0236f45bab68f014ac8273138d470882e80ea17 Mon Sep 17 00:00:00 2001 From: SamPortnow Date: Wed, 26 Jun 2013 16:09:30 -0400 Subject: [PATCH 373/404] added the py_docx module and the htmlconversion parser --- pydocx/HtmlConversion.py | 394 +++++ pydocx/py_docx/.gitignore | 8 + pydocx/py_docx/HACKING.markdown | 104 ++ pydocx/py_docx/LICENSE | 22 + pydocx/py_docx/MANIFEST.in | 5 + pydocx/py_docx/Makefile | 28 + pydocx/py_docx/README.markdown | 81 ++ pydocx/py_docx/SERVING_SUGGESTIONS.markdown | 12 + pydocx/py_docx/__init__.py | 0 pydocx/py_docx/docx.py | 1270 +++++++++++++++++ pydocx/py_docx/example-extracttext.py | 30 + pydocx/py_docx/example-makedocument.py | 114 ++ pydocx/py_docx/image1.png | Bin 0 -> 45187 bytes pydocx/py_docx/numbering.xml | 509 +++++++ pydocx/py_docx/screenshot.png | Bin 0 -> 88434 bytes pydocx/py_docx/setup.py | 27 + pydocx/py_docx/template/_rels/.rels | 14 + .../py_docx/template/docProps/thumbnail.jpeg | Bin 0 -> 45868 bytes pydocx/py_docx/template/word/fontTable.xml | 52 + pydocx/py_docx/template/word/settings.xml | 43 + pydocx/py_docx/template/word/styles.xml | 2 + pydocx/py_docx/template/word/theme/theme1.xml | 2 + pydocx/py_docx/tests/image1.png | Bin 0 -> 45187 bytes pydocx/py_docx/tests/test_docx.py | 150 ++ 24 files changed, 2867 insertions(+) create mode 100644 pydocx/HtmlConversion.py create mode 100644 pydocx/py_docx/.gitignore create mode 100644 pydocx/py_docx/HACKING.markdown create mode 100644 pydocx/py_docx/LICENSE create mode 100644 pydocx/py_docx/MANIFEST.in create mode 100644 pydocx/py_docx/Makefile create mode 100644 pydocx/py_docx/README.markdown create mode 100644 pydocx/py_docx/SERVING_SUGGESTIONS.markdown create mode 100644 pydocx/py_docx/__init__.py create mode 100755 pydocx/py_docx/docx.py create mode 100755 pydocx/py_docx/example-extracttext.py create mode 100755 pydocx/py_docx/example-makedocument.py create mode 100644 pydocx/py_docx/image1.png create mode 100644 pydocx/py_docx/numbering.xml create mode 100644 pydocx/py_docx/screenshot.png create mode 100644 pydocx/py_docx/setup.py create mode 100644 pydocx/py_docx/template/_rels/.rels create mode 100644 pydocx/py_docx/template/docProps/thumbnail.jpeg create mode 100644 pydocx/py_docx/template/word/fontTable.xml create mode 100644 pydocx/py_docx/template/word/settings.xml create mode 100644 pydocx/py_docx/template/word/styles.xml create mode 100644 pydocx/py_docx/template/word/theme/theme1.xml create mode 100644 pydocx/py_docx/tests/image1.png create mode 100644 pydocx/py_docx/tests/test_docx.py diff --git a/pydocx/HtmlConversion.py b/pydocx/HtmlConversion.py new file mode 100644 index 00000000..cab112f1 --- /dev/null +++ b/pydocx/HtmlConversion.py @@ -0,0 +1,394 @@ +import xml.etree.ElementTree as ElementTree +from xml.etree.ElementTree import _ElementInterface +from pydocx.py_docx.docx import * +import py_docx.docx as docx + + +def find_first(self, tag): + """ + Find the first occurrence of a tag beneath the current element. + """ + return self.find('.//' + tag) + + +def find_all(self, tag): + """ + Find all occurrences of a tag + """ + return self.findall('.//' + tag) + + +def has_descendant_with_tag(el, tag): + """ + Determine if there is a child ahead in the element tree. + """ + # Get child. stop at first child. + return True if el.find('.//' + tag) is not None else False + + +setattr(_ElementInterface, 'find_first', find_first) +setattr(_ElementInterface, 'find_all', find_all) +setattr(_ElementInterface, 'is_first_list_item', False) +setattr(_ElementInterface, 'is_last_list_item', False) +setattr(_ElementInterface, 'in_table', False) +setattr(_ElementInterface, 'has_descendant_with_tag', has_descendant_with_tag) +setattr(_ElementInterface, 'new_list', False) +setattr(_ElementInterface, 'new_ilvl', False) +setattr(_ElementInterface, 'is_first_list', False) +setattr(_ElementInterface, 'is_last_item_in_list', False) + + +class Html2Docx(): + + def __init__(self, html): + # set up what is parsed + self.parsed = '' + with open(html, 'r') as f: + html = f.read() + # need to keep track of elements + # that have been visited + self.visited = [] + self.stored_numId = 0 + # need to keep track of the + # ilvl in the document + self.stored_ilvl = 0 + #abstractId info for the numbering documents + self.abstractIdInfo = [] + #numIds for the numbering document. + #these correspond to the abstractIdInfo + self.numIds = [] + #for the numbering document + self.abstract = None + # set up the html + self.html = ElementTree.fromstring(html) + # get the relationship list + self.relationships = relationshiplist() + # make a new document + self.document = newdocument() + #get the body + self.body = self.document.xpath( + '/w:document/w:body', namespaces=nsprefixes)[0] + #make a new numbering document + self.numbering = new_numbering() + #start bulding the document + self.build() + + def build(self): + #first step is to add parent attribute + #for the whole document + def add_parent(el): + for child in el.getchildren(): + setattr(child, 'parent', el) + add_parent(child) + add_parent(self.html) + #now set the list attributes + self.set_list_attributes() + #and begin parsing + self.parse(self.html.find_first('body')) + + def find_all_by_tags(self, html, *args): + #helper function to find all the elements + #with mutiple tags + list_elements = [] + for el in html.iter(): + if el.tag in args: + list_elements.append(el) + return list_elements + + def check_for_lst_parent(self, el): + #helper function to see if a list + #has an li as a parent. + #meaning that its parent is itself + #a list and therefore, it is nested + lst_parent = False + if el.parent.tag != 'body': + if el.parent.tag == 'li': + lst_parent = True + #return true if you find a list parent + return lst_parent + self.check_for_lst_parent(el.parent) + else: + return lst_parent + + def set_list_attributes(self): + #now we set the list attributes + ilvl = 0 + numId = 0 + lsts = self.find_all_by_tags(self.html, 'ol', 'ul') + for lst in lsts: + lst.getchildren()[0].is_first_list_item = True + lst.getchildren()[-1].is_last_list_item = True + for item in lst.getchildren(): + #if the element does not have a parent and it is + #the last list item, we know it is safe to + #increment the numId, meaning there is a new + #list + if not self.check_for_lst_parent(item.parent): + if item.is_last_list_item: + numId += 1 + #has to be true because a new list will + # automatically have a new ilvl + item.new_ilvl = True + item.new_list = True + #also have to set the ilvl back to 0 + ilvl = 0 + elif item.is_first_list_item and self.check_for_lst_parent( + item.parent): + #if a list if item has a parent that is a list + #and its the first item, we must increment the + #indentation level (ilvl) + item.new_ilvl = True + ilvl += 1 + item.ilvl = ilvl + item.num_id = numId + item.is_list_item = True + + def parse(self, el): + for child in el.getchildren(): + if child.tag == 'br': + #if we find a break tag, look for text after it + text_and_style = self.parse_r(child)[0] + just = self.parse_r(child)[1] + self.body.append(paragraph(text_and_style, jc=just)) + if child.tag == 'p': + #if we find a p tag, look for text after it + text_and_style = self.parse_r(child)[0] + just = self.parse_r(child)[1] + self.body.append(paragraph(text_and_style, jc=just)) + if child.tag == 'ul' or child.tag == 'ol': + #if we find a list, look for text after it + lst_type = child.tag + self.parse_list(child, lst_type) + if child.tag == 'table': + #separate function for parsing tables + #because in word, the table tags are the parent + #of the p tag, so we have to handle + #them a bit differently + self.body.append(self.parse_table(child)) + self.parse(child) + self.save() + + def parse_r(self, el): + # we have to the whole block of + # text that will go in a paragraph + par_block = [] + # we have to get the breaks that + # will go in the paragraph + breaks = [] + #we need this to creating a string of the styles + #i.e., bold, italic, underline + style = '' + just = 'left' + for child in el.iter(): + text = '' + if child.tag == 'div': + #look for what the justification is + if 'center' in child.attrib['class']: + just = 'center' + elif 'right' in child.attrib['class']: + just = 'right' + if child.tag == 'em': + #if there's an em tag, + #add italic to style + style += 'i' + if child.tag == 'strong': + #if there's a strong tag, + #add bold to style + style += 'b' + if child.tag == 'underline': + #if there's an underline tag, + #add underline to style + style += 'u' + if child.text: + #get the text + text = child.text + if child.tag == 'br' and child not in self.visited: + #dont want to hit breaks twice + #text of break comes at the tail + text = child.tail + breaks.append('br') + self.visited.append(child) + if text: + #if text, add everything to the parblock + #set the style back to blank + par_block.append([text, style, breaks]) + style = '' + if child.parent and child.parent.tag == 'li': + #if it has a list parent, return early + return par_block, just + return par_block, just + + def parse_list(self, lst, lst_type=''): + tentatives = None + """ + parsing lists, we need to keep track of both + the list itself, and as we go through build up + the numbering document. for some reason, + there are two sections of a word numbering document: + an abstract numbering section that contains all of the + relevant list info, as well as a num section that contains + references to the abstract numbers defined earlier in the + numbering file + """ + for child in lst.getchildren(): + if child not in self.visited: + #first append the elements to + #the visisted elements + self.visited.append(child) + #get the text and style of this child + text_and_style = self.parse_r(child)[0] + #get the justication of the style + just = self.parse_r(child)[1] + #if its an ol, then its a decimal list + if lst_type == 'ol': + type_lst = 'decimal' + #if its a ul, then its a bulleted list + if lst_type == 'ul': + type_lst = 'bullet' + if child.new_ilvl: + #if theres a new ilvl, increase + #the indentation + ind = 720 * (child.ilvl + 1) + #create a numId attribute for the list, this + #is for the numbering document, + num = create_list_attributes( + ilvl=str(child.ilvl), + type=type_lst, just=just, left=str(ind)) + #append that numId to the lists of + #all the numIds + #we will later append this info to the + #abstract id section of the numbering document + self.numIds.append(num) + self.stored_ilvl += 1 + if not child.find('ol') and not child.find('ul'): + tentatives = fill_tentative( + self.stored_ilvl, type_lst=type_lst) + #if we cant find another list, we know its the + #last item and it's ok to fill out the rest of the + #abstract num info + + #abstractnumid gets increased + # for every list, starts out at 0. numIds themselves + self.abstract = create_list(child.num_id - 1) + self.numbering.append(self.abstract) + #here is where we append to the abstract num section + for num in self.numIds: + self.abstract.append(num) + #now we have to create tentative lists. the way that + #word is able to nicely do indent to create new lists + #is by creating tentative lists that start past the + #last indent. it goes all the way up to 8, because that's + #all that will fit in the width of the file. + for tentative in tentatives: + self.abstract.append(tentative) + #now we have our abstract id info, and we have to append to + #it the current num_id + self.abstractIdInfo.append( + create_abstract_IdInfo(str(child.num_id))) + #we're done here, so we can set our stored_ilvl back to 0 + self.stored_ilvl = 0 + #and we can set our num ideas to zero + self.numIds = [] + #now we append to hte body the relavent list info + self.body.append( + paragraph( + text_and_style, is_list=True, + ilvl=str(child.ilvl), numId=str(child.num_id), + style=lst_type, jc=just)) + #if, from the current list element, we find another list, + # we have to parse that lists BEFORE we parse the next list + # item in the current list + if child.find('ul'): + lst = child.find('ul') + self.parse_list(lst, lst.tag) + if child.find('ol'): + lst = child.find('ol') + self.parse_list(lst, lst.tag) + + def table_look_ahead(self, tbl): + #table look ahead function, + #we need to do this to account for vertical merges. in html + #all you need to do is include the rowspan and not include any + #extra table elements. word, on the other hand, expects an + #empty tale with a vmerge attribute inside it. so we're + #going to go thru and create these elements and insert them + #into the html document + trs = tbl.find_all('tr') + for i in range(len(trs)): + tcs = trs[i].find_all('td') + for j in range(len(tcs)): + if 'rowspan' in tcs[j].attrib: + for x in range(1, int(tcs[j].attrib['rowspan'])): + tc = ElementTree.Element('td') + setattr(tc, 'parent', trs[i+x]) + tc.set('vmerge_continue', True) + trs[i + x].insert(j, tc) + return tbl + + def get_columns(self, tbl): + #have to get the total number of columns + #for the table. just go by the first row + #but if there is a colspan, add that to the + #column count + columns = 0 + trs = tbl.find_all('tr') + tcs = trs[0].find_all('td') + for tc in tcs: + tc.in_table = True + if 'colspan' in tc.attrib: + columns += int(tc.attrib['colspan']) + else: + columns += 1 + return columns + + def parse_table(self, el): + #get the number of columns + columns = self.get_columns(el) + #set up the table properties + tbl = createtblproperties(columns) + #going to have to do a look ahead and + #create those extra table rows + for tr in self.table_look_ahead(el).getchildren(): + table_row = createtablerow() + tcs = tr.find_all('td') + for tc in tcs: + colspan = '' + vmerge = {} + #now look for colspans + #and rowspans (referenced by + #total number of vmerge starting from + #a vmerge:restart + if 'colspan' in tc.attrib: + colspan = tc.attrib['colspan'] + if 'rowspan' in tc.attrib: + vmerge = {'val': 'restart'} + if 'vmerge_continue' in tc.attrib: + vmerge = {'val': 'continue'} + cell = createtablecell(gridspan=colspan, vmerge=vmerge) + text_and_style = self.parse_r(tc)[0] + just = self.parse_r(tc)[1] + par_run = paragraph(text_and_style, jc=just) + cell.append(par_run) + table_row.append(cell) + tbl.append(table_row) + return tbl + + def save(self): + title = 'Python docx demo' + subject = 'A practical example of making docx from Python' + creator = 'Mike MacCana' + keywords = ['python', 'Office Open XML', 'Word'] + for abstract in self.abstractIdInfo: + self.numbering.append(abstract) + coreprops = coreproperties( + title=title, subject=subject, + creator=creator, keywords=keywords) + appprops = appproperties() + contenttypes = docx.contenttypes() + websettings = docx.websettings() + wordrelationships = docx.wordrelationships(self.relationships) + # Save our document + savedocx( + self.document, coreprops, + appprops, contenttypes, websettings, + wordrelationships, 'Testing.docx', self.numbering) diff --git a/pydocx/py_docx/.gitignore b/pydocx/py_docx/.gitignore new file mode 100644 index 00000000..a67f55a1 --- /dev/null +++ b/pydocx/py_docx/.gitignore @@ -0,0 +1,8 @@ +.coverage +*.pyc +*.docx +*.kpf +build +dist +template/word/media +MANIFEST diff --git a/pydocx/py_docx/HACKING.markdown b/pydocx/py_docx/HACKING.markdown new file mode 100644 index 00000000..9009eee2 --- /dev/null +++ b/pydocx/py_docx/HACKING.markdown @@ -0,0 +1,104 @@ +Adding Features +=============== + +# Recommended reading + +- The [LXML tutorial](http://codespeak.net/lxml/tutorial.html) covers the basics of XML etrees, which we create, append and insert to make XML documents. LXML also provides XPath, which we use to specify locations in the document. +- If you're stuck. check out the [OpenXML specs and videos](http://openxmldeveloper.org). In particular, the is [OpenXML ECMA spec] [] is well worth a read. +- Learning about [XML namespaces](http://www.w3schools.com/XML/xml_namespaces.asp) +- The [Namespaces section of Dive into Python](http://diveintopython3.org/xml.html) +- Microsoft's [introduction to the Office (2007) Open XML File Formats](http://msdn.microsoft.com/en-us/library/aa338205.aspx) + +# How can I contribute? + +Fork the project on github, then send the main project a [pull request](http://github.com/guides/pull-requests). The project will then accept your pull (in most cases), which will show your changes part of the changelog for the main project, along with your name and picture. + +# A note about namespaces and LXML + +LXML doesn't use namespace prefixes. It just uses the actual namespaces, and wants you to set a namespace on each tag. For example, rather than making an element with the 'w' namespace prefix, you'd make an element with the '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' prefix. + +To make this easier: + +- The most common namespace, '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}' (prefix 'w') is automatically added by makeelement() +- You can specify other namespaces with 'nsprefix', which maps the prefixes Word files use to the actual namespaces, eg: + +
    makeelement('coreProperties',nsprefix='cp')
    + +will generate: + + + +which is the same as what Word generates: + + + +The namespace prefixes are different, but that's irrelevant as the namespaces themselves are the same. + +There's also a cool side effect - you can ignore setting 'xmlns' attributes that aren't used directly in the current element, since there's no need. Eg, you can make the equivalent of this from a Word file: + + + + +With the following code: + + docprops = makeelement('coreProperties',nsprefix='cp') + +We only need to specify the 'cp' prefix because that's what this element uses. The other 'xmlns' attributes are used to specify the prefixes for child elements. We don't need to specify them here because each child element will have its namespace specified when we make that child. + +# Coding Style + +Basically just look at what's there. But if you need something more specific: + +- Functional - every function should take some inputs, return something, and not use any globals. +- [Google Python Style Guide style](http://code.google.com/p/soc/wiki/PythonStyleGuide) + +# Unit Testing + +After adding code, open **tests/test_docx.py** and add a test that calls your function and checks its output. + +- Use **easy_install** to fetch the **nose** and **coverage** modules +- Run + +
    nosetests --with-coverage
    + +to run all the doctests. They should all pass. + +# Tips + +## If Word complains about files: + +First, determine whether Word can recover the files: +- If Word cannot recover the file, you most likely have a problem with your zip file +- If Word can recover the file, you most likely have a problem with your XML + +### Common Zipfile issues + +- Ensure the same file isn't included twice in your zip archive. Zip supports this, Word doesn't. +- Ensure that all media files have an entry for their file type in [Content_Types].xml +- Ensure that files in zip file file have leading '/'s removed. + +### Common XML issues + +- Ensure the _rels, docProps, word, etc directories are in the top level of your zip file. +- Check your namespaces - on both the tags, and the attributes +- Check capitalization of tag names +- Ensure you're not missing any attributes +- If images or other embedded content is shown with a large red X, your relationships file is missing data. + +#### One common debugging technique we've used before + +- Re-save the document in Word will produced a fixed version of the file +- Unzip and grabbing the serialized XML out of the fixed file +- Use etree.fromstring() to turn it into an element, and include that in your code. +- Check that a correct file is generated +- Remove an element from your string-created etree (including both opening and closing tags) +- Use element.append(makelement()) to add that element to your tree +- Open the doc in Word and see if it still works +- Repeat the last three steps until you discover which element is causing the prob + +[OpenXML ECMA spec]: http://www.ecma-international.org/publications/files/ECMA-ST/Office%20Open%20XML%201st%20edition%20Part%204%20(DOCX).zip \ No newline at end of file diff --git a/pydocx/py_docx/LICENSE b/pydocx/py_docx/LICENSE new file mode 100644 index 00000000..c621d034 --- /dev/null +++ b/pydocx/py_docx/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2009-2010 Mike MacCana + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/pydocx/py_docx/MANIFEST.in b/pydocx/py_docx/MANIFEST.in new file mode 100644 index 00000000..da4ec342 --- /dev/null +++ b/pydocx/py_docx/MANIFEST.in @@ -0,0 +1,5 @@ +include template/* +include template/_rels/* +include template/docProps/* +include template/word/* +include template/word/theme/* diff --git a/pydocx/py_docx/Makefile b/pydocx/py_docx/Makefile new file mode 100644 index 00000000..52d1c96f --- /dev/null +++ b/pydocx/py_docx/Makefile @@ -0,0 +1,28 @@ +PYTHON = $(shell test -x bin/python && echo bin/python || echo `which python`) +SETUP = $(PYTHON) ./setup.py + +.PHONY: clean help coverage register sdist upload + +help: + @echo "Please use \`make ' where is one or more of" + @echo " clean delete intermediate work product and start fresh" + @echo " coverage run nosetests with coverage" + @echo " register update metadata (README.rst) on PyPI" + @echo " sdist generate a source distribution into dist/" + @echo " upload upload distribution tarball to PyPI" + +clean: + find . -type f -name \*.pyc -exec rm {} \; + rm -rf dist .coverage .DS_Store MANIFEST + +coverage: + nosetests --with-coverage --cover-package=docx --cover-erase + +register: + $(SETUP) register + +sdist: + $(SETUP) sdist + +upload: + $(SETUP) sdist upload diff --git a/pydocx/py_docx/README.markdown b/pydocx/py_docx/README.markdown new file mode 100644 index 00000000..cbccf12a --- /dev/null +++ b/pydocx/py_docx/README.markdown @@ -0,0 +1,81 @@ +Python docx +=========== + +## Introduction + +The docx module creates, reads and writes Microsoft Office Word 2007 docx files. + +These are referred to as 'WordML', 'Office Open XML' and 'Open XML' by Microsoft. + +These documents can be opened in Microsoft Office 2007 / 2010, Microsoft Mac Office 2008, Google Docs, OpenOffice.org 3, and Apple iWork 08. + +They also [validate as well formed XML](http://validator.w3.org/check). + +The module was created when I was looking for a Python support for MS Word .doc files, but could only find various hacks involving COM automation, calling .net or Java, or automating OpenOffice or MS Office. + +The docx module has the following features: + +### Making documents + +Features for making documents include: + +- Paragraphs +- Bullets +- Numbered lists +- Document properties (author, company, etc) +- Multiple levels of headings +- Tables +- Section and page breaks +- Images + +
    + +### Editing documents + +Thanks to the awesomeness of the lxml module, we can: + +- Search and replace +- Extract plain text of document +- Add and delete items anywhere within the document +- Change document properties +- Run xpath queries against particular locations in the document - useful for retrieving data from user-completed templates. + +# Getting started + +## Making and Modifying Documents + +- Just [download python docx](http://github.com/mikemaccana/python-docx/tarball/master). +- Use **pip** or **easy_install** to fetch the **lxml** and **PIL** modules. +- Then run: + +
    example-makedocument.py
    + +Congratulations, you just made and then modified a Word document! + +## Extracting Text from a Document + +If you just want to extract the text from a Word file, run: + + example-extracttext.py 'Some word file.docx' 'new file.txt' + +### Ideas & To Do List + +- Further improvements to image handling +- Document health checks +- Egg +- Markdown conversion support + +### We love forks, changes and pull requests! + +- Check out the [HACKING](HACKING.markdown) to add your own changes! +- For this project on github +- Send a pull request via github and we'll add your changes! + +### Want to talk? Need help? + +Email . + +### License + +Licensed under the [MIT license](http://www.opensource.org/licenses/mit-license.php) +Short version: this code is copyrighted to me (Mike MacCana), I give you permission to do what you want with it except remove my name from the credits. See the LICENSE file for specific terms. diff --git a/pydocx/py_docx/SERVING_SUGGESTIONS.markdown b/pydocx/py_docx/SERVING_SUGGESTIONS.markdown new file mode 100644 index 00000000..86e51e48 --- /dev/null +++ b/pydocx/py_docx/SERVING_SUGGESTIONS.markdown @@ -0,0 +1,12 @@ +Serving Suggestions +=================== + +# Mashing docx with other modules + +This is a list of interesting things you could do with Python docx when mashed up with other modules. + +- [LinkedIn Python API](http://code.google.com/p/python-linkedin/) - Auto-build a Word doc whenever some old recruiting dude asks one. +- [Python Natural Language Toolkit](http://www.nltk.org/) - can analyse text and extract meaning. +- [Lamson](http://lamsonproject.org/) - transparently parse or modify email attachments. + +Any other ideas? Doing something cool you want to tell the world about? python.docx@librelist.com \ No newline at end of file diff --git a/pydocx/py_docx/__init__.py b/pydocx/py_docx/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pydocx/py_docx/docx.py b/pydocx/py_docx/docx.py new file mode 100755 index 00000000..934701c2 --- /dev/null +++ b/pydocx/py_docx/docx.py @@ -0,0 +1,1270 @@ +#!/usr/bin/env python2.6 +# -*- coding: utf-8 -*- +""" +Open and modify Microsoft Word 2007 docx files (called 'OpenXML' and +'Office OpenXML' by Microsoft) + +Part of Python's docx module - http://github.com/mikemaccana/python-docx +See LICENSE for licensing information. +""" + +import logging +from lxml import etree +from PIL import Image +import zipfile +import shutil +import re +import time +import os +from os.path import join + +log = logging.getLogger(__name__) + +# Record template directory's location which is just 'template' for a docx +# developer or 'site-packages/docx-template' if you have installed docx +template_dir = join(os.path.dirname(__file__), 'docx-template') # installed +if not os.path.isdir(template_dir): + template_dir = join(os.path.dirname(__file__), 'template') # dev + +# All Word prefixes / namespace matches used in document.xml & core.xml. +# LXML doesn't actually use prefixes (just the real namespace) , but these +# make it easier to copy Word output more easily. +nsprefixes = { + 'mo': 'http://schemas.microsoft.com/' + 'office/mac/office/2008/main', + 'o': 'urn:schemas-microsoft-com:office:office', + 've': 'http://schemas.openxmlformats.org/' + 'markup-compatibility/2006', + # Text Content + 'w': 'http://schemas.openxmlformats.org/' + 'wordprocessingml/2006/main', + 'w10': 'urn:schemas-microsoft-com:office:word', + 'wne': 'http://schemas.microsoft.com/office/word/2006/wordml', + # Drawing + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math', + 'mv': 'urn:schemas-microsoft-com:mac:vml', + 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', + 'v': 'urn:schemas-microsoft-com:vml', + 'wp': 'http://schemas.openxmlformats.org/' + 'drawingml/2006/wordprocessingDrawing', + # Properties (core and extended) + 'cp': 'http://schemas.openxmlformats.org/' + 'package/2006/metadata/core-properties', + 'dc': 'http://purl.org/dc/elements/1.1/', + 'ep': 'http://schemas.openxmlformats.org/' + 'officeDocument/2006/extended-properties', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + # Content Types + 'ct': 'http://schemas.openxmlformats.org/' + 'package/2006/content-types', + # Package Relationships + 'r': 'http://schemas.openxmlformats.org/' + 'officeDocument/2006/relationships', + 'pr': 'http://schemas.openxmlformats.org/' + 'package/2006/relationships', + # Dublin Core document properties + 'dcmitype': 'http://purl.org/dc/dcmitype/', + 'dcterms': 'http://purl.org/dc/terms/'} + + +def opendocx(file): + '''Open a docx file, return a document XML tree''' + mydoc = zipfile.ZipFile(file) + xmlcontent = mydoc.read('word/document.xml') + document = etree.fromstring(xmlcontent) + return document + + +def newdocument(): + #create a new document + #add the body the document + document = makeelement('document') + document.append(makeelement('body')) + return document + + +def new_numbering(): + #create a new numbering file. this is needed for lists + numbering = makeelement('numbering') + return numbering + + +def create_list(abstractNum=0): + #the numbering file requires an abstractNum for each list + abstractnum = makeelement( + 'abstractNum', attributes={'abstractNumId': str(abstractNum)}) + return abstractnum + + +def create_list_attributes( + ilvl='0', start='1', type='bullet', + just='left', left='720', hanging='360'): + #create the attributes of a list that will + #go into the numbering file + lvl = makeelement('lvl', attributes={'ilvl': ilvl}) + lvl.append(makeelement('start', attributes={'val': start})) + if type == 'decimal': + if int(ilvl) % 3 == 1: + type = 'lowerLetter' + if int(ilvl) % 3 == 2: + type = 'lowerRoman' + if int(ilvl) % 3 == 0: + type = 'decimal' + lvl.append(makeelement('numFmt', attributes={'val': type})) + if type == 'bullet': + lvl.append(makeelement('lvlText', attributes={'val': u"\u2022"})) + else: + lvl.append(makeelement( + 'lvlText', attributes= + {'val': '%'+str(int(ilvl)+1)+'.'})) + lvl.append(makeelement('lvlJc', attributes={'val': just})) + ppr = makeelement('pPr') + ppr.append(makeelement( + 'ind', attributes={'left': left, 'hanging': hanging})) + lvl.append(ppr) + if type == 'bullet': + rpr = makeelement('rPr') + rpr.append(makeelement('rFonts', attributes={ + 'ascii': 'Symbol', 'hAnsi': 'Symbol', 'hint': 'default'})) + lvl.append(rpr) + else: + rpr = makeelement('rPr') + rpr.append(makeelement('rFonts', attributes={'hint': 'default'})) + lvl.append(rpr) + return lvl + + +def fill_tentative(ilvl, type_lst, left='720'): + #fill tentative is for the list items that + #the user has not filled out, but might + #later fill out + start_from = int(ilvl) + if type_lst == 'decimal': + #decimal requires different tentatives than bullet points, so need + #to separate these out + numbers = True + else: + numbers = False + tentatives = [] + for i in range(start_from, 9): + lvl = makeelement('lvl', attributes={'ilvl': str(i), 'tentative': '1'}) + lvl.append(makeelement('start', attributes={'val': '1'})) + if numbers: + #lists usually go in a pattern of three + #decimal, lower letter, then lower roman + if i % 3 == 2: + lvl.append(makeelement( + 'numFmt', attributes={'val': 'lowerRoman'})) + elif i % 3 == 0: + lvl.append(makeelement( + 'numFmt', attributes={'val': 'decimal'})) + elif i % 3 == 1: + lvl.append(makeelement( + 'numFmt', attributes={'val': 'lowerLetter'})) + else: + lvl.append(makeelement('numFmt', attributes={'val': type_lst})) + if type_lst == 'bullet': + #using unicode for now for bullet representation + lvl.append(makeelement('lvlText', attributes={'val': u"\u2022"})) + else: + level = i + 1 + level = str(level) + #there's a lvlText attrib for numbered lists + #which just is just 1 more than the current ilvl + lvl.append(makeelement( + 'lvlText', attributes={'val': '%'+level+'.'})) + if i % 3 == 2: + #it seems that for every second list, there justification level + #switches to the right + lvl.append(makeelement('lvlJc', attributes={'val': 'right'})) + else: + lvl.append(makeelement('lvlJc', attributes={'val': 'left'})) + ppr = makeelement('pPr') + #making appropriate indentation + left = int(left) + left = 720 * (i + 1) + left = str(left) + if i % 3 == 2: + #hanging is usually 360, but for every second list + #the hanging value changes to 180, or so it seems + ppr.append(makeelement( + 'ind', attributes={'left': left, 'hanging': '180'})) + else: + ppr.append(makeelement( + 'ind', attributes={'left': left, 'hanging': '360'})) + lvl.append(ppr) + if type_lst == 'bullet': + #this can be made more complex and put in some + #special types of bullets + rpr = makeelement('rPr') + rpr.append(makeelement( + 'rFonts', attributes={'ascii': 'Symbol', + 'hAnsi': 'Symbol', 'hint': 'default'})) + lvl.append(rpr) + tentatives.append(lvl) + return tentatives + + +def create_abstract_IdInfo(numId): + #abstractIdInfo for the bottom of the numbering file + #each val refers to a list in the numbering xml + #file + abstractId = str(int(numId)-1) + num = makeelement('num', attributes={'numId': numId}) + abstractNumId = makeelement( + 'abstractNumId', attributes={'val': abstractId}) + num.append(abstractNumId) + return num + + +def makeelement( + tagname, tagtext=None, nsprefix='w', + attributes=None, attrnsprefix=None): + '''Create an element & return it''' + # Deal with list of nsprefix by making namespacemap + namespacemap = None + if isinstance(nsprefix, list): + namespacemap = {} + for prefix in nsprefix: + namespacemap[prefix] = nsprefixes[prefix] + # FIXME: rest of code below expects a single prefix + nsprefix = nsprefix[0] + if nsprefix: + namespace = '{'+nsprefixes[nsprefix]+'}' + else: + # For when namespace = None + namespace = '' + newelement = etree.Element(namespace+tagname, nsmap=namespacemap) + # Add attributes with namespaces + if attributes: + # If they haven't bothered setting + # attribute namespace, use an empty string + # (equivalent of no namespace) + if not attrnsprefix: + # Quick hack: it seems every element + # that has a 'w' nsprefix for its tag uses the + # same prefix for it's attributes + if nsprefix == 'w': + attributenamespace = namespace + else: + attributenamespace = '' + else: + attributenamespace = '{'+nsprefixes[attrnsprefix]+'}' + + for tagattribute in attributes: + newelement.set( + attributenamespace+tagattribute, attributes[tagattribute]) + if tagtext: + newelement.text = tagtext + return newelement + + +def pagebreak(type='page', orient='portrait'): + '''Insert a break, default 'page'. + See http://openxmldeveloper.org/forums/thread/4075.aspx + Return our page break element.''' + # Need to enumerate different types of page breaks. + validtypes = ['page', 'section'] + if type not in validtypes: + tmpl = 'Page break style "%s" not implemented. Valid styles: %s.' + raise ValueError(tmpl % (type, validtypes)) + pagebreak = makeelement('p') + if type == 'page': + run = makeelement('r') + br = makeelement('br', attributes={'type': type}) + run.append(br) + pagebreak.append(run) + elif type == 'section': + pPr = makeelement('pPr') + sectPr = makeelement('sectPr') + if orient == 'portrait': + pgSz = makeelement('pgSz', attributes={'w': '12240', 'h': '15840'}) + elif orient == 'landscape': + pgSz = makeelement('pgSz', attributes={'h': '12240', 'w': '15840', + 'orient': 'landscape'}) + sectPr.append(pgSz) + pPr.append(sectPr) + pagebreak.append(pPr) + return pagebreak + + +def paragraph(paratext, style='BodyText', + breakbefore=False, jc='left', + is_list=False, ilvl='0', numId='1'): + #added is_list, because justification is included in the numbering + #file for lists, so we need not include it. also ilvl and numId + #is included so that we can nest lists + + '''Make a new paragraph element, containing a run, and some text. + Return the paragraph element. + + @param string jc: Paragraph alignment, possible values: + left, center, right, both (justified), ... + see http://www.schemacentral.com/sc/ooxml/t-w_ST_Jc.html + for a full list + + If paratext is a list, spawn multiple run/text elements. + Support text styles (paratext must then be a list of lists in the form + /