1
+ /*
2
+ * Copyright (c) 2006-, IPD Boehm, Universitaet Karlsruhe (TH) / KIT, by Guido Sautter
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * * Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ * * Redistributions in binary form must reproduce the above copyright
11
+ * notice, this list of conditions and the following disclaimer in the
12
+ * documentation and/or other materials provided with the distribution.
13
+ * * Neither the name of the Universität Karlsruhe (TH) / KIT nor the
14
+ * names of its contributors may be used to endorse or promote products
15
+ * derived from this software without specific prior written permission.
16
+ *
17
+ * THIS SOFTWARE IS PROVIDED BY UNIVERSITÄT KARLSRUHE (TH) / KIT AND CONTRIBUTORS
18
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ */
28
+ package de .uka .ipd .idaho .im .imagine .plugins .basic ;
29
+
30
+ import java .awt .Point ;
31
+ import java .util .Arrays ;
32
+ import java .util .LinkedList ;
33
+
34
+ import de .uka .ipd .idaho .gamta .util .constants .LiteratureConstants ;
35
+ import de .uka .ipd .idaho .gamta .util .imaging .BoundingBox ;
36
+ import de .uka .ipd .idaho .gamta .util .imaging .PageImage ;
37
+ import de .uka .ipd .idaho .im .ImAnnotation ;
38
+ import de .uka .ipd .idaho .im .ImLayoutObject ;
39
+ import de .uka .ipd .idaho .im .ImPage ;
40
+ import de .uka .ipd .idaho .im .ImRegion ;
41
+ import de .uka .ipd .idaho .im .ImWord ;
42
+ import de .uka .ipd .idaho .im .analysis .Imaging ;
43
+ import de .uka .ipd .idaho .im .analysis .Imaging .AnalysisImage ;
44
+ import de .uka .ipd .idaho .im .analysis .Imaging .ImagePartRectangle ;
45
+ import de .uka .ipd .idaho .im .imagine .plugins .AbstractSelectionActionProvider ;
46
+ import de .uka .ipd .idaho .im .util .ImDocumentMarkupPanel ;
47
+ import de .uka .ipd .idaho .im .util .ImDocumentMarkupPanel .SelectionAction ;
48
+ import de .uka .ipd .idaho .im .util .ImUtils ;
49
+
50
+ /**
51
+ * This class provides basic actions for working with text blocks.
52
+ *
53
+ * @author sautter
54
+ */
55
+ public class TextBlockActionProvider extends AbstractSelectionActionProvider implements LiteratureConstants {
56
+
57
+ /** public zero-argument constructor for class loading */
58
+ public TextBlockActionProvider () {}
59
+
60
+ /* (non-Javadoc)
61
+ * @see de.uka.ipd.idaho.goldenGate.plugins.AbstractResourceManager#getPluginName()
62
+ */
63
+ public String getPluginName () {
64
+ return "IM Text Block Actions" ;
65
+ }
66
+
67
+ /* (non-Javadoc)
68
+ * @see de.uka.ipd.idaho.im.imagine.plugins.AbstractSelectionActionProvider#getActions(de.uka.ipd.idaho.im.ImWord, de.uka.ipd.idaho.im.ImWord, de.uka.ipd.idaho.im.util.ImDocumentMarkupPanel)
69
+ */
70
+ public SelectionAction [] getActions (final ImWord start , final ImWord end , ImDocumentMarkupPanel idmp ) {
71
+
72
+ // we strictly work on one page at a time
73
+ if (start .pageId != end .pageId )
74
+ return null ;
75
+
76
+ // we also work on individual text streams only
77
+ if (!start .getTextStreamId ().equals (end .getTextStreamId ()))
78
+ return null ;
79
+
80
+ // line up words
81
+ LinkedList words = new LinkedList ();
82
+ for (ImWord imw = start ; imw != null ; imw = imw .getNextWord ()) {
83
+ words .addLast (imw );
84
+ if (imw == end )
85
+ break ;
86
+ }
87
+
88
+ // return actions
89
+ return this .getActions (((ImWord []) words .toArray (new ImWord [words .size ()])), null , null );
90
+ }
91
+
92
+ /* (non-Javadoc)
93
+ * @see de.uka.ipd.idaho.im.imagine.plugins.AbstractSelectionActionProvider#getActions(java.awt.Point, java.awt.Point, de.uka.ipd.idaho.im.ImPage, de.uka.ipd.idaho.im.util.ImDocumentMarkupPanel)
94
+ */
95
+ public SelectionAction [] getActions (Point start , Point end , ImPage page , ImDocumentMarkupPanel idmp ) {
96
+
97
+ // mark selection
98
+ BoundingBox selectedBox = new BoundingBox (Math .min (start .x , end .x ), Math .max (start .x , end .x ), Math .min (start .y , end .y ), Math .max (start .y , end .y ));
99
+
100
+ // get selected words
101
+ ImWord [] selectedWords = page .getWordsInside (selectedBox );
102
+
103
+ // return actions if selection not empty
104
+ return ((selectedWords .length == 0 ) ? null : this .getActions (selectedWords , page , selectedBox ));
105
+ }
106
+
107
+ private SelectionAction [] getActions (final ImWord [] words , final ImPage page , final BoundingBox selectedBox ) {
108
+ LinkedList actions = new LinkedList ();
109
+
110
+ // mark selected words as a caption
111
+ actions .add (new SelectionAction ("markRegionCaption" , "Mark Caption" , "Mark selected words as a caption." ) {
112
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
113
+
114
+ // cut out caption
115
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_CAPTION , null );
116
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
117
+
118
+ // annotate caption
119
+ ImAnnotation caption = words [0 ].getDocument ().addAnnotation (words [0 ], words [words .length -1 ], CAPTION_TYPE );
120
+ caption .getLastWord ().setNextRelation (ImWord .NEXT_RELATION_PARAGRAPH_END );
121
+ invoker .setAnnotationsPainted (CAPTION_TYPE , true );
122
+ BoundingBox captionBox = ImLayoutObject .getAggregateBox (words );
123
+
124
+ // do we have a table caption or a figure caption?
125
+ boolean isTableCaption = words [0 ].getString ().toLowerCase ().startsWith ("tab" ); // covers most Latin based languages
126
+
127
+ // find possible targets
128
+ ImRegion [] targets ;
129
+ if (isTableCaption )
130
+ targets = page .getRegions (ImRegion .TABLE_TYPE );
131
+ else {
132
+ targets = page .getRegions (ImRegion .IMAGE_TYPE );
133
+ if (targets .length == 0 )
134
+ targets = page .getRegions (ImRegion .BLOCK_ANNOTATION_TYPE );
135
+ }
136
+ if (targets .length == 0 )
137
+ return true ;
138
+
139
+ // assign target
140
+ Arrays .sort (targets , ImUtils .topDownOrder );
141
+ PageImage pi = page .getImage ();
142
+ for (int i = 0 ; i < targets .length ; i ++) {
143
+
144
+ // check vertical alignment
145
+ if (!isTableCaption && (captionBox .top < targets [i ].bounds .bottom ))
146
+ break ; // due to top-down sort order, we won't find any matches from here onward
147
+
148
+ // TODO also search left and right if no images at all above and below
149
+
150
+ // check general alignment
151
+ if (!ImUtils .isCaptionBelowTargetMatch (captionBox , targets [i ].bounds , pi .currentDpi ) && (!isTableCaption || !ImUtils .isCaptionAboveTargetMatch (captionBox , targets [i ].bounds , pi .currentDpi )))
152
+ continue ;
153
+
154
+ // check size and words if using block fallback
155
+ if (ImRegion .BLOCK_ANNOTATION_TYPE .equals (targets [i ].getType ())) {
156
+ if ((targets [i ].bounds .right - targets [i ].bounds .left ) < pi .currentDpi )
157
+ continue ;
158
+ if ((targets [i ].bounds .bottom - targets [i ].bounds .top ) < pi .currentDpi )
159
+ continue ;
160
+ ImWord [] imageWords = targets [i ].getWords ();
161
+ if (imageWords .length != 0 )
162
+ continue ;
163
+ }
164
+
165
+ // link image to caption
166
+ caption .setAttribute (ImAnnotation .CAPTION_TARGET_PAGE_ID_ATTRIBUTE , ("" + targets [i ].pageId ));
167
+ caption .setAttribute (ImAnnotation .CAPTION_TARGET_BOX_ATTRIBUTE , targets [i ].bounds .toString ());
168
+ if (isTableCaption )
169
+ caption .setAttribute ("targetIsTable" );
170
+ break ;
171
+ }
172
+
173
+ // finally ...
174
+ return true ;
175
+ }
176
+ });
177
+
178
+ // mark selected words as a footnote
179
+ actions .add (new SelectionAction ("markRegionFootnote" , "Mark Footnote" , "Mark selected words as a footnote." ) {
180
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
181
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_FOOTNOTE , null );
182
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
183
+ ImAnnotation footnote = words [0 ].getDocument ().addAnnotation (words [0 ], words [words .length -1 ], FOOTNOTE_TYPE );
184
+ footnote .getLastWord ().setNextRelation (ImWord .NEXT_RELATION_PARAGRAPH_END );
185
+ invoker .setAnnotationsPainted (FOOTNOTE_TYPE , true );
186
+ return true ;
187
+ }
188
+ });
189
+
190
+ // mark selected words as a page header
191
+ actions .add (new SelectionAction ("markRegionPageHeader" , "Mark Page Header" , "Mark selected words as a page header or footer." ) {
192
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
193
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_PAGE_TITLE , null );
194
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
195
+ ImAnnotation pageTitle = words [0 ].getDocument ().addAnnotation (words [0 ], words [words .length -1 ], PAGE_TITLE_TYPE );
196
+ pageTitle .getLastWord ().setNextRelation (ImWord .NEXT_RELATION_PARAGRAPH_END );
197
+ invoker .setAnnotationsPainted (PAGE_TITLE_TYPE , true );
198
+ return true ;
199
+ }
200
+ });
201
+
202
+ // mark selected words as a page header
203
+ actions .add (new SelectionAction ("markRegionParenthesis" , "Mark Parenthesis" , "Mark selected words as a parenthesis, e.g. a standalone text box or a note." ) {
204
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
205
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_MAIN_TEXT , null );
206
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
207
+ ImAnnotation pageTitle = words [0 ].getDocument ().addAnnotation (words [0 ], words [words .length -1 ], PARENTHESIS_TYPE );
208
+ pageTitle .getLastWord ().setNextRelation (ImWord .NEXT_RELATION_PARAGRAPH_END );
209
+ invoker .setAnnotationsPainted (PARENTHESIS_TYPE , true );
210
+ return true ;
211
+ }
212
+ });
213
+
214
+ // mark selected words as an artifact
215
+ actions .add (new SelectionAction ("markRegionArtifact" , "Mark Artifact" , "Mark selected words as an OCR or layout artifact." ) {
216
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
217
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_ARTIFACT , null );
218
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
219
+ for (ImWord imw = words [words .length -1 ]; imw != null ; imw = imw .getPreviousWord ())
220
+ imw .setNextWord (null );
221
+ return true ;
222
+ }
223
+ });
224
+
225
+ // mark selected non-white area as image (case without words comes from region actions)
226
+ if ((page != null ) && (selectedBox != null ))
227
+ actions .add (new SelectionAction ("markRegionImage" , "Mark Image" , "Mark selected region as an image." ) {
228
+ public boolean performAction (ImDocumentMarkupPanel invoker ) {
229
+
230
+ // remove words
231
+ ImUtils .makeStream (words , ImWord .TEXT_STREAM_TYPE_ARTIFACT , null );
232
+ ImUtils .orderStream (words , ImUtils .leftRightTopDownOrder );
233
+ for (ImWord imw = words [words .length -1 ]; imw != null ; imw = imw .getPreviousWord ())
234
+ imw .setNextWord (null );
235
+ for (int w = 0 ; w < words .length ; w ++)
236
+ page .removeWord (words [w ], true );
237
+
238
+ // shrink selection
239
+ PageImage pi = page .getImage ();
240
+ AnalysisImage ai = Imaging .wrapImage (pi .image , null );
241
+ ImagePartRectangle ipr = Imaging .getContentBox (ai );
242
+ ImagePartRectangle selectedIpr = ipr .getSubRectangle (Math .max (selectedBox .left , page .bounds .left ), Math .min (selectedBox .right , page .bounds .right ), Math .max (selectedBox .top , page .bounds .top ), Math .min (selectedBox .bottom , page .bounds .bottom ));
243
+ selectedIpr = Imaging .narrowLeftAndRight (selectedIpr );
244
+ selectedIpr = Imaging .narrowTopAndBottom (selectedIpr );
245
+ BoundingBox imageBox = new BoundingBox (selectedIpr .getLeftCol (), selectedIpr .getRightCol (), selectedIpr .getTopRow (), selectedIpr .getBottomRow ());
246
+
247
+ // clean up nested regions
248
+ ImRegion [] selectedRegions = page .getRegionsInside (selectedBox , true );
249
+ for (int r = 0 ; r < selectedRegions .length ; r ++) {
250
+ if (!imageBox .liesIn (selectedRegions [r ].bounds , false ))
251
+ page .removeRegion (selectedRegions [r ]);
252
+ }
253
+
254
+ // mark image
255
+ ImRegion image = new ImRegion (page , imageBox , ImRegion .IMAGE_TYPE );
256
+ invoker .setRegionsPainted (ImRegion .IMAGE_TYPE , true );
257
+
258
+ // get potential captions
259
+ ImAnnotation [] captionAnnots = ImUtils .findCaptions (image , false , true , true );
260
+
261
+ // try setting attributes in unassigned captions first
262
+ for (int a = 0 ; a < captionAnnots .length ; a ++) {
263
+ if (captionAnnots [a ].hasAttribute (ImAnnotation .CAPTION_TARGET_PAGE_ID_ATTRIBUTE ) || captionAnnots [a ].hasAttribute (ImAnnotation .CAPTION_TARGET_BOX_ATTRIBUTE ))
264
+ continue ;
265
+ captionAnnots [a ].setAttribute (ImAnnotation .CAPTION_TARGET_PAGE_ID_ATTRIBUTE , ("" + image .pageId ));
266
+ captionAnnots [a ].setAttribute (ImAnnotation .CAPTION_TARGET_BOX_ATTRIBUTE , image .bounds .toString ());
267
+ return true ;
268
+ }
269
+
270
+ // set attributes in any caption (happens if user corrects, for instance)
271
+ if (captionAnnots .length != 0 ) {
272
+ captionAnnots [0 ].setAttribute (ImAnnotation .CAPTION_TARGET_PAGE_ID_ATTRIBUTE , ("" + image .pageId ));
273
+ captionAnnots [0 ].setAttribute (ImAnnotation .CAPTION_TARGET_BOX_ATTRIBUTE , image .bounds .toString ());
274
+ }
275
+
276
+ // finally ...
277
+ return true ;
278
+ }
279
+ });
280
+
281
+ // finally ...
282
+ return ((SelectionAction []) actions .toArray (new SelectionAction [actions .size ()]));
283
+ }
284
+ }
0 commit comments