@@ -71,19 +71,19 @@ Merger::TryNextMerge() {
7171 return std::nullopt ;
7272}
7373
74- Status Merger::MoveSegmentsToInitFont () {
75- if (!strategy_.InitFontMergeThreshold ().has_value ()) {
76- return absl::FailedPreconditionError (
77- " Cannot be called when there is no merge threshold configured." );
78- }
74+ SegmentSet Merger::InitFontSegmentsToCheck (const SegmentSet& inscope) const {
75+ SegmentSet to_check = inscope;
7976
80- VLOG (0 ) << " Checking if there are any segments which should be moved into "
81- " the initial font." ;
77+ SegmentSet excluded = CutoffSegments ();
78+ // Shared segments aren't subject to optimization cutoff. So only exclude
79+ // those in inscope_segments_ (which is all of the non-shared segments)
80+ excluded.intersect (inscope_segments_);
81+ to_check.subtract (excluded);
8282
83- // TODO(garretrieger): This implementation can be further optimized:
84- // - Make ReassignInitSegment() an incremental update instead of a full
85- // reprocessing.
83+ return to_check;
84+ }
8685
86+ SegmentSet Merger::InitFontApplyProbabilityThreshold () const {
8787 SegmentSet below_threshold;
8888 if (strategy_.InitFontMergeProbabilityThreshold ().has_value ()) {
8989 for (segment_index_t s : inscope_segments_for_init_move_) {
@@ -96,60 +96,120 @@ Status Merger::MoveSegmentsToInitFont() {
9696
9797 SegmentSet inscope = inscope_segments_for_init_move_;
9898 inscope.subtract (below_threshold);
99- VLOG (0 ) << inscope.size () << " inscope segments, " << below_threshold.size () << " skipped for being below the probability threshold." ;
10099
101- do {
102- SegmentSet to_check = inscope;
100+ VLOG (0 ) << inscope.size () << " inscope segments, " << below_threshold.size ()
101+ << " skipped for being below the probability threshold." ;
102+ return inscope;
103+ }
104+
105+ btree_map<ActivationCondition, GlyphSet> Merger::InitFontConditionsToCheck (
106+ const SegmentSet& to_check, bool batch_mode) const {
107+ // We only want to check conditions that use at least one segment which is
108+ // inscope for moving to the init font.
109+ btree_map<ActivationCondition, GlyphSet> conditions;
110+ for (segment_index_t s : to_check) {
111+ for (const auto & c :
112+ Context ().glyph_groupings .TriggeringSegmentToConditions (s)) {
113+ if (conditions.contains (c)) {
114+ continue ;
115+ }
116+
117+ if (batch_mode) {
118+ SegmentSet triggering_segments = c.TriggeringSegments ();
119+ if (triggering_segments.size () != 1 ||
120+ !Context ().InertSegments ().contains (*triggering_segments.begin ())) {
121+ // Non-inert conditions are skipped during the batch processing.
122+ continue ;
123+ }
124+ }
125+
126+ GlyphSet glyphs = Context ().glyph_groupings .ConditionsAndGlyphs ().at (c);
127+ conditions.insert (std::make_pair (c, glyphs));
128+ }
129+ }
130+ return conditions;
131+ }
132+
133+ Status Merger::MoveSegmentsToInitFont () {
134+ if (!strategy_.InitFontMergeThreshold ().has_value ()) {
135+ return absl::FailedPreconditionError (
136+ " Cannot be called when there is no merge threshold configured." );
137+ }
103138
104- SegmentSet excluded = CutoffSegments ();
105- // Shared segments aren't subject to optimization cutoff. So only exclude
106- // those in inscope_segments_ (which is all of the non-shared segments)
107- excluded.intersect (inscope_segments_);
139+ VLOG (0 ) << " Checking if there are any segments which should be moved into "
140+ " the initial font." ;
108141
109- to_check.subtract (excluded);
142+ SegmentSet inscope = InitFontApplyProbabilityThreshold ();
143+
144+ // Init move processing works in two phases:
145+ //
146+ // First is batch mode. In batch mode only inert segments are checked
147+ // for move. Any segments that are below the threshold are moved to the
148+ // init font in a single operation. Because inert segments are not
149+ // expected to interact we don't need to reform the closure analysis
150+ // after each individual move to get an accurate cost delta.
151+ //
152+ // Once batch processing has no more moves left, the processing switches
153+ // to non-batch processing where all candidate conditions are checked
154+ // and moved one at a time.
155+
156+ bool batch_mode = true ;
157+ VLOG (0 ) << " batch checking inert segments for move to init font." ;
158+ do {
159+ SegmentSet to_check = InitFontSegmentsToCheck (inscope);
110160
111161 uint32_t init_font_size = TRY (Context ().patch_size_cache ->GetPatchSize (
112162 Context ().SegmentationInfo ().InitFontGlyphs ()));
113163
164+ double total_delta = 0.0 ;
114165 double lowest_delta = *strategy_.InitFontMergeThreshold ();
115166 std::optional<GlyphSet> glyphs_for_lowest = std::nullopt ;
116167
117- // We only want to check conditions that use at least one segment which is
118- // inscope for moving to the init font.
119- btree_map<ActivationCondition, GlyphSet> conditions;
120- for (segment_index_t s : to_check) {
121- for (const auto & c : Context ().glyph_groupings .TriggeringSegmentToConditions (s)) {
122- if (conditions.contains (c)) {
123- continue ;
124- }
125-
126- GlyphSet glyphs = Context ().glyph_groupings .ConditionsAndGlyphs ().at (c);
127- conditions.insert (std::make_pair (c, glyphs));
128- }
129- }
168+ btree_map<ActivationCondition, GlyphSet> conditions =
169+ InitFontConditionsToCheck (to_check, batch_mode);
130170
131171 for (const auto & [condition, glyphs] : conditions) {
132- double best_case_delta = TRY (CandidateMerge::ComputeInitFontCostDelta (
172+ auto [ best_case_delta, _] = TRY (CandidateMerge::ComputeInitFontCostDelta (
133173 *this , init_font_size, true , glyphs));
134174 if (best_case_delta >= lowest_delta) {
135175 // Filter by best case first which is much faster to compute.
136176 continue ;
137177 }
138178
139- double delta = TRY (CandidateMerge::ComputeInitFontCostDelta (
179+ auto [ delta, all_glyphs] = TRY (CandidateMerge::ComputeInitFontCostDelta (
140180 *this , init_font_size, false , glyphs));
141- if (delta < lowest_delta) {
181+ if (delta >= lowest_delta) {
182+ continue ;
183+ }
184+
185+ if (batch_mode) {
186+ // In batch mode we accept any merges under the threshold instead of
187+ // finding the lowest.
188+ if (!glyphs_for_lowest.has_value ()) {
189+ glyphs_for_lowest = GlyphSet{};
190+ }
191+ total_delta += delta;
192+ glyphs_for_lowest->union_set (all_glyphs);
193+ } else {
142194 lowest_delta = delta;
143- glyphs_for_lowest = glyphs;
195+ total_delta = delta;
196+ glyphs_for_lowest = all_glyphs;
144197 }
145198 }
146199
147200 if (!glyphs_for_lowest.has_value ()) {
148- // No more moves to make.
149- break ;
201+ if (batch_mode) {
202+ // Batch mode processing done, move on to non-batch processing.
203+ batch_mode = false ;
204+ VLOG (0 ) << " switching to checking individually." ;
205+ continue ;
206+ } else {
207+ // No more moves to make.
208+ break ;
209+ }
150210 }
151211
152- TRYV (ApplyInitFontMove (*glyphs_for_lowest, lowest_delta ));
212+ TRYV (ApplyInitFontMove (*glyphs_for_lowest, total_delta ));
153213 } while (true );
154214
155215 VLOG (0 ) << " Initial font now has "
@@ -179,10 +239,21 @@ SegmentSet Merger::ComputeCandidateSegments(
179239 SegmentationContext& context, const MergeStrategy& strategy,
180240 const common::SegmentSet& inscope_segments) {
181241 SegmentSet candidate_segments;
182- for (unsigned i = 0 ; i < context.SegmentationInfo ().Segments ().size (); i++) {
183- if (!context.SegmentationInfo ().Segments ()[i].Definition ().Empty () &&
184- inscope_segments.contains (i)) {
185- candidate_segments.insert (i);
242+
243+ if (inscope_segments.size () < context.SegmentationInfo ().Segments ().size ()) {
244+ for (segment_index_t s : inscope_segments) {
245+ if (s < context.SegmentationInfo ().Segments ().size () &&
246+ !context.SegmentationInfo ().Segments ()[s].Definition ().Empty ()) {
247+ candidate_segments.insert (s);
248+ }
249+ }
250+ } else {
251+ for (segment_index_t s = 0 ;
252+ s < context.SegmentationInfo ().Segments ().size (); s++) {
253+ if (inscope_segments.contains (s) &&
254+ !context.SegmentationInfo ().Segments ()[s].Definition ().Empty ()) {
255+ candidate_segments.insert (s);
256+ }
186257 }
187258 }
188259
0 commit comments