Add optimzied path to except segment computation.

garretrieger · garretrieger · commit 2b47beb23d7c · 2025-10-22T21:06:56.000Z
The previous implementation could be slow in fonts with a large number of segments since it is O(total number of segments - segments ids), the fast path is roughly O(segment ids). Also includes optimizations to feature tag subtraction implementation.
diff --git a/ift/encoder/closure_glyph_segmenter.cc b/ift/encoder/closure_glyph_segmenter.cc
@@ -21,11 +21,9 @@
 #include "common/int_set.h"
 #include "common/try.h"
 #include "common/woff2.h"
-#include "ift/encoder/activation_condition.h"
 #include "ift/encoder/glyph_segmentation.h"
 #include "ift/encoder/merge_strategy.h"
 #include "ift/encoder/merger.h"
-#include "ift/encoder/patch_size_cache.h"
 #include "ift/encoder/segmentation_context.h"
 #include "ift/encoder/subset_definition.h"
 #include "ift/encoder/types.h"
diff --git a/ift/encoder/glyph_closure_cache.cc b/ift/encoder/glyph_closure_cache.cc
@@ -4,6 +4,7 @@
 #include "common/int_set.h"
 #include "common/try.h"
 #include "ift/encoder/requested_segmentation_information.h"
+#include "ift/encoder/subset_definition.h"
 #include "ift/encoder/types.h"
 
 using absl::Status;
@@ -63,6 +64,37 @@ StatusOr<GlyphSet> GlyphClosureCache::CodepointsToOrGids(
   return or_gids;
 }
 
+// This generates the subset definition that contains all segments except for
+// those listed in segment_ids.
+SubsetDefinition ComputExceptSegment(
+    const RequestedSegmentationInformation& segmentation_info,
+    const SegmentSet& segment_ids, const SubsetDefinition& combined) {
+  if (segmentation_info.SegmentsAreDisjoint() &&
+      (segment_ids.size() == 1 ||
+       segment_ids.size() < (segmentation_info.Segments().size() / 2))) {
+    // Approach that is optimzied for the case where input segments are disjoint
+    // and the number of segment ids is smallish.
+    SubsetDefinition except_segment = segmentation_info.FullDefinition();
+    except_segment.Subtract(combined);
+    return except_segment;
+  }
+
+  // Otherwise this approach will always work even with non-disjoint segments
+  SegmentSet except_segment_ids = segment_ids;
+  except_segment_ids.invert();
+
+  uint32_t num_segments = segmentation_info.Segments().size();
+  SubsetDefinition except_segment = segmentation_info.InitFontSegment();
+  for (segment_index_t s : except_segment_ids) {
+    if (s >= num_segments) {
+      break;
+    }
+    except_segment.Union(segmentation_info.Segments()[s].Definition());
+  }
+
+  return except_segment;
+}
+
 Status GlyphClosureCache::AnalyzeSegment(
     const RequestedSegmentationInformation& segmentation_info,
     const SegmentSet& segment_ids, GlyphSet& and_gids, GlyphSet& or_gids,
@@ -95,20 +127,19 @@ Status GlyphClosureCache::AnalyzeSegment(
   // * I - D: the activation conditions for these glyphs is s_i OR …
   //          Where … is one or more additional segments.
   // * D intersection I: the activation conditions for these glyphs is only s_i
-  SubsetDefinition except_segment = segmentation_info.InitFontSegment();
-  for (uint32_t s = 0; s < segmentation_info.Segments().size(); s++) {
-    if (segment_ids.contains(s)) {
-      continue;
-    }
-    except_segment.Union(segmentation_info.Segments()[s].Definition());
+
+  SubsetDefinition
+      combined;  // This is the subset definition of the unions of segment_ids.
+  for (segment_index_t s_id : segment_ids) {
+    combined.Union(segmentation_info.Segments()[s_id].Definition());
   }
 
+  SubsetDefinition except_segment =
+      ComputExceptSegment(segmentation_info, segment_ids, combined);
   auto B_except_segment_closure = TRY(GlyphClosure(except_segment));
 
-  SubsetDefinition only_segment = segmentation_info.InitFontSegment();
-  for (segment_index_t s_id : segment_ids) {
-    only_segment.Union(segmentation_info.Segments()[s_id].Definition());
-  }
+  SubsetDefinition only_segment = combined;
+  only_segment.Union(segmentation_info.InitFontSegment());
 
   auto I_only_segment_closure = TRY(GlyphClosure(only_segment));
   I_only_segment_closure.subtract(segmentation_info.InitFontGlyphs());
diff --git a/ift/encoder/requested_segmentation_information.cc b/ift/encoder/requested_segmentation_information.cc
@@ -11,6 +11,24 @@ RequestedSegmentationInformation::RequestedSegmentationInformation(
     GlyphClosureCache& closure_cache)
     : segments_(std::move(segments)), init_font_segment_() {
   ReassignInitSubset(closure_cache, std::move(init_font_segment));
+
+  segments_disjoint_ = true;
+
+  full_definition_ = init_font_segment_;
+  for (const auto& s : segments_) {
+    const auto& def = s.Definition();
+    if (segments_disjoint_) {
+      for (hb_tag_t tag : def.feature_tags) {
+        if (full_definition_.feature_tags.contains(tag)) {
+          segments_disjoint_ = false;
+        }
+      }
+      segments_disjoint_ =
+          segments_disjoint_ &&
+          !full_definition_.codepoints.intersects(def.codepoints);
+    }
+    full_definition_.Union(s.Definition());
+  }
 }
 
 }  // namespace ift::encoder
diff --git a/ift/encoder/requested_segmentation_information.h b/ift/encoder/requested_segmentation_information.h
@@ -83,6 +83,10 @@ class RequestedSegmentationInformation {
 
   const common::GlyphSet& FullClosure() const { return full_closure_; }
 
+  const SubsetDefinition& FullDefinition() const { return full_definition_; }
+
+  bool SegmentsAreDisjoint() const { return segments_disjoint_; }
+
   const std::vector<Segment>& Segments() const { return segments_; }
 
   const std::vector<SubsetDefinition> SegmentSubsetDefinitions() const {
@@ -125,8 +129,10 @@ class RequestedSegmentationInformation {
 
   std::vector<Segment> segments_;
   SubsetDefinition init_font_segment_;
+  SubsetDefinition full_definition_;
   common::GlyphSet init_font_glyphs_;
   common::GlyphSet full_closure_;
+  bool segments_disjoint_;
 };
 
 }  // namespace ift::encoder
diff --git a/ift/encoder/segmentation_context.h b/ift/encoder/segmentation_context.h
@@ -176,14 +176,16 @@ class SegmentationContext {
   // too small to be worthwhile.
   absl::StatusOr<segment_index_t> ComputeSegmentCutoff() const;
 
-  static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) {
+  static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(
+      hb_face_t* face, uint32_t brotli_quality) {
     if (brotli_quality == 0) {
       auto cache = EstimatedPatchSizeCache::New(face);
       if (cache.ok()) {
         return std::move(*cache);
       }
     }
-    return std::unique_ptr<PatchSizeCache>(new PatchSizeCacheImpl(face, brotli_quality));
+    return std::unique_ptr<PatchSizeCache>(
+        new PatchSizeCacheImpl(face, brotli_quality));
   }
 
  public:
diff --git a/ift/encoder/subset_definition.cc b/ift/encoder/subset_definition.cc
@@ -79,14 +79,23 @@ void PrintTo(const SubsetDefinition& def, std::ostream* os) {
 }
 
 template <typename S>
-S subtract(const S& a, const S& b) {
-  S c;
-  for (uint32_t v : a) {
-    if (!b.contains(v)) {
-      c.insert(v);
+void subtract_sets(S& a, const S& b) {
+  // Depending on which set is bigger use the implementation
+  // that iterates the fewest elements.
+  if (a.size() < b.size()) {
+    for (auto it = a.begin(); it != a.end();) {
+      if (b.contains(*it)) {
+        it = a.erase(it);
+      } else {
+        ++it;
+      }
     }
+    return;
+  }
+
+  for (uint32_t v : b) {
+    a.erase(v);
   }
-  return c;
 }
 
 std::optional<AxisRange> subtract(const AxisRange& a, const AxisRange& b) {
@@ -143,7 +152,7 @@ design_space_t subtract(const design_space_t& a, const design_space_t& b) {
 void SubsetDefinition::Subtract(const SubsetDefinition& other) {
   codepoints.subtract(other.codepoints);
   gids.subtract(other.gids);
-  feature_tags = subtract(feature_tags, other.feature_tags);
+  subtract_sets(feature_tags, other.feature_tags);
   design_space = subtract(design_space, other.design_space);
 }
 
diff --git a/ift/encoder/subset_definition_test.cc b/ift/encoder/subset_definition_test.cc
@@ -141,18 +141,28 @@ TEST_F(SubsetDefinitionTest, Subtraction) {
   {
     SubsetDefinition a{1, 2, 3, 4};
     a.gids = {7, 8, 9};
-    a.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('b', 'a', 'r', ' ')};
+    a.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('b', 'a', 'r', ' '),
+                      HB_TAG('b', 'a', 'z', ' ')};
 
     SubsetDefinition b{3, 5, 6};
     b.gids = {8, 10};
-    b.feature_tags = {HB_TAG('f', 'o', 'o', ' ')};
+    b.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('a', 'b', 'c', 'd')};
 
     SubsetDefinition c{1, 2, 4};
     c.gids = {7, 9};
-    c.feature_tags = {HB_TAG('b', 'a', 'r', ' ')};
+    c.feature_tags = {HB_TAG('b', 'a', 'r', ' '), HB_TAG('b', 'a', 'z', ' ')};
 
-    a.Subtract(b);
-    ASSERT_EQ(a, c);
+    SubsetDefinition def = a;
+    def.Subtract(b);
+    ASSERT_EQ(def, c);
+
+    SubsetDefinition d{5, 6};
+    d.gids = {10};
+    d.feature_tags = {HB_TAG('a', 'b', 'c', 'd')};
+
+    def = b;
+    def.Subtract(a);
+    ASSERT_EQ(def, d);
   }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -176,14 +176,16 @@ class SegmentationContext {`
`176`	`176`	`// too small to be worthwhile.`
`177`	`177`	`absl::StatusOr<segment_index_t> ComputeSegmentCutoff() const;`
`178`	`178`
`179`		`- static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) {`
	`179`	`+ static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(`
	`180`	`+ hb_face_t* face, uint32_t brotli_quality) {`
`180`	`181`	`if (brotli_quality == 0) {`
`181`	`182`	`auto cache = EstimatedPatchSizeCache::New(face);`
`182`	`183`	`if (cache.ok()) {`
`183`	`184`	`return std::move(*cache);`
`184`	`185`	`}`
`185`	`186`	`}`
`186`		`- return std::unique_ptr<PatchSizeCache>(new PatchSizeCacheImpl(face, brotli_quality));`
	`187`	`+ return std::unique_ptr<PatchSizeCache>(`
	`188`	`+ new PatchSizeCacheImpl(face, brotli_quality));`
`187`	`189`	`}`
`188`	`190`
`189`	`191`	`public:`