Skip to content

Commit 2b47beb

Browse files
committed
Add optimzied path to except segment computation.
The previous implementation could be slow in fonts with a large number of segments since it is O(total number of segments - segments ids), the fast path is roughly O(segment ids). Also includes optimizations to feature tag subtraction implementation.
1 parent 65673fe commit 2b47beb

File tree

7 files changed

+100
-26
lines changed

7 files changed

+100
-26
lines changed

ift/encoder/closure_glyph_segmenter.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,9 @@
2121
#include "common/int_set.h"
2222
#include "common/try.h"
2323
#include "common/woff2.h"
24-
#include "ift/encoder/activation_condition.h"
2524
#include "ift/encoder/glyph_segmentation.h"
2625
#include "ift/encoder/merge_strategy.h"
2726
#include "ift/encoder/merger.h"
28-
#include "ift/encoder/patch_size_cache.h"
2927
#include "ift/encoder/segmentation_context.h"
3028
#include "ift/encoder/subset_definition.h"
3129
#include "ift/encoder/types.h"

ift/encoder/glyph_closure_cache.cc

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "common/int_set.h"
55
#include "common/try.h"
66
#include "ift/encoder/requested_segmentation_information.h"
7+
#include "ift/encoder/subset_definition.h"
78
#include "ift/encoder/types.h"
89

910
using absl::Status;
@@ -63,6 +64,37 @@ StatusOr<GlyphSet> GlyphClosureCache::CodepointsToOrGids(
6364
return or_gids;
6465
}
6566

67+
// This generates the subset definition that contains all segments except for
68+
// those listed in segment_ids.
69+
SubsetDefinition ComputExceptSegment(
70+
const RequestedSegmentationInformation& segmentation_info,
71+
const SegmentSet& segment_ids, const SubsetDefinition& combined) {
72+
if (segmentation_info.SegmentsAreDisjoint() &&
73+
(segment_ids.size() == 1 ||
74+
segment_ids.size() < (segmentation_info.Segments().size() / 2))) {
75+
// Approach that is optimzied for the case where input segments are disjoint
76+
// and the number of segment ids is smallish.
77+
SubsetDefinition except_segment = segmentation_info.FullDefinition();
78+
except_segment.Subtract(combined);
79+
return except_segment;
80+
}
81+
82+
// Otherwise this approach will always work even with non-disjoint segments
83+
SegmentSet except_segment_ids = segment_ids;
84+
except_segment_ids.invert();
85+
86+
uint32_t num_segments = segmentation_info.Segments().size();
87+
SubsetDefinition except_segment = segmentation_info.InitFontSegment();
88+
for (segment_index_t s : except_segment_ids) {
89+
if (s >= num_segments) {
90+
break;
91+
}
92+
except_segment.Union(segmentation_info.Segments()[s].Definition());
93+
}
94+
95+
return except_segment;
96+
}
97+
6698
Status GlyphClosureCache::AnalyzeSegment(
6799
const RequestedSegmentationInformation& segmentation_info,
68100
const SegmentSet& segment_ids, GlyphSet& and_gids, GlyphSet& or_gids,
@@ -95,20 +127,19 @@ Status GlyphClosureCache::AnalyzeSegment(
95127
// * I - D: the activation conditions for these glyphs is s_i OR …
96128
// Where … is one or more additional segments.
97129
// * D intersection I: the activation conditions for these glyphs is only s_i
98-
SubsetDefinition except_segment = segmentation_info.InitFontSegment();
99-
for (uint32_t s = 0; s < segmentation_info.Segments().size(); s++) {
100-
if (segment_ids.contains(s)) {
101-
continue;
102-
}
103-
except_segment.Union(segmentation_info.Segments()[s].Definition());
130+
131+
SubsetDefinition
132+
combined; // This is the subset definition of the unions of segment_ids.
133+
for (segment_index_t s_id : segment_ids) {
134+
combined.Union(segmentation_info.Segments()[s_id].Definition());
104135
}
105136

137+
SubsetDefinition except_segment =
138+
ComputExceptSegment(segmentation_info, segment_ids, combined);
106139
auto B_except_segment_closure = TRY(GlyphClosure(except_segment));
107140

108-
SubsetDefinition only_segment = segmentation_info.InitFontSegment();
109-
for (segment_index_t s_id : segment_ids) {
110-
only_segment.Union(segmentation_info.Segments()[s_id].Definition());
111-
}
141+
SubsetDefinition only_segment = combined;
142+
only_segment.Union(segmentation_info.InitFontSegment());
112143

113144
auto I_only_segment_closure = TRY(GlyphClosure(only_segment));
114145
I_only_segment_closure.subtract(segmentation_info.InitFontGlyphs());

ift/encoder/requested_segmentation_information.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@ RequestedSegmentationInformation::RequestedSegmentationInformation(
1111
GlyphClosureCache& closure_cache)
1212
: segments_(std::move(segments)), init_font_segment_() {
1313
ReassignInitSubset(closure_cache, std::move(init_font_segment));
14+
15+
segments_disjoint_ = true;
16+
17+
full_definition_ = init_font_segment_;
18+
for (const auto& s : segments_) {
19+
const auto& def = s.Definition();
20+
if (segments_disjoint_) {
21+
for (hb_tag_t tag : def.feature_tags) {
22+
if (full_definition_.feature_tags.contains(tag)) {
23+
segments_disjoint_ = false;
24+
}
25+
}
26+
segments_disjoint_ =
27+
segments_disjoint_ &&
28+
!full_definition_.codepoints.intersects(def.codepoints);
29+
}
30+
full_definition_.Union(s.Definition());
31+
}
1432
}
1533

1634
} // namespace ift::encoder

ift/encoder/requested_segmentation_information.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ class RequestedSegmentationInformation {
8383

8484
const common::GlyphSet& FullClosure() const { return full_closure_; }
8585

86+
const SubsetDefinition& FullDefinition() const { return full_definition_; }
87+
88+
bool SegmentsAreDisjoint() const { return segments_disjoint_; }
89+
8690
const std::vector<Segment>& Segments() const { return segments_; }
8791

8892
const std::vector<SubsetDefinition> SegmentSubsetDefinitions() const {
@@ -125,8 +129,10 @@ class RequestedSegmentationInformation {
125129

126130
std::vector<Segment> segments_;
127131
SubsetDefinition init_font_segment_;
132+
SubsetDefinition full_definition_;
128133
common::GlyphSet init_font_glyphs_;
129134
common::GlyphSet full_closure_;
135+
bool segments_disjoint_;
130136
};
131137

132138
} // namespace ift::encoder

ift/encoder/segmentation_context.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,16 @@ class SegmentationContext {
176176
// too small to be worthwhile.
177177
absl::StatusOr<segment_index_t> ComputeSegmentCutoff() const;
178178

179-
static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) {
179+
static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(
180+
hb_face_t* face, uint32_t brotli_quality) {
180181
if (brotli_quality == 0) {
181182
auto cache = EstimatedPatchSizeCache::New(face);
182183
if (cache.ok()) {
183184
return std::move(*cache);
184185
}
185186
}
186-
return std::unique_ptr<PatchSizeCache>(new PatchSizeCacheImpl(face, brotli_quality));
187+
return std::unique_ptr<PatchSizeCache>(
188+
new PatchSizeCacheImpl(face, brotli_quality));
187189
}
188190

189191
public:

ift/encoder/subset_definition.cc

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,23 @@ void PrintTo(const SubsetDefinition& def, std::ostream* os) {
7979
}
8080

8181
template <typename S>
82-
S subtract(const S& a, const S& b) {
83-
S c;
84-
for (uint32_t v : a) {
85-
if (!b.contains(v)) {
86-
c.insert(v);
82+
void subtract_sets(S& a, const S& b) {
83+
// Depending on which set is bigger use the implementation
84+
// that iterates the fewest elements.
85+
if (a.size() < b.size()) {
86+
for (auto it = a.begin(); it != a.end();) {
87+
if (b.contains(*it)) {
88+
it = a.erase(it);
89+
} else {
90+
++it;
91+
}
8792
}
93+
return;
94+
}
95+
96+
for (uint32_t v : b) {
97+
a.erase(v);
8898
}
89-
return c;
9099
}
91100

92101
std::optional<AxisRange> subtract(const AxisRange& a, const AxisRange& b) {
@@ -143,7 +152,7 @@ design_space_t subtract(const design_space_t& a, const design_space_t& b) {
143152
void SubsetDefinition::Subtract(const SubsetDefinition& other) {
144153
codepoints.subtract(other.codepoints);
145154
gids.subtract(other.gids);
146-
feature_tags = subtract(feature_tags, other.feature_tags);
155+
subtract_sets(feature_tags, other.feature_tags);
147156
design_space = subtract(design_space, other.design_space);
148157
}
149158

ift/encoder/subset_definition_test.cc

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,28 @@ TEST_F(SubsetDefinitionTest, Subtraction) {
141141
{
142142
SubsetDefinition a{1, 2, 3, 4};
143143
a.gids = {7, 8, 9};
144-
a.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('b', 'a', 'r', ' ')};
144+
a.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('b', 'a', 'r', ' '),
145+
HB_TAG('b', 'a', 'z', ' ')};
145146

146147
SubsetDefinition b{3, 5, 6};
147148
b.gids = {8, 10};
148-
b.feature_tags = {HB_TAG('f', 'o', 'o', ' ')};
149+
b.feature_tags = {HB_TAG('f', 'o', 'o', ' '), HB_TAG('a', 'b', 'c', 'd')};
149150

150151
SubsetDefinition c{1, 2, 4};
151152
c.gids = {7, 9};
152-
c.feature_tags = {HB_TAG('b', 'a', 'r', ' ')};
153+
c.feature_tags = {HB_TAG('b', 'a', 'r', ' '), HB_TAG('b', 'a', 'z', ' ')};
153154

154-
a.Subtract(b);
155-
ASSERT_EQ(a, c);
155+
SubsetDefinition def = a;
156+
def.Subtract(b);
157+
ASSERT_EQ(def, c);
158+
159+
SubsetDefinition d{5, 6};
160+
d.gids = {10};
161+
d.feature_tags = {HB_TAG('a', 'b', 'c', 'd')};
162+
163+
def = b;
164+
def.Subtract(a);
165+
ASSERT_EQ(def, d);
156166
}
157167
}
158168

0 commit comments

Comments
 (0)