Skip to content

Commit 019fec9

Browse files
committed
Add closure segmenter option to populate the table keyed segments in its output.
One table keyed segment is added per merge group.
1 parent 47f7040 commit 019fec9

11 files changed

+114
-69
lines changed

ift/encoder/closure_glyph_segmenter.cc

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,10 @@ struct SegmentOrdering {
193193
};
194194

195195
static std::vector<Segment> PreGroupSegments(
196-
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
197-
const std::vector<SegmentOrdering>& ordering,
198-
const std::vector<SubsetDefinition>& subset_definitions,
199-
std::vector<uint32_t>& segment_index_map
200-
) {
196+
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
197+
const std::vector<SegmentOrdering>& ordering,
198+
const std::vector<SubsetDefinition>& subset_definitions,
199+
std::vector<uint32_t>& segment_index_map) {
201200
segment_index_map.resize(subset_definitions.size());
202201
std::vector<Segment> segments;
203202

@@ -206,9 +205,10 @@ static std::vector<Segment> PreGroupSegments(
206205
auto merge_group_it = merge_groups.begin();
207206
auto ordering_it = ordering.begin();
208207

209-
while (ordering_it != ordering.end()) {
208+
while (ordering_it != ordering.end()) {
210209
const auto& o = *ordering_it;
211-
if (o.group_index != last_group_index && merge_group_it != merge_groups.end()) {
210+
if (o.group_index != last_group_index &&
211+
merge_group_it != merge_groups.end()) {
212212
merge_group_it++;
213213
}
214214

@@ -217,11 +217,11 @@ static std::vector<Segment> PreGroupSegments(
217217
strategy = &(merge_group_it->second);
218218
}
219219

220-
Segment segment = Segment{subset_definitions[o.original_index], o.probability};
220+
Segment segment =
221+
Segment{subset_definitions[o.original_index], o.probability};
221222
ordering_it++;
222223

223-
if (strategy == nullptr ||
224-
strategy->PreClosureGroupSize() <= 1 ||
224+
if (strategy == nullptr || strategy->PreClosureGroupSize() <= 1 ||
225225
o.probability.Max() > strategy->PreClosureProbabilityThreshold()) {
226226
segment_index_map[o.original_index] = i;
227227
} else {
@@ -232,15 +232,18 @@ static std::vector<Segment> PreGroupSegments(
232232
break;
233233
}
234234

235-
segment.Definition().Union(subset_definitions[ordering_it->original_index]);
235+
segment.Definition().Union(
236+
subset_definitions[ordering_it->original_index]);
236237
segment_index_map[ordering_it->original_index] = i;
237238

238239
ordering_it++;
239240
remaining--;
240241
}
241242

242243
if (strategy->UseCosts()) {
243-
segment.SetProbability(strategy->ProbabilityCalculator()->ComputeProbability(segment.Definition()));
244+
segment.SetProbability(
245+
strategy->ProbabilityCalculator()->ComputeProbability(
246+
segment.Definition()));
244247
}
245248
}
246249

@@ -328,7 +331,8 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(
328331

329332
// maps from index in subset_definitions to the new ordering.
330333
std::vector<uint32_t> segment_index_map;
331-
std::vector<Segment> segments = PreGroupSegments(merge_groups, ordering, subset_definitions, segment_index_map);
334+
std::vector<Segment> segments = PreGroupSegments(
335+
merge_groups, ordering, subset_definitions, segment_index_map);
332336
VLOG(0) << segments.size() << " segments after pregrouping.";
333337

334338
btree_map<SegmentSet, MergeStrategy> new_merge_groups;
@@ -344,8 +348,8 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(
344348
remapped_full.insert(s_prime);
345349
}
346350

347-
348-
VLOG(0) << " Merge group " << group_index << " has " << remapped.size() << " segments.";
351+
VLOG(0) << " Merge group " << group_index << " has " << remapped.size()
352+
<< " segments.";
349353
group_index++;
350354

351355
if (!new_merge_groups.insert(std::make_pair(remapped, std::move(strategy)))

ift/encoder/closure_glyph_segmenter_test.cc

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,19 +1427,11 @@ if (s0 AND s2) then p2
14271427
)");
14281428
}
14291429

1430-
14311430
TEST_F(ClosureGlyphSegmenterTest, MultipleMergeGroups_PreGrouping) {
14321431
UnicodeFrequencies freq{
1433-
{{' ', ' '}, 100},
1434-
{{'d', 'd'}, 100},
1435-
{{'a', 'a'}, 60},
1436-
{{'e', 'e'}, 30},
1437-
{{'b', 'b'}, 29},
1438-
{{'f', 'f'}, 28},
1439-
{{'c', 'c'}, 10},
1440-
{{'g', 'g'}, 9},
1441-
{{'h', 'h'}, 5},
1442-
{{'i', 'i'}, 1}, // 8
1432+
{{' ', ' '}, 100}, {{'d', 'd'}, 100}, {{'a', 'a'}, 60}, {{'e', 'e'}, 30},
1433+
{{'b', 'b'}, 29}, {{'f', 'f'}, 28}, {{'c', 'c'}, 10}, {{'g', 'g'}, 9},
1434+
{{'h', 'h'}, 5}, {{'i', 'i'}, 1}, // 8
14431435
};
14441436

14451437
MergeStrategy costs = *MergeStrategy::CostBased(std::move(freq), 0, 1);
@@ -1473,8 +1465,8 @@ TEST_F(ClosureGlyphSegmenterTest, MultipleMergeGroups_PreGrouping) {
14731465
// Group 1
14741466
{'d'},
14751467
{'a'},
1476-
{'e', 'b', 'f'}, // pre merge
1477-
{'c', 'g'}, // pre merge
1468+
{'e', 'b', 'f'}, // pre merge
1469+
{'c', 'g'}, // pre merge
14781470
// Shared
14791471
{'h'},
14801472
{'i'},

ift/encoder/estimated_patch_size_cache.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include "common/int_set.h"
44
#include "common/try.h"
55

6-
using absl::StatusOr;
76
using absl::flat_hash_set;
7+
using absl::StatusOr;
88
using common::GlyphSet;
99

1010
namespace ift::encoder {

ift/encoder/estimated_patch_size_cache.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_
33

44
#include <memory>
5+
56
#include "absl/status/statusor.h"
67
#include "common/font_data.h"
78
#include "common/int_set.h"
@@ -18,14 +19,13 @@ class EstimatedPatchSizeCache : public PatchSizeCache {
1819
public:
1920
static absl::StatusOr<std::unique_ptr<PatchSizeCache>> New(hb_face_t* face) {
2021
double compression_ratio = TRY(EstimateCompressionRatio(face));
21-
return std::unique_ptr<PatchSizeCache>(new EstimatedPatchSizeCache(face, compression_ratio));
22+
return std::unique_ptr<PatchSizeCache>(
23+
new EstimatedPatchSizeCache(face, compression_ratio));
2224
}
2325

2426
absl::StatusOr<uint32_t> GetPatchSize(const common::GlyphSet& gids) override;
2527

26-
double CompressionRatio() const {
27-
return compression_ratio_;
28-
}
28+
double CompressionRatio() const { return compression_ratio_; }
2929

3030
private:
3131
explicit EstimatedPatchSizeCache(hb_face_t* original_face,
Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#include "ift/encoder/estimated_patch_size_cache.h"
22

3+
#include "common/font_data.h"
34
#include "common/font_helper.h"
45
#include "common/int_set.h"
56
#include "gtest/gtest.h"
67

7-
#include "common/font_data.h"
8-
9-
using common::hb_face_unique_ptr;
10-
using common::make_hb_face;
11-
using common::hb_blob_unique_ptr;
12-
using common::make_hb_blob;
138
using common::FontHelper;
149
using common::GlyphSet;
10+
using common::hb_blob_unique_ptr;
11+
using common::hb_face_unique_ptr;
12+
using common::make_hb_blob;
13+
using common::make_hb_face;
1514

1615
namespace ift::encoder {
1716

@@ -23,24 +22,24 @@ class EstimatedPatchSizeCacheTest : public ::testing::Test {
2322
roboto = make_hb_face(hb_face_create(blob.get(), 0));
2423
}
2524

26-
2725
double CompressionRatio(GlyphSet gids, double expected_compression_ratio) {
28-
uint32_t raw_outline_size =
29-
*FontHelper::TotalGlyphData(roboto.get(), gids);
30-
double fixed_size = 1 + 7 * 4; // header
31-
fixed_size += (double) (5 + gids.size() * 2 + 4 + (gids.size() + 1)*4) * expected_compression_ratio; // glyph patches header
26+
uint32_t raw_outline_size = *FontHelper::TotalGlyphData(roboto.get(), gids);
27+
double fixed_size = 1 + 7 * 4; // header
28+
fixed_size += (double)(5 + gids.size() * 2 + 4 + (gids.size() + 1) * 4) *
29+
expected_compression_ratio; // glyph patches header
3230
auto estimated = *EstimatedPatchSizeCache::New(roboto.get());
3331
uint32_t compressed_size = *estimated->GetPatchSize(gids);
34-
return (double) (compressed_size - fixed_size) / (double) raw_outline_size;
32+
return (double)(compressed_size - fixed_size) / (double)raw_outline_size;
3533
}
3634

3735
hb_face_unique_ptr roboto;
3836
};
3937

4038
TEST_F(EstimatedPatchSizeCacheTest, PatchSize) {
4139
// There should be a consistent compression ratio between patches.
42-
ASSERT_NEAR(this->CompressionRatio(GlyphSet {44, 47, 49}, 0.457), 0.46, 0.01);
43-
ASSERT_NEAR(CompressionRatio(GlyphSet {45, 48, 50, 51, 52, 53}, 0.457), 0.46, 0.01);
40+
ASSERT_NEAR(this->CompressionRatio(GlyphSet{44, 47, 49}, 0.457), 0.46, 0.01);
41+
ASSERT_NEAR(CompressionRatio(GlyphSet{45, 48, 50, 51, 52, 53}, 0.457), 0.46,
42+
0.01);
4443
}
4544

4645
} // namespace ift::encoder

ift/encoder/glyph_segmentation.cc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22

33
#include <cstdint>
44
#include <cstdio>
5-
#include <optional>
65
#include <sstream>
76

87
#include "absl/container/btree_map.h"
98
#include "absl/container/btree_set.h"
109
#include "absl/container/flat_hash_map.h"
11-
#include "absl/container/flat_hash_set.h"
1210
#include "absl/status/statusor.h"
1311
#include "common/font_helper.h"
1412
#include "common/int_set.h"
@@ -144,17 +142,22 @@ ProtoType TagsToSetProto(const btree_set<hb_tag_t>& set) {
144142
return values;
145143
}
146144

145+
void GlyphSegmentation::SubsetDefinitionToSegment(const SubsetDefinition& def,
146+
SegmentProto& segment_proto) {
147+
(*segment_proto.mutable_codepoints()) =
148+
ToSetProto<Codepoints>(def.codepoints);
149+
(*segment_proto.mutable_features()) =
150+
TagsToSetProto<Features>(def.feature_tags);
151+
}
152+
147153
SegmentationPlan GlyphSegmentation::ToSegmentationPlanProto() const {
148154
SegmentationPlan config;
149155

150156
uint32_t set_index = 0;
151157
for (const auto& s : Segments()) {
152158
if (!s.Empty()) {
153159
SegmentProto segment_proto;
154-
(*segment_proto.mutable_codepoints()) =
155-
ToSetProto<Codepoints>(s.codepoints);
156-
(*segment_proto.mutable_features()) =
157-
TagsToSetProto<Features>(s.feature_tags);
160+
SubsetDefinitionToSegment(s, segment_proto);
158161
(*config.mutable_segments())[set_index++] = segment_proto;
159162
} else {
160163
set_index++;

ift/encoder/glyph_segmentation.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ class GlyphSegmentation {
8989
return init_font_segment_;
9090
};
9191

92+
static void SubsetDefinitionToSegment(const SubsetDefinition& def,
93+
SegmentProto& segment_proto);
94+
9295
SegmentationPlan ToSegmentationPlanProto() const;
9396

9497
static absl::Status GroupsToSegmentation(

ift/encoder/merge_strategy.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,7 @@ class MergeStrategy {
145145
return init_font_merge_probability_threshold_;
146146
}
147147

148-
uint32_t PreClosureGroupSize() const {
149-
return pre_closure_group_size_;
150-
}
148+
uint32_t PreClosureGroupSize() const { return pre_closure_group_size_; }
151149

152150
double PreClosureProbabilityThreshold() const {
153151
return pre_closure_probability_threshold_;

util/closure_glyph_keyed_segmenter_util.cc

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,39 @@ static Status Analysis(hb_face_t* font,
127127
return absl::OkStatus();
128128
}
129129

130+
static void AddTableKeyedSegments(
131+
SegmentationPlan& plan,
132+
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
133+
const std::vector<SubsetDefinition>& segments,
134+
const SubsetDefinition& init_segment) {
135+
std::vector<SubsetDefinition> table_keyed_segments;
136+
for (const auto& [segment_ids, _] : merge_groups) {
137+
SubsetDefinition new_segment;
138+
for (uint32_t s : segment_ids) {
139+
new_segment.Union(segments.at(s));
140+
}
141+
new_segment.Subtract(init_segment);
142+
table_keyed_segments.push_back(new_segment);
143+
}
144+
145+
uint32_t max_id = 0;
146+
for (const auto& [id, _] : plan.segments()) {
147+
if (id > max_id) {
148+
max_id = id;
149+
}
150+
}
151+
152+
uint32_t next_id = max_id + 1;
153+
auto* plan_segments = plan.mutable_segments();
154+
for (const SubsetDefinition& def : table_keyed_segments) {
155+
GlyphSegmentation::SubsetDefinitionToSegment(def,
156+
(*plan_segments)[next_id]);
157+
SegmentsProto* segment_ids = plan.add_non_glyph_segments();
158+
segment_ids->add_values(next_id);
159+
next_id++;
160+
}
161+
}
162+
130163
static Status Main(const std::vector<char*> args) {
131164
hb_face_unique_ptr font =
132165
TRY(LoadFont(absl::GetFlag(FLAGS_input_font).c_str()));
@@ -137,12 +170,14 @@ static Status Main(const std::vector<char*> args) {
137170
CodepointSet font_codepoints = FontHelper::ToCodepointsSet(font.get());
138171
SubsetDefinition init_segment =
139172
config_util.SegmentProtoToSubsetDefinition(config.initial_segment());
173+
140174
std::vector<SubsetDefinition> segments;
141175
btree_map<SegmentSet, MergeStrategy> merge_groups =
142176
TRY(config_util.ConfigToMergeGroups(config, font_codepoints, segments));
143177

144-
ClosureGlyphSegmenter segmenter(config.brotli_quality(),
145-
config.brotli_quality_for_initial_font_merging());
178+
ClosureGlyphSegmenter segmenter(
179+
config.brotli_quality(),
180+
config.brotli_quality_for_initial_font_merging());
146181
GlyphSegmentation segmentation = TRY(segmenter.CodepointToGlyphSegments(
147182
font.get(), init_segment, segments, merge_groups,
148183
config.move_fallback_glyphs_into_initial_font()));
@@ -154,6 +189,10 @@ static Status Main(const std::vector<char*> args) {
154189
plan.clear_initial_codepoints();
155190
}
156191

192+
if (config.generate_table_keyed_segments()) {
193+
AddTableKeyedSegments(plan, merge_groups, segments, init_segment);
194+
}
195+
157196
// TODO(garretrieger): assign a basic (single segment) table keyed config.
158197
// Later on the input to this util should include information on how the
159198
// segments should be grouped together for the table keyed portion of the

0 commit comments

Comments
 (0)