Skip to content

Commit 8b04217

Browse files
authored
Add .clang-format (#34)
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent d7e19d4 commit 8b04217

File tree

4 files changed

+273
-25
lines changed

4 files changed

+273
-25
lines changed

.clang-format

+244
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
---
2+
Language: Cpp
3+
AccessModifierOffset: -1
4+
AlignAfterOpenBracket: AlwaysBreak
5+
AlignArrayOfStructures: None
6+
AlignConsecutiveAssignments:
7+
Enabled: false
8+
AcrossEmptyLines: false
9+
AcrossComments: false
10+
AlignCompound: false
11+
AlignFunctionPointers: false
12+
PadOperators: true
13+
AlignConsecutiveBitFields:
14+
Enabled: false
15+
AcrossEmptyLines: false
16+
AcrossComments: false
17+
AlignCompound: false
18+
AlignFunctionPointers: false
19+
PadOperators: true
20+
AlignConsecutiveDeclarations:
21+
Enabled: false
22+
AcrossEmptyLines: false
23+
AcrossComments: false
24+
AlignCompound: false
25+
AlignFunctionPointers: false
26+
PadOperators: true
27+
AlignConsecutiveMacros:
28+
Enabled: false
29+
AcrossEmptyLines: false
30+
AcrossComments: false
31+
AlignCompound: false
32+
AlignFunctionPointers: false
33+
PadOperators: true
34+
AlignConsecutiveShortCaseStatements:
35+
Enabled: false
36+
AcrossEmptyLines: false
37+
AcrossComments: false
38+
AlignCaseColons: false
39+
AlignEscapedNewlines: Left
40+
AlignOperands: DontAlign
41+
AlignTrailingComments:
42+
Kind: Never
43+
OverEmptyLines: 0
44+
AllowAllArgumentsOnNextLine: true
45+
AllowAllParametersOfDeclarationOnNextLine: false
46+
AllowBreakBeforeNoexceptSpecifier: Never
47+
AllowShortBlocksOnASingleLine: Never
48+
AllowShortCaseLabelsOnASingleLine: false
49+
AllowShortCompoundRequirementOnASingleLine: true
50+
AllowShortEnumsOnASingleLine: true
51+
AllowShortFunctionsOnASingleLine: Empty
52+
AllowShortIfStatementsOnASingleLine: Never
53+
AllowShortLambdasOnASingleLine: All
54+
AllowShortLoopsOnASingleLine: false
55+
AlwaysBreakAfterDefinitionReturnType: None
56+
AlwaysBreakAfterReturnType: None
57+
AlwaysBreakBeforeMultilineStrings: true
58+
AlwaysBreakTemplateDeclarations: Yes
59+
AttributeMacros:
60+
- __capability
61+
BinPackArguments: false
62+
BinPackParameters: false
63+
BitFieldColonSpacing: Both
64+
BraceWrapping:
65+
AfterCaseLabel: false
66+
AfterClass: false
67+
AfterControlStatement: Never
68+
AfterEnum: false
69+
AfterExternBlock: false
70+
AfterFunction: false
71+
AfterNamespace: false
72+
AfterObjCDeclaration: false
73+
AfterStruct: false
74+
AfterUnion: false
75+
BeforeCatch: false
76+
BeforeElse: false
77+
BeforeLambdaBody: false
78+
BeforeWhile: false
79+
IndentBraces: false
80+
SplitEmptyFunction: true
81+
SplitEmptyRecord: true
82+
SplitEmptyNamespace: true
83+
BreakAdjacentStringLiterals: true
84+
BreakAfterAttributes: Leave
85+
BreakAfterJavaFieldAnnotations: false
86+
BreakArrays: true
87+
BreakBeforeBinaryOperators: None
88+
BreakBeforeConceptDeclarations: Always
89+
BreakBeforeBraces: Attach
90+
BreakBeforeInlineASMColon: OnlyMultiline
91+
BreakBeforeTernaryOperators: true
92+
BreakConstructorInitializers: BeforeColon
93+
BreakInheritanceList: BeforeColon
94+
BreakStringLiterals: false
95+
ColumnLimit: 80
96+
CommentPragmas: '^ IWYU pragma:'
97+
CompactNamespaces: false
98+
ConstructorInitializerIndentWidth: 4
99+
ContinuationIndentWidth: 4
100+
Cpp11BracedListStyle: true
101+
DerivePointerAlignment: false
102+
DisableFormat: false
103+
EmptyLineAfterAccessModifier: Never
104+
EmptyLineBeforeAccessModifier: LogicalBlock
105+
ExperimentalAutoDetectBinPacking: false
106+
FixNamespaceComments: true
107+
ForEachMacros:
108+
- FOR_EACH
109+
- FOR_EACH_R
110+
- FOR_EACH_RANGE
111+
IfMacros:
112+
- KJ_IF_MAYBE
113+
IncludeBlocks: Preserve
114+
IncludeCategories:
115+
- Regex: '^<.*\.h(pp)?>'
116+
Priority: 1
117+
SortPriority: 0
118+
CaseSensitive: false
119+
- Regex: '^<.*'
120+
Priority: 2
121+
SortPriority: 0
122+
CaseSensitive: false
123+
- Regex: '.*'
124+
Priority: 3
125+
SortPriority: 0
126+
CaseSensitive: false
127+
IncludeIsMainRegex: '(Test)?$'
128+
IncludeIsMainSourceRegex: ''
129+
IndentAccessModifiers: false
130+
IndentCaseBlocks: false
131+
IndentCaseLabels: true
132+
IndentExternBlock: AfterExternBlock
133+
IndentGotoLabels: true
134+
IndentPPDirectives: None
135+
IndentRequiresClause: true
136+
IndentWidth: 2
137+
IndentWrappedFunctionNames: false
138+
InsertBraces: false
139+
InsertNewlineAtEOF: false
140+
InsertTrailingCommas: None
141+
IntegerLiteralSeparator:
142+
Binary: 0
143+
BinaryMinDigits: 0
144+
Decimal: 0
145+
DecimalMinDigits: 0
146+
Hex: 0
147+
HexMinDigits: 0
148+
JavaScriptQuotes: Leave
149+
JavaScriptWrapImports: true
150+
KeepEmptyLinesAtTheStartOfBlocks: false
151+
KeepEmptyLinesAtEOF: false
152+
LambdaBodyIndentation: Signature
153+
LineEnding: DeriveLF
154+
MacroBlockBegin: ''
155+
MacroBlockEnd: ''
156+
MaxEmptyLinesToKeep: 1
157+
NamespaceIndentation: None
158+
ObjCBinPackProtocolList: Auto
159+
ObjCBlockIndentWidth: 2
160+
ObjCBreakBeforeNestedBlockParam: true
161+
ObjCSpaceAfterProperty: false
162+
ObjCSpaceBeforeProtocolList: false
163+
PackConstructorInitializers: NextLine
164+
PenaltyBreakAssignment: 2
165+
PenaltyBreakBeforeFirstCallParameter: 1
166+
PenaltyBreakComment: 300
167+
PenaltyBreakFirstLessLess: 120
168+
PenaltyBreakOpenParenthesis: 0
169+
PenaltyBreakScopeResolution: 500
170+
PenaltyBreakString: 1000
171+
PenaltyBreakTemplateDeclaration: 10
172+
PenaltyExcessCharacter: 1000000
173+
PenaltyIndentedWhitespace: 0
174+
PenaltyReturnTypeOnItsOwnLine: 200
175+
PointerAlignment: Left
176+
PPIndentWidth: -1
177+
QualifierAlignment: Leave
178+
ReferenceAlignment: Pointer
179+
ReflowComments: true
180+
RemoveBracesLLVM: false
181+
RemoveParentheses: Leave
182+
RemoveSemicolon: false
183+
RequiresClausePosition: OwnLine
184+
RequiresExpressionIndentation: OuterScope
185+
SeparateDefinitionBlocks: Leave
186+
ShortNamespaceLines: 1
187+
SkipMacroDefinitionBody: false
188+
SortIncludes: CaseSensitive
189+
SortJavaStaticImport: Before
190+
SortUsingDeclarations: LexicographicNumeric
191+
SpaceAfterCStyleCast: false
192+
SpaceAfterLogicalNot: false
193+
SpaceAfterTemplateKeyword: true
194+
SpaceAroundPointerQualifiers: Default
195+
SpaceBeforeAssignmentOperators: true
196+
SpaceBeforeCaseColon: false
197+
SpaceBeforeCpp11BracedList: false
198+
SpaceBeforeCtorInitializerColon: true
199+
SpaceBeforeInheritanceColon: true
200+
SpaceBeforeJsonColon: false
201+
SpaceBeforeParens: ControlStatements
202+
SpaceBeforeParensOptions:
203+
AfterControlStatements: true
204+
AfterForeachMacros: true
205+
AfterFunctionDefinitionName: false
206+
AfterFunctionDeclarationName: false
207+
AfterIfMacros: true
208+
AfterOverloadedOperator: false
209+
AfterPlacementOperator: true
210+
AfterRequiresInClause: false
211+
AfterRequiresInExpression: false
212+
BeforeNonEmptyParentheses: false
213+
SpaceBeforeRangeBasedForLoopColon: true
214+
SpaceBeforeSquareBrackets: false
215+
SpaceInEmptyBlock: false
216+
SpacesBeforeTrailingComments: 1
217+
SpacesInAngles: Never
218+
SpacesInContainerLiterals: true
219+
SpacesInLineCommentPrefix:
220+
Minimum: 1
221+
Maximum: -1
222+
SpacesInParens: Never
223+
SpacesInParensOptions:
224+
InCStyleCasts: false
225+
InConditionalStatements: false
226+
InEmptyParentheses: false
227+
Other: false
228+
SpacesInSquareBrackets: false
229+
Standard: Latest
230+
StatementAttributeLikeMacros:
231+
- Q_EMIT
232+
StatementMacros:
233+
- Q_UNUSED
234+
- QT_REQUIRE_VERSION
235+
TabWidth: 8
236+
UseTab: Never
237+
VerilogBreakBetweenInstancePorts: true
238+
WhitespaceSensitiveMacros:
239+
- BOOST_PP_STRINGIZE
240+
- CF_SWIFT_NAME
241+
- NS_SWIFT_NAME
242+
- PP_STRINGIZE
243+
- STRINGIZE
244+
...

src/pre_tokenizer.cpp

+25-21
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* This source code is licensed under the BSD-style license found in the
66
* LICENSE file in the root directory of this source tree.
77
*/
8+
// @lint-ignore-every LICENSELINT
89

910
// Local
1011
#include <pytorch/tokenizers/pre_tokenizer.h>
@@ -63,35 +64,37 @@ PreTokenizer::Ptr PreTokenizerConfig::create() const {
6364
"Missing pretokenizers for PreTokenizer of type Sequence");
6465
}
6566
std::vector<PreTokenizer::Ptr> pretoks;
66-
std::transform(pretokenizers->begin(), pretokenizers->end(),
67-
std::back_inserter(pretoks),
68-
[](const PreTokenizerConfig &cfg) { return cfg.create(); });
67+
std::transform(
68+
pretokenizers->begin(),
69+
pretokenizers->end(),
70+
std::back_inserter(pretoks),
71+
[](const PreTokenizerConfig& cfg) { return cfg.create(); });
6972
return PreTokenizer::Ptr(new SequencePreTokenizer(pretoks));
7073
}
7174
throw std::runtime_error("Unsupported PreTokenizer type: " + type);
7275
}
7376

74-
PreTokenizerConfig &PreTokenizerConfig::parse_json(const json &json_config) {
77+
PreTokenizerConfig& PreTokenizerConfig::parse_json(const json& json_config) {
7578
type = json_config.at("type");
7679
if (type == "Split") {
7780
try {
7881
pattern = json_config.at("pattern");
79-
} catch (json::out_of_range &) {
82+
} catch (json::out_of_range&) {
8083
}
8184
} else if (type == "Digits") {
8285
try {
8386
individual_digits = json_config.at("individual_digits");
84-
} catch (json::out_of_range &) {
87+
} catch (json::out_of_range&) {
8588
}
8689
} else if (type == "ByteLevel") {
8790
try {
8891
add_prefix_space = json_config.at("add_prefix_space");
89-
} catch (json::out_of_range &) {
92+
} catch (json::out_of_range&) {
9093
}
9194
// TODO: trim_offsets, use_regex
9295
} else if (type == "Sequence") {
9396
pretokenizers = std::vector<PreTokenizerConfig>();
94-
for (const auto &entry : json_config.at("pretokenizers")) {
97+
for (const auto& entry : json_config.at("pretokenizers")) {
9598
pretokenizers->push_back(PreTokenizerConfig().parse_json(entry));
9699
}
97100
} else {
@@ -102,14 +105,14 @@ PreTokenizerConfig &PreTokenizerConfig::parse_json(const json &json_config) {
102105

103106
// RegexPreTokenizer ///////////////////////////////////////////////////////////
104107

105-
RegexPreTokenizer::Re2UPtr
106-
RegexPreTokenizer::create_regex_(const std::string &pattern) {
108+
RegexPreTokenizer::Re2UPtr RegexPreTokenizer::create_regex_(
109+
const std::string& pattern) {
107110
assert(!pattern.empty());
108111
return std::make_unique<re2::RE2>("(" + pattern + ")");
109112
}
110113

111-
std::vector<std::string>
112-
RegexPreTokenizer::pre_tokenize(re2::StringPiece input) const {
114+
std::vector<std::string> RegexPreTokenizer::pre_tokenize(
115+
re2::StringPiece input) const {
113116
std::vector<std::string> result;
114117
std::string piece;
115118
while (RE2::FindAndConsume(&input, *regex_, &piece)) {
@@ -136,13 +139,14 @@ constexpr char GPT2_EXPR[] =
136139
// Construction //
137140
//////////////////
138141

139-
ByteLevelPreTokenizer::ByteLevelPreTokenizer(bool add_prefix_space,
140-
const std::string &pattern)
142+
ByteLevelPreTokenizer::ByteLevelPreTokenizer(
143+
bool add_prefix_space,
144+
const std::string& pattern)
141145
: pattern_(pattern.empty() ? GPT2_EXPR : pattern),
142146
add_prefix_space_(add_prefix_space) {}
143147

144-
std::vector<std::string>
145-
ByteLevelPreTokenizer::pre_tokenize(re2::StringPiece input) const {
148+
std::vector<std::string> ByteLevelPreTokenizer::pre_tokenize(
149+
re2::StringPiece input) const {
146150
// Add the prefix space if configured to do so
147151
std::string input_str(input);
148152
if (add_prefix_space_ && !input_str.empty() && input_str[0] != ' ') {
@@ -158,13 +162,13 @@ SequencePreTokenizer::SequencePreTokenizer(
158162
std::vector<PreTokenizer::Ptr> pre_tokenizers)
159163
: pre_tokenizers_(std::move(pre_tokenizers)) {}
160164

161-
std::vector<std::string>
162-
SequencePreTokenizer::pre_tokenize(re2::StringPiece input) const {
165+
std::vector<std::string> SequencePreTokenizer::pre_tokenize(
166+
re2::StringPiece input) const {
163167
std::vector<std::string> pieces{std::string(input)};
164-
for (const auto &pre_tokenizer : pre_tokenizers_) {
168+
for (const auto& pre_tokenizer : pre_tokenizers_) {
165169
std::vector<std::string> new_pieces;
166-
for (const auto &piece : pieces) {
167-
for (const auto &subpiece : pre_tokenizer->pre_tokenize(piece)) {
170+
for (const auto& piece : pieces) {
171+
for (const auto& subpiece : pre_tokenizer->pre_tokenize(piece)) {
168172
new_pieces.push_back(subpiece);
169173
}
170174
}

src/token_decoder.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ TokenDecoder::Ptr TokenDecoderConfig::create() const {
3737
throw std::runtime_error("Unsupported TokenDecoder type: " + type);
3838
}
3939

40-
TokenDecoderConfig &TokenDecoderConfig::parse_json(const json &json_config) {
40+
TokenDecoderConfig& TokenDecoderConfig::parse_json(const json& json_config) {
4141
type = json_config.at("type");
4242
if (type == "ByteLevel") {
4343
// No parameters to parse
@@ -54,7 +54,7 @@ namespace {
5454
// Copied from llama.cpp
5555
// CITE:
5656
// https://github.com/ggerganov/llama.cpp/blob/master/src/llama-vocab.cpp#L20
57-
static std::string format(const char *fmt, ...) {
57+
static std::string format(const char* fmt, ...) {
5858
va_list ap;
5959
va_list ap2;
6060
va_start(ap, fmt);
@@ -84,7 +84,7 @@ std::string ByteLevelTokenDecoder::decode(re2::StringPiece token) const {
8484
const auto utf8 = unicode_cpt_to_utf8(cpt);
8585
try {
8686
decoded_text += unicode_utf8_to_byte(utf8);
87-
} catch (const std::out_of_range & /*e*/) {
87+
} catch (const std::out_of_range& /*e*/) {
8888
decoded_text += "[UNK_BYTE_0x";
8989
for (const auto c : utf8) {
9090
decoded_text += format("%02x", (uint8_t)c);

targets.bzl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime", "get_executorch_supported_platforms")
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_executorch_supported_platforms", "runtime")
22
load("@fbsource//xplat/executorch/third-party:glob_defs.bzl", "subdir_glob")
33

44
PLATFORMS = get_executorch_supported_platforms()

0 commit comments

Comments
 (0)