diff --git a/targets.bzl b/targets.bzl index fdd69cf..4fbf61c 100644 --- a/targets.bzl +++ b/targets.bzl @@ -14,7 +14,7 @@ def define_common_targets(): name = "headers", exported_headers = subdir_glob([ ("include", "pytorch/tokenizers/*.h"), - ], exclude = ["pcre2_regex.h", "std_regex.h"]), + ]), visibility = [ "@EXECUTORCH_CLIENTS", "//pytorch/tokenizers/...", @@ -23,9 +23,6 @@ def define_common_targets(): platforms = PLATFORMS, ) - # TODO: add target for regex which does lookahed with pcre2 - # by adding "-DSUPPORT_REGEX_LOOKAHEAD" as a compiler flag - # and including pcre2 dependencies. runtime.cxx_library( name = "regex", srcs = [ @@ -43,6 +40,27 @@ def define_common_targets(): platforms = PLATFORMS, ) + runtime.cxx_library( + name = "regex_lookahead", + srcs = [ + "src/pcre2_regex.cpp", + "src/regex.cpp", + "src/re2_regex.cpp", + "src/std_regex.cpp", + ], + exported_deps = [ + ":headers", + ], + exported_external_deps = [ + "pcre2", + "re2", + ], + preprocessor_flags = ["-DSUPPORT_REGEX_LOOKAHEAD=ON"], + visibility = ["//pytorch/tokenizers/..."], + header_namespace = "", + platforms = PLATFORMS, + ) + runtime.cxx_library( name = "bpe_tokenizer_base", srcs = [ @@ -101,6 +119,29 @@ def define_common_targets(): platforms = PLATFORMS, ) + runtime.cxx_library( + name = "tiktoken_lookahead", + srcs = [ + "src/tiktoken.cpp", + ], + deps = [ + ":regex_lookahead", + ], + exported_deps = [ + ":bpe_tokenizer_base", + ":headers", + ], + exported_external_deps = [ + "pcre2", + "re2", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", + ], + platforms = PLATFORMS, + ) + runtime.cxx_library( name = "hf_tokenizer", srcs = [