Add stub impl of json tokenizer to llama runner

jackzhxng · jackzhxng · commit de6b73586fc2 · 2025-03-10T16:32:58.000-07:00
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
@@ -17,6 +17,7 @@
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
+#include <executorch/extension/llm/tokenizer/hf_tokenizer.h>
 
 namespace example {
 
@@ -75,20 +76,25 @@ Error Runner::load() {
     return Error::Ok;
   }
   ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
-  // load tokenizer. Assuming tiktoken is the default tokenizer
+  // Load tokenizer.
   tokenizer_ = nullptr;
-  tokenizer_ = get_tiktoken_for_llama();
-  Error err = tokenizer_->load(tokenizer_path_);
-  // Rely on tiktoken to throw error if the artifact is incompatible. Then we
-  // fallback to BPE tokenizer.
-  if (err == Error::InvalidArgument) {
-    ET_LOG(
-        Info,
-        "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
-        tokenizer_path_.c_str());
-    tokenizer_.reset();
-    tokenizer_ = std::make_unique<llm::BPETokenizer>();
+  // Check if tokenizer_path_ ends with ".json".
+  if (tokenizer_path_.size() >= 5 && tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
+    tokenizer_ = std::make_unique<llm::HfTokenizer>();
     tokenizer_->load(tokenizer_path_);
+    ET_LOG(Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
+  } else {
+    // Else assume TikToken is the default tokenizer, using BPE as a fallback.
+    tokenizer_ = get_tiktoken_for_llama();
+    Error err = tokenizer_->load(tokenizer_path_);
+    if (err == Error::InvalidArgument) {
+      tokenizer_.reset();
+      tokenizer_ = std::make_unique<llm::BPETokenizer>();
+      tokenizer_->load(tokenizer_path_);
+      ET_LOG(Info, "Loaded tokenizer %s as BPE tokenizer", tokenizer_path_.c_str());
+    } else {
+      ET_LOG(Info, "Loaded tokenizer %s as TikToken tokenizer", tokenizer_path_.c_str());
+    }
   }
 
   ET_LOG(Info, "Reading metadata from model");
diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp
@@ -0,0 +1,42 @@
+#include <executorch/extension/llm/tokenizer.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <string>
+#include <vector>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class HfTokenizer : public Tokenizer {
+ public:
+  HfTokenizer() : Tokenizer() {}
+
+  ~HfTokenizer() override {}
+
+  ::executorch::runtime::Error load(const std::string& tokenizer_path) override {
+    // Stub implementation for loading the tokenizer.
+    // TODO: Implement actual loading logic.
+    return ::executorch::runtime::Error::Ok;
+  }
+
+  ::executorch::runtime::Result<std::vector<uint64_t>> encode(
+      const std::string& input, int8_t bos, int8_t eos) const override {
+    // Stub implementation for encoding.
+    // TODO: Implement actual encoding logic.
+    std::vector<uint64_t> tokens;
+    return ::executorch::runtime::Result<std::vector<uint64_t>>(tokens);
+  }
+
+  ::executorch::runtime::Result<std::string> decode(
+      uint64_t prev_token, uint64_t token) const override {
+    // Stub implementation for decoding.
+    // TODO: Implement actual decoding logic.
+    std::string decoded_string;
+    return ::executorch::runtime::Result<std::string>(decoded_string);
+  }
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/extension/llm/tokenizer/tokenizer.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class ET_EXPERIMENTAL HfTokenizer : public Tokenizer {
+ public:
+  explicit HfTokenizer();
+  ~HfTokenizer() override;
+
+  ::executorch::runtime::Error load(const std::string& tokenizer_path) override;
+
+  ::executorch::runtime::Result<std::vector<uint64_t>>
+  encode(const std::string& input, int8_t bos, int8_t eos) const override;
+
+  ::executorch::runtime::Result<std::string> decode(
+      uint64_t prev_token,
+      uint64_t token) const override;
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl
@@ -95,3 +95,20 @@ def define_common_targets():
             "re2",
         ],
     )
+
+    runtime.cxx_library(
+        name = "hf_tokenizer",
+        srcs = [
+            "hf_tokenizer.cpp",
+        ],
+        exported_headers = [
+            "hf_tokenizer.h",
+        ],
+        exported_deps = [
+            ":tokenizer_header",
+            "//executorch/runtime/core:core",
+        ],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )