diff --git a/README.md b/README.md
index 5cef03cf..4d1926b2 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Then login with `huggingface-cli login`
 ## Downloading Weights
 Models tested/supported
 ```text
-tinyllamas/stories{15,42,100}
+tinyllamas/stories{15M,42M,100M}
 openlm-research/open_llama_7b
 meta-llama/Llama-2-7b-chat-hf
 meta-llama/Llama-2-13b-chat-hf
diff --git a/model.py b/model.py
index 0660bc2b..77945d64 100644
--- a/model.py
+++ b/model.py
@@ -63,7 +63,9 @@ def from_name(cls, name: str):
     "34B": dict(n_layer=48, n_head=64, dim=8192, vocab_size=32000, n_local_heads=8, intermediate_size=22016, rope_base=1000000), # CodeLlama-34B-Python-hf
     "70B": dict(n_layer=80, n_head=64, dim=8192, n_local_heads=8, intermediate_size=28672),
     "Mistral-7B": dict(n_layer=32, n_head=32, n_local_heads=8, dim=4096, intermediate_size=14336, vocab_size=32000),
+    # For stories model, refer https://github.com/karpathy/llama2.c?tab=readme-ov-file#models
     "stories15M": dict(n_layer=6, n_head=6, dim=288),
+    "stories42M": dict(n_layer=8, n_head=8, dim=512, intermediate_size=1376),
     "stories110M": dict(n_layer=12, n_head=12, dim=768),
     "Llama-3-8B": dict(block_size=8192, n_layer=32, n_head=32, n_local_heads=8, dim=4096, intermediate_size=14336, vocab_size=128256),
 }
diff --git a/quantize.py b/quantize.py
index fb566421..5c735f53 100644
--- a/quantize.py
+++ b/quantize.py
@@ -577,7 +577,7 @@ def quantize(
         quant_handler = WeightOnlyInt4GPTQQuantHandler(model, groupsize)
 
         tokenizer_path = checkpoint_path.parent / "tokenizer.model"
-        assert tokenizer_path.is_file(), str(tokenizer_path)
+        assert tokenizer_path.is_file(), f"{tokenizer_path} is not a file."
         tokenizer = get_tokenizer(tokenizer_path, checkpoint_path)
 
         quantized_state_dict = quant_handler.create_quantized_state_dict(
diff --git a/scripts/convert_hf_checkpoint.py b/scripts/convert_hf_checkpoint.py
index 8a221067..86943f65 100644
--- a/scripts/convert_hf_checkpoint.py
+++ b/scripts/convert_hf_checkpoint.py
@@ -25,6 +25,9 @@ def convert_hf_checkpoint(
     checkpoint_dir: Path = Path("checkpoints/meta-Transformer/Transformer-2-7b-chat-hf"),
     model_name: Optional[str] = None,
 ) -> None:
+    if "stories" in checkpoint_dir.name:
+        # No need to convert tinyllamas
+        return
     if model_name is None:
         model_name = checkpoint_dir.name
 
diff --git a/scripts/download.py b/scripts/download.py
index a968cf33..06a70328 100644
--- a/scripts/download.py
+++ b/scripts/download.py
@@ -7,11 +7,28 @@
 from typing import Optional
 
 from requests.exceptions import HTTPError
+import subprocess
 
+def download_tinyllamas(repo_id: str, local_dir: str) -> None:
+    try:
+        model_name = repo_id.split("/")[-1]
+        # Download model weight
+        weight_url = "https://huggingface.co/karpathy/tinyllamas/resolve/main/" + model_name + ".pt"
+        weight_dst_path = os.path.join(local_dir, "model.pth")
+        subprocess.run(["wget", weight_url, "-O", weight_dst_path], check=True)
+        # Download tokenizer model
+        tokenizer_url = "https://github.com/karpathy/llama2.c/raw/master/tokenizer.model"
+        tokenizer_dst_path: str = os.path.join(local_dir, "tokenizer.model")
+        subprocess.run(["wget", tokenizer_url, "-O", tokenizer_dst_path], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to download {repo_id}: {e}")
 
 def hf_download(repo_id: Optional[str] = None, hf_token: Optional[str] = None) -> None:
     from huggingface_hub import snapshot_download
     os.makedirs(f"checkpoints/{repo_id}", exist_ok=True)
+    if "stories" in repo_id:
+        download_tinyllamas(repo_id, f"checkpoints/{repo_id}")
+        return
     try:
         snapshot_download(repo_id, local_dir=f"checkpoints/{repo_id}", local_dir_use_symlinks=False, token=hf_token)
     except HTTPError as e: