From cc44323a9851e7e05202178d66851c5d26754231 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 24 Apr 2024 02:21:46 -0400 Subject: [PATCH 1/2] add download script for tinyllamas Signed-off-by: yiliu30 --- README.md | 2 +- scripts/convert_hf_checkpoint.py | 3 +++ scripts/download.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ff12ad9e..38d7c15d 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Then login with `huggingface-cli login` ## Downloading Weights Models tested/supported ```text -tinyllamas/stories{15,42,100} +tinyllamas/stories{15M,42M,100M} openlm-research/open_llama_7b meta-llama/Llama-2-7b-chat-hf meta-llama/Llama-2-13b-chat-hf diff --git a/scripts/convert_hf_checkpoint.py b/scripts/convert_hf_checkpoint.py index b92114c4..72beec89 100644 --- a/scripts/convert_hf_checkpoint.py +++ b/scripts/convert_hf_checkpoint.py @@ -24,6 +24,9 @@ def convert_hf_checkpoint( checkpoint_dir: Path = Path("checkpoints/meta-Transformer/Transformer-2-7b-chat-hf"), model_name: Optional[str] = None, ) -> None: + if "stories" in checkpoint_dir.name: + # No need to convert tinyllamas + return if model_name is None: model_name = checkpoint_dir.name diff --git a/scripts/download.py b/scripts/download.py index a968cf33..68dabdf3 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -7,11 +7,28 @@ from typing import Optional from requests.exceptions import HTTPError +import subprocess +def download_tinyllamas(repo_id: str, local_dir: str) -> None: + try: + model_name = repo_id.split("/")[-1] + # Download model weight + weight_url = "https://huggingface.co/karpathy/tinyllamas/resolve/main/" + model_name + ".pt" + weight_dst_path = os.path.join(local_dir, "model.pth") + subprocess.run(["wget", weight_url, "-O", weight_dst_path], check=True) + # Download tokenizer model + tokenizer_url = "https://github.com/karpathy/llama2.c/blob/master/tokenizer.model" + tokenizer_dst_path: str = os.path.join(local_dir, "tokenizer.model") + subprocess.run(["wget", tokenizer_url, "-O", tokenizer_dst_path], check=True) + except subprocess.CalledProcessError as e: + print(f"Failed to download {repo_id}: {e}") def hf_download(repo_id: Optional[str] = None, hf_token: Optional[str] = None) -> None: from huggingface_hub import snapshot_download os.makedirs(f"checkpoints/{repo_id}", exist_ok=True) + if "stories" in repo_id: + download_tinyllamas(repo_id, f"checkpoints/{repo_id}") + return try: snapshot_download(repo_id, local_dir=f"checkpoints/{repo_id}", local_dir_use_symlinks=False, token=hf_token) except HTTPError as e: From b9b5758a4555c6c447c42c29cadbfcaef81edfcc Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sat, 18 May 2024 00:53:59 -0400 Subject: [PATCH 2/2] correct tokenizer model link Signed-off-by: yiliu30 --- model.py | 2 ++ quantize.py | 2 +- scripts/download.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/model.py b/model.py index fbb60405..0d195eec 100644 --- a/model.py +++ b/model.py @@ -63,7 +63,9 @@ def from_name(cls, name: str): "34B": dict(n_layer=48, n_head=64, dim=8192, vocab_size=32000, n_local_heads=8, intermediate_size=22016, rope_base=1000000), # CodeLlama-34B-Python-hf "70B": dict(n_layer=80, n_head=64, dim=8192, n_local_heads=8, intermediate_size=28672), "Mistral-7B": dict(n_layer=32, n_head=32, n_local_heads=8, dim=4096, intermediate_size=14336, vocab_size=32000), + # For stories model, refer https://github.com/karpathy/llama2.c?tab=readme-ov-file#models "stories15M": dict(n_layer=6, n_head=6, dim=288), + "stories42M": dict(n_layer=8, n_head=8, dim=512, intermediate_size=1376), "stories110M": dict(n_layer=12, n_head=12, dim=768), } diff --git a/quantize.py b/quantize.py index af17a698..91416d8d 100644 --- a/quantize.py +++ b/quantize.py @@ -578,7 +578,7 @@ def quantize( quant_handler = WeightOnlyInt4GPTQQuantHandler(model, groupsize) tokenizer_path = checkpoint_path.parent / "tokenizer.model" - assert tokenizer_path.is_file(), tokenizer_path + assert tokenizer_path.is_file(), f"{tokenizer_path} is not a file." tokenizer = SentencePieceProcessor(model_file=str(tokenizer_path)) quantized_state_dict = quant_handler.create_quantized_state_dict( diff --git a/scripts/download.py b/scripts/download.py index 68dabdf3..06a70328 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -17,7 +17,7 @@ def download_tinyllamas(repo_id: str, local_dir: str) -> None: weight_dst_path = os.path.join(local_dir, "model.pth") subprocess.run(["wget", weight_url, "-O", weight_dst_path], check=True) # Download tokenizer model - tokenizer_url = "https://github.com/karpathy/llama2.c/blob/master/tokenizer.model" + tokenizer_url = "https://github.com/karpathy/llama2.c/raw/master/tokenizer.model" tokenizer_dst_path: str = os.path.join(local_dir, "tokenizer.model") subprocess.run(["wget", tokenizer_url, "-O", tokenizer_dst_path], check=True) except subprocess.CalledProcessError as e: