Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions BitNet_LLM_OnPC.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 118
},
"id": "0ni13B9CW72b",
"outputId": "d827ca77-e6da-47a2-f6f6-81a4318db5a0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading Completed Successful\n"
]
}
],
"source": [
"# @title # BitNet.CPP On CPU - Google Colab {\"display-mode\":\"form\"}\n",
"# @markdown ## BitNet.CPP is Efficient LLM Inference on CPUs\n",
"# @markdown\n",
"# @markdown BitNet is a cutting-edge innovation by Microsoft Research, designed to make large language models (LLMs) more efficient. The model operates using 1.58-bit weights, reducing memory consumption, improving energy efficiency, and speeding up inference without sacrificing performance. This approach brings significant improvements over traditional FP16 or BF16 models, providing up to 4x faster inference and using 7x less memory.\n",
"# @markdown\n",
"# @markdown > Give it a try and see how BitNet is revolutionizing LLM performance!\n",
"# @markdown\n",
"# @markdown\n",
"from IPython.display import clear_output, display, HTML\n",
"def show_progress(percentage, activity, show=True):\n",
" clear_output(wait=True)\n",
" if show:\n",
" display(HTML(f'''<div style=\"position:fixed;top:0;left:0;width:100%;background-color:#f0f0f0;padding:10px;text-align:center;\">\n",
" <p>{activity}</p>\n",
" <progress style=\"width:100%; {\"\" if show else \"display: none\"}\"></progress></div>\n",
" '''))\n",
"\n",
"\n",
"\n",
"hf_model_name = \"1bitLLM/bitnet_b1_58-large\" # @param [\"1bitLLM/bitnet_b1_58-large\",\"1bitLLM/bitnet_b1_58-3B\",\"HF1BitLLM/Llama3-8B-1.58-100B-tokens\"]\n",
"model_quant_type = \"tl2\" # @param [\"tl2\",\"tl1\"]\n",
"\n",
"\n",
"\n",
"# Installing key env\n",
"!bash -c \"$(wget -O - https://apt.llvm.org/llvm.sh)\"\n",
"\n",
"show_progress(0, \"Cloning BitNet...\")\n",
"!git clone --recursive https://github.com/microsoft/BitNet.git\n",
"%cd BitNet\n",
"\n",
"# Pip installation\n",
"show_progress(20, \"Upgrading pip and installing requirements.txt...\")\n",
"!pip install --upgrade pip >> log.bitnet\n",
"!pip install -r requirements.txt >> log.bitnet\n",
"\n",
"\n",
"# Download the model from Hugging Face, convert it to quantized gguf format, and build the project\n",
"# It is important you specify between tl1 and tl2. using i2_s failed in converting everytime.\n",
"show_progress(50, f\"Downloading and converting {hf_model_name} model to gguf...\")\n",
"!python3 setup_env.py --hf-repo {hf_model_name} -q {model_quant_type} >> log.bitnet\n",
"show_progress(50, f\"The model {hf_model_name} is Ready for inference\", False)\n",
"print(\"Downloading Completed Successful\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iAuraiJjbeJ9",
"outputId": "e10446e6-177a-448c-fc92-e84621e6ddf4"
},
"outputs": [],
"source": [
"# @title # Test the completion with the model you just downloaded {\"display-mode\":\"form\"}\n",
"prompt = \"Youtube is \" # @param {\"type\":\"string\"}\n",
"token_number = 1200 # @param {\"type\":\"number\",\"placeholder\":\"how many token to be generated\"}\n",
"\n",
"show_progress(100, f\"Complete\", False)\n",
"\n",
"model_selected = hf_model_name.split('/')[1]\n",
"converted_model_path = f\"models/{model_selected}/ggml-model-{model_quant_type}.gguf\"\n",
"# print(converted_model_path)\n",
"command = f'run_inference.py -m {converted_model_path} -p \"{prompt}\" -n {token_number} -temp 7'\n",
"!python3 {command}\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}