diff --git a/README.md b/README.md
index f11e5d8..7df8bc5 100644
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ result = code_generation_model.predict(["def print_hello_world():"])
 print(result)
 ```
 There are a few notable arguments that need to be considered:
--  ``model_name``: the name of the model, currently support ``codet5`` and ``causal-lm``. 
+-  ``model_name``: the name of the model, currently support ``codet5`` and ``causallm``. 
 -  ``model_type``: type of model for each model name, e.g. ``base``, ``codegen-350M-mono``, ``j-6B``, etc.
 -  ``load_in_8bit`` and ``load_in_4bit``: inherit the dynamic quantization feature from [Huggingface Quantization](https://huggingface.co/docs/transformers/main/main_classes/quantization).
 -  ``weight_sharding``: our advance feature that leverages [HuggingFace Sharded Checkpoint](https://huggingface.co/docs/accelerate/v0.19.0/en/package_reference/big_modeling#accelerate.load_checkpoint_and_dispatch) to split a large model in several smaller shards in different GPUs. Please consider using this if you are dealing with large models.
@@ -226,7 +226,7 @@ from codetf.performance.model_evaluator import ModelEvaluator
 os.environ["HF_ALLOW_CODE_EVAL"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
 
-model_class = load_model_pipeline(model_name="causal-lm", task="pretrained",
+model_class = load_model_pipeline(model_name="causallm", task="pretrained",
             model_type="codegen-350M-mono", is_eval=True,
             load_in_8bit=True, weight_sharding=False)