diff --git a/dockerfiles/llmbench/vllm-workspace/bench.sh b/dockerfiles/llmbench/vllm-workspace/bench.sh
index 66323fe..5be5141 100644
--- a/dockerfiles/llmbench/vllm-workspace/bench.sh
+++ b/dockerfiles/llmbench/vllm-workspace/bench.sh
@@ -9,8 +9,8 @@ declare -a isl_list=(3000 1000 500)
 declare -a osl_list=(150 1000 1000)
 
 for i in "${!isl_list[@]}"; do
-    ISL=${isl_list[$i]}
-    OSL=${osl_list[$i]}
+    local isl=${isl_list[$i]}
+    local osl=${osl_list[$i]}
     echo "======================================================"
     echo "   Running benchmark with 'ISL:OSL=${ISL}:${OSL}'     "
     echo "   OpenAI Server 'http://${HOST}:${PORT}'             "
@@ -18,7 +18,13 @@ for i in "${!isl_list[@]}"; do
     echo "   Tokenizer '${TOKENIZER}'                           "
     echo "======================================================"
 
-    ISL=${ISL} OSL=${OSL} bash benchmark_serving_concurrency.sh
+    bash benchmark_serving_concurrency_list.sh \
+      --host ${HOST} \
+      --port ${PORT} \
+      --model ${MODEL} \
+      --tokenizer ${TOKENIZER} \
+      -isl ${isl} \
+      -osl ${osl}
 
     echo ""
     echo ""
diff --git a/dockerfiles/llmbench/vllm-workspace/benchmark_cron.sh b/dockerfiles/llmbench/vllm-workspace/benchmark_cron.sh
new file mode 100644
index 0000000..9620eff
--- /dev/null
+++ b/dockerfiles/llmbench/vllm-workspace/benchmark_cron.sh
@@ -0,0 +1,252 @@
+#!/bin/bash
+
+# Default values
+PEAK_HOURS="9-10,13-15"
+MAX_CONCURRENCY="256"
+MIN_CONCURRENCY="16"
+HOST="localhost"
+PORT="8000"
+MODEL="deepseek-ai/DeepSeek-R1"
+TOKENIZER="/workspace/tokenizer/${MODEL}"
+ISL="1000"
+OSL="1000"
+TIMEZONE="$(date +%Z)"  # Default to system timezone
+OUTPUT_RESULT_FILE="benchmark_result_${ISL}_${OSL}.md"
+
+# Signal handling variables
+should_exit=0
+
+# Function to handle SIGINT (Ctrl+C)
+handle_sigint() {
+    echo "\nReceived interrupt signal (Ctrl+C), exiting gracefully..."
+    should_exit=1
+}
+
+# Trap SIGINT signal
+trap handle_sigint SIGINT
+
+# Function to print help message
+print_help() {
+  echo "Usage: $0 [OPTIONS]"
+  echo
+  echo "Examples: $0 \\"
+  echo "  --host localhost \\"
+  echo "  --port 8000 \\"
+  echo "  --model Qwen/Qwen3-32B \\"
+  echo "  --tokenizer /workspace/tokenizer/Qwen/Qwen3-32B \\"
+  echo "  --peak-hours 9-10,13-15 \\"
+  echo "  --timezone Asia/Shanghai"
+  echo 
+  echo "Options:"
+  echo "  --peak-hours <start-end>           Peak hours range (default: ${PEAK_HOURS})"
+  echo "  -max, --max-concurrency <int>      Base concurrency during peak hours (default: ${MAX_CONCURRENCY})"
+  echo "  -min, --min-concurrency <int>      Base concurrency during off-peak hours (default: ${MIN_CONCURRENCY})"
+  echo "  --timezone <tz>                    Timezone for hour calculation (default: ${TIMEZONE})"
+  echo "  --host <host>                      Target host for inference requests (default: ${HOST})"
+  echo "  --port <port>                      Target port for inference requests (default: ${PORT})"
+  echo "  --model <model_id>                 Model ID to benchmark (default: ${MODEL})"
+  echo "  --tokenizer <path>                 Tokenizer path (default: ${TOKENIZER})"
+  echo "  -isl, --input-seq-len <int>        Input sequence length (default: ${ISL})"
+  echo "  -osl, --output-seq-len <int>       Output sequence length (default: ${OSL})"
+  echo "  -h, --help                         Show this help message and exit"
+  exit 0
+}
+
+
+# Function to get current hour with timezone support
+get_current_hour() {
+  date +%-H
+}
+
+# Function to check if current hour is within any peak range
+is_peak_hour() {
+  local current_hour=$1
+  for range in "${PEAK_RANGES[@]}"; do
+    IFS='-' read -r start end <<< "${range}"
+    if [[ ${current_hour} -ge ${start} && ${current_hour} -le ${end} ]]; then
+      return 0
+    fi
+  done
+  return 1
+}
+
+# Function to calculate distance to the nearest peak hour
+calculate_distance_to_peak() {
+  local current_hour=$1
+  local min_distance=24  # Initialize with maximum possible distance
+
+  for range in "${PEAK_RANGES[@]}"; do
+    IFS='-' read -r peak_start peak_end <<< "${range}"
+
+    # Calculate distance to peak period start (handles both before and after cases)
+    x=${peak_start}
+    y=${current_hour}
+    distance_to_start_1=$(( ($y - $x + 24) % 24 ))
+    distance_to_start_2=$(( ($x - $y + 24) % 24 ))
+    if [[ ${distance_to_start_1} -lt ${distance_to_start_2} ]]; then
+      distance_to_start=${distance_to_start_1}
+    else
+      distance_to_start=${distance_to_start_2}
+    fi
+    
+    # Calculate distance to peak period end (handles both before and after cases)
+    x=${peak_end}
+    y=${current_hour}
+    distance_to_end_1=$(( ($y - $x + 24) % 24 ))
+    distance_to_end_2=$(( ($x - $y + 24) % 24 ))
+    if [[ ${distance_to_end_1} -lt ${distance_to_end_2} ]]; then
+      distance_to_end=${distance_to_end_1}
+    else
+      distance_to_end=${distance_to_end_2}
+    fi
+    
+    # The actual distance is the minimum of these two values
+    # This covers cases where current time is before, during or after the peak period
+    if [[ ${distance_to_start} -lt ${distance_to_end} ]]; then
+      current_distance=${distance_to_start}
+    else
+      current_distance=${distance_to_end}
+    fi
+    
+    # Keep track of the smallest distance across all peak ranges
+    if [[ ${current_distance} -lt ${min_distance} ]]; then
+      min_distance=${current_distance}
+    fi
+  done
+
+  echo ${min_distance}
+}
+
+# Function to run the benchmark
+run_benchmark() {
+  local concurrency=$1
+  local temp_dir=$(mktemp -d)
+  local result_file="${temp_dir}/bench_${ISL}_${OSL}_c${concurrency}.json"
+  # Initialize results file
+  echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})" > ${OUTPUT_RESULT_FILE}
+  echo "| Concurrency | Request Throughput | Output Throughput | Mean E2EL (ms) | P99 E2EL (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |" >> ${OUTPUT_RESULT_FILE}
+  echo "|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|" >> ${OUTPUT_RESULT_FILE}
+
+
+  echo "[$(date +'%Y-%m-%d %H:%M:%S')] Running benchmark with concurrency: ${concurrency}"
+  uv run python3 vllm-benchmarks/benchmark_serving.py \
+      --backend openai-chat \
+      --model ${TOKENIZER} \
+      --served-model-name ${MODEL} \
+      --host ${HOST} --port ${PORT} \
+      --endpoint /v1/chat/completions \
+      --dataset-name random \
+      --random_input_len ${ISL} \
+      --random_output_len ${OSL} \
+      --max-concurrency ${concurrency} \
+      --num-prompts $((${concurrency}*10)) \
+      --save-result --result-filename ${result_file} \
+      --percentile-metrics ttft,tpot,itl,e2el \
+      --ignore-eos 2>&1 > /dev/null
+  echo "[$(date +'%Y-%m-%d %H:%M:%S')] Finshed benchmark with concurrency: ${concurrency}"
+
+  # Parse and log results
+  eval $(cat ${result_file} | jq -r '
+  . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
+  | to_entries[]
+  | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
+  ')
+  echo "| ${concurrency} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |" >> ${OUTPUT_RESULT_FILE}
+}
+
+# Function to precompute base_concurrency for all hours
+precompute_concurrency() {
+  for hour in {0..23}; do
+    # Check if hour is within any peak range
+    if is_peak_hour ${hour}; then
+      HOURLY_CONCURRENCY[$hour]=${MAX_CONCURRENCY}
+    else
+      distance=$(calculate_distance_to_peak ${hour})
+      # Calculate base concurrency
+      HOURLY_CONCURRENCY[$hour]=$(( ${MIN_CONCURRENCY} + ( (${MAX_CONCURRENCY} - ${MIN_CONCURRENCY}) / 12 * (10 - ${distance})) ))
+    fi
+  done
+  
+  # Print hourly concurrency
+  echo "Hourly Base Concurrency Preview:"
+  for hour in {0..23}; do
+    printf "Hour %2d: %3d\n" ${hour} ${HOURLY_CONCURRENCY[${hour}]}
+  done
+}
+
+# Global variables
+declare -a HOURLY_CONCURRENCY
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --timezone)
+      TIMEZONE="$2"
+      shift 2
+      ;;
+    --peak-hours)
+      PEAK_HOURS="$2"
+      shift 2
+      ;;
+    -max|--max-concurrency)
+      MAX_CONCURRENCY="$2"
+      shift 2
+      ;;
+    -min|--min-concurrency)
+      MIN_CONCURRENCY="$2"
+      shift 2
+      ;;
+    --host)
+      HOST="$2"
+      shift 2
+      ;;
+    --port)
+      PORT="$2"
+      shift 2
+      ;;
+    --model)
+      MODEL="$2"
+      shift 2
+      ;;
+    --tokenizer)
+      TOKENIZER="$2"
+      shift 2
+      ;;
+    -isl|--input-seq-len)
+      ISL="$2"
+      shift 2
+      ;;
+    -osl|--output-seq-len)
+      OSL="$2"
+      shift 2
+      ;;
+    -h|--help)
+      print_help
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+export TZ=${TIMEZONE}
+
+# Split PEAK_HOURS into an array of ranges
+IFS=',' read -ra PEAK_RANGES <<< "${PEAK_HOURS}"
+
+# Precompute and show hourly concurrency
+precompute_concurrency
+
+# Simulate business tides with dynamic concurrency
+while [[ ${should_exit} -eq 0 ]]; do
+    current_hour=$(get_current_hour)
+    base_concurrency=${HOURLY_CONCURRENCY[${current_hour}]}
+    
+    # Add random spikes (毛刺) to concurrency
+    spike=$((RANDOM % 16))
+    concurrency=$((base_concurrency + spike))
+
+    # Run the benchmark
+    run_benchmark ${concurrency}
+done
diff --git a/dockerfiles/llmbench/vllm-workspace/benchmark_scaling.sh b/dockerfiles/llmbench/vllm-workspace/benchmark_scaling.sh
new file mode 100644
index 0000000..78de56b
--- /dev/null
+++ b/dockerfiles/llmbench/vllm-workspace/benchmark_scaling.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+# Default values
+INITIAL_CONCURRENCY=16
+MAX_CONCURRENCY=256
+INTERVAL_SECONDS=60
+HOST="localhost"
+PORT="8000"
+MODEL="deepseek-ai/DeepSeek-R1"
+TOKENIZER="/workspace/tokenizer/${MODEL}"
+ISL="1000"
+OSL="1000"
+MAX_CONCURRENCY_LOOP=3
+
+# Function to print help message
+print_help() {
+  echo "Usage: $0 [OPTIONS]"
+  echo
+  echo "Options:"
+  echo "  --initial-concurrency <int>    Initial concurrency level (default: ${INITIAL_CONCURRENCY})"
+  echo "  --max-concurrency <int>        Maximum concurrency level (default: ${MAX_CONCURRENCY})"
+  echo "  --max-concurrency-loop <int>   Number of times to repeat max concurrency (default: ${MAX_CONCURRENCY_LOOP})"
+  echo "  --interval <int>               Interval in seconds between concurrency increases (default: ${INTERVAL_SECONDS})"
+  echo "  --host <host>                  Target host for inference requests (default: ${HOST})"
+  echo "  --port <port>                  Target port for inference requests (default: ${PORT})"
+  echo "  --model <model_id>             Model ID to benchmark (default: ${MODEL})"
+  echo "  --tokenizer <path>             Tokenizer path (default: ${TOKENIZER})"
+  echo "  -isl, --input-seq-len <int>    Input sequence length (default: ${ISL})"
+  echo "  -osl, --output-seq-len <int>   Output sequence length (default: ${OSL})"
+  echo "  -h, --help                     Show this help message and exit"
+  exit 0
+}
+
+# Function to parse command line arguments
+get_options() {
+  while [[ $# -gt 0 ]]; do
+    case $1 in
+      --initial-concurrency)
+        INITIAL_CONCURRENCY="$2"
+        shift 2
+        ;;
+      --max-concurrency)
+        MAX_CONCURRENCY="$2"
+        shift 2
+        ;;
+      --interval)
+        INTERVAL_SECONDS="$2"
+        shift 2
+        ;;
+      --host)
+        HOST="$2"
+        shift 2
+        ;;
+      --port)
+        PORT="$2"
+        shift 2
+        ;;
+      --model)
+        MODEL="$2"
+        shift 2
+        ;;
+      --tokenizer)
+        TOKENIZER="$2"
+        shift 2
+        ;;
+      -isl|--input-seq-len)
+        ISL="$2"
+        shift 2
+        ;;
+      -osl|--output-seq-len)
+        OSL="$2"
+        shift 2
+        ;;
+      -h|--help)
+        print_help
+        exit 1
+        ;;
+      *)
+        echo "Unknown option: $1"
+        exit 1
+        ;;
+    esac
+  done
+}
+
+# Function to log messages
+log() {
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
+}
+
+# Function to run the benchmark
+run_benchmark() {
+  local concurrency=$1
+  local temp_dir=$(mktemp -d)
+  local result_file="${temp_dir}/bench_${ISL}_${OSL}_c${concurrency}.json"
+  local output_result_file="benchmark_result_${ISL}_${OSL}_c${concurrency}.md"
+
+  log "Running benchmark with concurrency: ${concurrency}"
+  uv run python3 vllm-benchmarks/benchmark_serving.py \
+    --backend openai-chat \
+    --model ${TOKENIZER} \
+    --served-model-name ${MODEL} \
+    --host ${HOST} --port ${PORT} \
+    --endpoint /v1/chat/completions \
+    --dataset-name random \
+    --random_input_len ${ISL} \
+    --random_output_len ${OSL} \
+    --max-concurrency ${concurrency} \
+    --num-prompts $((${concurrency} * 10)) \
+    --save-result --result-filename ${result_file} \
+    --percentile-metrics ttft,tpot,itl,e2el \
+    --ignore-eos
+
+  # Generate markdown table for benchmark result
+  eval $(cat ${result_file} | jq -r '
+  . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
+  | to_entries[]
+  | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
+  ')
+  log "Benchmark completed. Results saved to '${output_result_file}'"
+  echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})
+| Concurrency | Request Throughput (req/s) | Output Token Throughput (tok/s) | Mean E2E Latency (ms) | P99 E2E Latency (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |
+|:-----------:|:---------------------------:|:-------------------------------:|:---------------------:|:-------------------:|:--------------:|:-------------:|:--------------:|:-------------:|
+| ${concurrency} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |
+" > ${output_result_file}
+}
+
+# Main script execution
+main() {
+  # Parse command line arguments
+  get_options "$@"
+
+  # Initialize concurrency
+  current_concurrency=${INITIAL_CONCURRENCY}
+
+  # Run benchmark in a loop, doubling concurrency until max is reached
+  while true; do
+    run_benchmark ${current_concurrency}
+    current_concurrency=$((current_concurrency * 2))
+    if [[ ${current_concurrency} -gt ${MAX_CONCURRENCY} ]]; then
+      break
+    fi
+    log "Waiting for ${INTERVAL_SECONDS} seconds before next run (with concurrency: ${current_concurrency})"
+    sleep ${INTERVAL_SECONDS}
+  done
+
+  log "Max concurrency reached, running ${MAX_CONCURRENCY_LOOP} loops."
+  for i in $(seq 1 ${MAX_CONCURRENCY_LOOP}); do 
+    run_benchmark ${MAX_CONCURRENCY}
+    log "Waiting for ${INTERVAL_SECONDS} seconds before next run (with concurrency: ${MAX_CONCURRENCY})"
+    sleep ${INTERVAL_SECONDS}
+  done
+
+  while true; do
+    current_concurrency=$((current_concurrency / 2))
+    run_benchmark ${current_concurrency}
+    if [[ ${current_concurrency} -lt ${INITIAL_CONCURRENCY} ]]; then
+      break
+    fi
+    log "Waiting for ${INTERVAL_SECONDS} seconds before next run (with concurrency: ${current_concurrency})"
+    sleep ${INTERVAL_SECONDS}
+  done
+
+  log "Benchmark completed."
+}
+
+# Execute the main function
+main "$@"
diff --git a/dockerfiles/llmbench/vllm-workspace/benchmark_serving.sh b/dockerfiles/llmbench/vllm-workspace/benchmark_serving.sh
index dac20f4..00c78e8 100644
--- a/dockerfiles/llmbench/vllm-workspace/benchmark_serving.sh
+++ b/dockerfiles/llmbench/vllm-workspace/benchmark_serving.sh
@@ -1,43 +1,125 @@
 #!/bin/bash
+set -euo pipefail
 
-HOST=${HOST:-"localhost"}
-PORT=${PORT:-"8000"}
-MODEL=${MODEL:-"deepseek-ai/DeepSeek-R1"}
-TOKENIZER=${TOKENIZER:-"/workspace/tokenizer/${MODEL}"}
-
-CONCURRENCY=${CONCURRENCY:-"64"}
-ISL=${ISL:-"1000"}
-OSL=${OSL:-"1000"}
-
-RESULT_FILENAME=${RESULT_FILENAME:-"benchmark_result_${ISL}_${OSL}_c${CONCURRENCY}.md"}
-
-mkdir -p /tmp/llmbench
-
-uv run python3 vllm-benchmarks/benchmark_serving.py \
-  --backend openai-chat \
-  --model ${TOKENIZER} \
-  --served-model-name ${MODEL} \
-  --host ${HOST} --port ${PORT} \
-  --endpoint /v1/chat/completions \
-  --dataset-name random \
-  --random_input_len ${ISL} \
-  --random_output_len ${OSL} \
-  --max-concurrency ${CONCURRENCY} \
-  --num-prompts $(($CONCURRENCY*10)) \
-  --save-result --result-filename /tmp/llmbench/bench_${ISL}_${OSL}_c${CONCURRENCY}.json \
-  --percentile-metrics ttft,tpot,itl,e2el \
-  --ignore-eos
-
-# generate markdown table for benchmark result
-eval $(cat /tmp/llmbench/bench_${ISL}_${OSL}_c${CONCURRENCY}.json | jq -r '
-. | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
-| to_entries[]
-| "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
-')
-echo "[tke-llmbench] save result to '${RESULT_FILENAME}'"
-echo "# Benchmark Result
+# Default values
+HOST="localhost"
+PORT="8000"
+MODEL="deepseek-ai/DeepSeek-R1"
+TOKENIZER="/workspace/tokenizer/${MODEL}"
+CONCURRENCY="64"
+ISL="1000"
+OSL="1000"
 
+# Function to print help message
+print_help() {
+  echo "Usage: $0 [OPTIONS]"
+  echo
+  echo "Options:"
+  echo "  --host <host>                             Target host for inference requests (default: ${HOST})"
+  echo "  --port <port>                             Target port for inference requests (default: ${PORT})"
+  echo "  --model <model_id>                        Model ID to benchmark (default: ${MODEL})"
+  echo "  --tokenizer <path>                        Tokenizer path (default: ${TOKENIZER})"
+  echo "  -isl, --input-sequence-length <int>       Input sequence length (default: ${ISL})"
+  echo "  -osl, --output-sequence-length <int>      Output sequence length (default: ${OSL})"
+  echo "  --concurrency <int>                       Concurrency level (default: ${CONCURRENCY})"
+  echo "  -h, --help                                Show this help message and exit"
+  echo
+  exit 0
+}
+
+# Function to parse command line arguments
+get_options() {
+  while [[ $# -gt 0 ]]; do
+    case $1 in
+      --host)
+        HOST="$2"
+        shift 2
+        ;;
+      --port)
+        PORT="$2"
+        shift 2
+        ;;
+      --model)
+        MODEL="$2"
+        shift 2
+        ;;
+      --tokenizer)
+        TOKENIZER="$2"
+        shift 2
+        ;;
+      -isl|--input-sequence-length)
+        ISL="$2"
+        shift 2
+        ;;
+      -osl|--output-sequence-length)
+        OSL="$2"
+        shift 2
+        ;;
+      --concurrency)
+        CONCURRENCY="$2"
+        shift 2
+        ;;
+      -h|--help)
+        print_help
+        exit 1
+        ;;
+      *)
+        echo "Unknown option: $1"
+        exit 1
+        ;;
+    esac
+  done
+}
+
+# Function to log messages
+log() {
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
+}
+
+run_benchmark() {
+  local temp_dir=$(mktemp -d)
+  local result_file="${temp_dir}/bench_${ISL}_${OSL}_c${CONCURRENCY}.json"
+  local output_result_file="benchmark_result_${ISL}_${OSL}_c${CONCURRENCY}.md"
+
+
+  log "Running benchmark with concurrency: ${CONCURRENCY}"
+  uv run python3 vllm-benchmarks/benchmark_serving.py \
+    --backend openai-chat \
+    --model ${TOKENIZER} \
+    --served-model-name ${MODEL} \
+    --host ${HOST} --port ${PORT} \
+    --endpoint /v1/chat/completions \
+    --dataset-name random \
+    --random_input_len ${ISL} \
+    --random_output_len ${OSL} \
+    --max-concurrency ${CONCURRENCY} \
+    --num-prompts $(($CONCURRENCY*10)) \
+    --save-result --result-filename ${result_file} \
+    --percentile-metrics ttft,tpot,itl,e2el \
+    --ignore-eos
+
+  # generate markdown table for benchmark result
+  eval $(cat ${result_file} | jq -r '
+  . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
+  | to_entries[]
+  | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
+  ')
+  log "Benchmark completed. Results saved to '${output_result_file}'"
+  echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})
 | Concurrency | Reqeuest Throughput (req/s) | Output Token Throughput (tok/s) | Mean E2E Lantency (ms)  | P99 E2E Lantency (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |
-|:-----------:|:--------------------------:|:-------------------------------:|:-----------------------:|:---------------------:|:--------------:|:-------------:|:-------------:|:------------:|
+|:-----------:|:---------------------------:|:-------------------------------:|:-----------------------:|:---------------------:|:--------------:|:-------------:|:--------------:|:-------------:|
 | ${CONCURRENCY} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |
-" > ${RESULT_FILENAME}
+" > ${output_result_file}
+}
+
+# Main script execution
+main() {
+  # Parse command line arguments
+  get_options "$@"
+  
+  # Runs benchmark with the given concurrency
+  run_benchmark
+}
+
+# Execute the main function
+main "$@"
\ No newline at end of file
diff --git a/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency.sh b/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency.sh
deleted file mode 100644
index a12328d..0000000
--- a/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-HOST=${HOST:-"localhost"}
-PORT=${PORT:-"8000"}
-MODEL=${MODEL:-"deepseek-ai/DeepSeek-R1"}
-TOKENIZER=${TOKENIZER:-"/workspace/tokenizer/${MODEL}"}
-ISL=${ISL:-"1000"}
-OSL=${OSL:-"1000"}
-OUTPUT_RESULT_FILE=${OUTPUT_RESULT_FILE:-"benchmark_result_${ISL}_${OSL}.md"}
-
-BENCH_LOOP=${BENCH_LOOP:-"1"}
-
-mkdir -p /tmp/llmbench
-echo "[tke-llmbench] the result will save to '${OUTPUT_RESULT_FILE}'"
-echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})
-
-| Concurrency | Reqeuest Throughput (req/s) | Output Token Throughput (tok/s) | Mean E2E Lantency (ms)  | P99 E2E Lantency (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |
-|:-----------:|:--------------------------:|:-------------------------------:|:-----------------------:|:---------------------:|:--------------:|:-------------:|:-------------:|:------------:|" > ${OUTPUT_RESULT_FILE}
-
-for i in {1..${BENCH_LOOP}}; do
-    for concurrency in 1 2 4 8 16 32 64 128; do
-        set -x
-        uv run python3 vllm-benchmarks/benchmark_serving.py \
-            --backend openai-chat \
-            --model ${TOKENIZER} \
-            --served-model-name ${MODEL} \
-            --host ${HOST} --port ${PORT} \
-            --endpoint /v1/chat/completions \
-            --dataset-name random \
-            --random_input_len ${ISL} \
-            --random_output_len ${OSL} \
-            --max-concurrency ${concurrency} \
-            --num-prompts $((${concurrency}*10)) \
-            --save-result --result-filename /tmp/llmbench/bench_${ISL}_${OSL}_c${concurrency}.json \
-            --percentile-metrics ttft,tpot,itl,e2el \
-            --ignore-eos
-        set +x
-
-        eval $(cat /tmp/llmbench/bench_${ISL}_${OSL}_c${concurrency}.json | jq -r '
-        . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
-        | to_entries[]
-        | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
-        ')
-        echo "| ${concurrency} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |" >> ${OUTPUT_RESULT_FILE}
-    done
-done
-
-cat ${OUTPUT_RESULT_FILE}
\ No newline at end of file
diff --git a/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency_list.sh b/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency_list.sh
new file mode 100644
index 0000000..ff31d5f
--- /dev/null
+++ b/dockerfiles/llmbench/vllm-workspace/benchmark_serving_concurrency_list.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+set -euo pipefail
+
+# Default values
+HOST="localhost"
+PORT="8000"
+MODEL="deepseek-ai/DeepSeek-R1"
+TOKENIZER="/workspace/tokenizer/${MODEL}"
+ISL="1000"
+OSL="1000"
+BENCH_LOOP="1"
+OUTPUT_RESULT_FILE="benchmark_result_${ISL}_${OSL}.md"
+CONCURRENCY_LIST="1,2,4,8,16,32,64,128"
+
+# Function to print help message
+print_help() {
+  echo "Usage: $0 [OPTIONS]"
+  echo
+  echo "Options:"
+  echo "  --host <host>                             Target host for inference requests (default: ${HOST})"
+  echo "  --port <port>                             Target port for inference requests (default: ${PORT})"
+  echo "  --model <model_id>                        Model ID to benchmark (default: ${MODEL})"
+  echo "  --tokenizer <path>                        Tokenizer path (default: ${TOKENIZER})"
+  echo "  -isl, --input-sequence-length <int>       Input sequence length (default: ${ISL})"
+  echo "  -osl, --output-sequence-length <int>      Output sequence length (default: ${OSL})"
+  echo "  --bench-loop <int>                        Number of benchmark loops (default: ${BENCH_LOOP})"
+  echo "  --concurrency <list>                      Comma-separated concurrency levels (default: ${CONCURRENCY_LIST})"
+  echo "  -h, --help                                Show this help message and exit"
+  echo
+  exit 0
+}
+
+# Function to parse command line arguments
+get_options() {
+  while [[ $# -gt 0 ]]; do
+    case $1 in
+      --host)
+        HOST="$2"
+        shift 2
+        ;;
+      --port)
+        PORT="$2"
+        shift 2
+        ;;
+      --model)
+        MODEL="$2"
+        shift 2
+        ;;
+      --tokenizer)
+        TOKENIZER="$2"
+        shift 2
+        ;;
+      -isl|--input-sequence-length)
+        ISL="$2"
+        shift 2
+        ;;
+      -osl|--output-sequence-length)
+        OSL="$2"
+        shift 2
+        ;;
+      --bench-loop)
+        BENCH_LOOP="$2"
+        shift 2
+        ;;
+      --concurrency)
+        CONCURRENCY_LIST="$2"
+        shift 2
+        ;;
+      -h|--help)
+        print_help
+        exit 1
+        ;;
+      *)
+        echo "Unknown option: $1"
+        exit 1
+        ;;
+    esac
+  done
+
+  OUTPUT_RESULT_FILE="benchmark_result_${ISL}_${OSL}.md"
+}
+
+
+# Function to log messages
+log() {
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
+}
+
+# Function to validate concurrency values
+validate_concurrency() {
+  for val in "${concurrency_array[@]}"; do
+    if ! [[ "$val" =~ ^[0-9]+$ ]] || [ "$val" -le 0 ]; then
+      log "Error: Invalid concurrency value '$val'. Must be a positive integer."
+      exit 1
+    fi
+  done
+}
+
+# Function to run the benchmark
+run_benchmark() {
+    local concurrency=$1
+    local temp_dir=$(mktemp -d)
+    local result_file="${temp_dir}/bench_${ISL}_${OSL}_c${concurrency}.json"
+
+    log "Running benchmark with concurrency: ${concurrency}"
+    set -x
+    uv run python3 vllm-benchmarks/benchmark_serving.py \
+        --backend openai-chat \
+        --model ${TOKENIZER} \
+        --served-model-name ${MODEL} \
+        --host ${HOST} --port ${PORT} \
+        --endpoint /v1/chat/completions \
+        --dataset-name random \
+        --random_input_len ${ISL} \
+        --random_output_len ${OSL} \
+        --max-concurrency ${concurrency} \
+        --num-prompts $((${concurrency}*10)) \
+        --save-result --result-filename ${result_file} \
+        --percentile-metrics ttft,tpot,itl,e2el \
+        --ignore-eos
+    { set +x; } 2>/dev/null
+
+    # Parse and log results
+    eval $(cat ${result_file} | jq -r '
+    . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
+    | to_entries[]
+    | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
+    ')
+    echo "| ${concurrency} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |" >> ${OUTPUT_RESULT_FILE}
+}
+
+# Main script execution
+main() {
+    # Parse command line arguments
+    get_options "$@"
+
+    IFS=',' read -r -a concurrency_array <<< "${CONCURRENCY_LIST}"
+    validate_concurrency
+
+    log "Starting benchmark. Results will be saved to '${OUTPUT_RESULT_FILE}'"
+    echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})
+
+| Concurrency | Reqeuest Throughput (req/s) | Output Token Throughput (tok/s) | Mean E2E Lantency (ms)  | P99 E2E Lantency (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |
+|:-----------:|:--------------------------:|:-------------------------------:|:-----------------------:|:---------------------:|:--------------:|:-------------:|:-------------:|:------------:|" > ${OUTPUT_RESULT_FILE}
+
+    for i in $(seq 1 ${BENCH_LOOP}); do
+        log "Benchmark loop ${i}/${BENCH_LOOP}"
+        for concurrency in "${concurrency_array[@]}"; do
+            run_benchmark ${concurrency}
+        done
+    done
+
+    log "Benchmark completed. Results saved to '${OUTPUT_RESULT_FILE}'"
+    cat ${OUTPUT_RESULT_FILE}
+}
+
+# Execute the main function
+main "$@"
\ No newline at end of file
diff --git a/dockerfiles/llmbench/vllm-workspace/sla_benchmark.sh b/dockerfiles/llmbench/vllm-workspace/sla_benchmark.sh
new file mode 100644
index 0000000..2cc6604
--- /dev/null
+++ b/dockerfiles/llmbench/vllm-workspace/sla_benchmark.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+HOST=${HOST:-"localhost"}
+PORT=${PORT:-"8000"}
+MODEL=${MODEL:-"deepseek-ai/DeepSeek-R1"}
+TOKENIZER=${TOKENIZER:-"/workspace/tokenizer/${MODEL}"}
+
+ISL=${ISL:-"2500"}
+OSL=${OSL:-"500"}
+TTFT=${TTFT:-"5000"}
+TPOT=${TPOT:-"100"}
+OUTPUT_RESULT_FILE=${OUTPUT_RESULT_FILE:-"sla_ttft${TTFT}_tpot${TPOT}_benchmark_result_${ISL}_${OSL}.md"}
+
+TEMP_DIR=$(mktemp -d)
+echo "[tke-llmbench] the result will save to '${OUTPUT_RESULT_FILE}'"
+echo "# Benchmark Result (ISL:OSL=${ISL}:${OSL})
+
+| Concurrency | Reqeuest Throughput (req/s) | Output Token Throughput (tok/s) | Mean E2E Lantency (ms)  | P99 E2E Lantency (ms) | Mean TTFT (ms) | P99 TTFT (ms) | Mean TPOT (ms) | P99 TPOT (ms) |
+|:-----------:|:--------------------------:|:-------------------------------:|:-----------------------:|:---------------------:|:--------------:|:-------------:|:-------------:|:------------:|" > ${OUTPUT_RESULT_FILE}
+
+concurrency=1
+while true; do
+    set -x
+    
+    uv run python3 vllm-benchmarks/benchmark_serving.py \
+        --backend openai-chat \
+        --model ${TOKENIZER} \
+        --served-model-name ${MODEL} \
+        --host ${HOST} --port ${PORT} \
+        --endpoint /v1/chat/completions \
+        --dataset-name random \
+        --random_input_len ${ISL} \
+        --random_output_len ${OSL} \
+        --concurrency ${concurrency} \
+        --request_timeout ${TTFT} \
+        --request_timeout_per_output_token ${TPOT} \
+        --output_dir ${TEMP_DIR} \
+        --output_result_file ${OUTPUT_RESULT_FILE} \
+        --output_result_file_append
+    set +x
+    
+    eval $(cat /tmp/llmbench/bench_${ISL}_${OSL}_c${concurrency}.json | jq -r '
+    . | {request_throughput, output_throughput, mean_e2el_ms, p99_e2el_ms, mean_ttft_ms, p99_ttft_ms, mean_tpot_ms, p99_tpot_ms}
+    | to_entries[]
+    | "\(.key)=\(.value | if type == "number" then (. * 100 | round) / 100 else . end)"
+    ')
+    echo "| ${concurrency} | ${request_throughput} | ${output_throughput} | ${mean_e2el_ms} | ${p99_e2el_ms} | ${mean_ttft_ms} | ${p99_ttft_ms} | ${mean_tpot_ms} | ${p99_tpot_ms} |" >> ${OUTPUT_RESULT_FILE}
+
+    if [ ${mean_ttft_ms} -gt ${TTFT} ]; then
+        echo "[concurrency:${concurrency}] The mean_ttft_ms is ${mean_ttft_ms}, which is greater than SLA ${TTFT} ms."
+        break
+    fi
+    if [ ${mean_tpot_ms} -gt ${TPOT} ]; then
+        echo "[concurrency:${concurrency}] The mean_tpot_ms is ${mean_tpot_ms}, which is greater than SLA ${TPOT} ms."
+        break
+    fi
+    echo "[concurrency:${concurrency}] The mean_ttft_ms is ${mean_ttft_ms}, which is less than SLA ${TTFT} ms."
+    echo "[concurrency:${concurrency}] The mean_tpot_ms is ${mean_tpot_ms}, which is less than SLA ${TPOT} ms."
+
+    if [ ${concurrency} -ge 64 ]; then
+        concurrency=$((${concurrency} + 8))
+    else
+        concurrency=$((${concurrency} * 2))
+    fi
+    echo "increase concurrency to ${concurrency}"
+done
+
+