add prod-focused configruations (#302)

YifanYuan3 · meta-codesync[bot] · commit 7c44c03266ac · 2025-11-17T18:27:08.000-08:00
Summary: Pull Request resolved: #302 This diff adds: (1) one new microbenchmark (glibc `memcmp`) (2) configurations of running all microbenchmarks so that they focus on the most relevant and representative operations (3) a new benchpress job and run script for (2) (4) some affiliated things like parsers. Scoring (e.g. how to interpret the results) as well as sizing (e.g. dataset size) will come in the next diff Reviewed By: charles-typ Differential Revision: D85820722 fbshipit-source-id: 7ef69914b05fa3d5e3c7089da0ab693ce5229108
diff --git a/benchpress/config/jobs_wdl.yml b/benchpress/config/jobs_wdl.yml
@@ -175,3 +175,26 @@
         after:
           - 'benchmarks/wdl_bench/wdl_bench_results.txt'
           - 'benchmarks/wdl_bench/out_*.json'
+
+- name: prod_set
+  benchmark: wdl_bench
+  description: >
+    a set of most popular and ubiquitous WDLs across Meta's fleet, with configs close to real production.
+  args:
+    - '--type {type}'
+    - '--output {output}'
+  vars:
+    - 'type=prod'
+    - 'output=wdl_bench_results.txt'
+  hooks:
+    - hook: cpu-mpstat
+      options:
+        args:
+          - '-u'   # utilization
+          - '1'    # second interval
+    - hook: copymove
+      options:
+        is_move: true
+        after:
+          - 'benchmarks/wdl_bench/wdl_bench_results.txt'
+          - 'benchmarks/wdl_bench/out_*.json'
diff --git a/packages/wdl_bench/convert.py b/packages/wdl_bench/convert.py
@@ -16,14 +16,20 @@
 
 
 with open(input_file_name) as f:
-    if sys.argv[1] == "concurrent_hash_map_benchmark":
+    if sys.argv[1] == "concurrency_concurrent_hash_map_benchmark":
         parse_line.parse_line_chm(f, sum_c)
     elif sys.argv[1] == "lzbench":
         parse_line.parse_line_lzbench(f, sum_c)
     elif sys.argv[1] == "openssl":
         parse_line.parse_line_openssl(f, sum_c)
     elif sys.argv[1] == "vdso_bench":
         parse_line.parse_line_vdso_bench(f, sum_c)
+    elif sys.argv[1] == "libaegis_benchmark":
+        parse_line.parse_line_libaegis_benchmark(f, sum_c)
+    elif sys.argv[1] == "xxhash_benchmark":
+        parse_line.parse_line_xxhash_benchmark(f, sum_c)
+    elif sys.argv[1] == "container_hash_maps_bench":
+        parse_line.parse_line_container_hash_maps_bench(f, sum_c)
     else:
         parse_line.parse_line(f, sum_c)
 
diff --git a/packages/wdl_bench/install_wdl_bench.sh b/packages/wdl_bench/install_wdl_bench.sh
@@ -5,6 +5,8 @@
 # LICENSE file in the root directory of this source tree.
 set -Eeuo pipefail
 
+GLIBC_VERSION=$(getconf GNU_LIBC_VERSION | cut -f 2 -d\  )
+
 ##################### BENCHMARK CONFIG #########################
 
 declare -A REPOS=(
@@ -15,16 +17,18 @@ declare -A REPOS=(
     ['vdso']='https://github.com/leitao/debug.git'
     ['libaegis']='https://github.com/aegis-aead/libaegis.git'
     ['xxhash']='https://github.com/Cyan4973/xxHash.git'
+    ['glibc']='https://sourceware.org/git/glibc.git'
 )
 
 declare -A TAGS=(
-    ['folly']='v2025.11.03.00'
-    ['fbthrift']='v2025.11.03.00'
+    ['folly']='v2025.11.17.00'
+    ['fbthrift']='v2025.11.17.00'
     ['lzbench']='v2.2'
     ['openssl']='openssl-3.6.0'
     ['vdso']='a90085a8e4e1e07a93cc45a68da246fa98a9f831'
     ['libaegis']='0.4.2'
     ['xxhash']='136cc1f8fe4d5ea62a7c16c8424d4fa5158f6d68'
+    ['glibc']="glibc-${GLIBC_VERSION}"
 )
 
 declare -A DATASETS=(
@@ -48,12 +52,12 @@ LINUX_DIST_ID="$(awk -F "=" '/^ID=/ {print $2}' /etc/os-release | tr -d '"')"
 if [ "$LINUX_DIST_ID" = "ubuntu" ]; then
   apt install -y cmake autoconf automake flex bison \
     nasm clang patch git libssl-dev \
-    tar unzip perl openssl python3-dev
+    tar unzip perl openssl python3-dev gawk
 
 elif [ "$LINUX_DIST_ID" = "centos" ]; then
   dnf install -y cmake autoconf automake flex bison \
     meson nasm clang patch \
-    git tar unzip perl openssl-devel python3-devel
+    git tar unzip perl openssl-devel python3-devel gawk
 fi
 
 
@@ -67,6 +71,11 @@ fi
 
 ##################### BUILD AND INSTALL FUNCTIONS #########################
 
+folly_benchmark_list="concurrency_concurrent_hash_map_bench hash_hash_benchmark container_hash_maps_bench stats_digest_builder_benchmark fibers_fibers_benchmark crypto_lt_hash_benchmark memcpy_benchmark memset_benchmark io_async_event_base_benchmark io_iobuf_benchmark function_benchmark random_benchmark synchronization_small_locks_benchmark synchronization_lifo_sem_bench range_find_benchmark"
+
+fbthrift_benchmark_list="ProtocolBench VarintUtilsBench"
+
+
 clone()
 {
     lib=$1
@@ -104,6 +113,10 @@ build_folly()
 
     python3 ./build/fbcode_builder/getdeps.py --allow-system-packages build --scratch-path "${WDL_BUILD}"
 
+    for benchmark in $folly_benchmark_list; do
+      cp "$WDL_BUILD/build/folly/$benchmark" "$WDL_ROOT/$benchmark"
+    done
+
     popd || exit
 }
 
@@ -119,6 +132,10 @@ build_fbthrift()
 
     python3 ./build/fbcode_builder/getdeps.py --allow-system-packages build fbthrift --scratch-path "${WDL_BUILD}" --extra-cmake-defines='{"enable_tests": "1"}'
 
+    for benchmark in $fbthrift_benchmark_list; do
+      cp "$WDL_BUILD/build/fbthrift/bin/$benchmark" "$WDL_ROOT/$benchmark"
+    done
+
     popd || exit
 }
 
@@ -198,7 +215,22 @@ build_xxhash()
     clone $lib || echo "Failed to clone $lib"
     cd "$lib" || exit
     make -C ./tests/bench/ -j
-    cp ./test/bench/benchHash "${WDL_ROOT}/xxhash_benchmark" || exit
+    cp ./tests/bench/benchHash "${WDL_ROOT}/xxhash_benchmark" || exit
+
+    popd || exit
+}
+
+build_glibc()
+{
+    lib='glibc'
+    pushd "${WDL_SOURCE}"
+    clone $lib || echo "Failed to clone $lib"
+    cd "$lib" || exit
+    mkdir build && cd build
+    ../configure --prefix="${WDL_SOURCE}/glibc/build"
+    make -j
+    make bench
+    cp "${WDL_SOURCE}/glibc/build/benchtests/bench-memcmp" "${WDL_ROOT}/" || exit
 
     popd || exit
 }
@@ -215,21 +247,10 @@ build_openssl
 build_vdso
 build_libaegis
 build_xxhash
-
-folly_benchmark_list="concurrency_concurrent_hash_map_bench hash_hash_benchmark container_hash_maps_bench stats_digest_builder_benchmark fibers_fibers_benchmark crypto_lt_hash_benchmark memcpy_benchmark memset_benchmark io_async_event_base_benchmark io_iobuf_benchmark function_benchmark random_benchmark synchronization_small_locks_benchmark range_find_benchmark"
-
-fbthrift_benchmark_list="ProtocolBench"
-
-for benchmark in $folly_benchmark_list; do
-  cp "$WDL_BUILD/build/folly/$benchmark" "$WDL_ROOT/$benchmark"
-done
-
-for benchmark in $fbthrift_benchmark_list; do
-  cp "$WDL_BUILD/build/fbthrift/bin/$benchmark" "$WDL_ROOT/$benchmark"
-done
-
+build_glibc
 
 cp "${BPKGS_WDL_ROOT}/run.sh" ./
+cp "${BPKGS_WDL_ROOT}/run_prod.sh" ./
 cp "${BPKGS_WDL_ROOT}/convert.py" ./
 cp "${BPKGS_WDL_ROOT}/aggregate_result.py" ./
 cp "${BPKGS_WDL_ROOT}/parse_line.py" ./
diff --git a/packages/wdl_bench/parse_line.py b/packages/wdl_bench/parse_line.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 
+import json
 import re
 
 
@@ -130,3 +131,62 @@ def parse_line_vdso_bench(f, sum_c):
             name = elements[4]
             value = float(elements[7])
             sum_c[name + ": M/s"] = value
+
+
+def parse_line_libaegis_benchmark(f, sum_c):
+    for line in f:
+        elements = line.split()
+        if re.search("128L", elements[0]):
+            name = "".join(elements[:-3])
+            value = float(elements[-2])
+            sum_c[name + ": Mb/s"] = value
+
+
+def parse_line_xxhash_benchmark(f, sum_c):
+    for line in f:
+        line = line.strip()
+        if not line:
+            continue
+
+        # Detect section headers
+        if "benchmarking large inputs" in line.lower():
+            current_section = "large_inputs"
+            sum_c[current_section] = {}
+        elif "throughput small inputs of fixed size" in line.lower():
+            current_section = "throughput_small_fixed"
+            sum_c[current_section] = {}
+        elif "benchmarking random size inputs" in line.lower():
+            current_section = "random_size_inputs"
+            sum_c[current_section] = {}
+        elif "latency for small inputs of fixed size" in line.lower():
+            current_section = "latency_small_fixed"
+            sum_c[current_section] = {}
+        elif "latency for small inputs of random size" in line.lower():
+            current_section = "latency_small_random"
+            sum_c[current_section] = {}
+        # Parse data lines (format: "xxh3   , value1, value2, ...")
+        elif "," in line and current_section:
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) > 1:
+                hash_name = parts[0]
+                values = [int(v) for v in parts[1:] if v]
+
+                # Create input size keys based on section and position
+                data = {}
+                for i, value in enumerate(values):
+                    if current_section == "large_inputs":
+                        # log9 to log27 (512 bytes to 128 MB)
+                        input_size = f"log{9+i}"
+                    else:
+                        # 1 to N bytes
+                        input_size = f"{i+1}_bytes"
+                    data[input_size] = value
+
+                sum_c[current_section][hash_name] = data
+
+
+def parse_line_container_hash_maps_bench(f, sum_c):
+    data = json.load(f)
+    for k, v in data.items():
+        if re.search("^(Find)|(Insert)|(InsertSqBr)|(Erase)|(Iter)", k):
+            sum_c[k] = v
diff --git a/packages/wdl_bench/run.sh b/packages/wdl_bench/run.sh
@@ -120,7 +120,9 @@ main() {
     done
 
 
-
+    if [ "$run_type" = "prod" ]; then
+        bash "${WDL_ROOT}/run_prod.sh"
+    fi
 
     set -u  # Enable unbound variables check from here onwards
     benchreps_tell_state "working on config"
diff --git a/packages/wdl_bench/run_prod.sh b/packages/wdl_bench/run_prod.sh