From 3b71af0aef1cc911b3410478237b663f319d10ef Mon Sep 17 00:00:00 2001
From: Finbarr Timbers <finbarrtimbers@gmail.com>
Date: Thu, 30 Oct 2025 10:20:56 -0600
Subject: [PATCH] Removed dead code

---
 AGENTS.md                                     | 10 ++++
 CLAUDE.md                                     | 11 +---
 .../IFEvalG/instructions_registry.py          | 18 -------
 open_instruct/code_utils/code_utils.py        |  9 ----
 open_instruct/code_utils/testing_util.py      | 11 ----
 open_instruct/context_window_checker.py       | 16 ------
 open_instruct/dataset_processor.py            | 15 ------
 open_instruct/dataset_transformation.py       | 46 ----------------
 open_instruct/grpo_vllm_thread_ray_gtrl.py    | 12 -----
 open_instruct/model_utils.py                  | 54 ++-----------------
 open_instruct/ppo2.py                         | 12 -----
 open_instruct/ppo_vllm_thread_ray_gtrl.py     | 12 -----
 open_instruct/utils.py                        | 51 ------------------
 scripts/convert_olmo_1124_weights_to_hf.py    |  2 -
 ...eate_nq_tqa_hotpotqa_2wiki_simplqa_data.py |  5 --
 scripts/synth_pref/parse_preferences.py       |  2 -
 16 files changed, 16 insertions(+), 270 deletions(-)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000..3d2e569a76
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,10 @@
+# Bash commands
+- `uv run pytest`: Run the tests.
+- `make style && make quality` run the linter + formatter.
+
+# Workflow
+- Always run the linter and make sure the tests pass before finishing a task.
+- Prefer running single tests, not the whole suite, when developing.
+- To run the `./scripts/train/build_image_and_launch.sh` script, you must commit the current changes.
+- Launch tool use experiments by running `./scripts/train/build_image_and_launch.sh scripts/train/debug/tool_grpo_fast.sh`.
+- Launch multi-node non-tool experiments by running `./scripts/train/build_image_and_launch.sh scripts/train/debug/large_test_script.sh`.
diff --git a/CLAUDE.md b/CLAUDE.md
index 3d2e569a76..43c994c2d3 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,10 +1 @@
-# Bash commands
-- `uv run pytest`: Run the tests.
-- `make style && make quality` run the linter + formatter.
-
-# Workflow
-- Always run the linter and make sure the tests pass before finishing a task.
-- Prefer running single tests, not the whole suite, when developing.
-- To run the `./scripts/train/build_image_and_launch.sh` script, you must commit the current changes.
-- Launch tool use experiments by running `./scripts/train/build_image_and_launch.sh scripts/train/debug/tool_grpo_fast.sh`.
-- Launch multi-node non-tool experiments by running `./scripts/train/build_image_and_launch.sh scripts/train/debug/large_test_script.sh`.
+@AGENTS.md
diff --git a/open_instruct/IFEvalG/instructions_registry.py b/open_instruct/IFEvalG/instructions_registry.py
index 71db32f973..1229b62a75 100644
--- a/open_instruct/IFEvalG/instructions_registry.py
+++ b/open_instruct/IFEvalG/instructions_registry.py
@@ -292,21 +292,3 @@
     _KEYWORD + "keyword_specific_position": {_KEYWORD + "keyword_specific_position"},
     _KEYWORD + "start_end": {_KEYWORD + "start_end"},
 }
-
-
-def conflict_make(conflicts):
-    """Makes sure if A conflicts with B, B will conflict with A.
-
-    Args:
-      conflicts: Dictionary of potential conflicts where key is instruction id
-        and value is set of instruction ids that it conflicts with.
-
-    Returns:
-      Revised version of the dictionary. All instructions conflict with
-      themselves. If A conflicts with B, B will conflict with A.
-    """
-    for key in conflicts:
-        for k in conflicts[key]:
-            conflicts[k].add(key)
-        conflicts[key].add(key)
-    return conflicts
diff --git a/open_instruct/code_utils/code_utils.py b/open_instruct/code_utils/code_utils.py
index 2abcc398ef..727dc1a0fd 100644
--- a/open_instruct/code_utils/code_utils.py
+++ b/open_instruct/code_utils/code_utils.py
@@ -39,15 +39,6 @@
 original_builtins = __builtins__
 
 
-def encode_tests(tests: list) -> str:
-    if not tests:
-        return ""
-    pickled_data = pickle.dumps(tests)
-    compressed_data = zlib.compress(pickled_data)
-    b64_encoded_data = base64.b64encode(compressed_data)
-    return b64_encoded_data.decode("utf-8")
-
-
 def decode_tests(tests: Any) -> list:
     if not tests:
         return []
diff --git a/open_instruct/code_utils/testing_util.py b/open_instruct/code_utils/testing_util.py
index 9dd1956c97..d32178a8ea 100644
--- a/open_instruct/code_utils/testing_util.py
+++ b/open_instruct/code_utils/testing_util.py
@@ -25,17 +25,6 @@
 import_string = "from string import *\nfrom re import *\nfrom datetime import *\nfrom collections import *\nfrom heapq import *\nfrom bisect import *\nfrom copy import *\nfrom math import *\nfrom random import *\nfrom statistics import *\nfrom itertools import *\nfrom functools import *\nfrom operator import *\nfrom io import *\nfrom sys import *\nfrom json import *\nfrom builtins import *\nfrom typing import *\nimport string\nimport re\nimport datetime\nimport collections\nimport heapq\nimport bisect\nimport copy\nimport math\nimport random\nimport statistics\nimport itertools\nimport functools\nimport operator\nimport io\nimport sys\nimport json\nsys.setrecursionlimit(50000)\n"
 
 
-def truncatefn(s, length=300):
-    if isinstance(s, str):
-        pass
-    else:
-        s = str(s)
-    if len(s) <= length:
-        return s
-
-    return s[: length // 2] + "...(truncated) ..." + s[-length // 2 :]
-
-
 class CODE_TYPE(Enum):
     call_based = 0
     standard_input = 1
diff --git a/open_instruct/context_window_checker.py b/open_instruct/context_window_checker.py
index 01b81316ca..de48fa688d 100644
--- a/open_instruct/context_window_checker.py
+++ b/open_instruct/context_window_checker.py
@@ -385,22 +385,6 @@ async def safe_acompletion_with_context_check(
 
 
 # Convenience function for quick context checking
-def will_exceed_context_window(
-    messages: list[dict[str, str]],
-    max_completion_tokens: int,
-    model_name: str,
-    max_context_length: int = 8192,
-    safety_margin: int = 100,
-) -> bool:
-    """
-    Quick check to see if a request would exceed the context window.
-
-    Returns:
-        bool: True if the request would exceed context window, False otherwise
-    """
-    return not check_context_window_limit(
-        messages, max_completion_tokens, model_name, max_context_length, safety_margin
-    )
 
 
 def truncate_str_for_prompt_template(
diff --git a/open_instruct/dataset_processor.py b/open_instruct/dataset_processor.py
index d88d22ef91..1eb038628f 100644
--- a/open_instruct/dataset_processor.py
+++ b/open_instruct/dataset_processor.py
@@ -229,11 +229,6 @@ def get_num_proc(dataset_len: int, num_available_cpus: int, example_per_second_p
     return min(num_required_cpus, num_available_cpus, dataset_len)
 
 
-def select_nested(dataset: DatasetDict, max_examples_per_split: int):
-    """select the dataset nested in a DatasetDict"""
-    return {key: dataset[key].select(range(min(max_examples_per_split, len(dataset[key])))) for key in dataset}
-
-
 class DatasetProcessor:
     def __init__(self, tokenizer: PreTrainedTokenizer, config: DatasetConfig) -> None:
         self.tokenizer = tokenizer
@@ -475,16 +470,6 @@ def get_token_length_visualization(self, dataset: DatasetDict, save_path: str =
         )
 
 
-def convert_preference_dataset_to_binary_dataset(ds: Dataset):
-    binary_ds = defaultdict(list)
-    for i in tqdm(range(len(ds))):
-        binary_ds[SFT_MESSAGE_KEY].append(ds[i]["chosen"])
-        binary_ds[BINARY_LABEL_KEY].append(True)
-        binary_ds[SFT_MESSAGE_KEY].append(ds[i]["rejected"])
-        binary_ds[BINARY_LABEL_KEY].append(False)
-    return Dataset.from_dict(binary_ds)
-
-
 def visualize_token(tokens: list[int], tokenizer: PreTrainedTokenizer):
     i = 0
     console = Console()
diff --git a/open_instruct/dataset_transformation.py b/open_instruct/dataset_transformation.py
index f88da72109..765ae13023 100644
--- a/open_instruct/dataset_transformation.py
+++ b/open_instruct/dataset_transformation.py
@@ -144,19 +144,6 @@ def visualize_token(tokens: list[int], tokenizer: PreTrainedTokenizer):
     console.print(rich_text)
 
 
-def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrainedTokenizer):
-    i = 0
-    console = Console()
-    rich_text = Text()
-    # for i, token in enumerate():
-    for i in range(min(len(tokens), len(masks))):
-        token = tokens[i]
-        color = COLORS[masks[i] % len(COLORS)]
-        decoded_token = tokenizer.decode(token)
-        rich_text.append(f"{decoded_token}", style=color)
-    console.print(rich_text)
-
-
 # ----------------------------------------------------------------------------
 # Tokenization
 # Chat templates
@@ -1274,25 +1261,6 @@ def rlvr_tokenize_v3(
     return row
 
 
-def rlvr_filter_v1(
-    row: Dict[str, Any],
-    tokenizer: PreTrainedTokenizer,
-    need_contain_labels: bool = True,
-    max_prompt_token_length: Optional[int] = None,
-    max_token_length: Optional[int] = None,
-):
-    max_prompt_token_length_ok = True
-    if max_prompt_token_length is not None:
-        max_prompt_token_length_ok = len(row[INPUT_IDS_PROMPT_KEY]) <= max_prompt_token_length
-
-    max_token_length_ok = True
-    if max_token_length is not None:
-        max_token_length_ok = len(row[INPUT_IDS_KEY]) <= max_token_length
-
-    contain_some_labels = any(x != -100 for x in row[LABELS_KEY])
-    return max_prompt_token_length_ok and max_token_length_ok and (contain_some_labels or not need_contain_labels)
-
-
 def rlvr_max_length_filter_v2(
     row: Dict[str, Any], tokenizer: PreTrainedTokenizer, max_prompt_token_length: Optional[int] = None
 ):
@@ -1686,20 +1654,6 @@ def count_tokens(sample):
         return loaded_dataset, all_statistics
 
 
-def get_cached_dataset(
-    dcs: List[DatasetConfig],
-    tc: TokenizerConfig,
-    hf_entity: Optional[str] = None,
-    dataset_local_cache_dir: Optional[str] = None,
-    dataset_skip_cache: bool = False,
-) -> Union[Dataset, Tuple[Dataset, Dict[str, Any]]]:
-    if dataset_local_cache_dir is not None:
-        cache = LocalDatasetTransformationCache(dataset_local_cache_dir=dataset_local_cache_dir)
-    else:
-        cache = DatasetTransformationCache(hf_entity=hf_entity)
-    return cache.load_or_transform_dataset(dcs, tc, dataset_skip_cache=dataset_skip_cache)
-
-
 def get_cached_dataset_tulu_with_statistics(
     dataset_mixer_list: List[str],
     dataset_mixer_list_splits: List[str],
diff --git a/open_instruct/grpo_vllm_thread_ray_gtrl.py b/open_instruct/grpo_vllm_thread_ray_gtrl.py
index bc59fbdb25..63955244cd 100644
--- a/open_instruct/grpo_vllm_thread_ray_gtrl.py
+++ b/open_instruct/grpo_vllm_thread_ray_gtrl.py
@@ -47,7 +47,6 @@
 
 import asyncio
 import gc
-import json
 import math
 import random
 import shutil
@@ -367,17 +366,6 @@ def __post_init__(self):
             print("WARNING: number_samples_per_prompt is 1. This reduces GRPO to REINFORCE. ")
 
 
-def process_dataset_mixer(value) -> tuple[dict | None, str | None]:
-    # if passed through cli: convert the dataset mixers to dictionaries
-    if isinstance(value, str):
-        return json.loads(value), value
-    # if passed through yaml: convert the dataset mixers to strings
-    elif isinstance(value, dict):
-        return value, json.dumps(value)
-    else:
-        raise ValueError("Input must be either a string or a dictionary")
-
-
 def get_train_ds_config(
     offload,
     adam_offload=False,
diff --git a/open_instruct/model_utils.py b/open_instruct/model_utils.py
index 79a3e0648c..280d4c06bc 100644
--- a/open_instruct/model_utils.py
+++ b/open_instruct/model_utils.py
@@ -14,30 +14,23 @@
 # limitations under the License.
 
 
+import asyncio
 import itertools
 from collections import OrderedDict, defaultdict
-from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import Literal, Union
-
-try:
-    import deepspeed
-    from deepspeed.runtime.engine import DeepSpeedEngine
-except ImportError:
-    pass
-import asyncio
+from typing import Literal
 
 import pandas as pd
 import torch
 import transformers
 from accelerate import Accelerator
 from accelerate.state import AcceleratorState
+from deepspeed.runtime.engine import DeepSpeedEngine
 from huggingface_hub import HfApi
 from rich import print as rprint
 from rich.console import Console
 from rich.panel import Panel
 from rich.table import Table
-from torch.nn.parallel.distributed import DistributedDataParallel
 from transformers import PreTrainedModel, PreTrainedTokenizer
 
 from open_instruct import logger_utils
@@ -419,24 +412,6 @@ def generate(
     return torch.cat((queries, output.sequences[:, context_length:]), dim=1), logits
 
 
-@torch.no_grad()
-def batch_generation(
-    model: torch.nn.Module,
-    queries: torch.Tensor,
-    local_rollout_forward_batch_size: int,
-    pad_token_id: int,
-    generation_config: dict,
-):
-    query_responses = []
-    logitss = []
-    for i in range(0, queries.shape[0], local_rollout_forward_batch_size):
-        query = queries[i : i + local_rollout_forward_batch_size]
-        query_response, logits = generate(model, query, pad_token_id, generation_config)
-        query_responses.append(query_response)
-        logitss.append(logits)
-    return torch.cat(query_responses, 0), torch.cat(logitss, 0)
-
-
 def get_olmo3_generation_config(tokenizer):
     return transformers.GenerationConfig(
         temperature=None,
@@ -556,7 +531,7 @@ def iter_params(module, recurse=False):
     return [param for _, param in get_all_parameters(module, recurse)]
 
 
-def remove_hooks(model: "DeepSpeedEngine") -> None:
+def remove_hooks(model: DeepSpeedEngine) -> None:
     """Removes the optimizer hooks from a DeepSpeed ZeRO-3 model."""
     if model.optimizer is not None and hasattr(model.optimizer, "parameter_offload"):
         optimizer_offload = model.optimizer.parameter_offload
@@ -575,7 +550,7 @@ def remove_hooks(model: "DeepSpeedEngine") -> None:
     optimizer_offload.backward_hooks = []
 
 
-def add_hooks(model: "DeepSpeedEngine") -> None:
+def add_hooks(model: DeepSpeedEngine) -> None:
     """Adds the optimizer hooks from a DeepSpeed ZeRO-3 model."""
     if model.optimizer is not None and hasattr(model.optimizer, "parameter_offload"):
         optimizer_offload = model.optimizer.parameter_offload
@@ -584,25 +559,6 @@ def add_hooks(model: "DeepSpeedEngine") -> None:
     optimizer_offload._register_hooks_recursively(optimizer_offload.module)
 
 
-@contextmanager
-def unwrap_model_for_generation(
-    model: Union["DistributedDataParallel", "DeepSpeedEngine"], accelerator: "Accelerator", is_peft_model: bool = False
-) -> Union["transformers.PreTrainedModel", "DeepSpeedEngine"]:
-    """Context manager to unwrap a model for generation.
-    For ZeRO-3 models, we gather the weights once to speed up generation.
-    """
-    unwrapped_model = accelerator.unwrap_model(model)
-    if is_peft_model:
-        unwrapped_model.pretrained_model.disable_adapter()
-    if accelerator.state.deepspeed_plugin is not None and accelerator.state.deepspeed_plugin.zero_stage == 3:
-        with deepspeed.zero.GatheredParameters(model.parameters()):
-            remove_hooks(model)
-            yield accelerator.unwrap_model(model)
-            add_hooks(model)
-    else:
-        yield unwrapped_model
-
-
 def prepare_deepspeed(model: torch.nn.Module, per_device_train_batch_size: int, mixed_precision: str):
     """
     Prepares the model for training with DeepSpeed (both for stage 2 and 3), configuring the appropriate settings based on the model and
diff --git a/open_instruct/ppo2.py b/open_instruct/ppo2.py
index a64ded9772..1d2398e5a7 100644
--- a/open_instruct/ppo2.py
+++ b/open_instruct/ppo2.py
@@ -52,7 +52,6 @@
 # isort: on
 
 import gc
-import json
 import math
 import random
 import shutil
@@ -352,17 +351,6 @@ class Args:
     """What dataset to upload the metadata to. If unset, don't upload metadata"""
 
 
-def process_dataset_mixer(value) -> tuple[dict | None, str | None]:
-    # if passed through cli: convert the dataset mixers to dictionaries
-    if isinstance(value, str):
-        return json.loads(value), value
-    # if passed through yaml: convert the dataset mixers to strings
-    elif isinstance(value, dict):
-        return value, json.dumps(value)
-    else:
-        raise ValueError("Input must be either a string or a dictionary")
-
-
 def get_train_ds_config(
     offload,
     adam_offload=False,
diff --git a/open_instruct/ppo_vllm_thread_ray_gtrl.py b/open_instruct/ppo_vllm_thread_ray_gtrl.py
index bd5530876b..4c393af06a 100644
--- a/open_instruct/ppo_vllm_thread_ray_gtrl.py
+++ b/open_instruct/ppo_vllm_thread_ray_gtrl.py
@@ -47,7 +47,6 @@
 
 import asyncio
 import gc
-import json
 import math
 import random
 import shutil
@@ -369,17 +368,6 @@ class Args:
     """whether to apply a performance penalty to the code verifier"""
 
 
-def process_dataset_mixer(value) -> tuple[dict | None, str | None]:
-    # if passed through cli: convert the dataset mixers to dictionaries
-    if isinstance(value, str):
-        return json.loads(value), value
-    # if passed through yaml: convert the dataset mixers to strings
-    elif isinstance(value, dict):
-        return value, json.dumps(value)
-    else:
-        raise ValueError("Input must be either a string or a dictionary")
-
-
 def get_train_ds_config(
     offload,
     adam_offload=False,
diff --git a/open_instruct/utils.py b/open_instruct/utils.py
index 5a7da0636c..00702e3ddb 100644
--- a/open_instruct/utils.py
+++ b/open_instruct/utils.py
@@ -134,18 +134,6 @@ def ray_get_with_progress(
 
 # ----------------------------------------------------------------------------
 # Dataset utilities
-def is_openai_format(messages: Any) -> bool:
-    """
-    Check if the input messages are in OpenAI format.
-    Args:
-        messages (`Any`):
-            Messages to check.
-    Returns:
-        `bool`: Whether the messages are in OpenAI format.
-    """
-    if isinstance(messages, list) and all(isinstance(message, dict) for message in messages):
-        return all("role" in message and "content" in message for message in messages)
-    return False
 
 
 # functions for handling different formats of messages
@@ -1043,17 +1031,6 @@ def live_subprocess_output(cmd: list[str]) -> str:
     return "\n".join(output_lines)
 
 
-def download_from_hf(model_name_or_path: str, revision: str) -> None:
-    cmd = ["huggingface-cli", "download", model_name_or_path, "--revision", revision]
-    print(f"Downloading from HF with command: {cmd}")
-    output = live_subprocess_output(cmd)
-    # for some reason, sometimes the output includes the line including some loading message.
-    # so do some minor cleaning.
-    if "\n" in output:
-        output = output.split("\n")[-1].strip()
-    return output
-
-
 [
     "gsutil",
     "-o",
@@ -1065,23 +1042,6 @@ def download_from_hf(model_name_or_path: str, revision: str) -> None:
 ]
 
 
-def download_from_gs_bucket(src_path: str, dest_path: str) -> None:
-    cmd = [
-        "gsutil",
-        "-o",
-        "GSUtil:parallel_thread_count=1",
-        "-o",
-        "GSUtil:sliced_object_download_threshold=150",
-        "-m",
-        "cp",
-        "-r",
-        src_path,
-        dest_path,
-    ]
-    print(f"Downloading from GS bucket with command: {cmd}")
-    live_subprocess_output(cmd)
-
-
 def gs_folder_exists(path: str) -> bool:
     cmd = ["gsutil", "ls", path]
     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -1092,12 +1052,6 @@ def gs_folder_exists(path: str) -> bool:
     return process.returncode == 0
 
 
-def upload_to_gs_bucket(src_path: str, dest_path: str) -> None:
-    cmd = ["gsutil", "-o", "GSUtil:parallel_composite_upload_threshold=150M", "cp", "-r", src_path, dest_path]
-    print(f"Copying model to GS bucket with command: {cmd}")
-    live_subprocess_output(cmd)
-
-
 def sync_gs_bucket(src_path: str, dest_path: str) -> None:
     cmd = [
         "gsutil",
@@ -1402,11 +1356,6 @@ def _z3_params_to_fetch(param_list):
     return [p for p in param_list if hasattr(p, "ds_id") and p.ds_status == ZeroParamStatus.NOT_AVAILABLE]
 
 
-def get_ray_address() -> str | None:
-    """Get the Ray address from the environment variable."""
-    return os.environ.get("RAY_ADDRESS")
-
-
 _SET_AFFINITY = False
 
 
diff --git a/scripts/convert_olmo_1124_weights_to_hf.py b/scripts/convert_olmo_1124_weights_to_hf.py
index 08b68e8bb7..4cb35fb848 100644
--- a/scripts/convert_olmo_1124_weights_to_hf.py
+++ b/scripts/convert_olmo_1124_weights_to_hf.py
@@ -39,8 +39,6 @@
 """
 
 
-def compute_intermediate_size(n, ffn_dim_multiplier=1, multiple_of=256):
-    return multiple_of * ((int(ffn_dim_multiplier * int(8 * n / 3)) + multiple_of - 1) // multiple_of)
 
 
 def read_json(path):
diff --git a/scripts/data/create_nq_tqa_hotpotqa_2wiki_simplqa_data.py b/scripts/data/create_nq_tqa_hotpotqa_2wiki_simplqa_data.py
index 398010c5f1..f63c8c28df 100644
--- a/scripts/data/create_nq_tqa_hotpotqa_2wiki_simplqa_data.py
+++ b/scripts/data/create_nq_tqa_hotpotqa_2wiki_simplqa_data.py
@@ -91,11 +91,6 @@ def convert_nq_to_rlvr_format_with_context(no_prompt: bool):
     dataset.push_to_hub("hamishivi/nq_rlvr" + ("_no_prompt" if no_prompt else ""), split="test")
 
 
-def check_nq_rlvr_dataset():
-    # Load the NQ dataset
-    nq_dataset = datasets.load_dataset("hamishivi/nq_rlvr")
-    nq_data = nq_dataset["train"]
-    import pdb; pdb.set_trace()
 
 
 def convert_tqa_to_rlvr_format(no_prompt: bool):
diff --git a/scripts/synth_pref/parse_preferences.py b/scripts/synth_pref/parse_preferences.py
index bb4b961b7a..d389220452 100644
--- a/scripts/synth_pref/parse_preferences.py
+++ b/scripts/synth_pref/parse_preferences.py
@@ -70,8 +70,6 @@ def parse_openai(
     }
     aspects = list(aspects_map.values())
 
-    def find_key(d: dict[str, list[str]], value: str) -> Optional[str]:
-        return next((k for k, v in d.items() if value in v), None)
 
     def get_resp(resp: dict[str, Any]) -> str:
         message = resp["body"]["choices"][0]["message"]