File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1414from typing import List
1515
1616from litellm import acompletion
17+
18+ from art .utils .strip_logprobs import strip_logprobs
1719from litellm .types .utils import ModelResponse
1820from openai .types .chat .chat_completion_message_param import ChatCompletionMessageParam
1921from pydantic import BaseModel , Field
@@ -287,9 +289,10 @@ async def ruler_score_group(
287289 new_trajectories .append (new_traj )
288290
289291 # Extract message lists and preserve original rewards for comparison
292+ # Strip logprobs to avoid sending huge token probability data to the judge
290293 message_lists : list [list [ChatCompletionMessageParam ]] = []
291294 for traj in new_trajectories :
292- message_lists .append (traj .messages ())
295+ message_lists .append (strip_logprobs ( traj .messages () ))
293296 traj .metrics ["independent_reward" ] = traj .reward
294297
295298 try :
You can’t perform that action at this time.
0 commit comments