Source code for are.simulation.validation.judgment
# Copyright (c) Meta Platforms, Inc. and affiliates.# All rights reserved.## This source code is licensed under the terms described in the LICENSE file in# the root directory of this source tree.fromcollectionsimportCounterfromdataclassesimportdataclass,fieldfromenumimportEnum
[docs]@dataclassclassToolCallCountsFailure(Failure):agent_counter:Counteragent_aui_count:intoracle_counter:Counteroracle_aui_count:intextra_send_message_to_user_allowed:int=0def__str__(self)->str:message=[]diff=self.agent_counter-self.oracle_counterdiff.update(self.oracle_counter-self.agent_counter)ifdiff:discrepancies="\n".join(f"- Tool '{tool}': Agent count {self.agent_counter.get(tool,0)}, Oracle count {self.oracle_counter.get(tool,0)}"fortoolindiff)message.append(f"Agent and oracle counters do not match for the following tools:\n{discrepancies}")ifself.oracle_aui_count>self.agent_aui_count:message.append(f"Oracle sent {self.oracle_aui_count-self.agent_aui_count} more message(s) than agent.")if(self.agent_aui_count>self.oracle_aui_count+self.extra_send_message_to_user_allowed):message.append(f"Agent message to user count exceeds oracle AUI count by more than {self.extra_send_message_to_user_allowed}.")return"Failure: \n"+"\n".join(message)
[docs]@dataclassclassOracleEventMatchingFailure(Failure):oracle_tool_name:stroracle_tool_args:dict[str,str]comparison_failures:list[EventComparisonFailure]def__str__(self)->str:tool_arg_str=[f"-{k}: {v}"fork,vinself.oracle_tool_args.items()]tool_arg_str=[(x[:200]+("..."iflen(x)>200else""))forxintool_arg_str]tool_arg_str="\n".join(tool_arg_str)message="Failure: Agent did not perform the following oracle tool call:"message+=f"\ntool name: {self.oracle_tool_name}\ntool args:\n{tool_arg_str}\n"message+="\nList of matching attempts:\n"message+="\n".join(["-"+str(c)forcinself.comparison_failures])returnmessage
[docs]@dataclassclassEnvOracleMatchingFailure(Failure):oracle_event_id:strdef__str__(self)->str:returnf"Failure: Oracle env/user event {self.oracle_event_id} could not be matched. This is likely a bug !"