Source code for iclbench.agents.self_refine

import copy
import re

from iclbench.agents.base import BaseAgent
from iclbench.client import LLMClientWrapper


[docs] class SelfRefineAgent(BaseAgent):
[docs] def __init__(self, client_factory: LLMClientWrapper, prompt_builder, max_iterations=3): super().__init__(client_factory, prompt_builder) self.max_iterations = max_iterations
[docs] def act(self, obs, prev_action=None): if prev_action: self.prompt_builder.update_action(prev_action) self.prompt_builder.update_observation(obs) input = self.prompt_builder.get_prompt() # Add initial instructions to the prompt initial_instructions = """ Please provide an answer to the given question or task. After your response, I will provide feedback for improvement. You can only output one of the above actions at a time, and always have to output an action until the episode terminates. Please provide reasoning first and only after reasoning provide the final answer in the form of Action: <action> """.strip() input[-1]["parts"][0] += "\n\n" + initial_instructions # Initial response generation response = self.client.generate(input) # Self-refine loop for _ in range(self.max_iterations): # Generate feedback feedback_prompt = """ "You are an AI assistant tasked with providing feedback on the following response. Analyze the response for clarity, completeness, and accuracy. Suggest specific improvements or write "No further improvements needed" if no further improvements are needed. Response to evaluate:" """.strip() feedback_input = [ *input, {"role": "user", "parts": [feedback_prompt + response.choices[0].message.content]}, ] feedback_response = self.client.generate(feedback_input) # If feedback suggests no further improvements, break the loop if "No further improvements needed" in feedback_response.choices[0].message.content: break # Add feedback and refinement instructions to the prompt refine_prompt = f""" Feedback: {feedback_response.choices[0].message.content} Please refine your previous response based on this feedback. You can only output one of the above actions at a time, and always have to output an action until the episode terminates. Please provide reasoning first and only after reasoning provide the final answer in the form of Action: <action> """.strip() input.append({"role": "user", "parts": [refine_prompt]}) # Generate refined response response = self.client.generate(input) # Extract the final answer from the refined response final_answer = self._extract_final_answer(response) return final_answer
def _extract_final_answer(self, reasoning): def filter_letters(input_string): return re.sub(r"[^a-zA-Z\s:]", "", input_string) answer = copy.deepcopy(reasoning) answer = answer._replace(reasoning=answer.completion) answer = answer._replace(completion=filter_letters(answer.completion).split("ACTION:")[-1].strip()) return answer