RobotecAI · maciejmajek · Sep 17, 2024 · Aug 16, 2024 · Aug 19, 2024 · Aug 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -171,3 +171,4 @@ logs/
 !examples/imgs/*.md
 
 src/examples/*-demo
+artifact_database.pkl
diff --git a/examples/rosbot-xl-generic-node-demo.py b/examples/rosbot-xl-generic-node-demo.py
@@ -20,17 +20,19 @@
 import rclpy.qos
 import rclpy.subscription
 import rclpy.task
+from langchain.tools.render import render_text_description_and_args
 from langchain_openai import ChatOpenAI
 
 from rai.agents.state_based import create_state_based_agent
-from rai.node import RaiNode, describe_ros_image, wait_for_2s
+from rai.node import RaiNode, describe_ros_image
 from rai.tools.ros.native import (
     GetCameraImage,
     GetMsgFromTopic,
     Ros2ShowMsgInterfaceTool,
 )
 from rai.tools.ros.native_actions import Ros2RunActionSync
 from rai.tools.ros.tools import GetOccupancyGridTool
+from rai.tools.time import WaitForSecondsTool
 
 
 def main():
@@ -68,10 +70,9 @@ def main():
         "/wait",
     ]
 
-    SYSTEM_PROMPT = "You are an autonomous robot connected to ros2 environment. Your main goal is to fulfill the user's requests. "
-    "Do not make assumptions about the environment you are currently in. "
-    "Use the tooling provided to gather information about the environment."
-    "You can use ros2 topics, services and actions to operate."
+    # TODO(boczekbartek): refactor system prompt
+
+    SYSTEM_PROMPT = ""
 
     node = RaiNode(
         llm=ChatOpenAI(
@@ -84,7 +85,7 @@ def main():
     )
 
     tools = [
-        wait_for_2s,
+        WaitForSecondsTool(),
         GetMsgFromTopic(node=node),
         Ros2RunActionSync(node=node),
         GetCameraImage(node=node),
@@ -94,6 +95,18 @@ def main():
 
     state_retriever = node.get_robot_state
 
+    SYSTEM_PROMPT = f"""You are an autonomous robot connected to ros2 environment. Your main goal is to fulfill the user's requests.
+    Do not make assumptions about the environment you are currently in.
+    Use the tooling provided to gather information about the environment:
+
+    {render_text_description_and_args(tools)}
+
+    You can use ros2 topics, services and actions to operate. """
+
+    node.get_logger().info(f"{SYSTEM_PROMPT=}")
+
+    node.system_prompt = node.initialize_system_prompt(SYSTEM_PROMPT)
+
     app = create_state_based_agent(
         llm=llm,
         tools=tools,

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,7 +28,7 @@ langchain-aws = "^0.1.7"
 langchain-openai = "^0.1.8"
 langchain-community = "^0.2.4"
 transforms3d = "^0.4.1"
-langgraph = "^0.0.66"
+langgraph = "^0.1.0"
 tabulate = "^0.9.0"
 lark = "^1.1.9"
 langfuse = "^2.36.1"

diff --git a/src/rai/rai/agents/state_based.py b/src/rai/rai/agents/state_based.py
@@ -14,8 +14,10 @@
 #
 
 import logging
+import pickle
 import time
 from functools import partial
+from pathlib import Path
 from typing import (
     Any,
     Callable,
@@ -75,11 +77,49 @@ class Report(BaseModel):
     steps: List[str] = Field(
         ..., title="Steps", description="The steps taken to solve the problem"
     )
+    success: bool = Field(
+        ..., title="Success", description="Whether the problem was solved"
+    )
     response_to_user: str = Field(
         ..., title="Response", description="The response to the user"
     )
 
 
+def get_stored_artifacts(
+    tool_call_id: str, db_path="artifact_database.pkl"
+) -> List[Any]:
+    # TODO(boczekbartek): refactor
+    db_path = Path(db_path)
+    if not db_path.is_file():
+        return []
+
+    with db_path.open("rb") as db:
+        artifact_database = pickle.load(db)
+        if tool_call_id in artifact_database:
+            return artifact_database[tool_call_id]
+
+    return []
+
+
+def store_artifacts(
+    tool_call_id: str, artifacts: List[Any], db_path="artifact_database.pkl"
+):
+    # TODO(boczekbartek): refactor
+    db_path = Path(db_path)
+    if not db_path.is_file():
+        artifact_database = {}
+        with open("artifact_database.pkl", "wb") as file:
+            pickle.dump(artifact_database, file)
+    with open("artifact_database.pkl", "rb") as file:
+        artifact_database = pickle.load(file)
+        if tool_call_id not in artifact_database:
+            artifact_database[tool_call_id] = artifacts
+        else:
+            artifact_database[tool_call_id].extend(artifacts)
+    with open("artifact_database.pkl", "wb") as file:
+        pickle.dump(artifact_database, file)
+
+
 class ToolRunner(RunnableCallable):
     def __init__(
         self,
@@ -126,13 +166,15 @@ def run_one(call: ToolCall):
                     content=f"Failed to run tool. Error: {e}",
                     name=call["name"],
                     tool_call_id=call["id"],
+                    status="error",
                 )
             except Exception as e:
                 self.logger.info(f'Error in "{call["name"]}", error: {e}')
                 output = ToolMessage(
                     content=f"Failed to run tool. Error: {e}",
                     name=call["name"],
                     tool_call_id=call["id"],
+                    status="error",
                 )
 
             if output.artifact is not None:
@@ -143,6 +185,7 @@ def run_one(call: ToolCall):
                     )
 
                 artifact = cast(MultimodalArtifact, artifact)
+                store_artifacts(output.tool_call_id, [artifact])
 
             if artifact is not None:  # multimodal case
                 return ToolMultimodalMessage(
@@ -160,7 +203,9 @@ def run_one(call: ToolCall):
             outputs: List[Any] = []
             for raw_output in raw_outputs:
                 if isinstance(raw_output, ToolMultimodalMessage):
-                    outputs.extend(raw_output.postprocess())
+                    outputs.extend(
+                        raw_output.postprocess()
+                    )  # openai please allow tool messages with images!
                 else:
                     outputs.append(raw_output)
 
@@ -258,7 +303,7 @@ def retriever_wrapper(
     info = str_output(retrieved_info)
     state["messages"].append(
         HumanMultimodalMessage(
-            content="Retrieved state: {}".format(info), images=images, audios=audios
+            content=f"Retrieved state: {info}", images=images, audios=audios
         )
     )
     return state

diff --git a/src/rai/rai/messages/multimodal.py b/src/rai/rai/messages/multimodal.py
@@ -58,6 +58,10 @@ def __init__(
             _content.extend(_image_content)
         self.content = _content
 
+    @property
+    def text(self) -> str:
+        return self.content[0]["text"]
+
 
 class HumanMultimodalMessage(HumanMessage, MultimodalMessage):
     def __repr_args__(self) -> Any:
@@ -104,6 +108,7 @@ def _postprocess_openai(self):
             human_message = HumanMultimodalMessage(
                 content=f"Image returned by a tool call {self.tool_call_id}",
                 images=self.images,
+                tool_call_id=self.tool_call_id,
             )
             # at this point self.content is a list of dicts
             # we need to extract the text from each dict