Add date in system prompt and precisions on scroll tool
Browse files- app.py +0 -17
- e2bqwen.py +4 -2
- eval.py +1 -1
app.py
CHANGED
|
@@ -774,23 +774,6 @@ _Please note that we store the task logs by default so **do not write any person
|
|
| 774 |
def set_logs_source(session_state):
|
| 775 |
session_state["replay_log"] = "udupp2fyavq_1743170323"
|
| 776 |
|
| 777 |
-
# replay_btn.click(
|
| 778 |
-
# fn=clear_and_set_view_only,
|
| 779 |
-
# inputs=[task_input],
|
| 780 |
-
# outputs=[sandbox_html]
|
| 781 |
-
# ).then(
|
| 782 |
-
# set_logs_source,
|
| 783 |
-
# inputs=[session_state]
|
| 784 |
-
# ).then(
|
| 785 |
-
# agent_ui.interact_with_agent,
|
| 786 |
-
# inputs=[task_input, stored_messages, session_state, session_hash_state],
|
| 787 |
-
# outputs=[chatbot_display]
|
| 788 |
-
# ).then(
|
| 789 |
-
# fn=set_interactive,
|
| 790 |
-
# inputs=[],
|
| 791 |
-
# outputs=[sandbox_html]
|
| 792 |
-
# )
|
| 793 |
-
|
| 794 |
demo.load(
|
| 795 |
fn=lambda: True, # dummy to trigger the load
|
| 796 |
outputs=[is_interactive],
|
|
|
|
| 774 |
def set_logs_source(session_state):
|
| 775 |
session_state["replay_log"] = "udupp2fyavq_1743170323"
|
| 776 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 777 |
demo.load(
|
| 778 |
fn=lambda: True, # dummy to trigger the load
|
| 779 |
outputs=[is_interactive],
|
e2bqwen.py
CHANGED
|
@@ -16,8 +16,10 @@ from smolagents.agents import populate_template
|
|
| 16 |
from smolagents.monitoring import LogLevel
|
| 17 |
from smolagents.agent_types import AgentImage
|
| 18 |
from PIL import ImageDraw
|
|
|
|
| 19 |
|
| 20 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
|
|
|
| 21 |
<action process>
|
| 22 |
You will be given a task to solve in several steps. At each step you will perform an action.
|
| 23 |
After each action, you'll receive an updated screenshot.
|
|
@@ -142,7 +144,7 @@ Always analyze the latest screenshot carefully before performing actions.
|
|
| 142 |
Desktop menus usually expand with more options, the tiny triangle next to some text in a menu means that menu expands. For example in Office in the Applications menu expands showing presentation or writing applications.
|
| 143 |
NEVER CLICK THE WEB BROWSER ICON TO OPEN THE WEB BROWSER: use open_url
|
| 144 |
</general_guidelines>
|
| 145 |
-
"""
|
| 146 |
|
| 147 |
|
| 148 |
def draw_marker_on_image(image_copy, click_coordinates):
|
|
@@ -419,7 +421,7 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
|
|
| 419 |
Args:
|
| 420 |
x: The x coordinate (horizontal position) of the element to scroll/zoom
|
| 421 |
y: The y coordinate (vertical position) of the element to scroll/zoom
|
| 422 |
-
direction: The direction to scroll ("up" or "down"), defaults to "down"
|
| 423 |
amount: The amount to scroll. A good amount is 1 or 2.
|
| 424 |
"""
|
| 425 |
self.desktop.move_mouse(x, y)
|
|
|
|
| 16 |
from smolagents.monitoring import LogLevel
|
| 17 |
from smolagents.agent_types import AgentImage
|
| 18 |
from PIL import ImageDraw
|
| 19 |
+
from datetime import datetime
|
| 20 |
|
| 21 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
| 22 |
+
The current date is <<current_date>>.
|
| 23 |
<action process>
|
| 24 |
You will be given a task to solve in several steps. At each step you will perform an action.
|
| 25 |
After each action, you'll receive an updated screenshot.
|
|
|
|
| 144 |
Desktop menus usually expand with more options, the tiny triangle next to some text in a menu means that menu expands. For example in Office in the Applications menu expands showing presentation or writing applications.
|
| 145 |
NEVER CLICK THE WEB BROWSER ICON TO OPEN THE WEB BROWSER: use open_url
|
| 146 |
</general_guidelines>
|
| 147 |
+
""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
|
| 148 |
|
| 149 |
|
| 150 |
def draw_marker_on_image(image_copy, click_coordinates):
|
|
|
|
| 421 |
Args:
|
| 422 |
x: The x coordinate (horizontal position) of the element to scroll/zoom
|
| 423 |
y: The y coordinate (vertical position) of the element to scroll/zoom
|
| 424 |
+
direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out.
|
| 425 |
amount: The amount to scroll. A good amount is 1 or 2.
|
| 426 |
"""
|
| 427 |
self.desktop.move_mouse(x, y)
|
eval.py
CHANGED
|
@@ -347,7 +347,7 @@ def main():
|
|
| 347 |
"commute": "Check the commuting time between Bern and Zurich on Google maps",
|
| 348 |
"hello": "Write 'Hello World' in a text editor",
|
| 349 |
"wiki": "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
|
| 350 |
-
"flight": "Search a flight Rome
|
| 351 |
"pond": "What's the name of the pond just south of Château de Fontainebleau in Google maps?",
|
| 352 |
"flux": "Go on the Hugging Face Hub, find a Space for FLUX1.dev, and generate a picture of the Golden Gate bridge.",
|
| 353 |
"hf": "Download me a picture of a puppy from Google, then head to Hugging Face, find a Space dedicated to background removal, and use it to remove the puppy picture's background",
|
|
|
|
| 347 |
"commute": "Check the commuting time between Bern and Zurich on Google maps",
|
| 348 |
"hello": "Write 'Hello World' in a text editor",
|
| 349 |
"wiki": "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
|
| 350 |
+
"flight": "Search a flight from Rome to Berlin for May 3rd, 2025.",
|
| 351 |
"pond": "What's the name of the pond just south of Château de Fontainebleau in Google maps?",
|
| 352 |
"flux": "Go on the Hugging Face Hub, find a Space for FLUX1.dev, and generate a picture of the Golden Gate bridge.",
|
| 353 |
"hf": "Download me a picture of a puppy from Google, then head to Hugging Face, find a Space dedicated to background removal, and use it to remove the puppy picture's background",
|