Spaces:

albertosanzdev
/

multi-modal-agent

Sleeping

App Files Files Community

AlbertoSanz commited on 14 days ago

Commit

8814dcc

1 Parent(s): 52d2ab8

commit para HF

Browse files

Files changed (12) hide show

multi-modal-ia-agent.py → app/__init__.py +0 -0
app/agent.py +64 -0
app/config.py +26 -0
app/gradio_interface.py +39 -0
app/multimodal_handler.py +30 -0
app/tools.py +38 -0
app/utils.py +26 -0
main.py +9 -0
multi-modal-agent +1 -0
scripts/deploy.sh +0 -0
tests/test_agent.py +0 -0
tests/test_interface.py +0 -0

multi-modal-ia-agent.py → app/__init__.py RENAMED Viewed

File without changes

app/agent.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import base64
+from io import BytesIO
+from PIL import Image
+from pydub import AudioSegment
+from pydub.playback import play
+import openai
+from app.utils import tools
+from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
+from app.tools import handle_tool_call  # Asegúrate de que esté definido ahí
+def initialize_agents():
+    openai.api_key = OPENAI_API_KEY
+    return {
+        "chat": chat,
+        "artist": artist,
+        "talker": talker
+    }
+def artist(city):
+    image_response = openai.images.generate(
+            model="dall-e-3",
+            prompt=f"Una imagen que representa unas vacaciones en {city}, mostrando lugares turísticos y todo lo único de {city}, en un vibrante estilo pop-art",
+            size="1024x1024",
+            n=1,
+            response_format="b64_json",
+        )
+    image_base64 = image_response.data[0].b64_json
+    image_data = base64.b64decode(image_base64)
+    return Image.open(BytesIO(image_data))
+def talker(message):
+    if not message:
+        print("Mensaje vacío, no se genera audio")
+        return
+    response = openai.audio.speech.create(
+        model="tts-1",
+        voice="onyx",
+        input=message
+    )
+    audio_stream = BytesIO(response.content)
+    audio = AudioSegment.from_file(audio_stream, format="mp3")
+    play(audio)
+def chat(history):
+    messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history
+    response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages, functions=tools,
+    function_call="auto")
+    image = None
+    if response.choices[0].finish_reason=="tool_calls":
+        message = response.choices[0].message
+        response, city = handle_tool_call(message)
+        messages.append(message)
+        messages.append(response)
+        image = artist(city)
+        response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
+    reply = response.choices[0].message.content
+    history += [{"role": "assistant", "content": reply}]
+    talker(reply)
+    return history, image

app/config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()  # Carga variables de entorno desde .env
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+# Modelos OpenAI para distintos usos
+MODEL_TEXT = "gpt-4o-mini"        # Modelo para texto/chat
+MODEL_IMAGE = "dall-e-3"          # Modelo para generación de imágenes
+MODEL_AUDIO = "whisper-1"         # Modelo para procesamiento de audio
+def check_api_key():
+    if OPENAI_API_KEY:
+        print(f"OpenAI API Key exists and begins with: {OPENAI_API_KEY[:8]}")
+    else:
+        print("⚠️ OpenAI Key not configured")
+SYSTEM_MESSAGE = (
+    "Eres un asistente útil para una aerolínea llamada FlightAI. "
+    "Da respuestas breves y corteses, de no más de una oración. "
+    "Se siempre preciso. Si no sabes la respuesta, dilo."
+)
+TICKET_PRICES = {"londres": "$799", "parís": "$899", "tokyo": "$1400", "berlín": "$499"}

app/gradio_interface.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import gradio as gr
+from app.agent import initialize_agents
+agents = initialize_agents()
+history = [{"role": "system", "content": "SYSTEM_MESSAGE"}]  # inicializa con sistema
+def chat_with_agent(user_input):
+    global history
+    history, image = agents["chat"](history)
+    # history debe tener el formato adecuado:
+    # [{"role": "user", "content": "..."} , {"role": "assistant", "content": "..."}]
+    return history, history  # Devuelve la historia para actualizar el chatbot y su estado
+# def chat_with_agent(user_input):
+#     global history
+#     # Añadir mensaje del usuario al historial antes de pasar a chat()
+#     history.append({"role": "user", "content": user_input})
+#     history, image = agents["chat"](history)
+#     # Preparar para Gradio: crear lista de pares (usuario, asistente)
+#     history_display = []
+#     for i in range(1, len(history), 2):
+#         user_msg = history[i]["content"] if i < len(history) else ""
+#         assistant_msg = history[i+1]["content"] if (i+1) < len(history) else ""
+#         history_display.append((user_msg, assistant_msg))
+#     return history_display
+def create_interface():
+    with gr.Blocks() as app_interface:
+        chatbot = gr.Chatbot(type="messages")
+        txt = gr.Textbox(show_label=False, placeholder="Escribe tu mensaje...")
+        txt.submit(chat_with_agent, inputs=txt, outputs=chatbot)
+    return app_interface

app/multimodal_handler.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import openai
+from app.agent import chat, artist, talker
+from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
+from app.utils import get_ticket_price
+# def chat_handler(message, history):
+#     messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history + [{"role": "user", "content": message}]
+#     response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
+#     return response.choices[0].message.content
+def initialize_agents():
+    return {
+        "chat": chat,
+        "artist": artist,
+        "talker": talker
+    }
+def handle_message(history=[]):
+    response_text = chat(history)
+    return response_text
+def handle_image(city):
+    return artist(city)
+def handle_audio(text):
+    return talker(text)

app/tools.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import json
+from app.config import TICKET_PRICES
+def get_ticket_price(destination_city):
+    print(f"Tool get_ticket_price called for {destination_city}")
+    city = destination_city.lower()
+    return TICKET_PRICES.get(city, "Unknown")
+price_function = {
+    "name": "get_ticket_price",
+    "description": "Obtén el precio de un billete de ida y vuelta a la ciudad de destino...",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "destination_city": {
+                "type": "string",
+                "description": "La ciudad a la que el cliente desea viajar",
+            },
+        },
+        "required": ["destination_city"],
+        "additionalProperties": False
+    }
+}
+tools = [{"type": "function", "function": price_function}]
+def handle_tool_call(message):
+    tool_call = message.tool_calls[0]
+    arguments = json.loads(tool_call.function.arguments)
+    city = arguments.get('destination_city')
+    price = get_ticket_price(city)
+    response = {
+        "role": "tool",
+        "content": json.dumps({"destination_city": city,"price": price}),
+        "tool_call_id": message.tool_calls[0].id
+    }
+    return response, city

app/utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from app.config import TICKET_PRICES
+from app.tools import price_function, get_ticket_price
+tools = [
+    {
+        "name": "get_ticket_price",
+        "description": "Obtiene el precio del billete a una ciudad de destino",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "destination_city": {
+                    "type": "string",
+                    "description": "Ciudad a la que se quiere viajar"
+                }
+            },
+            "required": ["destination_city"]
+        }
+    }
+]
+# def get_ticket_price(destination_city):
+#     print(f"Tool get_ticket_price called for {destination_city}")
+#     city = destination_city.lower()
+#     return TICKET_PRICES.get(city, "Unknown")

main.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from app.gradio_interface import create_interface
+def main():
+    interface = create_interface()
+    interface.launch()
+if __name__ == "__main__":
+    main()

multi-modal-agent ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit bebe87828691fbe445ffbdc7fb421935ad490047

scripts/deploy.sh ADDED Viewed

File without changes

tests/test_agent.py ADDED Viewed

File without changes

tests/test_interface.py ADDED Viewed

File without changes