Spaces:
Sleeping
Sleeping
AlbertoSanz
commited on
Commit
·
8814dcc
1
Parent(s):
52d2ab8
commit para HF
Browse files- multi-modal-ia-agent.py → app/__init__.py +0 -0
- app/agent.py +64 -0
- app/config.py +26 -0
- app/gradio_interface.py +39 -0
- app/multimodal_handler.py +30 -0
- app/tools.py +38 -0
- app/utils.py +26 -0
- main.py +9 -0
- multi-modal-agent +1 -0
- scripts/deploy.sh +0 -0
- tests/test_agent.py +0 -0
- tests/test_interface.py +0 -0
multi-modal-ia-agent.py → app/__init__.py
RENAMED
|
File without changes
|
app/agent.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from pydub import AudioSegment
|
| 5 |
+
from pydub.playback import play
|
| 6 |
+
import openai
|
| 7 |
+
from app.utils import tools
|
| 8 |
+
from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
|
| 9 |
+
from app.tools import handle_tool_call # Asegúrate de que esté definido ahí
|
| 10 |
+
|
| 11 |
+
def initialize_agents():
|
| 12 |
+
openai.api_key = OPENAI_API_KEY
|
| 13 |
+
return {
|
| 14 |
+
"chat": chat,
|
| 15 |
+
"artist": artist,
|
| 16 |
+
"talker": talker
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def artist(city):
|
| 21 |
+
image_response = openai.images.generate(
|
| 22 |
+
model="dall-e-3",
|
| 23 |
+
prompt=f"Una imagen que representa unas vacaciones en {city}, mostrando lugares turísticos y todo lo único de {city}, en un vibrante estilo pop-art",
|
| 24 |
+
size="1024x1024",
|
| 25 |
+
n=1,
|
| 26 |
+
response_format="b64_json",
|
| 27 |
+
)
|
| 28 |
+
image_base64 = image_response.data[0].b64_json
|
| 29 |
+
image_data = base64.b64decode(image_base64)
|
| 30 |
+
return Image.open(BytesIO(image_data))
|
| 31 |
+
|
| 32 |
+
def talker(message):
|
| 33 |
+
if not message:
|
| 34 |
+
print("Mensaje vacío, no se genera audio")
|
| 35 |
+
return
|
| 36 |
+
response = openai.audio.speech.create(
|
| 37 |
+
model="tts-1",
|
| 38 |
+
voice="onyx",
|
| 39 |
+
input=message
|
| 40 |
+
)
|
| 41 |
+
audio_stream = BytesIO(response.content)
|
| 42 |
+
audio = AudioSegment.from_file(audio_stream, format="mp3")
|
| 43 |
+
play(audio)
|
| 44 |
+
|
| 45 |
+
def chat(history):
|
| 46 |
+
messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history
|
| 47 |
+
response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages, functions=tools,
|
| 48 |
+
function_call="auto")
|
| 49 |
+
image = None
|
| 50 |
+
|
| 51 |
+
if response.choices[0].finish_reason=="tool_calls":
|
| 52 |
+
message = response.choices[0].message
|
| 53 |
+
response, city = handle_tool_call(message)
|
| 54 |
+
messages.append(message)
|
| 55 |
+
messages.append(response)
|
| 56 |
+
image = artist(city)
|
| 57 |
+
response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
|
| 58 |
+
|
| 59 |
+
reply = response.choices[0].message.content
|
| 60 |
+
history += [{"role": "assistant", "content": reply}]
|
| 61 |
+
|
| 62 |
+
talker(reply)
|
| 63 |
+
|
| 64 |
+
return history, image
|
app/config.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv() # Carga variables de entorno desde .env
|
| 5 |
+
|
| 6 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 7 |
+
|
| 8 |
+
# Modelos OpenAI para distintos usos
|
| 9 |
+
MODEL_TEXT = "gpt-4o-mini" # Modelo para texto/chat
|
| 10 |
+
MODEL_IMAGE = "dall-e-3" # Modelo para generación de imágenes
|
| 11 |
+
MODEL_AUDIO = "whisper-1" # Modelo para procesamiento de audio
|
| 12 |
+
|
| 13 |
+
def check_api_key():
|
| 14 |
+
if OPENAI_API_KEY:
|
| 15 |
+
print(f"OpenAI API Key exists and begins with: {OPENAI_API_KEY[:8]}")
|
| 16 |
+
else:
|
| 17 |
+
print("⚠️ OpenAI Key not configured")
|
| 18 |
+
|
| 19 |
+
SYSTEM_MESSAGE = (
|
| 20 |
+
"Eres un asistente útil para una aerolínea llamada FlightAI. "
|
| 21 |
+
"Da respuestas breves y corteses, de no más de una oración. "
|
| 22 |
+
"Se siempre preciso. Si no sabes la respuesta, dilo."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
TICKET_PRICES = {"londres": "$799", "parís": "$899", "tokyo": "$1400", "berlín": "$499"}
|
app/gradio_interface.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from app.agent import initialize_agents
|
| 3 |
+
|
| 4 |
+
agents = initialize_agents()
|
| 5 |
+
history = [{"role": "system", "content": "SYSTEM_MESSAGE"}] # inicializa con sistema
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def chat_with_agent(user_input):
|
| 9 |
+
global history
|
| 10 |
+
history, image = agents["chat"](history)
|
| 11 |
+
# history debe tener el formato adecuado:
|
| 12 |
+
# [{"role": "user", "content": "..."} , {"role": "assistant", "content": "..."}]
|
| 13 |
+
return history, history # Devuelve la historia para actualizar el chatbot y su estado
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# def chat_with_agent(user_input):
|
| 17 |
+
# global history
|
| 18 |
+
# # Añadir mensaje del usuario al historial antes de pasar a chat()
|
| 19 |
+
# history.append({"role": "user", "content": user_input})
|
| 20 |
+
|
| 21 |
+
# history, image = agents["chat"](history)
|
| 22 |
+
|
| 23 |
+
# # Preparar para Gradio: crear lista de pares (usuario, asistente)
|
| 24 |
+
# history_display = []
|
| 25 |
+
# for i in range(1, len(history), 2):
|
| 26 |
+
# user_msg = history[i]["content"] if i < len(history) else ""
|
| 27 |
+
# assistant_msg = history[i+1]["content"] if (i+1) < len(history) else ""
|
| 28 |
+
# history_display.append((user_msg, assistant_msg))
|
| 29 |
+
|
| 30 |
+
# return history_display
|
| 31 |
+
|
| 32 |
+
def create_interface():
|
| 33 |
+
with gr.Blocks() as app_interface:
|
| 34 |
+
chatbot = gr.Chatbot(type="messages")
|
| 35 |
+
txt = gr.Textbox(show_label=False, placeholder="Escribe tu mensaje...")
|
| 36 |
+
|
| 37 |
+
txt.submit(chat_with_agent, inputs=txt, outputs=chatbot)
|
| 38 |
+
|
| 39 |
+
return app_interface
|
app/multimodal_handler.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
from app.agent import chat, artist, talker
|
| 3 |
+
from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
|
| 4 |
+
from app.utils import get_ticket_price
|
| 5 |
+
|
| 6 |
+
# def chat_handler(message, history):
|
| 7 |
+
# messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history + [{"role": "user", "content": message}]
|
| 8 |
+
# response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# return response.choices[0].message.content
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def initialize_agents():
|
| 16 |
+
return {
|
| 17 |
+
"chat": chat,
|
| 18 |
+
"artist": artist,
|
| 19 |
+
"talker": talker
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def handle_message(history=[]):
|
| 23 |
+
response_text = chat(history)
|
| 24 |
+
return response_text
|
| 25 |
+
|
| 26 |
+
def handle_image(city):
|
| 27 |
+
return artist(city)
|
| 28 |
+
|
| 29 |
+
def handle_audio(text):
|
| 30 |
+
return talker(text)
|
app/tools.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from app.config import TICKET_PRICES
|
| 3 |
+
|
| 4 |
+
def get_ticket_price(destination_city):
|
| 5 |
+
print(f"Tool get_ticket_price called for {destination_city}")
|
| 6 |
+
city = destination_city.lower()
|
| 7 |
+
return TICKET_PRICES.get(city, "Unknown")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
price_function = {
|
| 11 |
+
"name": "get_ticket_price",
|
| 12 |
+
"description": "Obtén el precio de un billete de ida y vuelta a la ciudad de destino...",
|
| 13 |
+
"parameters": {
|
| 14 |
+
"type": "object",
|
| 15 |
+
"properties": {
|
| 16 |
+
"destination_city": {
|
| 17 |
+
"type": "string",
|
| 18 |
+
"description": "La ciudad a la que el cliente desea viajar",
|
| 19 |
+
},
|
| 20 |
+
},
|
| 21 |
+
"required": ["destination_city"],
|
| 22 |
+
"additionalProperties": False
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
tools = [{"type": "function", "function": price_function}]
|
| 27 |
+
|
| 28 |
+
def handle_tool_call(message):
|
| 29 |
+
tool_call = message.tool_calls[0]
|
| 30 |
+
arguments = json.loads(tool_call.function.arguments)
|
| 31 |
+
city = arguments.get('destination_city')
|
| 32 |
+
price = get_ticket_price(city)
|
| 33 |
+
response = {
|
| 34 |
+
"role": "tool",
|
| 35 |
+
"content": json.dumps({"destination_city": city,"price": price}),
|
| 36 |
+
"tool_call_id": message.tool_calls[0].id
|
| 37 |
+
}
|
| 38 |
+
return response, city
|
app/utils.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.config import TICKET_PRICES
|
| 2 |
+
from app.tools import price_function, get_ticket_price
|
| 3 |
+
|
| 4 |
+
tools = [
|
| 5 |
+
{
|
| 6 |
+
"name": "get_ticket_price",
|
| 7 |
+
"description": "Obtiene el precio del billete a una ciudad de destino",
|
| 8 |
+
"parameters": {
|
| 9 |
+
"type": "object",
|
| 10 |
+
"properties": {
|
| 11 |
+
"destination_city": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "Ciudad a la que se quiere viajar"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"required": ["destination_city"]
|
| 17 |
+
}
|
| 18 |
+
}
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# def get_ticket_price(destination_city):
|
| 24 |
+
# print(f"Tool get_ticket_price called for {destination_city}")
|
| 25 |
+
# city = destination_city.lower()
|
| 26 |
+
# return TICKET_PRICES.get(city, "Unknown")
|
main.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.gradio_interface import create_interface
|
| 2 |
+
|
| 3 |
+
def main():
|
| 4 |
+
|
| 5 |
+
interface = create_interface()
|
| 6 |
+
interface.launch()
|
| 7 |
+
|
| 8 |
+
if __name__ == "__main__":
|
| 9 |
+
main()
|
multi-modal-agent
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit bebe87828691fbe445ffbdc7fb421935ad490047
|
scripts/deploy.sh
ADDED
|
File without changes
|
tests/test_agent.py
ADDED
|
File without changes
|
tests/test_interface.py
ADDED
|
File without changes
|