AlbertoSanz commited on
Commit
8814dcc
·
1 Parent(s): 52d2ab8

commit para HF

Browse files
multi-modal-ia-agent.py → app/__init__.py RENAMED
File without changes
app/agent.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from io import BytesIO
3
+ from PIL import Image
4
+ from pydub import AudioSegment
5
+ from pydub.playback import play
6
+ import openai
7
+ from app.utils import tools
8
+ from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
9
+ from app.tools import handle_tool_call # Asegúrate de que esté definido ahí
10
+
11
+ def initialize_agents():
12
+ openai.api_key = OPENAI_API_KEY
13
+ return {
14
+ "chat": chat,
15
+ "artist": artist,
16
+ "talker": talker
17
+ }
18
+
19
+
20
+ def artist(city):
21
+ image_response = openai.images.generate(
22
+ model="dall-e-3",
23
+ prompt=f"Una imagen que representa unas vacaciones en {city}, mostrando lugares turísticos y todo lo único de {city}, en un vibrante estilo pop-art",
24
+ size="1024x1024",
25
+ n=1,
26
+ response_format="b64_json",
27
+ )
28
+ image_base64 = image_response.data[0].b64_json
29
+ image_data = base64.b64decode(image_base64)
30
+ return Image.open(BytesIO(image_data))
31
+
32
+ def talker(message):
33
+ if not message:
34
+ print("Mensaje vacío, no se genera audio")
35
+ return
36
+ response = openai.audio.speech.create(
37
+ model="tts-1",
38
+ voice="onyx",
39
+ input=message
40
+ )
41
+ audio_stream = BytesIO(response.content)
42
+ audio = AudioSegment.from_file(audio_stream, format="mp3")
43
+ play(audio)
44
+
45
+ def chat(history):
46
+ messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history
47
+ response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages, functions=tools,
48
+ function_call="auto")
49
+ image = None
50
+
51
+ if response.choices[0].finish_reason=="tool_calls":
52
+ message = response.choices[0].message
53
+ response, city = handle_tool_call(message)
54
+ messages.append(message)
55
+ messages.append(response)
56
+ image = artist(city)
57
+ response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
58
+
59
+ reply = response.choices[0].message.content
60
+ history += [{"role": "assistant", "content": reply}]
61
+
62
+ talker(reply)
63
+
64
+ return history, image
app/config.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv() # Carga variables de entorno desde .env
5
+
6
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
7
+
8
+ # Modelos OpenAI para distintos usos
9
+ MODEL_TEXT = "gpt-4o-mini" # Modelo para texto/chat
10
+ MODEL_IMAGE = "dall-e-3" # Modelo para generación de imágenes
11
+ MODEL_AUDIO = "whisper-1" # Modelo para procesamiento de audio
12
+
13
+ def check_api_key():
14
+ if OPENAI_API_KEY:
15
+ print(f"OpenAI API Key exists and begins with: {OPENAI_API_KEY[:8]}")
16
+ else:
17
+ print("⚠️ OpenAI Key not configured")
18
+
19
+ SYSTEM_MESSAGE = (
20
+ "Eres un asistente útil para una aerolínea llamada FlightAI. "
21
+ "Da respuestas breves y corteses, de no más de una oración. "
22
+ "Se siempre preciso. Si no sabes la respuesta, dilo."
23
+ )
24
+
25
+
26
+ TICKET_PRICES = {"londres": "$799", "parís": "$899", "tokyo": "$1400", "berlín": "$499"}
app/gradio_interface.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from app.agent import initialize_agents
3
+
4
+ agents = initialize_agents()
5
+ history = [{"role": "system", "content": "SYSTEM_MESSAGE"}] # inicializa con sistema
6
+
7
+
8
+ def chat_with_agent(user_input):
9
+ global history
10
+ history, image = agents["chat"](history)
11
+ # history debe tener el formato adecuado:
12
+ # [{"role": "user", "content": "..."} , {"role": "assistant", "content": "..."}]
13
+ return history, history # Devuelve la historia para actualizar el chatbot y su estado
14
+
15
+
16
+ # def chat_with_agent(user_input):
17
+ # global history
18
+ # # Añadir mensaje del usuario al historial antes de pasar a chat()
19
+ # history.append({"role": "user", "content": user_input})
20
+
21
+ # history, image = agents["chat"](history)
22
+
23
+ # # Preparar para Gradio: crear lista de pares (usuario, asistente)
24
+ # history_display = []
25
+ # for i in range(1, len(history), 2):
26
+ # user_msg = history[i]["content"] if i < len(history) else ""
27
+ # assistant_msg = history[i+1]["content"] if (i+1) < len(history) else ""
28
+ # history_display.append((user_msg, assistant_msg))
29
+
30
+ # return history_display
31
+
32
+ def create_interface():
33
+ with gr.Blocks() as app_interface:
34
+ chatbot = gr.Chatbot(type="messages")
35
+ txt = gr.Textbox(show_label=False, placeholder="Escribe tu mensaje...")
36
+
37
+ txt.submit(chat_with_agent, inputs=txt, outputs=chatbot)
38
+
39
+ return app_interface
app/multimodal_handler.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ from app.agent import chat, artist, talker
3
+ from app.config import MODEL_TEXT, MODEL_IMAGE, MODEL_AUDIO, OPENAI_API_KEY, SYSTEM_MESSAGE
4
+ from app.utils import get_ticket_price
5
+
6
+ # def chat_handler(message, history):
7
+ # messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + history + [{"role": "user", "content": message}]
8
+ # response = openai.chat.completions.create(model=MODEL_TEXT, messages=messages)
9
+
10
+
11
+ # return response.choices[0].message.content
12
+
13
+
14
+
15
+ def initialize_agents():
16
+ return {
17
+ "chat": chat,
18
+ "artist": artist,
19
+ "talker": talker
20
+ }
21
+
22
+ def handle_message(history=[]):
23
+ response_text = chat(history)
24
+ return response_text
25
+
26
+ def handle_image(city):
27
+ return artist(city)
28
+
29
+ def handle_audio(text):
30
+ return talker(text)
app/tools.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from app.config import TICKET_PRICES
3
+
4
+ def get_ticket_price(destination_city):
5
+ print(f"Tool get_ticket_price called for {destination_city}")
6
+ city = destination_city.lower()
7
+ return TICKET_PRICES.get(city, "Unknown")
8
+
9
+
10
+ price_function = {
11
+ "name": "get_ticket_price",
12
+ "description": "Obtén el precio de un billete de ida y vuelta a la ciudad de destino...",
13
+ "parameters": {
14
+ "type": "object",
15
+ "properties": {
16
+ "destination_city": {
17
+ "type": "string",
18
+ "description": "La ciudad a la que el cliente desea viajar",
19
+ },
20
+ },
21
+ "required": ["destination_city"],
22
+ "additionalProperties": False
23
+ }
24
+ }
25
+
26
+ tools = [{"type": "function", "function": price_function}]
27
+
28
+ def handle_tool_call(message):
29
+ tool_call = message.tool_calls[0]
30
+ arguments = json.loads(tool_call.function.arguments)
31
+ city = arguments.get('destination_city')
32
+ price = get_ticket_price(city)
33
+ response = {
34
+ "role": "tool",
35
+ "content": json.dumps({"destination_city": city,"price": price}),
36
+ "tool_call_id": message.tool_calls[0].id
37
+ }
38
+ return response, city
app/utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.config import TICKET_PRICES
2
+ from app.tools import price_function, get_ticket_price
3
+
4
+ tools = [
5
+ {
6
+ "name": "get_ticket_price",
7
+ "description": "Obtiene el precio del billete a una ciudad de destino",
8
+ "parameters": {
9
+ "type": "object",
10
+ "properties": {
11
+ "destination_city": {
12
+ "type": "string",
13
+ "description": "Ciudad a la que se quiere viajar"
14
+ }
15
+ },
16
+ "required": ["destination_city"]
17
+ }
18
+ }
19
+ ]
20
+
21
+
22
+
23
+ # def get_ticket_price(destination_city):
24
+ # print(f"Tool get_ticket_price called for {destination_city}")
25
+ # city = destination_city.lower()
26
+ # return TICKET_PRICES.get(city, "Unknown")
main.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from app.gradio_interface import create_interface
2
+
3
+ def main():
4
+
5
+ interface = create_interface()
6
+ interface.launch()
7
+
8
+ if __name__ == "__main__":
9
+ main()
multi-modal-agent ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit bebe87828691fbe445ffbdc7fb421935ad490047
scripts/deploy.sh ADDED
File without changes
tests/test_agent.py ADDED
File without changes
tests/test_interface.py ADDED
File without changes