Maximofn commited on
Commit
159fcfe
1 Parent(s): 20d2599

Refactor HuggingFace model integration and simplify token handling

Browse files

- Replace HuggingFaceEndpoint with InferenceClient for direct API interaction
- Remove environment variable loading and token logging
- Add message conversion between LangChain and HuggingFace formats
- Implement a new /test-token endpoint for authentication verification
- Simplify model invocation and response processing

Files changed (1) hide show
  1. app.py +48 -40
app.py CHANGED
@@ -1,46 +1,40 @@
1
- import os
2
-
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
5
- from dotenv import load_dotenv
6
 
7
- from langchain_huggingface import HuggingFaceEndpoint
8
- from langchain_core.messages import HumanMessage
9
  from langgraph.checkpoint.memory import MemorySaver
10
  from langgraph.graph import START, MessagesState, StateGraph
11
 
12
- # Cargar variables de entorno (煤til para desarrollo local)
13
- load_dotenv()
14
-
15
- # Obtener token de HuggingFace
16
- # En HuggingFace Spaces, el token estar谩 disponible como variable de entorno
17
- HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
18
- if not HUGGINGFACE_TOKEN:
19
- raise ValueError("No se encontr贸 la variable de entorno HUGGINGFACE_TOKEN o HF_TOKEN")
20
-
21
- # Despu茅s de cargar el token
22
- if HUGGINGFACE_TOKEN:
23
- print(f"Token cargado: {HUGGINGFACE_TOKEN[:5]}...{HUGGINGFACE_TOKEN[-5:] if len(HUGGINGFACE_TOKEN) > 10 else ''}")
24
- print(f"Longitud del token: {len(HUGGINGFACE_TOKEN)}")
25
- else:
26
- print("隆ADVERTENCIA! No se encontr贸 el token de HuggingFace")
27
-
28
- # Inicializar el modelo
29
- model = HuggingFaceEndpoint(
30
- repo_id="Qwen/Qwen2.5-72B-Instruct",
31
- huggingfacehub_api_token=HUGGINGFACE_TOKEN,
32
- max_new_tokens=64,
33
- temperature=0.5,
34
- top_p=0.7,
35
  )
36
 
37
- # Definir el grafo
38
  workflow = StateGraph(state_schema=MessagesState)
39
 
40
- # Definir la funci贸n que llama al modelo
41
  def call_model(state: MessagesState):
42
- response = model.invoke(state["messages"])
43
- return {"messages": response}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Definir el nodo en el grafo
46
  workflow.add_edge(START, "model")
@@ -55,28 +49,28 @@ class QueryRequest(BaseModel):
55
  query: str
56
  thread_id: str = "default"
57
 
58
- # Crear la aplicaci贸n FastAPI
59
- app = FastAPI(title="LangChain FastAPI", description="API para generar texto usando LangChain y LangGraph")
60
 
61
  @app.get("/")
62
  async def root():
63
- """Endpoint de bienvenida"""
64
  return {"detail": "Welcome to FastAPI, Langchain, Docker tutorial"}
65
 
66
  @app.post("/generate")
67
  async def generate(request: QueryRequest):
68
- """Endpoint para generar texto usando el modelo de lenguaje"""
69
  try:
70
- # Configurar el ID del hilo
71
  config = {"configurable": {"thread_id": request.thread_id}}
72
 
73
- # Crear el mensaje de entrada
74
  input_messages = [HumanMessage(content=request.query)]
75
 
76
- # Invocar el grafo
77
  output = graph_app.invoke({"messages": input_messages}, config)
78
 
79
- # Obtener la respuesta del modelo
80
  response = output["messages"][-1].content
81
 
82
  return {
@@ -86,6 +80,20 @@ async def generate(request: QueryRequest):
86
  except Exception as e:
87
  raise HTTPException(status_code=500, detail=f"Error al generar texto: {str(e)}")
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  if __name__ == "__main__":
90
  import uvicorn
91
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from huggingface_hub import InferenceClient
4
 
5
+ from langchain_core.messages import HumanMessage, AIMessage
 
6
  from langgraph.checkpoint.memory import MemorySaver
7
  from langgraph.graph import START, MessagesState, StateGraph
8
 
9
+ # Inicializar el cliente de HuggingFace
10
+ client = InferenceClient(
11
+ model="Qwen/Qwen2.5-72B-Instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  )
13
 
14
+ # Define the graph
15
  workflow = StateGraph(state_schema=MessagesState)
16
 
17
+ # Define the function that calls the model
18
  def call_model(state: MessagesState):
19
+ # Convert LangChain messages to HuggingFace format
20
+ hf_messages = []
21
+ for msg in state["messages"]:
22
+ if isinstance(msg, HumanMessage):
23
+ hf_messages.append({"role": "user", "content": msg.content})
24
+ elif isinstance(msg, AIMessage):
25
+ hf_messages.append({"role": "assistant", "content": msg.content})
26
+
27
+ # Llamar a la API
28
+ response = client.chat_completion(
29
+ messages=hf_messages,
30
+ temperature=0.5,
31
+ max_tokens=64,
32
+ top_p=0.7
33
+ )
34
+
35
+ # Convertir respuesta a formato LangChain
36
+ ai_message = AIMessage(content=response.choices[0].message.content)
37
+ return {"messages": state["messages"] + [ai_message]}
38
 
39
  # Definir el nodo en el grafo
40
  workflow.add_edge(START, "model")
 
49
  query: str
50
  thread_id: str = "default"
51
 
52
+ # Create the FastAPI application
53
+ app = FastAPI(title="LangChain FastAPI", description="API to generate text using LangChain and LangGraph")
54
 
55
  @app.get("/")
56
  async def root():
57
+ """Welcome endpoint"""
58
  return {"detail": "Welcome to FastAPI, Langchain, Docker tutorial"}
59
 
60
  @app.post("/generate")
61
  async def generate(request: QueryRequest):
62
+ """Endpoint to generate text using the language model"""
63
  try:
64
+ # Configure the thread ID
65
  config = {"configurable": {"thread_id": request.thread_id}}
66
 
67
+ # Create the input message
68
  input_messages = [HumanMessage(content=request.query)]
69
 
70
+ # Invoke the graph
71
  output = graph_app.invoke({"messages": input_messages}, config)
72
 
73
+ # Get the model response
74
  response = output["messages"][-1].content
75
 
76
  return {
 
80
  except Exception as e:
81
  raise HTTPException(status_code=500, detail=f"Error al generar texto: {str(e)}")
82
 
83
+ # Add an endpoint to test the token directly
84
+ @app.get("/test-token")
85
+ async def test_token():
86
+ """Endpoint to test the authentication with HuggingFace"""
87
+ try:
88
+ # Make a simple request to verify that the token works
89
+ response = client.chat_completion(
90
+ messages=[{"role": "user", "content": "Hello"}],
91
+ max_tokens=10
92
+ )
93
+ return {"status": "success", "message": "Token is valid", "response": response.choices[0].message.content}
94
+ except Exception as e:
95
+ return {"status": "error", "message": str(e)}
96
+
97
  if __name__ == "__main__":
98
  import uvicorn
99
  uvicorn.run(app, host="0.0.0.0", port=8000)