Spaces:

Maximofn
/

SmolLM2_backend

Build error

Maximofn commited on Mar 2

Commit

159fcfe

1 Parent(s): 20d2599

Refactor HuggingFace model integration and simplify token handling

- Replace HuggingFaceEndpoint with InferenceClient for direct API interaction
- Remove environment variable loading and token logging
- Add message conversion between LangChain and HuggingFace formats
- Implement a new /test-token endpoint for authentication verification
- Simplify model invocation and response processing

Files changed (1) hide show

app.py +48 -40

app.py CHANGED Viewed

@@ -1,46 +1,40 @@
-import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from dotenv import load_dotenv
-from langchain_huggingface import HuggingFaceEndpoint
-from langchain_core.messages import HumanMessage
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph import START, MessagesState, StateGraph
-# Cargar variables de entorno (útil para desarrollo local)
-load_dotenv()
-# Obtener token de HuggingFace
-# En HuggingFace Spaces, el token estará disponible como variable de entorno
-HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
-if not HUGGINGFACE_TOKEN:
-    raise ValueError("No se encontró la variable de entorno HUGGINGFACE_TOKEN o HF_TOKEN")
-# Después de cargar el token
-if HUGGINGFACE_TOKEN:
-    print(f"Token cargado: {HUGGINGFACE_TOKEN[:5]}...{HUGGINGFACE_TOKEN[-5:] if len(HUGGINGFACE_TOKEN) > 10 else ''}")
-    print(f"Longitud del token: {len(HUGGINGFACE_TOKEN)}")
-else:
-    print("¡ADVERTENCIA! No se encontró el token de HuggingFace")
-# Inicializar el modelo
-model = HuggingFaceEndpoint(
-    repo_id="Qwen/Qwen2.5-72B-Instruct",
-    huggingfacehub_api_token=HUGGINGFACE_TOKEN,
-    max_new_tokens=64,
-    temperature=0.5,
-    top_p=0.7,
 )
-# Definir el grafo
 workflow = StateGraph(state_schema=MessagesState)
-# Definir la función que llama al modelo
 def call_model(state: MessagesState):
-    response = model.invoke(state["messages"])
-    return {"messages": response}
 # Definir el nodo en el grafo
 workflow.add_edge(START, "model")
@@ -55,28 +49,28 @@ class QueryRequest(BaseModel):
     query: str
     thread_id: str = "default"
-# Crear la aplicación FastAPI
-app = FastAPI(title="LangChain FastAPI", description="API para generar texto usando LangChain y LangGraph")
 @app.get("/")
 async def root():
-    """Endpoint de bienvenida"""
     return {"detail": "Welcome to FastAPI, Langchain, Docker tutorial"}
 @app.post("/generate")
 async def generate(request: QueryRequest):
-    """Endpoint para generar texto usando el modelo de lenguaje"""
     try:
-        # Configurar el ID del hilo
         config = {"configurable": {"thread_id": request.thread_id}}
-        # Crear el mensaje de entrada
         input_messages = [HumanMessage(content=request.query)]
-        # Invocar el grafo
         output = graph_app.invoke({"messages": input_messages}, config)
-        # Obtener la respuesta del modelo
         response = output["messages"][-1].content
         return {
@@ -86,6 +80,20 @@ async def generate(request: QueryRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error al generar texto: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from huggingface_hub import InferenceClient
+from langchain_core.messages import HumanMessage, AIMessage
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph import START, MessagesState, StateGraph
+# Inicializar el cliente de HuggingFace
+client = InferenceClient(
+    model="Qwen/Qwen2.5-72B-Instruct",
 )
+# Define the graph
 workflow = StateGraph(state_schema=MessagesState)
+# Define the function that calls the model
 def call_model(state: MessagesState):
+    # Convert LangChain messages to HuggingFace format
+    hf_messages = []
+    for msg in state["messages"]:
+        if isinstance(msg, HumanMessage):
+            hf_messages.append({"role": "user", "content": msg.content})
+        elif isinstance(msg, AIMessage):
+            hf_messages.append({"role": "assistant", "content": msg.content})
+    # Llamar a la API
+    response = client.chat_completion(
+        messages=hf_messages,
+        temperature=0.5,
+        max_tokens=64,
+        top_p=0.7
+    )
+    # Convertir respuesta a formato LangChain
+    ai_message = AIMessage(content=response.choices[0].message.content)
+    return {"messages": state["messages"] + [ai_message]}
 # Definir el nodo en el grafo
 workflow.add_edge(START, "model")
     query: str
     thread_id: str = "default"
+# Create the FastAPI application
+app = FastAPI(title="LangChain FastAPI", description="API to generate text using LangChain and LangGraph")
 @app.get("/")
 async def root():
+    """Welcome endpoint"""
     return {"detail": "Welcome to FastAPI, Langchain, Docker tutorial"}
 @app.post("/generate")
 async def generate(request: QueryRequest):
+    """Endpoint to generate text using the language model"""
     try:
+        # Configure the thread ID
         config = {"configurable": {"thread_id": request.thread_id}}
+        # Create the input message
         input_messages = [HumanMessage(content=request.query)]
+        # Invoke the graph
         output = graph_app.invoke({"messages": input_messages}, config)
+        # Get the model response
         response = output["messages"][-1].content
         return {
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error al generar texto: {str(e)}")
+# Add an endpoint to test the token directly
+@app.get("/test-token")
+async def test_token():
+    """Endpoint to test the authentication with HuggingFace"""
+    try:
+        # Make a simple request to verify that the token works
+        response = client.chat_completion(
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=10
+        )
+        return {"status": "success", "message": "Token is valid", "response": response.choices[0].message.content}
+    except Exception as e:
+        return {"status": "error", "message": str(e)}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)