Spaces:

luciagomez
/

MisPhil_v3

Sleeping

App Files Files Community

luciagomez commited on Aug 25

Commit

74ae99d

verified ·

1 Parent(s): 91b3ba9

Update retriever.py

Browse files

Files changed (1) hide show

retriever.py +32 -22

retriever.py CHANGED Viewed

@@ -8,14 +8,11 @@ import json
 # Define HF tokens
 HF_TOKEN_read = os.environ.get("HF_TOKEN_read")
-HF_TOKEN_inference = os.environ.get("HF_TOKEN_inf")
 # Make sure cache is redirected to /tmp
 os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"
-# Setup InferenceClient for embeddings
-client = InferenceClient(provider="nebius", api_key=HF_TOKEN_inference)
 # Dataset repo (private)
 DATASET_REPO = "luciagomez/MrPhil_vector"
@@ -78,29 +75,42 @@ examples = [
         ]
-#model = FlagICLModel(
-#    "BAAI/bge-en-icl",
-#    query_instruction_for_retrieval="Given a mission statement, retrieve foundations with aligned purposes.",
-#    examples_for_task=examples,
-#    use_fp16=False
-#)
-def encode_query(perspective: str):
-    response = client.feature_extraction(
-        perspective,model="BAAI/bge-en-icl",
-    )
-    return np.array(response)
 # -------------------------------------------------------------------
 # 4. Retrieval function
 # -------------------------------------------------------------------
-def find_similar_foundations(perspective, top_k=5):
-    q_emb = encode_query(perspective).astype("float32")
-    faiss.normalize_L2(q_emb)
     scores, idxs = index.search(q_emb, top_k)
-    return [
-        {"title": df.iloc[i]["Title"], "purpose": df.iloc[i]["Purpose"], "similarity": float(scores[0][j])}
-        for j, i in enumerate(idxs[0])
-    ]

 # Define HF tokens
 HF_TOKEN_read = os.environ.get("HF_TOKEN_read")
+#HF_TOKEN_inference = os.environ.get("HF_TOKEN_inf")
 # Make sure cache is redirected to /tmp
 os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"
 # Dataset repo (private)
 DATASET_REPO = "luciagomez/MrPhil_vector"
         ]
+model = FlagICLModel(
+    "BAAI/bge-en-icl",
+    query_instruction_for_retrieval="Given a mission statement, retrieve foundations with aligned purposes.",
+    examples_for_task=examples,
+    use_fp16=False, # set True if GPU with enough memory
+)
+# -------------------------------
+# Helper to encode queries
+# -------------------------------
+def encode_query(query: str) -> np.ndarray:
+    return model.encode_queries([query])[0].astype("float32")  #  Encode a user query using BGE-EN-ICL.
 # -------------------------------------------------------------------
 # 4. Retrieval function
 # -------------------------------------------------------------------
+def find_similar_foundations(perspective: str, top_k: int = 5):
+    """
+    Given a user perspective, retrieve top-k foundations aligned with it.
+    """
+    # Encode perspective
+    q_emb = encode_query(perspective).reshape(1, -1)  # FAISS expects 2D
+    # Search FAISS index
     scores, idxs = index.search(q_emb, top_k)
+    # Retrieve foundation info
+    results = []
+    for score, idx in zip(scores[0], idxs[0]):
+        foundation_info = {
+            "Title": foundations.iloc[idx]["Title"],
+            "Purpose": foundations.iloc[idx]["Purpose"],
+            "Score": float(score)
+        }
+        results.append(foundation_info)
+    return results