Spaces:
Sleeping
Sleeping
File size: 7,942 Bytes
429f957 af987ea 429f957 de389b7 429f957 de389b7 429f957 af987ea 7a45593 af987ea 7a45593 af987ea 7a45593 af987ea 7a45593 af987ea 7a45593 af987ea 7a45593 af987ea 7a45593 af987ea 429f957 7a45593 429f957 7bf9779 429f957 de389b7 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 7a45593 429f957 1d7adfe 7a45593 429f957 7a45593 429f957 7a45593 af987ea 7a45593 429f957 7a45593 429f957 7a45593 1d7adfe 7a45593 429f957 7a45593 429f957 7a45593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import os
import pandas as pd
import numpy as np
import gzip
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
from together import Together
# Load pre-trained Sentence Transformer model
model = SentenceTransformer('LaBSE')
# Load questions and answers from the CSV file
df = pd.read_csv('combined_questions_and_answers.csv')
# Encode all questions in the dataset
question_embeddings = model.encode(df['Question'].tolist())
# Together API setup
client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
def compressed_length(s):
return len(gzip.compress(s.encode('utf-8')))
def ncd(x, y):
"""
Normalized Compression Distance for strings x and y.
"""
Cx = compressed_length(x)
Cy = compressed_length(y)
Cxy = compressed_length(x + " " + y)
return (Cxy - min(Cx, Cy)) / max(Cx, Cy)
def normalize_scores(scores, reverse=False):
"""
Scale a list of scores to [0,1], optionally reversing (1 - x).
"""
min_score = min(scores)
max_score = max(scores)
if max_score == min_score:
return [0] * len(scores)
if reverse:
return [(max_score - x) / (max_score - min_score) for x in scores]
return [(x - min_score) / (max_score - min_score) for x in scores]
def hybrid_retrieval(query, passages, embeddings, alpha=0.7, beta=0.3):
"""
Combine cosine similarity (SentenceTransformer) and
Normalized Compression Distance (NCD) for retrieval.
"""
query_embedding = model.encode(query)
cosine_similarities = cosine_similarity([query_embedding], embeddings)[0]
# Normalize
normalized_cosine_similarities = normalize_scores(cosine_similarities)
# Calculate NCD
ncd_values = [ncd(query, passage) for passage in passages]
normalized_ncd_values = normalize_scores(ncd_values, reverse=True)
# Combine
final_scores = [
alpha * cos_sim + beta * ncd_sim
for cos_sim, ncd_sim in zip(normalized_cosine_similarities, normalized_ncd_values)
]
most_similar_index = np.argmax(final_scores)
return most_similar_index, cosine_similarities[most_similar_index], ncd_values[most_similar_index], final_scores[most_similar_index]
def llama_query(prompt, system_content):
"""
Send a prompt to the Together LLaMa model and return the response.
"""
response = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": prompt}
],
max_tokens=512,
temperature=0.7,
top_p=0.7,
top_k=50,
repetition_penalty=1,
stop=["<|eot_id|>", "<|eom_id|>"],
stream=False
)
return response.choices[0].message.content
def check_blood_donation_relevance(question):
"""
Use LLaMa to check whether 'question' is about blood donation.
"""
prompt = f"Is the following question related to blood donation? Answer ONLY with 'Yes' or 'No': {question}"
system_content = "You are an assistant that determines if a question is related to blood donation."
response = llama_query(prompt, system_content).strip().lower()
return response == 'yes'
def detect_language(text):
"""
Use LLaMa to detect language (English or Swahili).
Returns 'swahili' or 'english'.
"""
prompt = (
"Detect the language of this text. If it's Swahili, return 'Swahili'. "
"If it's English, return 'English'. Here's the text:\n\n"
f"{text}"
)
system_content = "You are a language detection assistant."
response = llama_query(prompt, system_content).strip().lower()
# Attempt to match strictly 'swahili' or 'english' from the response
if "swahili" in response:
return "swahili"
if "english" in response:
return "english"
# Fallback: default to English
return "english"
def translate_to_english(text):
"""
Translate Swahili text to English using LLaMa.
"""
prompt = f"Translate the following Swahili text to English: {text}"
system_content = "You are a translation assistant that translates from Swahili to English."
response = llama_query(prompt, system_content)
return response.strip()
def translate_to_swahili(text):
"""
Translate any text to simple Swahili using LLaMa.
"""
prompt = f"Translate the following text to simple Swahili, avoiding difficult words: {text}"
system_content = "You are a translation assistant that translates to simple Swahili."
response = llama_query(prompt, system_content)
return response.strip()
def refine_answer(question, retrieved_answer):
"""
Refine the retrieved answer, making it more relevant and natural.
"""
prompt = (
f"Question: {question}\n\n"
f"Retrieved Answer: {retrieved_answer}\n\n"
"Please refine the retrieved answer so it's direct, clear, and specifically addresses the question."
)
system_content = "You are an assistant that refines answers to make them more relevant and natural."
return llama_query(prompt, system_content).strip()
def get_answer(user_question, threshold=0.3):
# 1) Detect user language
language = detect_language(user_question)
# 2) Convert user question to English for checking & retrieval
if language == 'swahili':
english_question = translate_to_english(user_question)
else:
english_question = user_question
# 3) Check if the question is about blood donation using LLaMa
is_blood_related = check_blood_donation_relevance(english_question)
if not is_blood_related:
# Off-topic response
off_topic_message = "I'm sorry, but your question doesn't seem to be related to blood donation. Could you please ask a question about blood donation?"
if language == 'swahili':
off_topic_message = translate_to_swahili(off_topic_message)
return off_topic_message, 0.0
# If it is about blood donation, proceed with hybrid retrieval
index, cosine_sim, ncd_value, final_score = hybrid_retrieval(
english_question,
df['Question'].tolist(),
question_embeddings
)
# 4) If retrieval confidence is high enough, refine the CSV answer
if final_score > threshold:
retrieved_answer = df.iloc[index]['Answer']
refined_answer_english = refine_answer(english_question, retrieved_answer)
# Translate back to user language if needed
if language == 'swahili':
return translate_to_swahili(refined_answer_english), final_score
else:
return refined_answer_english, final_score
else:
# 5) If retrieval is below threshold, ask LLaMa for a general blood-donation-related answer
llama_response_english = llama_query(
f"Please provide a concise, accurate answer about blood donation for the question: {english_question}",
"You are an assistant knowledgeable about blood donation. Provide concise, accurate answers."
)
llama_response_english = llama_response_english.strip()
# Translate back to user language if needed
if language == 'swahili':
return translate_to_swahili(llama_response_english), final_score
else:
return llama_response_english, final_score
# Gradio app
def gradio_app(user_question):
answer, similarity = get_answer(user_question)
return f"Similarity: {similarity:.2f}\nAnswer: {answer}"
iface = gr.Interface(
fn=gradio_app,
inputs=gr.Textbox(label="Enter your question"),
outputs=gr.Textbox(label="Answer"),
title="Blood Donation Q&A",
description="Ask questions about blood donation in English or Swahili. The system first checks if it's related to blood donation."
)
iface.launch()
|