lyimo commited on
Commit
af987ea
·
verified ·
1 Parent(s): de389b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -10
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import pandas as pd
3
  import numpy as np
 
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import gradio as gr
@@ -18,6 +19,38 @@ question_embeddings = model.encode(df['Question'].tolist())
18
  # Together API setup
19
  client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def llama_query(prompt, system_content):
22
  response = client.chat.completions.create(
23
  model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
@@ -75,26 +108,21 @@ def get_answer(user_question, threshold=0.01):
75
  else:
76
  english_question = user_question
77
 
78
- user_embedding = model.encode(english_question)
79
-
80
- similarities = cosine_similarity([user_embedding], question_embeddings)
81
-
82
- max_similarity = np.max(similarities)
83
 
84
- if max_similarity > threshold:
85
- similar_question_idx = np.argmax(similarities)
86
- retrieved_answer = df.iloc[similar_question_idx]['Answer']
87
  refined_answer = refine_answer(english_question, retrieved_answer)
88
 
89
  if language == 'swahili':
90
  refined_answer = translate_to_swahili(refined_answer)
91
 
92
- return refined_answer, max_similarity
93
  else:
94
  default_message = "The system couldn't find a sufficient answer to your question. Do you want to learn anything else about blood donation?"
95
  if language == 'swahili':
96
  default_message = translate_to_swahili(default_message)
97
- return default_message, max_similarity
98
 
99
  # Gradio app
100
  def gradio_app(user_question):
 
1
  import os
2
  import pandas as pd
3
  import numpy as np
4
+ import gzip
5
  from sentence_transformers import SentenceTransformer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  import gradio as gr
 
19
  # Together API setup
20
  client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
21
 
22
+ def compressed_length(s):
23
+ return len(gzip.compress(s.encode('utf-8')))
24
+
25
+ def ncd(x, y):
26
+ Cx = compressed_length(x)
27
+ Cy = compressed_length(y)
28
+ Cxy = compressed_length(x + " " + y)
29
+ return (Cxy - min(Cx, Cy)) / max(Cx, Cy)
30
+
31
+ def normalize_scores(scores, reverse=False):
32
+ min_score = min(scores)
33
+ max_score = max(scores)
34
+ if reverse:
35
+ return [(max_score - x) / (max_score - min_score) for x in scores]
36
+ return [(x - min_score) / (max_score - min_score) for x in scores]
37
+
38
+ def hybrid_retrieval(query, passages, embeddings, alpha=0.7, beta=0.3):
39
+ query_embedding = model.encode(query)
40
+ cosine_similarities = cosine_similarity([query_embedding], embeddings)[0]
41
+
42
+ normalized_cosine_similarities = normalize_scores(cosine_similarities)
43
+
44
+ ncd_values = [ncd(query, passage) for passage in passages]
45
+ normalized_ncd_values = normalize_scores(ncd_values, reverse=True)
46
+
47
+ final_scores = [alpha * cos_sim + beta * ncd_sim
48
+ for cos_sim, ncd_sim in zip(normalized_cosine_similarities, normalized_ncd_values)]
49
+
50
+ most_similar_index = np.argmax(final_scores)
51
+
52
+ return most_similar_index, cosine_similarities[most_similar_index], ncd_values[most_similar_index], final_scores[most_similar_index]
53
+
54
  def llama_query(prompt, system_content):
55
  response = client.chat.completions.create(
56
  model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
 
108
  else:
109
  english_question = user_question
110
 
111
+ index, cosine_sim, ncd_value, final_score = hybrid_retrieval(english_question, df['Question'].tolist(), question_embeddings)
 
 
 
 
112
 
113
+ if final_score > threshold:
114
+ retrieved_answer = df.iloc[index]['Answer']
 
115
  refined_answer = refine_answer(english_question, retrieved_answer)
116
 
117
  if language == 'swahili':
118
  refined_answer = translate_to_swahili(refined_answer)
119
 
120
+ return refined_answer, final_score
121
  else:
122
  default_message = "The system couldn't find a sufficient answer to your question. Do you want to learn anything else about blood donation?"
123
  if language == 'swahili':
124
  default_message = translate_to_swahili(default_message)
125
+ return default_message, final_score
126
 
127
  # Gradio app
128
  def gradio_app(user_question):