Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

alx-d commited on Sep 17

Commit

4cbdaf1

verified ·

1 Parent(s): f840733

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

advanced_rag.py +175 -10
batch_dim_fast_Instruct__const_20250917_204636.csv +2 -0
psyllm.py +1 -1

advanced_rag.py CHANGED Viewed

@@ -42,6 +42,27 @@ from langchain_community.document_loaders import PyMuPDFLoader  # Updated loader
 import tempfile
 import mimetypes
 # Add batch processing helper functions
 def generate_parameter_values(min_val, max_val, num_values):
     """Generate evenly spaced values between min and max"""
@@ -54,6 +75,11 @@ def process_batch_query(query, model_choice, max_tokens, param_configs, slider_v
     """Process a batch of queries with different parameter combinations"""
     results = []
     # Generate all parameter combinations
     temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
     top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
@@ -174,10 +200,10 @@ def submit_batch_query_async(query, model_choice, max_tokens, temp_config, top_p
                            temp_slider, top_p_slider, top_k_slider, bm25_slider, use_history):
     """Handle batch query submission with async processing"""
     if not query:
-        return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
     if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
-        return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
     # Get slider values
     slider_values = {
@@ -395,6 +421,11 @@ def submit_query_async(query, model_choice, max_tokens_slider, temperature, top_
         if not query:
             return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
         # Update BM25 weight and recreate ensemble retriever if needed
         if hasattr(rag_chain, 'bm25_weight') and rag_chain.bm25_weight != bm25_weight:
             rag_chain.bm25_weight = bm25_weight
@@ -790,7 +821,7 @@ def load_file_from_google_drive(link: str) -> list:
 class ElevatedRagChain:
     def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
                  bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50,
-                 embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2") -> None:
         debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
         self.embedding_model = embedding_model
         self.embed_func = self._create_embedding_function(embedding_model)
@@ -800,6 +831,7 @@ class ElevatedRagChain:
         self.llm_choice = llm_choice
         self.temperature = temperature
         self.top_p = top_p
         self.prompt_template = prompt_template
         self.context = ""
         self.conversation_history: List[Dict[str, str]] = []
@@ -929,7 +961,7 @@ class ElevatedRagChain:
         if not model_key:
             raise ValueError(f"Unsupported model: {normalized}")
         model = model_map[model_key]
-        max_tokens = model_token_limits.get(model, 4096)
         if max_tokens_override is not None:
             max_tokens = min(max_tokens_override, max_tokens)
         pricing_info = model_pricing.get(model_key, {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}})
@@ -1100,6 +1132,81 @@ class ElevatedRagChain:
                 debug_print("Mistral API pipeline created successfully.")
                 return mistral_llm
             else:
                 raise ValueError(f"Unsupported model choice: {self.llm_choice}")
@@ -1121,12 +1228,13 @@ class ElevatedRagChain:
             return ErrorLLM()
-    def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, prompt_template: str, bm25_weight: float):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
         self.temperature = temperature
         self.top_p = top_p
         self.top_k = top_k
         self.prompt_template = prompt_template
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
@@ -1312,7 +1420,7 @@ def update_model(new_model: str):
     global rag_chain
     if rag_chain and rag_chain.raw_data:
         rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k,
-                                      rag_chain.prompt_template, rag_chain.bm25_weight)
         debug_print(f"Model updated to {rag_chain.llm_choice}")
         return f"Model updated to: {rag_chain.llm_choice}"
     else:
@@ -1758,6 +1866,18 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
                             "🇺🇸 o3-mini",
                             "🇺🇸 Remote Meta-Llama-3",
                             "🇪🇺 Mistral-API",
                         ],
                         value="🇪🇺 Mistral-API",
                         label="Query Model"
@@ -1874,6 +1994,18 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
                             "🇺🇸 o3-mini",
                             "🇺🇸 Remote Meta-Llama-3",
                             "🇪🇺 Mistral-API",
                         ],
                         value="🇪🇺 Mistral-API",
                         label="Query Model"
@@ -2247,10 +2379,43 @@ def create_csv_from_batch_results(results: List[Dict], job_id: str,
             return "unknown"
         # Remove emojis and get the actual model name
         clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
-        # Get first few characters and last few characters
-        if len(clean_name) > 8:
-            return clean_name[:4] + clean_name[-4:]
-        return clean_name
     def get_param_variation_name(param_configs):
         """Get the parameter that was varied"""

 import tempfile
 import mimetypes
+# Add OpenAI import for NEBIUS with version check
+try:
+    import openai
+    from importlib.metadata import version as pkg_version
+    openai_version = pkg_version("openai")
+    print(f"OpenAI import success, version: {openai_version}")
+    if tuple(map(int, openai_version.split("."))) < (1, 0, 0):
+        print("ERROR: openai version must be >= 1.0.0 for NEBIUS support. Please upgrade with: pip install --upgrade openai")
+        sys.exit(1)
+    from openai import OpenAI
+    OPENAI_AVAILABLE = True
+except ImportError as e:
+    OPENAI_AVAILABLE = False
+    print("OpenAI import failed:", e)
+except Exception as e:
+    print("OpenAI version check failed:", e)
+    OPENAI_AVAILABLE = False
+# API Key Configuration
+NEBIUS_API_KEY = os.environ.get("NEBIUS_API_KEY", "")
 # Add batch processing helper functions
 def generate_parameter_values(min_val, max_val, num_values):
     """Generate evenly spaced values between min and max"""
     """Process a batch of queries with different parameter combinations"""
     results = []
+    # Update model if it has changed
+    if hasattr(rag_chain, 'llm_choice') and rag_chain.llm_choice != model_choice:
+        rag_chain.update_llm_pipeline(model_choice, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k, rag_chain.prompt_template, rag_chain.bm25_weight, rag_chain.max_tokens)
+        debug_print(f"Model updated to {model_choice}")
     # Generate all parameter combinations
     temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
     top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
                            temp_slider, top_p_slider, top_k_slider, bm25_slider, use_history):
     """Handle batch query submission with async processing"""
     if not query:
+        return "Please enter a non-empty query", None, "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
     if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
+        return "Please load files first.", None, "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
     # Get slider values
     slider_values = {
         if not query:
             return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
+        # Update model if it has changed
+        if hasattr(rag_chain, 'llm_choice') and rag_chain.llm_choice != model_choice:
+            rag_chain.update_llm_pipeline(model_choice, temperature, top_p, top_k, rag_chain.prompt_template, bm25_weight, max_tokens_slider)
+            debug_print(f"Model updated to {model_choice}")
         # Update BM25 weight and recreate ensemble retriever if needed
         if hasattr(rag_chain, 'bm25_weight') and rag_chain.bm25_weight != bm25_weight:
             rag_chain.bm25_weight = bm25_weight
 class ElevatedRagChain:
     def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
                  bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50,
+                 embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", max_tokens: int = 3000) -> None:
         debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
         self.embedding_model = embedding_model
         self.embed_func = self._create_embedding_function(embedding_model)
         self.llm_choice = llm_choice
         self.temperature = temperature
         self.top_p = top_p
+        self.max_tokens = max_tokens
         self.prompt_template = prompt_template
         self.context = ""
         self.conversation_history: List[Dict[str, str]] = []
         if not model_key:
             raise ValueError(f"Unsupported model: {normalized}")
         model = model_map[model_key]
+        max_tokens = self.max_tokens
         if max_tokens_override is not None:
             max_tokens = min(max_tokens_override, max_tokens)
         pricing_info = model_pricing.get(model_key, {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}})
                 debug_print("Mistral API pipeline created successfully.")
                 return mistral_llm
+            # Nebius models
+            elif self.llm_choice in ["🟦 GPT OSS 120b (Nebius)", "🟦 GPT OSS 20b (Nebius)", "🟦 Google Gemma 3 27b-Instruct (Nebius)",
+                                    "🟦 DeepSeek-R1-0528 (Nebius)", "🟦 DeepSeek-V3 (Nebius)", "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
+                                    "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)", "🟦 Qwen3-235B-A22B (Nebius)", "🟦 Qwen3-32B (Nebius)",
+                                    "🟦 Hermes 4 405B (Nebius)", "🟦 GLM-4.5 AIR (Nebius)"]:
+                if not OPENAI_AVAILABLE:
+                    raise ImportError("openai package is required for NEBIUS models.")
+                # Map display names to backend names
+                nebius_model_mapping = {
+                    "🟦 GPT OSS 120b (Nebius)": "openai/gpt-oss-120b",
+                    "🟦 GPT OSS 20b (Nebius)": "openai/gpt-oss-20b",
+                    "🟦 Google Gemma 3 27b-Instruct (Nebius)": "google/gemma-3-27b-it",
+                    "🟦 DeepSeek-R1-0528 (Nebius)": "deepseek-ai/DeepSeek-R1-0528",
+                    "🟦 DeepSeek-V3 (Nebius)": "deepseek-ai/DeepSeek-V3",
+                    "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+                    "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)": "meta-llama/Meta-Llama-3.1-405B-Instruct",
+                    "🟦 Qwen3-235B-A22B (Nebius)": "Qwen/Qwen3-235B-A22B",
+                    "🟦 Qwen3-32B (Nebius)": "Qwen/Qwen3-32B",
+                    "🟦 Hermes 4 405B (Nebius)": "NousResearch/Hermes-4-405B",
+                    "🟦 GLM-4.5 AIR (Nebius)": "zai-org/GLM-4.5-Air"
+                }
+                backend_model = nebius_model_mapping[self.llm_choice]
+                class NebiusLLM(LLM):
+                    model: str
+                    temperature: float = 0.5
+                    top_p: float = 0.95
+                    top_k: int = 50
+                    max_tokens: int = 3000
+                    _client: Any = PrivateAttr()
+                    def __init__(self, model: str, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any):
+                        if not OPENAI_AVAILABLE:
+                            raise ImportError("openai package is required for NEBIUS models.")
+                        super().__init__(**kwargs)
+                        api_key = NEBIUS_API_KEY or os.environ.get("NEBIUS_API_KEY")
+                        if not api_key:
+                            raise ValueError("Please set the NEBIUS_API_KEY either in the code or as an environment variable.")
+                        self.model = model
+                        self.temperature = temperature
+                        self.top_p = top_p
+                        self.top_k = top_k
+                        self.max_tokens = max_tokens
+                        # Use object.__setattr__ to bypass Pydantic field validation
+                        object.__setattr__(self, "_client", OpenAI(base_url="https://api.studio.nebius.com/v1/", api_key=api_key))
+                    @property
+                    def _llm_type(self) -> str:
+                        return "nebius_llm"
+                    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                        try:
+                            completion = self._client.chat.completions.create(
+                                model=self.model,
+                                messages=[{"role": "user", "content": prompt}],
+                                temperature=self.temperature,
+                                top_p=self.top_p,
+                                max_tokens=self.max_tokens
+                            )
+                            return completion.choices[0].message.content if hasattr(completion.choices[0].message, 'content') else str(completion.choices[0].message)
+                        except Exception as e:
+                            return f"Error from Nebius: {str(e)}"
+                    @property
+                    def _identifying_params(self) -> dict:
+                        return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p}
+                debug_print(f"Creating Nebius LLM for model: {backend_model}")
+                nebius_llm = NebiusLLM(model=backend_model, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens)
+                debug_print("Nebius API pipeline created successfully.")
+                return nebius_llm
             else:
                 raise ValueError(f"Unsupported model choice: {self.llm_choice}")
             return ErrorLLM()
+    def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, prompt_template: str, bm25_weight: float, max_tokens: int = 3000):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
         self.temperature = temperature
         self.top_p = top_p
         self.top_k = top_k
+        self.max_tokens = max_tokens
         self.prompt_template = prompt_template
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
     global rag_chain
     if rag_chain and rag_chain.raw_data:
         rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k,
+                                      rag_chain.prompt_template, rag_chain.bm25_weight, rag_chain.max_tokens)
         debug_print(f"Model updated to {rag_chain.llm_choice}")
         return f"Model updated to: {rag_chain.llm_choice}"
     else:
                             "🇺🇸 o3-mini",
                             "🇺🇸 Remote Meta-Llama-3",
                             "🇪🇺 Mistral-API",
+                            # Nebius Models
+                            "🟦 GPT OSS 120b (Nebius)",
+                            "🟦 GPT OSS 20b (Nebius)",
+                            "🟦 Google Gemma 3 27b-Instruct (Nebius)",
+                            "🟦 DeepSeek-R1-0528 (Nebius)",
+                            "🟦 DeepSeek-V3 (Nebius)",
+                            "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
+                            "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)",
+                            "🟦 Qwen3-235B-A22B (Nebius)",
+                            "🟦 Qwen3-32B (Nebius)",
+                            "🟦 Hermes 4 405B (Nebius)",
+                            "🟦 GLM-4.5 AIR (Nebius)",
                         ],
                         value="🇪🇺 Mistral-API",
                         label="Query Model"
                             "🇺🇸 o3-mini",
                             "🇺🇸 Remote Meta-Llama-3",
                             "🇪🇺 Mistral-API",
+                            # Nebius Models
+                            "🟦 GPT OSS 120b (Nebius)",
+                            "🟦 GPT OSS 20b (Nebius)",
+                            "🟦 Google Gemma 3 27b-Instruct (Nebius)",
+                            "🟦 DeepSeek-R1-0528 (Nebius)",
+                            "🟦 DeepSeek-V3 (Nebius)",
+                            "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
+                            "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)",
+                            "🟦 Qwen3-235B-A22B (Nebius)",
+                            "🟦 Qwen3-32B (Nebius)",
+                            "🟦 Hermes 4 405B (Nebius)",
+                            "🟦 GLM-4.5 AIR (Nebius)",
                         ],
                         value="🇪🇺 Mistral-API",
                         label="Query Model"
             return "unknown"
         # Remove emojis and get the actual model name
         clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
+        # Remove parentheses and replace with underscores, also clean other special characters
+        clean_name = clean_name.replace("(", "_").replace(")", "").replace(" ", "_").replace(",", "").replace("-", "_")
+        # For embedding models, extract the unique suffix part
+        if "sentence_transformers/" in clean_name:
+            # Extract the part after "sentence_transformers/"
+            suffix = clean_name.replace("sentence_transformers/", "")
+            # Take the last part after the last underscore for uniqueness
+            if "_" in suffix:
+                parts = suffix.split("_")
+                # Take the last 2-3 parts to ensure uniqueness
+                if len(parts) >= 2:
+                    return "_".join(parts[-2:])
+                else:
+                    return suffix
+            else:
+                return suffix
+        else:
+            # For other models, use a cleaner approach
+            # Remove common prefixes and take meaningful parts
+            if "Nebius" in clean_name:
+                # For Nebius models, extract the model name part
+                parts = clean_name.split("_")
+                # Filter out common words and take meaningful parts
+                meaningful_parts = [p for p in parts if p not in ["Nebius", "the", "and", "or", "of", "in", "on", "at", "to", "for", "with", "by"]]
+                if meaningful_parts:
+                    return "_".join(meaningful_parts[-2:]) if len(meaningful_parts) >= 2 else meaningful_parts[0]
+                else:
+                    return clean_name
+            else:
+                # For other models, take first and last parts
+                parts = clean_name.split("_")
+                if len(parts) >= 2:
+                    return "_".join([parts[0], parts[-1]])
+                else:
+                    return clean_name
     def get_param_variation_name(param_configs):
         """Get the parameter that was varied"""

batch_dim_fast_Instruct__const_20250917_204636.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Temperature,Top-p,Top-k,BM25 Weight,Model,Main Answer,Input Tokens,Output Tokens,Conversation History,Progress,Elapsed Time (s)
2	+ 0.5,0.95,50,0.6,Unknown,Error: 'ElevatedRagChain' object has no attribute 'max_tokens',N/A,N/A,0,Query 1/1,0.00

psyllm.py CHANGED Viewed

@@ -598,7 +598,7 @@ models = [
     # NEBIUS
     {"display": "🟦 GPT OSS 120b (Nebius)", "backend": "openai/gpt-oss-120b", "provider": "nebius"},
     {"display": "🟦 GPT OSS 20b (Nebius)", "backend": "openai/gpt-oss-20b", "provider": "nebius"},
-    {"display": "🟦 Google Gemma 3 27b-Instruct (Nebius)", "backend": "google/gemma-3-27b-it    ", "provider": "nebius"},
     {"display": "🟦 Kimi K2", "backend": "moonshotai/Kimi-K2-Instruct", "provider": "nebius"},
     {"display": "🟦 DeepSeek-R1-0528 (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-0528", "provider": "nebius"},
     {"display": "🟦 DeepSeek-V3-0324 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3-0324", "provider": "nebius"},

     # NEBIUS
     {"display": "🟦 GPT OSS 120b (Nebius)", "backend": "openai/gpt-oss-120b", "provider": "nebius"},
     {"display": "🟦 GPT OSS 20b (Nebius)", "backend": "openai/gpt-oss-20b", "provider": "nebius"},
+    {"display": "🟦 Google Gemma 3 27b-Instruct (Nebius)", "backend": "google/gemma-3-27b-it", "provider": "nebius"},
     {"display": "🟦 Kimi K2", "backend": "moonshotai/Kimi-K2-Instruct", "provider": "nebius"},
     {"display": "🟦 DeepSeek-R1-0528 (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-0528", "provider": "nebius"},
     {"display": "🟦 DeepSeek-V3-0324 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3-0324", "provider": "nebius"},