alx-d commited on
Commit
4cbdaf1
Β·
verified Β·
1 Parent(s): f840733

Upload folder using huggingface_hub

Browse files
advanced_rag.py CHANGED
@@ -42,6 +42,27 @@ from langchain_community.document_loaders import PyMuPDFLoader # Updated loader
42
  import tempfile
43
  import mimetypes
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Add batch processing helper functions
46
  def generate_parameter_values(min_val, max_val, num_values):
47
  """Generate evenly spaced values between min and max"""
@@ -54,6 +75,11 @@ def process_batch_query(query, model_choice, max_tokens, param_configs, slider_v
54
  """Process a batch of queries with different parameter combinations"""
55
  results = []
56
 
 
 
 
 
 
57
  # Generate all parameter combinations
58
  temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
59
  top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
@@ -174,10 +200,10 @@ def submit_batch_query_async(query, model_choice, max_tokens, temp_config, top_p
174
  temp_slider, top_p_slider, top_k_slider, bm25_slider, use_history):
175
  """Handle batch query submission with async processing"""
176
  if not query:
177
- return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
178
 
179
  if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
180
- return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
181
 
182
  # Get slider values
183
  slider_values = {
@@ -395,6 +421,11 @@ def submit_query_async(query, model_choice, max_tokens_slider, temperature, top_
395
  if not query:
396
  return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
397
 
 
 
 
 
 
398
  # Update BM25 weight and recreate ensemble retriever if needed
399
  if hasattr(rag_chain, 'bm25_weight') and rag_chain.bm25_weight != bm25_weight:
400
  rag_chain.bm25_weight = bm25_weight
@@ -790,7 +821,7 @@ def load_file_from_google_drive(link: str) -> list:
790
  class ElevatedRagChain:
791
  def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
792
  bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50,
793
- embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2") -> None:
794
  debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
795
  self.embedding_model = embedding_model
796
  self.embed_func = self._create_embedding_function(embedding_model)
@@ -800,6 +831,7 @@ class ElevatedRagChain:
800
  self.llm_choice = llm_choice
801
  self.temperature = temperature
802
  self.top_p = top_p
 
803
  self.prompt_template = prompt_template
804
  self.context = ""
805
  self.conversation_history: List[Dict[str, str]] = []
@@ -929,7 +961,7 @@ class ElevatedRagChain:
929
  if not model_key:
930
  raise ValueError(f"Unsupported model: {normalized}")
931
  model = model_map[model_key]
932
- max_tokens = model_token_limits.get(model, 4096)
933
  if max_tokens_override is not None:
934
  max_tokens = min(max_tokens_override, max_tokens)
935
  pricing_info = model_pricing.get(model_key, {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}})
@@ -1100,6 +1132,81 @@ class ElevatedRagChain:
1100
  debug_print("Mistral API pipeline created successfully.")
1101
  return mistral_llm
1102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1103
  else:
1104
  raise ValueError(f"Unsupported model choice: {self.llm_choice}")
1105
 
@@ -1121,12 +1228,13 @@ class ElevatedRagChain:
1121
  return ErrorLLM()
1122
 
1123
 
1124
- def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, prompt_template: str, bm25_weight: float):
1125
  debug_print(f"Updating chain with new model: {new_model_choice}")
1126
  self.llm_choice = new_model_choice
1127
  self.temperature = temperature
1128
  self.top_p = top_p
1129
  self.top_k = top_k
 
1130
  self.prompt_template = prompt_template
1131
  self.bm25_weight = bm25_weight
1132
  self.faiss_weight = 1.0 - bm25_weight
@@ -1312,7 +1420,7 @@ def update_model(new_model: str):
1312
  global rag_chain
1313
  if rag_chain and rag_chain.raw_data:
1314
  rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k,
1315
- rag_chain.prompt_template, rag_chain.bm25_weight)
1316
  debug_print(f"Model updated to {rag_chain.llm_choice}")
1317
  return f"Model updated to: {rag_chain.llm_choice}"
1318
  else:
@@ -1758,6 +1866,18 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
1758
  "πŸ‡ΊπŸ‡Έ o3-mini",
1759
  "πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3",
1760
  "πŸ‡ͺπŸ‡Ί Mistral-API",
 
 
 
 
 
 
 
 
 
 
 
 
1761
  ],
1762
  value="πŸ‡ͺπŸ‡Ί Mistral-API",
1763
  label="Query Model"
@@ -1874,6 +1994,18 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
1874
  "πŸ‡ΊπŸ‡Έ o3-mini",
1875
  "πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3",
1876
  "πŸ‡ͺπŸ‡Ί Mistral-API",
 
 
 
 
 
 
 
 
 
 
 
 
1877
  ],
1878
  value="πŸ‡ͺπŸ‡Ί Mistral-API",
1879
  label="Query Model"
@@ -2247,10 +2379,43 @@ def create_csv_from_batch_results(results: List[Dict], job_id: str,
2247
  return "unknown"
2248
  # Remove emojis and get the actual model name
2249
  clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
2250
- # Get first few characters and last few characters
2251
- if len(clean_name) > 8:
2252
- return clean_name[:4] + clean_name[-4:]
2253
- return clean_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2254
 
2255
  def get_param_variation_name(param_configs):
2256
  """Get the parameter that was varied"""
 
42
  import tempfile
43
  import mimetypes
44
 
45
+ # Add OpenAI import for NEBIUS with version check
46
+ try:
47
+ import openai
48
+ from importlib.metadata import version as pkg_version
49
+ openai_version = pkg_version("openai")
50
+ print(f"OpenAI import success, version: {openai_version}")
51
+ if tuple(map(int, openai_version.split("."))) < (1, 0, 0):
52
+ print("ERROR: openai version must be >= 1.0.0 for NEBIUS support. Please upgrade with: pip install --upgrade openai")
53
+ sys.exit(1)
54
+ from openai import OpenAI
55
+ OPENAI_AVAILABLE = True
56
+ except ImportError as e:
57
+ OPENAI_AVAILABLE = False
58
+ print("OpenAI import failed:", e)
59
+ except Exception as e:
60
+ print("OpenAI version check failed:", e)
61
+ OPENAI_AVAILABLE = False
62
+
63
+ # API Key Configuration
64
+ NEBIUS_API_KEY = os.environ.get("NEBIUS_API_KEY", "")
65
+
66
  # Add batch processing helper functions
67
  def generate_parameter_values(min_val, max_val, num_values):
68
  """Generate evenly spaced values between min and max"""
 
75
  """Process a batch of queries with different parameter combinations"""
76
  results = []
77
 
78
+ # Update model if it has changed
79
+ if hasattr(rag_chain, 'llm_choice') and rag_chain.llm_choice != model_choice:
80
+ rag_chain.update_llm_pipeline(model_choice, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k, rag_chain.prompt_template, rag_chain.bm25_weight, rag_chain.max_tokens)
81
+ debug_print(f"Model updated to {model_choice}")
82
+
83
  # Generate all parameter combinations
84
  temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
85
  top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
 
200
  temp_slider, top_p_slider, top_k_slider, bm25_slider, use_history):
201
  """Handle batch query submission with async processing"""
202
  if not query:
203
+ return "Please enter a non-empty query", None, "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
204
 
205
  if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
206
+ return "Please load files first.", None, "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
207
 
208
  # Get slider values
209
  slider_values = {
 
421
  if not query:
422
  return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
423
 
424
+ # Update model if it has changed
425
+ if hasattr(rag_chain, 'llm_choice') and rag_chain.llm_choice != model_choice:
426
+ rag_chain.update_llm_pipeline(model_choice, temperature, top_p, top_k, rag_chain.prompt_template, bm25_weight, max_tokens_slider)
427
+ debug_print(f"Model updated to {model_choice}")
428
+
429
  # Update BM25 weight and recreate ensemble retriever if needed
430
  if hasattr(rag_chain, 'bm25_weight') and rag_chain.bm25_weight != bm25_weight:
431
  rag_chain.bm25_weight = bm25_weight
 
821
  class ElevatedRagChain:
822
  def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
823
  bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50,
824
+ embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", max_tokens: int = 3000) -> None:
825
  debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
826
  self.embedding_model = embedding_model
827
  self.embed_func = self._create_embedding_function(embedding_model)
 
831
  self.llm_choice = llm_choice
832
  self.temperature = temperature
833
  self.top_p = top_p
834
+ self.max_tokens = max_tokens
835
  self.prompt_template = prompt_template
836
  self.context = ""
837
  self.conversation_history: List[Dict[str, str]] = []
 
961
  if not model_key:
962
  raise ValueError(f"Unsupported model: {normalized}")
963
  model = model_map[model_key]
964
+ max_tokens = self.max_tokens
965
  if max_tokens_override is not None:
966
  max_tokens = min(max_tokens_override, max_tokens)
967
  pricing_info = model_pricing.get(model_key, {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}})
 
1132
  debug_print("Mistral API pipeline created successfully.")
1133
  return mistral_llm
1134
 
1135
+ # Nebius models
1136
+ elif self.llm_choice in ["🟦 GPT OSS 120b (Nebius)", "🟦 GPT OSS 20b (Nebius)", "🟦 Google Gemma 3 27b-Instruct (Nebius)",
1137
+ "🟦 DeepSeek-R1-0528 (Nebius)", "🟦 DeepSeek-V3 (Nebius)", "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
1138
+ "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)", "🟦 Qwen3-235B-A22B (Nebius)", "🟦 Qwen3-32B (Nebius)",
1139
+ "🟦 Hermes 4 405B (Nebius)", "🟦 GLM-4.5 AIR (Nebius)"]:
1140
+
1141
+ if not OPENAI_AVAILABLE:
1142
+ raise ImportError("openai package is required for NEBIUS models.")
1143
+
1144
+ # Map display names to backend names
1145
+ nebius_model_mapping = {
1146
+ "🟦 GPT OSS 120b (Nebius)": "openai/gpt-oss-120b",
1147
+ "🟦 GPT OSS 20b (Nebius)": "openai/gpt-oss-20b",
1148
+ "🟦 Google Gemma 3 27b-Instruct (Nebius)": "google/gemma-3-27b-it",
1149
+ "🟦 DeepSeek-R1-0528 (Nebius)": "deepseek-ai/DeepSeek-R1-0528",
1150
+ "🟦 DeepSeek-V3 (Nebius)": "deepseek-ai/DeepSeek-V3",
1151
+ "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)": "meta-llama/Meta-Llama-3.1-70B-Instruct",
1152
+ "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)": "meta-llama/Meta-Llama-3.1-405B-Instruct",
1153
+ "🟦 Qwen3-235B-A22B (Nebius)": "Qwen/Qwen3-235B-A22B",
1154
+ "🟦 Qwen3-32B (Nebius)": "Qwen/Qwen3-32B",
1155
+ "🟦 Hermes 4 405B (Nebius)": "NousResearch/Hermes-4-405B",
1156
+ "🟦 GLM-4.5 AIR (Nebius)": "zai-org/GLM-4.5-Air"
1157
+ }
1158
+
1159
+ backend_model = nebius_model_mapping[self.llm_choice]
1160
+
1161
+ class NebiusLLM(LLM):
1162
+ model: str
1163
+ temperature: float = 0.5
1164
+ top_p: float = 0.95
1165
+ top_k: int = 50
1166
+ max_tokens: int = 3000
1167
+ _client: Any = PrivateAttr()
1168
+
1169
+ def __init__(self, model: str, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50, max_tokens: int = 3000, **kwargs: Any):
1170
+ if not OPENAI_AVAILABLE:
1171
+ raise ImportError("openai package is required for NEBIUS models.")
1172
+ super().__init__(**kwargs)
1173
+ api_key = NEBIUS_API_KEY or os.environ.get("NEBIUS_API_KEY")
1174
+ if not api_key:
1175
+ raise ValueError("Please set the NEBIUS_API_KEY either in the code or as an environment variable.")
1176
+ self.model = model
1177
+ self.temperature = temperature
1178
+ self.top_p = top_p
1179
+ self.top_k = top_k
1180
+ self.max_tokens = max_tokens
1181
+ # Use object.__setattr__ to bypass Pydantic field validation
1182
+ object.__setattr__(self, "_client", OpenAI(base_url="https://api.studio.nebius.com/v1/", api_key=api_key))
1183
+
1184
+ @property
1185
+ def _llm_type(self) -> str:
1186
+ return "nebius_llm"
1187
+
1188
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
1189
+ try:
1190
+ completion = self._client.chat.completions.create(
1191
+ model=self.model,
1192
+ messages=[{"role": "user", "content": prompt}],
1193
+ temperature=self.temperature,
1194
+ top_p=self.top_p,
1195
+ max_tokens=self.max_tokens
1196
+ )
1197
+ return completion.choices[0].message.content if hasattr(completion.choices[0].message, 'content') else str(completion.choices[0].message)
1198
+ except Exception as e:
1199
+ return f"Error from Nebius: {str(e)}"
1200
+
1201
+ @property
1202
+ def _identifying_params(self) -> dict:
1203
+ return {"model": self.model, "temperature": self.temperature, "top_p": self.top_p}
1204
+
1205
+ debug_print(f"Creating Nebius LLM for model: {backend_model}")
1206
+ nebius_llm = NebiusLLM(model=backend_model, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_tokens=self.max_tokens)
1207
+ debug_print("Nebius API pipeline created successfully.")
1208
+ return nebius_llm
1209
+
1210
  else:
1211
  raise ValueError(f"Unsupported model choice: {self.llm_choice}")
1212
 
 
1228
  return ErrorLLM()
1229
 
1230
 
1231
+ def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, prompt_template: str, bm25_weight: float, max_tokens: int = 3000):
1232
  debug_print(f"Updating chain with new model: {new_model_choice}")
1233
  self.llm_choice = new_model_choice
1234
  self.temperature = temperature
1235
  self.top_p = top_p
1236
  self.top_k = top_k
1237
+ self.max_tokens = max_tokens
1238
  self.prompt_template = prompt_template
1239
  self.bm25_weight = bm25_weight
1240
  self.faiss_weight = 1.0 - bm25_weight
 
1420
  global rag_chain
1421
  if rag_chain and rag_chain.raw_data:
1422
  rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k,
1423
+ rag_chain.prompt_template, rag_chain.bm25_weight, rag_chain.max_tokens)
1424
  debug_print(f"Model updated to {rag_chain.llm_choice}")
1425
  return f"Model updated to: {rag_chain.llm_choice}"
1426
  else:
 
1866
  "πŸ‡ΊπŸ‡Έ o3-mini",
1867
  "πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3",
1868
  "πŸ‡ͺπŸ‡Ί Mistral-API",
1869
+ # Nebius Models
1870
+ "🟦 GPT OSS 120b (Nebius)",
1871
+ "🟦 GPT OSS 20b (Nebius)",
1872
+ "🟦 Google Gemma 3 27b-Instruct (Nebius)",
1873
+ "🟦 DeepSeek-R1-0528 (Nebius)",
1874
+ "🟦 DeepSeek-V3 (Nebius)",
1875
+ "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
1876
+ "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)",
1877
+ "🟦 Qwen3-235B-A22B (Nebius)",
1878
+ "🟦 Qwen3-32B (Nebius)",
1879
+ "🟦 Hermes 4 405B (Nebius)",
1880
+ "🟦 GLM-4.5 AIR (Nebius)",
1881
  ],
1882
  value="πŸ‡ͺπŸ‡Ί Mistral-API",
1883
  label="Query Model"
 
1994
  "πŸ‡ΊπŸ‡Έ o3-mini",
1995
  "πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3",
1996
  "πŸ‡ͺπŸ‡Ί Mistral-API",
1997
+ # Nebius Models
1998
+ "🟦 GPT OSS 120b (Nebius)",
1999
+ "🟦 GPT OSS 20b (Nebius)",
2000
+ "🟦 Google Gemma 3 27b-Instruct (Nebius)",
2001
+ "🟦 DeepSeek-R1-0528 (Nebius)",
2002
+ "🟦 DeepSeek-V3 (Nebius)",
2003
+ "🟦 Meta-Llama-3.1-70B-Instruct (Nebius)",
2004
+ "🟦 Meta-Llama-3.1-405B-Instruct (Nebius)",
2005
+ "🟦 Qwen3-235B-A22B (Nebius)",
2006
+ "🟦 Qwen3-32B (Nebius)",
2007
+ "🟦 Hermes 4 405B (Nebius)",
2008
+ "🟦 GLM-4.5 AIR (Nebius)",
2009
  ],
2010
  value="πŸ‡ͺπŸ‡Ί Mistral-API",
2011
  label="Query Model"
 
2379
  return "unknown"
2380
  # Remove emojis and get the actual model name
2381
  clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
2382
+
2383
+ # Remove parentheses and replace with underscores, also clean other special characters
2384
+ clean_name = clean_name.replace("(", "_").replace(")", "").replace(" ", "_").replace(",", "").replace("-", "_")
2385
+
2386
+ # For embedding models, extract the unique suffix part
2387
+ if "sentence_transformers/" in clean_name:
2388
+ # Extract the part after "sentence_transformers/"
2389
+ suffix = clean_name.replace("sentence_transformers/", "")
2390
+ # Take the last part after the last underscore for uniqueness
2391
+ if "_" in suffix:
2392
+ parts = suffix.split("_")
2393
+ # Take the last 2-3 parts to ensure uniqueness
2394
+ if len(parts) >= 2:
2395
+ return "_".join(parts[-2:])
2396
+ else:
2397
+ return suffix
2398
+ else:
2399
+ return suffix
2400
+ else:
2401
+ # For other models, use a cleaner approach
2402
+ # Remove common prefixes and take meaningful parts
2403
+ if "Nebius" in clean_name:
2404
+ # For Nebius models, extract the model name part
2405
+ parts = clean_name.split("_")
2406
+ # Filter out common words and take meaningful parts
2407
+ meaningful_parts = [p for p in parts if p not in ["Nebius", "the", "and", "or", "of", "in", "on", "at", "to", "for", "with", "by"]]
2408
+ if meaningful_parts:
2409
+ return "_".join(meaningful_parts[-2:]) if len(meaningful_parts) >= 2 else meaningful_parts[0]
2410
+ else:
2411
+ return clean_name
2412
+ else:
2413
+ # For other models, take first and last parts
2414
+ parts = clean_name.split("_")
2415
+ if len(parts) >= 2:
2416
+ return "_".join([parts[0], parts[-1]])
2417
+ else:
2418
+ return clean_name
2419
 
2420
  def get_param_variation_name(param_configs):
2421
  """Get the parameter that was varied"""
batch_dim_fast_Instruct__const_20250917_204636.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Temperature,Top-p,Top-k,BM25 Weight,Model,Main Answer,Input Tokens,Output Tokens,Conversation History,Progress,Elapsed Time (s)
2
+ 0.5,0.95,50,0.6,Unknown,Error: 'ElevatedRagChain' object has no attribute 'max_tokens',N/A,N/A,0,Query 1/1,0.00
psyllm.py CHANGED
@@ -598,7 +598,7 @@ models = [
598
  # NEBIUS
599
  {"display": "🟦 GPT OSS 120b (Nebius)", "backend": "openai/gpt-oss-120b", "provider": "nebius"},
600
  {"display": "🟦 GPT OSS 20b (Nebius)", "backend": "openai/gpt-oss-20b", "provider": "nebius"},
601
- {"display": "🟦 Google Gemma 3 27b-Instruct (Nebius)", "backend": "google/gemma-3-27b-it ", "provider": "nebius"},
602
  {"display": "🟦 Kimi K2", "backend": "moonshotai/Kimi-K2-Instruct", "provider": "nebius"},
603
  {"display": "🟦 DeepSeek-R1-0528 (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-0528", "provider": "nebius"},
604
  {"display": "🟦 DeepSeek-V3-0324 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3-0324", "provider": "nebius"},
 
598
  # NEBIUS
599
  {"display": "🟦 GPT OSS 120b (Nebius)", "backend": "openai/gpt-oss-120b", "provider": "nebius"},
600
  {"display": "🟦 GPT OSS 20b (Nebius)", "backend": "openai/gpt-oss-20b", "provider": "nebius"},
601
+ {"display": "🟦 Google Gemma 3 27b-Instruct (Nebius)", "backend": "google/gemma-3-27b-it", "provider": "nebius"},
602
  {"display": "🟦 Kimi K2", "backend": "moonshotai/Kimi-K2-Instruct", "provider": "nebius"},
603
  {"display": "🟦 DeepSeek-R1-0528 (Nebius)", "backend": "deepseek-ai/DeepSeek-R1-0528", "provider": "nebius"},
604
  {"display": "🟦 DeepSeek-V3-0324 (Nebius)", "backend": "deepseek-ai/DeepSeek-V3-0324", "provider": "nebius"},