Spaces:

AdamF92
/

RxT-Beta-Micro-Compare

Running on Zero

AdamF92 commited on 9 days ago

Commit

fc90fb4

verified ·

1 Parent(s): 1df071f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,6 +41,11 @@ def chat(message: str, history: list, stm_state: torch.Tensor, llm_history: list
     llm_chat_history = llm_model.tokenize_chat_template(llm_tokenizer, llm_history, message, max_seq_len=llm_seq_len, use_simplified_format=True)
     with torch.amp.autocast(device_type=device.type, dtype=torch.bfloat16):
         for token_id in llm_model.generate(**llm_chat_history, max_seq_len=llm_seq_len, temperature=temperature, top_p=top_p):
             llm_response += model.stringify_token(token_id, show_memory_update=False)

     llm_chat_history = llm_model.tokenize_chat_template(llm_tokenizer, llm_history, message, max_seq_len=llm_seq_len, use_simplified_format=True)
+    llm_chat_history = {
+        'input_ids': llm_chat_history['input_ids'].to(device),
+        'attention_mask': llm_chat_history['attention_mask'].to(device)
+    }
     with torch.amp.autocast(device_type=device.type, dtype=torch.bfloat16):
         for token_id in llm_model.generate(**llm_chat_history, max_seq_len=llm_seq_len, temperature=temperature, top_p=top_p):
             llm_response += model.stringify_token(token_id, show_memory_update=False)