""" Helion-V1 Inference Script Safe and helpful conversational AI model """ import torch from transformers import AutoTokenizer, AutoModelForCausalLM from typing import List, Dict import warnings warnings.filterwarnings('ignore') class HelionInference: def __init__(self, model_name: str = "DeepXR/Helion-V1", device: str = "auto"): """ Initialize the Helion model for inference. Args: model_name: HuggingFace model identifier device: Device to run inference on ('cuda', 'cpu', or 'auto') """ print(f"Loading Helion-V1 model from {model_name}...") self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True ) self.model.eval() print("Model loaded successfully!") # Safety keywords to monitor self.safety_keywords = [ "harm", "illegal", "weapon", "violence", "dangerous", "exploit", "hack", "steal", "abuse" ] def check_safety(self, text: str) -> bool: """ Basic safety check on input text. Args: text: Input text to check Returns: True if text appears safe, False otherwise """ text_lower = text.lower() for keyword in self.safety_keywords: if keyword in text_lower: return False return True def generate_response( self, messages: List[Dict[str, str]], max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9, do_sample: bool = True ) -> str: """ Generate a response from the model. Args: messages: List of message dictionaries with 'role' and 'content' max_new_tokens: Maximum number of tokens to generate temperature: Sampling temperature top_p: Nucleus sampling parameter do_sample: Whether to use sampling Returns: Generated response text """ # Apply chat template input_ids = self.tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(self.model.device) # Generate response with torch.no_grad(): output = self.model.generate( input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample, pad_token_id=self.tokenizer.pad_token_id, eos_token_id=self.tokenizer.eos_token_id ) # Decode response response = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) return response.strip() def chat(self): """Interactive chat mode.""" print("\n" + "="*60) print("Helion-V1 Interactive Chat") print("Type 'quit' or 'exit' to end the conversation") print("="*60 + "\n") conversation_history = [] while True: user_input = input("You: ").strip() if user_input.lower() in ['quit', 'exit']: print("Goodbye! Have a great day!") break if not user_input: continue # Basic safety check if not self.check_safety(user_input): print("Helion: I apologize, but I can't assist with that request. " "Let me know if there's something else I can help you with!") continue # Add user message to history conversation_history.append({ "role": "user", "content": user_input }) # Generate response try: response = self.generate_response(conversation_history) print(f"Helion: {response}\n") # Add assistant response to history conversation_history.append({ "role": "assistant", "content": response }) except Exception as e: print(f"Error generating response: {e}") conversation_history.pop() # Remove failed user message def main(): """Main function for CLI usage.""" import argparse parser = argparse.ArgumentParser(description="Helion-V1 Inference") parser.add_argument("--model", default="DeepXR/Helion-V1", help="Model name or path") parser.add_argument("--device", default="auto", help="Device to use (cuda/cpu/auto)") parser.add_argument("--interactive", action="store_true", help="Start interactive chat") parser.add_argument("--prompt", type=str, help="Single prompt to process") args = parser.parse_args() # Initialize model helion = HelionInference(model_name=args.model, device=args.device) if args.interactive: helion.chat() elif args.prompt: messages = [{"role": "user", "content": args.prompt}] response = helion.generate_response(messages) print(f"Response: {response}") else: print("Please specify --interactive or --prompt") if __name__ == "__main__": main()