Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer | |
| import torch | |
| import bitsandbytes as bnb # Required for 4-bit quantization | |
| # Check if CUDA is available, and decide on the device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the tokenizer and the quantized LLaMA model | |
| model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Load the quantized LLaMA model in 4-bit precision | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| load_in_4bit=True, # Enable 4-bit quantization | |
| device_map="auto" if device == "cuda" else {"": "cpu"} # Use auto if CUDA is available, else fallback to CPU | |
| ) | |
| # Streamlit interface | |
| st.title("Keyword Extractor using LLaMA 4-bit Model") | |
| # Text input area for user input | |
| user_input = st.text_area("Enter text for keyword extraction") | |
| if user_input: | |
| # Prepare the prompt for keyword extraction | |
| prompt_template = ( | |
| "Extract keywords and variables from the prompt:\n" | |
| "{}\n" | |
| ) | |
| alpaca_prompt = prompt_template.format(user_input) | |
| # Tokenize the input text | |
| inputs = tokenizer([alpaca_prompt], return_tensors="pt").to(device) | |
| # Set up the text streamer to display the generated text as it streams | |
| text_streamer = TextStreamer(tokenizer) | |
| # Generate keywords and extract variables | |
| with torch.no_grad(): | |
| output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128) | |
| # Decode the output tokens to get the generated text | |
| generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
| # Display the result in the Streamlit app | |
| st.write("Extracted Keywords and Variables:") | |
| st.write(generated_text) | |