zenobot / generate_tokenizer.py
Zeno0007's picture
Upload folder using huggingface_hub
d64662a verified
raw
history blame contribute delete
919 Bytes
from transformers import AutoTokenizer
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Get token from environment variable
hf_token = os.environ.get("HF_TOKENIZER_READ_TOKEN")
if not hf_token:
print("Error: HF_TOKENIZER_READ_TOKEN not found in environment variables")
print("Please set this variable in your .env file or environment")
exit(1)
# Use the same model ID as in your main application
MODEL_ID = "meta-llama/Llama-3.2-3B" # Make sure this matches your app.py
print(f"Loading tokenizer from {MODEL_ID} using token from environment...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=hf_token)
# Save the tokenizer files directly to the root directory
print(f"Saving tokenizer files to the root directory...")
tokenizer.save_pretrained("./") # Changed from "./tokenizer" to "./"
print("Tokenizer files generated successfully!")