File size: 919 Bytes

from transformers import AutoTokenizer
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get token from environment variable
hf_token = os.environ.get("HF_TOKENIZER_READ_TOKEN")
if not hf_token:
    print("Error: HF_TOKENIZER_READ_TOKEN not found in environment variables")
    print("Please set this variable in your .env file or environment")
    exit(1)

# Use the same model ID as in your main application
MODEL_ID = "meta-llama/Llama-3.2-3B"  # Make sure this matches your app.py

print(f"Loading tokenizer from {MODEL_ID} using token from environment...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=hf_token)

# Save the tokenizer files directly to the root directory
print(f"Saving tokenizer files to the root directory...")
tokenizer.save_pretrained("./")  # Changed from "./tokenizer" to "./"

print("Tokenizer files generated successfully!")