Spaces:
Runtime error
Runtime error
| import os | |
| import warnings | |
| import torch | |
| import gc | |
| from transformers import AutoModelForVision2Seq, AutoProcessor | |
| from peft import PeftModel | |
| from PIL import Image | |
| import gradio as gr | |
| from huggingface_hub import login | |
| # Basic settings | |
| warnings.filterwarnings('ignore') | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |
| # Global variables | |
| model = None | |
| processor = None | |
| # Clear CUDA cache | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| print("เคลียร์ CUDA cache เรียบร้อยแล้ว") | |
| # Login to Hugging Face Hub | |
| if 'HUGGING_FACE_HUB_TOKEN' in os.environ: | |
| print("กำลังเข้าสู่ระบบ Hugging Face Hub...") | |
| login(token=os.environ['HUGGING_FACE_HUB_TOKEN']) | |
| else: | |
| print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN") | |
| def load_model_and_processor(): | |
| """โหลดโมเดลและ processor""" | |
| global model, processor | |
| print("กำลังโหลดโมเดลและ processor...") | |
| try: | |
| # Model paths | |
| base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
| adapter_path = "Aekanun/thai-handwriting-llm" | |
| # Load processor from base model | |
| print("กำลังโหลด processor...") | |
| processor = AutoProcessor.from_pretrained(base_model_path, use_auth_token=True) | |
| # Load base model | |
| print("กำลังโหลด base model...") | |
| base_model = AutoModelForVision2Seq.from_pretrained( | |
| base_model_path, | |
| device_map="auto", | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| use_auth_token=True | |
| ) | |
| # Load adapter | |
| print("กำลังโหลด adapter...") | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| adapter_path, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| use_auth_token=True | |
| ) | |
| print("โหลดโมเดลสำเร็จ!") | |
| return True | |
| except Exception as e: | |
| print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}") | |
| return False | |
| def process_handwriting(image): | |
| """ฟังก์ชันสำหรับ Gradio interface""" | |
| global model, processor | |
| if image is None: | |
| return "กรุณาอัพโหลดรูปภาพ" | |
| try: | |
| # Ensure image is in PIL format | |
| if not isinstance(image, Image.Image): | |
| image = Image.fromarray(image) | |
| # Create prompt | |
| prompt = """Transcribe the Thai handwritten text from the provided image. | |
| Only return the transcription in Thai language.""" | |
| # Create model inputs | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| {"type": "image", "image": image} | |
| ], | |
| } | |
| ] | |
| # Process with model | |
| text = processor.apply_chat_template(messages, tokenize=False) | |
| inputs = processor(text=text, images=image, return_tensors="pt") | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| # Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| do_sample=False, | |
| pad_token_id=processor.tokenizer.pad_token_id | |
| ) | |
| # Decode output | |
| transcription = processor.decode(outputs[0], skip_special_tokens=True) | |
| return transcription.strip() | |
| except Exception as e: | |
| return f"เกิดข้อผิดพลาด: {str(e)}" | |
| # Initialize application | |
| print("กำลังเริ่มต้นแอปพลิเคชัน...") | |
| if load_model_and_processor(): | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=process_handwriting, | |
| inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"), | |
| outputs=gr.Textbox(label="ข้อความที่แปลงได้"), | |
| title="Thai Handwriting Recognition", | |
| description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ", | |
| examples=[["example1.jpg"], ["example2.jpg"]] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |
| else: | |
| print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้") |