|
|
from flask import Flask, request, jsonify, render_template |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
from peft import PeftModel |
|
|
import torch |
|
|
|
|
|
app = Flask(__name__) |
|
|
import os |
|
|
from huggingface_hub import login |
|
|
|
|
|
|
|
|
access_token = os.environ.get("HUGGING_FACE_HUB_TOKEN") |
|
|
if access_token is None: |
|
|
raise ValueError("Bạn chưa đặt biến môi trường HUGGING_FACE_HUB_TOKEN") |
|
|
login(token=access_token) |
|
|
|
|
|
BASE_MODEL = "openchat/openchat-3.5-0106" |
|
|
ADAPTER_PATH = "./chatbot-gpt35-peft" |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL, |
|
|
device_map="auto", |
|
|
torch_dtype=torch.float16, |
|
|
trust_remote_code=True, |
|
|
) |
|
|
|
|
|
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, device_map="auto", is_local=True) |
|
|
model = model.to(device) |
|
|
model.eval() |
|
|
|
|
|
@app.route("/") |
|
|
def home(): |
|
|
return render_template("index.html") |
|
|
|
|
|
@app.route("/chat", methods=["POST"]) |
|
|
def chat(): |
|
|
user_input = request.json.get("message", "") |
|
|
prompt = f"User: {user_input}\nAI:" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=200, |
|
|
do_sample=True, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
response = response[len(prompt):].strip() |
|
|
return jsonify({"response": response}) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=5000, debug=True) |
|
|
|