deltarunemen commited on
Commit
3944bf9
·
verified ·
1 Parent(s): 9c42a91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -29
app.py CHANGED
@@ -1,46 +1,37 @@
1
  from flask import Flask, request, jsonify
2
- from sentence_transformers import SentenceTransformer, util
3
- import torch
4
- import re
5
 
6
  app = Flask(__name__)
7
 
8
- # Load model SBERT tiếng Việt
9
- retriever = SentenceTransformer("keepitreal/vietnamese-sbert")
 
 
10
 
11
- def extract_summary(context, topic, k=5, threshold=0.15):
12
- raw_sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+', context) if s.strip()]
13
- if len(raw_sentences) == 0:
14
- return ""
15
- context_emb = retriever.encode(raw_sentences, convert_to_tensor=True)
16
- query_emb = retriever.encode(topic, convert_to_tensor=True)
17
- cosine_scores = util.pytorch_cos_sim(query_emb, context_emb)[0]
18
-
19
- k_eff = min(k, len(raw_sentences))
20
- topk = torch.topk(cosine_scores, k=k_eff)
21
- top_indices = topk.indices.tolist()
22
- top_scores = topk.values.tolist()
23
-
24
- filtered = [(i, s) for i, s in zip(top_indices, top_scores) if s >= threshold]
25
- if len(filtered) == 0:
26
- filtered = [(top_indices[0], top_scores[0])]
27
- filtered.sort(key=lambda x: x[0])
28
- selected_text = " ".join([raw_sentences[i] for i, _ in filtered])
29
- return selected_text
30
 
31
  @app.route("/")
32
  def home():
33
- return jsonify({"message": "✅ AI Extractive Text Summarization API (Vietnamese) running."})
 
 
 
34
 
35
  @app.route("/summarize", methods=["POST"])
36
  def summarize():
37
  data = request.get_json(force=True)
38
- if not data or "text" not in data or "topic" not in data:
39
- return jsonify({"error": "Thiếu trường 'text' hoặc 'topic'"}), 400
 
40
  text = data["text"]
41
- topic = data["topic"]
42
  try:
43
- summary = extract_summary(text, topic)
 
44
  return jsonify({"summary": summary})
45
  except Exception as e:
46
  return jsonify({"error": str(e)}), 500
 
1
  from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
3
 
4
  app = Flask(__name__)
5
 
6
+ # --- Load hình tóm tắt tiếng Việt ---
7
+ model_name = "NlpHUST/t5-small-vi-summarization"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
 
11
+ summarizer = pipeline(
12
+ "summarization",
13
+ model=model,
14
+ tokenizer=tokenizer
15
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  @app.route("/")
18
  def home():
19
+ return jsonify({
20
+ "message": "✅ AI Text Summarization API is running (Vietnamese, NlpHUST/t5-small-vi).",
21
+ "usage": "POST /summarize với JSON: { 'text': '...', 'topic': '...' }"
22
+ })
23
 
24
  @app.route("/summarize", methods=["POST"])
25
  def summarize():
26
  data = request.get_json(force=True)
27
+ if not data or "text" not in data:
28
+ return jsonify({"error": "Thiếu trường 'text'"}), 400
29
+
30
  text = data["text"]
31
+ # topic không cần thiết, mô hình này tự tóm tắt
32
  try:
33
+ result = summarizer(text, max_length=150, min_length=40, do_sample=False)
34
+ summary = result[0]['summary_text']
35
  return jsonify({"summary": summary})
36
  except Exception as e:
37
  return jsonify({"error": str(e)}), 500