Spaces:

taha092
/

HumanizerV2

Runtime error

App Files Files Community

HumanizerV2 / app.py

taha092

Update app.py

31d085e verified 4 months ago

raw

history blame contribute delete

10.2 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from transformers.pipelines import pipeline
	from sentence_transformers import SentenceTransformer, util
	import numpy as np
	import gradio.themes as grthemes
	import random
	import re

	# ----------------------
	# Paraphrasing Model Setup (Pegasus + T5)
	# ----------------------
	PEGASUS_MODEL_NAME = "tuner007/pegasus_paraphrase"
	T5_MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
	pegasus_tokenizer = AutoTokenizer.from_pretrained(PEGASUS_MODEL_NAME)
	pegasus_model = AutoModelForSeq2SeqLM.from_pretrained(PEGASUS_MODEL_NAME)
	t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL_NAME)
	t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL_NAME)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	pegasus_model = pegasus_model.to(device)
	t5_model = t5_model.to(device)

	# ----------------------
	# Semantic Similarity Model
	# ----------------------
	similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

	# ----------------------
	# Local AI Detector (roberta-base-openai-detector)
	# ----------------------
	AI_DETECTOR_MODEL = "roberta-base-openai-detector"
	ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)

	# ----------------------
	# Prompt Variations for Humanization
	# ----------------------
	PEGASUS_PROMPTS = [
	"Paraphrase this naturally:",
	"Rewrite as if explaining to a friend:",
	"Make this sound like a real conversation:",
	"Express this in a casual, human way:",
	"Reword this with natural flow:",
	"Make this sound less robotic:",
	"Rewrite in a friendly, informal tone:",
	"Paraphrase in a way a student would say it:",
	]
	T5_PROMPTS = [
	"Paraphrase the following text in a formal, academic tone:",
	"Paraphrase the following text in a casual, conversational tone:",
	"Paraphrase the following text in a friendly, approachable tone:",
	"Paraphrase the following text to bypass AI detectors and sound as human as possible:",
	]

	# ----------------------
	# Sentence Splitter
	# ----------------------
	def split_sentences(text):
	sentences = re.split(r'(?<=[.!?])\s+', text.strip())
	return [s for s in sentences if s]

	# ----------------------
	# Aggressive Post-Processing
	# ----------------------
	def postprocess_text(text):
	contractions = {
	"do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm",
	"is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's",
	"does not": "doesn't", "have not": "haven't", "has not": "hasn't"
	}
	for k, v in contractions.items():
	text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
	idioms = [
	"at the end of the day", "to be honest", "as a matter of fact", "for what it's worth",
	"in a nutshell", "the bottom line is", "all things considered"
	]
	transitions = [
	"Interestingly,", "In fact,", "To be clear,", "As a result,", "For example,", "On the other hand,", "In other words,"
	]
	if random.random() < 0.3:
	text += " " + random.choice(idioms) + "."
	if random.random() < 0.3:
	text = random.choice(transitions) + " " + text
	# Randomly lower-case a word to mimic human error
	if random.random() < 0.2:
	words = text.split()
	if len(words) > 3:
	idx = random.randint(1, len(words)-2)
	words[idx] = words[idx].lower()
	text = ' '.join(words)
	return text

	# ----------------------
	# Multi-Model, Multi-Pass Paraphrasing
	# ----------------------
	def pegasus_paraphrase(sentence):
	prompt = random.choice(PEGASUS_PROMPTS)
	full_prompt = f"{prompt} {sentence}"
	batch = pegasus_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
	outputs = pegasus_model.generate(
	**batch,
	max_length=60,
	num_beams=5,
	num_return_sequences=1,
	temperature=1.0
	)
	tgt_text = pegasus_tokenizer.batch_decode(outputs, skip_special_tokens=True)
	return tgt_text[0] if tgt_text else sentence

	def t5_paraphrase(sentence):
	prompt = random.choice(T5_PROMPTS) + " " + sentence
	input_ids = t5_tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
	outputs = t5_model.generate(
	input_ids,
	do_sample=True,
	top_k=120,
	top_p=0.95,
	temperature=0.7,
	repetition_penalty=1.2,
	max_length=256,
	num_return_sequences=1
	)
	paraphrased = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
	return paraphrased

	# ----------------------
	# Feedback Loop with AI Detector
	# ----------------------
	def check_ai_score(text):
	try:
	result = ai_detector(text)
	for r in result:
	if r['label'] in ['LABEL_1', 'Fake']:
	return r['score'], None
	elif r['label'] in ['LABEL_0', 'Real']:
	return 1.0 - r['score'], None
	return 0.5, None
	except Exception as e:
	return None, f"AI detection error: {str(e)}"

	# ----------------------
	# Main Humanizer Pipeline
	# ----------------------
	def humanize_pipeline(text, tone, max_feedback_loops=2):
	sentences = split_sentences(text)
	paraphrased = []
	for sent in sentences:
	# First pass: Pegasus
	peg = pegasus_paraphrase(sent)
	# Second pass: T5
	t5 = t5_paraphrase(peg)
	paraphrased.append(t5)
	joined = ' '.join(paraphrased)
	processed = postprocess_text(joined)
	# Feedback loop: if still flagged as AI, re-paraphrase flagged sentences
	for _ in range(max_feedback_loops):
	ai_prob, _ = check_ai_score(processed)
	if ai_prob is not None and ai_prob < 0.5:
	break # Considered human
	# Re-paraphrase all sentences again
	sentences = split_sentences(processed)
	paraphrased = []
	for sent in sentences:
	peg = pegasus_paraphrase(sent)
	t5 = t5_paraphrase(peg)
	paraphrased.append(t5)
	joined = ' '.join(paraphrased)
	processed = postprocess_text(joined)
	return processed

	# ----------------------
	# Semantic Similarity Function
	# ----------------------
	def semantic_similarity(text1, text2):
	emb1 = similarity_model.encode(text1, convert_to_tensor=True)
	emb2 = similarity_model.encode(text2, convert_to_tensor=True)
	sim = util.pytorch_cos_sim(emb1, emb2).item()
	return sim

	# ----------------------
	# Humanization Score & Rating
	# ----------------------
	def humanization_score(sim, ai_prob):
	score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
	return score

	def humanization_rating(score):
	if score < 0.7:
	return f"⚠️ Still AI-like ({score:.2f})"
	elif score < 0.85:
	return f"👍 Acceptable ({score:.2f})"
	else:
	return f"✅ Highly Humanized ({score:.2f})"

	# ----------------------
	# Main Processing Function
	# ----------------------
	def process(text, tone):
	if not text.strip():
	return "", "", 0.0, "", 0.0, ""
	pre_ai_prob, pre_err = check_ai_score(text)
	if pre_ai_prob is None:
	return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
	try:
	# Generate 3 versions for user choice
	outputs = [humanize_pipeline(text, tone) for _ in range(3)]
	except Exception as e:
	return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
	# Pick the most human-like version (lowest ai_prob)
	best = None
	best_score = -1
	best_ai_prob = 1.0
	for out in outputs:
	post_ai_prob, _ = check_ai_score(out)
	sim = semantic_similarity(text, out)
	score = humanization_score(sim, post_ai_prob if post_ai_prob is not None else 1.0)
	if post_ai_prob is not None and post_ai_prob < best_ai_prob:
	best = out
	best_score = score
	best_ai_prob = post_ai_prob
	if best is None:
	best = outputs[0]
	best_score = 0.0
	best_ai_prob = 1.0
	sim = semantic_similarity(text, best)
	rating = humanization_rating(best_score)
	ai_score_str = f"Pre: {100(1-pre_ai_prob):.1f}% human \| Post: {100(1-best_ai_prob):.1f}% human"
	return (
	best,
	ai_score_str,
	sim,
	rating,
	best_score * 100,
	""
	)

	# ----------------------
	# Gradio UI
	# ----------------------
	custom_theme = grthemes.Base(
	primary_hue="blue",
	secondary_hue="blue",
	neutral_hue="slate"
	)

	with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
	gr.Markdown("""
	# 🧠 AI Humanizer
	<div style='display:flex;justify-content:space-between;align-items:center;'>
	<span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
	<span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
	</div>
	""", elem_id="header")
	with gr.Row():
	with gr.Column():
	text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
	tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
	btn = gr.Button("Humanize", elem_id="humanize-btn")
	with gr.Column():
	text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
	ai_scores = gr.Markdown("", elem_id="ai-scores")
	sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
	rating = gr.Markdown("", elem_id="rating")
	human_score = gr.Number(label="Humanization Score (%)", interactive=False)
	btn.click(
	process,
	inputs=[text_in, tone],
	outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
	api_name="humanize"
	)
	gr.Markdown("""
	<div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
	<b>Made by Taha</b> \| Free for unlimited use \| Optimized for students and creators
	</div>
	""", elem_id="footer")

	demo.launch()