KrishnaKarthik commited on
Commit
c25db68
Β·
verified Β·
1 Parent(s): ee15050

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModel
4
+ import torch.nn as nn
5
+ import numpy as np
6
+
7
+ class HybridCodeClassifier(nn.Module):
8
+ def __init__(self, model_name="microsoft/codebert-base", num_labels=4):
9
+ super().__init__()
10
+ self.encoder = AutoModel.from_pretrained(model_name)
11
+ self.classifier = nn.Linear(768, num_labels)
12
+
13
+ def forward(self, input_ids, attention_mask):
14
+ outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
15
+ return self.classifier(outputs.last_hidden_state[:, 0, :])
16
+
17
+ # Label mappings for Task C
18
+ label_mappings = {
19
+ 0: "Human-written πŸ‘¨β€πŸ’»",
20
+ 1: "Machine-generated πŸ€–",
21
+ 2: "Hybrid πŸ”„",
22
+ 3: "Adversarial βš”οΈ"
23
+ }
24
+
25
+ label_descriptions = {
26
+ 0: "β€’ Natural code patterns\nβ€’ Imperfections and TODOs\nβ€’ Personal coding style\nβ€’ Practical solutions",
27
+ 1: "β€’ Perfect structure\nβ€’ Comprehensive docs\nβ€’ Consistent formatting\nβ€’ Over-engineered",
28
+ 2: "β€’ Mixed patterns\nβ€’ Some AI elements\nβ€’ Some human elements\nβ€’ Inconsistent style",
29
+ 3: "β€’ Designed to mimic humans\nβ€’ Strategic imperfections\nβ€’ Hard to detect\nβ€’ Evasive patterns"
30
+ }
31
+
32
+ @torch.no_grad()
33
+ def load_taskC_model():
34
+ model = HybridCodeClassifier(num_labels=4)
35
+
36
+ try:
37
+ from huggingface_hub import hf_hub_download
38
+ model_path = hf_hub_download(
39
+ repo_id="KrishnaKarthik/ai-code-detector",
40
+ filename="taskC_model.pth"
41
+ )
42
+ model.load_state_dict(torch.load(model_path, map_location="cpu"))
43
+ print("βœ… Loaded Task C Hybrid Code Detector!")
44
+ except Exception as e:
45
+ print(f"❌ Error: {str(e)}")
46
+ return None
47
+
48
+ model.eval()
49
+ return model
50
+
51
+ model_taskC = load_taskC_model()
52
+ tokenizer_taskC = AutoTokenizer.from_pretrained("microsoft/codebert-base")
53
+
54
+ def detect_hybrid_code(code):
55
+ """Detect human, machine, hybrid, or adversarial code"""
56
+ if not code.strip():
57
+ return "Please enter code", "", "", ""
58
+
59
+ try:
60
+ inputs = tokenizer_taskC(code, return_tensors="pt", truncation=True, max_length=512)
61
+
62
+ with torch.no_grad():
63
+ outputs = model_taskC(**inputs)
64
+ probabilities = torch.softmax(outputs, dim=1)
65
+ probs = probabilities[0].numpy()
66
+
67
+ # Get all predictions
68
+ results = "πŸ” DETECTION RESULTS:\n"
69
+ results += "=" * 50 + "\n"
70
+
71
+ for i, (label_id, label_name) in enumerate(label_mappings.items()):
72
+ prob = probs[label_id]
73
+ results += f"{i+1}. {label_name:20} {prob:.1%}\n"
74
+
75
+ # Main prediction
76
+ main_pred_idx = np.argmax(probs)
77
+ main_pred_label = label_mappings[main_pred_idx]
78
+ main_pred_prob = probs[main_pred_idx]
79
+ main_description = label_descriptions[main_pred_idx]
80
+
81
+ # Confidence
82
+ if main_pred_prob >= 0.8:
83
+ confidence = "🟒 HIGH confidence"
84
+ elif main_pred_prob >= 0.6:
85
+ confidence = "🟑 MEDIUM confidence"
86
+ else:
87
+ confidence = "πŸ”΄ LOW confidence"
88
+
89
+ return results, main_pred_label, main_description, confidence
90
+
91
+ except Exception as e:
92
+ return f"Error: {str(e)}", "Error", "", ""
93
+
94
+ # Gradio Interface
95
+ with gr.Blocks(title="Hybrid Code Detector", theme=gr.themes.Soft()) as demo:
96
+ gr.Markdown("# πŸ”„ Hybrid Code Detector")
97
+ gr.Markdown("Classify code as: **Human** πŸ‘¨β€πŸ’» | **Machine** πŸ€– | **Hybrid** πŸ”„ | **Adversarial** βš”οΈ")
98
+
99
+ with gr.Row():
100
+ code_input = gr.Textbox(
101
+ label="Paste code to analyze",
102
+ placeholder="def hello_world():\n print('Hello, World!')",
103
+ lines=10
104
+ )
105
+
106
+ analyze_btn = gr.Button("Analyze Code", variant="primary", size="lg")
107
+
108
+ with gr.Row():
109
+ with gr.Column():
110
+ results_output = gr.Textbox(label="Detection Results", lines=8)
111
+ confidence_output = gr.Textbox(label="Confidence Level")
112
+ with gr.Column():
113
+ prediction_output = gr.Textbox(label="Primary Prediction")
114
+ description_output = gr.Textbox(label="Characteristics", lines=4)
115
+
116
+ gr.Markdown("### πŸ’‘ Examples to Test:")
117
+ examples = [
118
+ ["def calc(x):\n # quick hack\n result = x * 2\n if x > 10:\n result += 5\n return result", "Human-like"],
119
+ ["def calculate_sum(numbers):\n '''\n Calculate the sum of all numbers in the input list.\n \n Args:\n numbers (List[int]): Input list of numbers\n \n Returns:\n int: Sum of all numbers\n '''\n return sum(numbers)", "AI-like"],
120
+ ]
121
+
122
+ gr.Examples(examples=examples, inputs=code_input)
123
+
124
+ analyze_btn.click(
125
+ fn=detect_hybrid_code,
126
+ inputs=code_input,
127
+ outputs=[results_output, prediction_output, description_output, confidence_output]
128
+ )
129
+
130
+ if __name__ == "__main__":
131
+ demo.launch()