IS41720_Zone / app.py
courtneyf2's picture
Update app.py
78c8dfc verified
import os
import gradio as gr
import openai
import PyPDF2
import numpy as np
import math
MODEL_STATUS = {
'tiktoken': False,
'transformers': False,
'torch': False,
'model_loaded': False,
'error_messages': []
}
try:
import tiktoken
gpt_tokenizer = tiktoken.get_encoding("gpt2")
MODEL_STATUS['tiktoken'] = True
except Exception as e:
MODEL_STATUS['error_messages'].append(f"tiktoken error: {str(e)}")
gpt_tokenizer = None
# WEEK 3
# try:
# from transformers import AutoTokenizer, AutoModel
# import torch
# MODEL_STATUS['transformers'] = True
# MODEL_STATUS['torch'] = True
#
# print("Loading model...")
# tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")
# model = AutoModel.from_pretrained("prajjwal1/bert-tiny")
# MODEL_STATUS['model_loaded'] = True
# print("model loaded successfully!")
#
# except Exception as e:
# MODEL_STATUS['error_messages'].append(f"Model loading error: {str(e)}")
# tokenizer = None
# model = None
tokenizer = None
model = None
# OpenAI setup
OPENAI_API_KEY = os.getenv("openAI_TOKEN")
if OPENAI_API_KEY:
openai.api_key = OPENAI_API_KEY
else:
MODEL_STATUS['error_messages'].append("OpenAI API key not found")
import shutil
import os
cache_dir = os.path.expanduser("~/.cache/huggingface")
if os.path.exists(cache_dir):
try:
total_size = sum(
os.path.getsize(os.path.join(dirpath, filename))
for dirpath, dirnames, filenames in os.walk(cache_dir)
for filename in filenames
) / (1024**3)
if total_size > 40:
shutil.rmtree(cache_dir)
print(f"Cleared {total_size:.2f}GB cache")
except Exception as e:
print(f"Cache cleanup error: {e}")
from model_functions import *
def tokenize_text(text):
if not text.strip():
return [], 0, "Enter some text to see tokenization"
if gpt_tokenizer:
try:
tokens = gpt_tokenizer.encode(text)
token_strings = []
for token in tokens:
try:
decoded = gpt_tokenizer.decode([token])
token_strings.append(decoded)
except UnicodeDecodeError:
token_strings.append(f"<token_{token}>")
return token_strings, len(tokens), f"Text tokenized successfully β†’ {len(tokens)} tokens"
except Exception as e:
return [], 0, f"Tokenization error: {str(e)}"
else:
# Fallback: simple whitespace tokenization
tokens = text.split()
return tokens, len(tokens), f"Using fallback tokenization β†’ {len(tokens)} tokens (tiktoken unavailable)"
def get_next_token_predictions(text):
"""Get next token predictions using OpenAI API"""
if not text.strip():
return "Enter some text to see predictions"
if not OPENAI_API_KEY:
return "OpenAI API key not available - cannot generate predictions"
try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Complete the following text with the next most likely word. Provide exactly 3 options with their approximate probabilities."},
{"role": "user", "content": f"Text: '{text}'\n\nNext word options:"}
],
temperature=0.1,
max_tokens=50
)
return response.choices[0].message.content
except Exception as e:
return f"Error getting predictions: {str(e)}"
def merge_subword_tokens(tokens, attention_matrix):
"""Merge subword tokens back into words for cleaner viz"""
merged_tokens = []
merged_attention = []
current_word = ""
current_indices = []
for i, token in enumerate(tokens):
if token.startswith('##'):
current_word += token[2:]
current_indices.append(i)
else:
if current_word:
merged_tokens.append(current_word)
merged_attention.append(current_indices)
current_word = token
current_indices = [i]
if current_word:
merged_tokens.append(current_word)
merged_attention.append(current_indices)
# Merge attention weights by averaging
merged_matrix = np.zeros((len(merged_tokens), len(merged_tokens)))
for i, i_indices in enumerate(merged_attention):
for j, j_indices in enumerate(merged_attention):
# Average attention between word groups
weights = []
for ii in i_indices:
for jj in j_indices:
if ii < attention_matrix.shape[0] and jj < attention_matrix.shape[1]:
weights.append(attention_matrix[ii, jj])
if weights:
merged_matrix[i, j] = np.mean(weights)
return merged_tokens, merged_matrix
def create_attention_network_svg(text):
if not text.strip():
return "Enter text to see attention network"
if not MODEL_STATUS['model_loaded']:
return f"Attention model not available. Errors: {MODEL_STATUS['error_messages']}"
try:
# Tokenize input
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=64)
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
with torch.no_grad():
outputs = model(**inputs, output_attentions=True)
# Remove special tokens
clean_tokens = []
clean_indices = []
for i, token in enumerate(tokens):
if token not in ['[CLS]', '[SEP]', '[PAD]']:
clean_tokens.append(token)
clean_indices.append(i)
if len(clean_indices) < 2:
return "Need at least 2 valid tokens for attention visualisation."
# SEARCH for best head: max variance
best_attention = None
best_name = ""
best_tokens = []
best_variance = -1
debug_info = f"Total Layers: {len(outputs.attentions)}\n"
for layer_idx, layer_att in enumerate(outputs.attentions):
num_heads = layer_att.shape[1]
for head_idx in range(num_heads):
attn_matrix = layer_att[0, head_idx].numpy()
trimmed_attention = attn_matrix[np.ix_(clean_indices, clean_indices)]
variance = np.var(trimmed_attention)
debug_info += f"Layer {layer_idx}, Head {head_idx} β€” Variance: {variance:.5f}\n"
if variance > best_variance:
best_attention = trimmed_attention
best_name = f"Layer {layer_idx}, Head {head_idx}"
best_tokens = clean_tokens
best_variance = variance
if best_attention is None:
return "Could not extract valid attention."
# Merge subwords
merged_tokens, merged_attention = merge_subword_tokens(best_tokens, best_attention)
n_tokens = len(merged_tokens)
if n_tokens < 2:
return "Too few tokens after merging for attention graph."
# SVG dimensions
width, height = 1000, 500
margin = 50
# Linear positions
positions = []
for i in range(n_tokens):
x = margin + (width - 2*margin) * i / (n_tokens - 1)
y = height // 2
positions.append((x, y))
# Start SVG
svg = f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">'
svg += '<style>.token-text { font-family: Arial; font-size: 14px; text-anchor: middle; font-weight: bold; }'
svg += '.debug-text { font-family: monospace; font-size: 10px; fill: #666; }</style>'
# Choose top-N attention connections
num_top_connections = 20
pairs = []
for i in range(n_tokens):
for j in range(n_tokens):
if i != j:
pairs.append((merged_attention[i, j], i, j))
pairs.sort(reverse=True)
top_pairs = pairs[:num_top_connections]
# Draw attention arcs
for weight, i, j in top_pairs:
x1, y1 = positions[i]
x2, y2 = positions[j]
mid_x = (x1 + x2) / 2
curve_y = y1 - 80 if (i + j) % 2 == 0 else y1 + 80
# Color coding
if weight > 0.08:
color = "#d32f2f" # red
opacity = "0.8"
elif weight > 0.04:
color = "#ff9800" # orange
opacity = "0.6"
else:
color = "#2196f3" # blue
opacity = "0.4"
thickness = max(2, weight * 10)
svg += f'<path d="M {x1},{y1} Q {mid_x},{curve_y} {x2},{y2}" '
svg += f'stroke="{color}" stroke-width="{thickness}" fill="none" opacity="{opacity}"/>'
# Draw nodes
for i, (token, (x, y)) in enumerate(zip(merged_tokens, positions)):
svg += f'<circle cx="{x}" cy="{y}" r="25" fill="white" stroke="black" stroke-width="2"/>'
svg += f'<text x="{x}" y="{y+5}" class="token-text">{token[:10]}</text>'
# Legend and info
svg += f'<text x="20" y="{height - 130}" style="font-family: Arial; font-size: 16px; font-weight: bold;">'
svg += f'Attention Network - {best_name}</text>'
svg += f'<text x="20" y="{height - 110}" style="font-family: Arial; font-size: 12px;">'
svg += f'Red: Strong | Orange: Medium | Blue: Weak | Showing top {num_top_connections} connections</text>'
# Debug info (limited lines)
for i, line in enumerate(debug_info.split('\n')[:8]):
svg += f'<text x="20" y="{height - 90 + 12*i}" class="debug-text">{line}</text>'
svg += '</svg>'
return svg
except Exception as e:
return f"Error generating attention network: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("# Language Models & Methods Lab Interface")
with gr.Tabs() as tabs:
# Week 3 Tab
with gr.Tab("Week 3: Text Processing"):
gr.Markdown("# How Language Models Process Text")
gr.Markdown("Explore tokenization, context windows, and attention mechanisms")
with gr.Tabs() as week3_tabs:
with gr.Tab("Tokenization Explorer"):
gr.Markdown("### See how text gets broken into tokens")
with gr.Row():
token_input = gr.Textbox(
label="Enter your text",
placeholder="Type any text to see how it gets tokenized...",
lines=3,
value="The quick brown fox jumps over the lazy dog."
)
with gr.Row():
tokenize_btn = gr.Button("Tokenize Text")
with gr.Row():
token_display = gr.Textbox(label="Tokens", lines=3, interactive=False)
token_count = gr.Number(label="Token Count", interactive=False)
with gr.Row():
token_info = gr.Textbox(label="Tokenization Info", lines=2, interactive=False)
with gr.Tab("Context & Predictions"):
gr.Markdown("### Next-word predictions and context understanding")
with gr.Row():
context_input = gr.Textbox(
label="Enter incomplete text",
placeholder="I went to the bank to",
lines=2,
value="I went to the bank to"
)
with gr.Row():
predict_btn = gr.Button("Get Next Word Predictions")
with gr.Row():
predictions_output = gr.Textbox(label="Most Likely Next Words", lines=5, interactive=False)
with gr.Row():
context_window_info = gr.Textbox(
label="Context Window Status",
value="Click 'Get Predictions' to see token usage",
interactive=False
)
with gr.Tab("Attention Network"):
gr.Markdown("### Network visualisation of attention patterns")
gr.Markdown("See how words connect to each other through attention mechanisms")
with gr.Row():
attention_input = gr.Textbox(
label="Enter a sentence (shorter sentences work better)",
placeholder="The bank was closed.",
lines=2,
value="The bank was closed."
)
with gr.Row():
analyze_attention_btn = gr.Button("Generate Attention Network")
with gr.Row():
attention_network = gr.HTML(label="Attention Network Visualisation")
# Week 3 Event Handlers
def update_tokenization(text):
tokens, count, info = tokenize_text(text)
token_str = " | ".join(tokens) if tokens else ""
return token_str, count, info
def update_predictions_with_context(text):
if not text.strip():
return "Enter text to get predictions", "No text to analyze"
# Get token count for context window
_, token_count, _ = tokenize_text(text)
context_status = f"Current text: {token_count} tokens / 4096 (GPT-3.5 limit) = {token_count/4096*100:.1f}% used"
# Get predictions
predictions = get_next_token_predictions(text)
return predictions, context_status
def generate_network_visualization(text):
return create_attention_network_svg(text)
# Connect event handlers
tokenize_btn.click(
update_tokenization,
inputs=[token_input],
outputs=[token_display, token_count, token_info]
)
# Auto-update tokenization as user types
token_input.change(
update_tokenization,
inputs=[token_input],
outputs=[token_display, token_count, token_info]
)
predict_btn.click(
update_predictions_with_context,
inputs=[context_input],
outputs=[predictions_output, context_window_info]
)
analyze_attention_btn.click(
generate_network_visualization,
inputs=[attention_input],
outputs=[attention_network]
)
# OTHER WEEKS
with gr.Tab("Week 4: Controlling Model Behaviour"):
gr.Markdown("# Controlling Model Behaviour Through Prompting")
gr.Markdown("Explore how different prompting techniques and parameters affect model outputs")
with gr.Tabs() as week4_tabs:
with gr.Tab("Temperature Effects"):
gr.Markdown("### Compare how temperature affects creativity and consistency")
with gr.Row():
temp_input = gr.Textbox(
label="Enter your prompt",
placeholder="Type your question or prompt here...",
lines=3,
value="Write a creative opening sentence for a story about a robot looking for a friend."
)
with gr.Row():
temp_slider1 = gr.Slider(
minimum=0.1,
maximum=0.4,
value=0.2,
step=0.1,
label="Low Temperature (More Focused & Consistent)"
)
temp_slider2 = gr.Slider(
minimum=0.7,
maximum=1.0,
value=0.9,
step=0.1,
label="High Temperature (More Creative & Varied)"
)
with gr.Row():
generate_temp = gr.Button("Generate Both Responses")
with gr.Row():
focused_output = gr.Textbox(
label="Focused Output (Low Temperature)",
lines=5
)
creative_output = gr.Textbox(
label="Creative Output (High Temperature)",
lines=5
)
with gr.Tab("System Prompts"):
gr.Markdown("### See how system prompts shape model behaviour")
with gr.Row():
system_input = gr.Textbox(
label="Enter your prompt",
placeholder="Type your question or prompt here...",
lines=3,
value="Explain what a database index is."
)
with gr.Row():
system_prompt_dropdown = gr.Dropdown(
choices=[
"You are a helpful assistant providing accurate, concise answers.",
"You are a data scientist explaining technical concepts with precision and examples.",
"You are a creative storyteller who uses vivid metaphors and analogies.",
"You are a critical reviewer who evaluates information carefully and points out limitations.",
"You are a friendly teacher explaining concepts to someone learning for the first time."
],
label="Choose System Prompt",
value="You are a helpful assistant providing accurate, concise answers."
)
with gr.Row():
generate_system = gr.Button("Generate Response")
with gr.Row():
system_output = gr.Textbox(label="Output", lines=6)
with gr.Tab("Prompting Techniques"):
gr.Markdown("""
### Compare Zero-Shot, Few-Shot, and Chain-of-Thought
- **Zero-shot:** Direct question without examples
- **Few-shot:** You should provide similar examples to guide the response
- **Chain-of-thought:** Asks model to break down reasoning step-by-step
""")
with gr.Row():
shot_input = gr.Textbox(
label="Enter your task",
placeholder="Enter a task that requires reasoning...",
lines=3,
value="Classify the sentiment: 'The product works okay but customer service was terrible.'"
)
with gr.Row():
approach_type = gr.Radio(
["zero-shot", "few-shot", "chain-of-thought"],
label="Select Prompting Technique",
value="zero-shot"
)
with gr.Row():
generate_shot = gr.Button("Generate Response")
with gr.Row():
shot_output = gr.Textbox(label="Output", lines=8)
with gr.Tab("Combining Techniques"):
gr.Markdown("### Experiment with combining multiple techniques")
with gr.Row():
combo_input = gr.Textbox(
label="Enter your task",
placeholder="Enter a complex task...",
lines=3,
value="Analyse this review and suggest improvements: 'App crashes sometimes but has good features.'"
)
with gr.Row():
combo_system = gr.Dropdown(
choices=[
"None (default)",
"You are a product analyst providing structured feedback.",
"You are a UX researcher focused on user experience.",
],
label="System Prompt (optional)",
value="None (default)"
)
with gr.Row():
combo_examples = gr.Checkbox(
label="Include few-shot examples",
value=False
)
combo_cot = gr.Checkbox(
label="Use chain-of-thought reasoning",
value=False
)
with gr.Row():
combo_temp = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.5,
step=0.1,
label="Temperature"
)
with gr.Row():
generate_combo = gr.Button("Generate Response")
with gr.Row():
combo_output = gr.Textbox(label="Output", lines=8)
combo_info = gr.Textbox(label="Techniques Applied", lines=4)
generate_temp.click(
lambda x, t1, t2: [
generate_with_temperature(x, t1),
generate_with_temperature(x, t2)
],
inputs=[temp_input, temp_slider1, temp_slider2],
outputs=[focused_output, creative_output]
)
generate_system.click(
generate_with_system_prompt,
inputs=[system_input, system_prompt_dropdown],
outputs=system_output
)
generate_shot.click(
generate_with_examples,
inputs=[shot_input, approach_type],
outputs=shot_output
)
generate_combo.click(
generate_combined_techniques,
inputs=[combo_input, combo_system, combo_examples, combo_cot, combo_temp],
outputs=[combo_output, combo_info]
)
with gr.Tab("Week 5: Advanced Prompting"):
gr.Markdown("# Advanced Prompt Engineering Techniques")
gr.Markdown("Explore sophisticated prompting strategies and visualise reasoning patterns")
with gr.Tabs() as week5_tabs:
with gr.Tab("Tree of Thought Explorer"):
gr.Markdown("""
### Visualise Multi-Path Reasoning
The model will break down your problem into multiple approaches, evaluate each one, and select the best path.
""")
with gr.Row():
tot_input = gr.Textbox(
label="Enter a problem to solve",
placeholder="e.g., How can we improve user engagement on a mobile app?",
lines=3,
value="How should a startup decide between building a mobile app or a web application first?"
)
with gr.Row():
generate_tot = gr.Button("Generate Tree of Thought", variant="primary")
with gr.Row():
tot_output = gr.Textbox(
label="Reasoning Process",
lines=12
)
with gr.Row():
tot_visualization = gr.HTML(
label="Tree Visualisation"
)
with gr.Tab("Self-Consistency Testing"):
gr.Markdown("""
### Test Response Consistency
Run the same prompt multiple times to identify consistent patterns and areas of uncertainty.
""")
with gr.Row():
consistency_input = gr.Textbox(
label="Enter your prompt",
placeholder="Ask a question that requires reasoning...",
lines=3,
value="What are the three most important factors in choosing a database system?"
)
with gr.Row():
num_runs = gr.Slider(
minimum=3,
maximum=5,
value=3,
step=1,
label="Number of generations"
)
consistency_temp = gr.Slider(
minimum=0.3,
maximum=0.9,
value=0.7,
step=0.1,
label="Temperature"
)
with gr.Row():
generate_consistency = gr.Button("Generate Multiple Responses", variant="primary")
with gr.Row():
consistency_analysis = gr.Textbox(
label="Analysis Guide",
lines=4
)
with gr.Row():
consistency_output1 = gr.Textbox(label="Response 1", lines=5)
consistency_output2 = gr.Textbox(label="Response 2", lines=5)
with gr.Row():
consistency_output3 = gr.Textbox(label="Response 3", lines=5)
consistency_output4 = gr.Textbox(label="Response 4 (if selected)", lines=5, visible=True)
with gr.Row():
consistency_output5 = gr.Textbox(label="Response 5 (if selected)", lines=5, visible=True)
with gr.Tab("Prompt Structure Comparison"):
gr.Markdown("""
### Compare Structural Strategies
Test how different prompt structures affect model attention and output quality.
""")
with gr.Row():
structure_input = gr.Textbox(
label="Enter your task",
placeholder="Enter a task or question...",
lines=3,
value=""
)
with gr.Row():
gr.Markdown("### Select ONE structure to test:")
with gr.Row():
structure_radio = gr.Radio(
choices=[
"Baseline (no special structure)",
"Front-loading (critical instruction first)",
"Delimiter strategy (section separation)",
"Sandwich technique (instruction at start and end)"
],
label="Prompt Structure",
value="Baseline (no special structure)"
)
with gr.Row():
generate_structure = gr.Button("Generate Response", variant="primary")
with gr.Row():
structure_output = gr.Textbox(
label="Response",
lines=8
)
structure_info = gr.Textbox(
label="Structure Information",
lines=8
)
# Week 5 Event Handlers
def handle_tot(task):
text_output, svg_output = generate_tot_response(task)
return text_output, svg_output
def handle_consistency(prompt, runs, temp):
responses, analysis = generate_self_consistency(prompt, int(runs), temp)
while len(responses) < 5:
responses.append("")
return analysis, responses[0], responses[1], responses[2], responses[3], responses[4]
def handle_structure(task, structure_choice):
use_frontload = "Front-loading" in structure_choice
use_delimiters = "Delimiter" in structure_choice
use_sandwich = "Sandwich" in structure_choice
output, info = compare_prompt_structures(task, use_frontload, use_delimiters, use_sandwich)
return output, info
generate_tot.click(
handle_tot,
inputs=[tot_input],
outputs=[tot_output, tot_visualization]
)
generate_consistency.click(
handle_consistency,
inputs=[consistency_input, num_runs, consistency_temp],
outputs=[consistency_analysis, consistency_output1, consistency_output2,
consistency_output3, consistency_output4, consistency_output5]
)
generate_structure.click(
handle_structure,
inputs=[structure_input, structure_radio],
outputs=[structure_output, structure_info]
)
# with gr.Tab("Week 8: Error Detection"):
# # Week 8 content here
# pass
with gr.Tab("Assignment 1"):
gr.Markdown("# Assignment 1: Prompting Strategy Evaluation")
gr.Markdown("""
Test different prompting strategies for your chosen NLP task.
Remember: You need 3 documents, with 2 different strategies tested per document (6 total experiments).
""")
with gr.Row():
assignment_task = gr.Dropdown(
choices=["Sentiment Analysis", "Summarisation"],
label="Select NLP Task",
value="Sentiment Analysis"
)
with gr.Row():
with gr.Column():
assignment_text = gr.Textbox(
label="Enter Text",
placeholder="Paste your document text here...",
lines=6
)
with gr.Column():
assignment_file = gr.File(
label="OR Upload a File (TXT or PDF)",
file_types=[".txt", ".pdf"],
type="binary"
)
gr.Markdown("### Select Your Prompting Strategy")
with gr.Row():
strategy_type = gr.Radio(
choices=[
"Direct (no special technique)",
"Chain-of-thought (step-by-step reasoning)",
"Role-based (uses system prompt)",
"Combined (role + chain-of-thought)"
],
label="Prompting Strategy",
value="Direct (no special technique)",
info="Choose how the model should approach the task"
)
with gr.Row():
system_role = gr.Dropdown(
choices=[
"None",
"Technical analyst",
"Creative assistant"
],
label="System Role (for role-based strategies)",
value="None",
info="Only applies if you selected a role-based strategy"
)
with gr.Row():
assignment_temp = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.5,
step=0.1,
label="Temperature (0.1 = focused, 1.0 = creative)"
)
with gr.Row():
generate_assignment = gr.Button("Generate Response", variant="primary")
with gr.Row():
assignment_output = gr.Textbox(
label="Model Output",
lines=12
)
with gr.Row():
assignment_info = gr.Textbox(
label="Strategy Applied",
lines=3,
info="Documents which settings were used for this experiment"
)
generate_assignment.click(
handle_assignment_experiment,
inputs=[assignment_text, assignment_file, assignment_task, strategy_type, system_role, assignment_temp],
outputs=[assignment_output, assignment_info]
)
demo.launch()