import os import gradio as gr import openai import PyPDF2 import numpy as np import math MODEL_STATUS = { 'tiktoken': False, 'transformers': False, 'torch': False, 'model_loaded': False, 'error_messages': [] } try: import tiktoken gpt_tokenizer = tiktoken.get_encoding("gpt2") MODEL_STATUS['tiktoken'] = True except Exception as e: MODEL_STATUS['error_messages'].append(f"tiktoken error: {str(e)}") gpt_tokenizer = None # WEEK 3 # try: # from transformers import AutoTokenizer, AutoModel # import torch # MODEL_STATUS['transformers'] = True # MODEL_STATUS['torch'] = True # # print("Loading model...") # tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny") # model = AutoModel.from_pretrained("prajjwal1/bert-tiny") # MODEL_STATUS['model_loaded'] = True # print("model loaded successfully!") # # except Exception as e: # MODEL_STATUS['error_messages'].append(f"Model loading error: {str(e)}") # tokenizer = None # model = None tokenizer = None model = None # OpenAI setup OPENAI_API_KEY = os.getenv("openAI_TOKEN") if OPENAI_API_KEY: openai.api_key = OPENAI_API_KEY else: MODEL_STATUS['error_messages'].append("OpenAI API key not found") import shutil import os cache_dir = os.path.expanduser("~/.cache/huggingface") if os.path.exists(cache_dir): try: total_size = sum( os.path.getsize(os.path.join(dirpath, filename)) for dirpath, dirnames, filenames in os.walk(cache_dir) for filename in filenames ) / (1024**3) if total_size > 40: shutil.rmtree(cache_dir) print(f"Cleared {total_size:.2f}GB cache") except Exception as e: print(f"Cache cleanup error: {e}") from model_functions import * def tokenize_text(text): if not text.strip(): return [], 0, "Enter some text to see tokenization" if gpt_tokenizer: try: tokens = gpt_tokenizer.encode(text) token_strings = [] for token in tokens: try: decoded = gpt_tokenizer.decode([token]) token_strings.append(decoded) except UnicodeDecodeError: token_strings.append(f"") return token_strings, len(tokens), f"Text tokenized successfully → {len(tokens)} tokens" except Exception as e: return [], 0, f"Tokenization error: {str(e)}" else: # Fallback: simple whitespace tokenization tokens = text.split() return tokens, len(tokens), f"Using fallback tokenization → {len(tokens)} tokens (tiktoken unavailable)" def get_next_token_predictions(text): """Get next token predictions using OpenAI API""" if not text.strip(): return "Enter some text to see predictions" if not OPENAI_API_KEY: return "OpenAI API key not available - cannot generate predictions" try: client = openai.OpenAI(api_key=OPENAI_API_KEY) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "Complete the following text with the next most likely word. Provide exactly 3 options with their approximate probabilities."}, {"role": "user", "content": f"Text: '{text}'\n\nNext word options:"} ], temperature=0.1, max_tokens=50 ) return response.choices[0].message.content except Exception as e: return f"Error getting predictions: {str(e)}" def merge_subword_tokens(tokens, attention_matrix): """Merge subword tokens back into words for cleaner viz""" merged_tokens = [] merged_attention = [] current_word = "" current_indices = [] for i, token in enumerate(tokens): if token.startswith('##'): current_word += token[2:] current_indices.append(i) else: if current_word: merged_tokens.append(current_word) merged_attention.append(current_indices) current_word = token current_indices = [i] if current_word: merged_tokens.append(current_word) merged_attention.append(current_indices) # Merge attention weights by averaging merged_matrix = np.zeros((len(merged_tokens), len(merged_tokens))) for i, i_indices in enumerate(merged_attention): for j, j_indices in enumerate(merged_attention): # Average attention between word groups weights = [] for ii in i_indices: for jj in j_indices: if ii < attention_matrix.shape[0] and jj < attention_matrix.shape[1]: weights.append(attention_matrix[ii, jj]) if weights: merged_matrix[i, j] = np.mean(weights) return merged_tokens, merged_matrix def create_attention_network_svg(text): if not text.strip(): return "Enter text to see attention network" if not MODEL_STATUS['model_loaded']: return f"Attention model not available. Errors: {MODEL_STATUS['error_messages']}" try: # Tokenize input inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=64) tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) with torch.no_grad(): outputs = model(**inputs, output_attentions=True) # Remove special tokens clean_tokens = [] clean_indices = [] for i, token in enumerate(tokens): if token not in ['[CLS]', '[SEP]', '[PAD]']: clean_tokens.append(token) clean_indices.append(i) if len(clean_indices) < 2: return "Need at least 2 valid tokens for attention visualisation." # SEARCH for best head: max variance best_attention = None best_name = "" best_tokens = [] best_variance = -1 debug_info = f"Total Layers: {len(outputs.attentions)}\n" for layer_idx, layer_att in enumerate(outputs.attentions): num_heads = layer_att.shape[1] for head_idx in range(num_heads): attn_matrix = layer_att[0, head_idx].numpy() trimmed_attention = attn_matrix[np.ix_(clean_indices, clean_indices)] variance = np.var(trimmed_attention) debug_info += f"Layer {layer_idx}, Head {head_idx} — Variance: {variance:.5f}\n" if variance > best_variance: best_attention = trimmed_attention best_name = f"Layer {layer_idx}, Head {head_idx}" best_tokens = clean_tokens best_variance = variance if best_attention is None: return "Could not extract valid attention." # Merge subwords merged_tokens, merged_attention = merge_subword_tokens(best_tokens, best_attention) n_tokens = len(merged_tokens) if n_tokens < 2: return "Too few tokens after merging for attention graph." # SVG dimensions width, height = 1000, 500 margin = 50 # Linear positions positions = [] for i in range(n_tokens): x = margin + (width - 2*margin) * i / (n_tokens - 1) y = height // 2 positions.append((x, y)) # Start SVG svg = f'' svg += '' # Choose top-N attention connections num_top_connections = 20 pairs = [] for i in range(n_tokens): for j in range(n_tokens): if i != j: pairs.append((merged_attention[i, j], i, j)) pairs.sort(reverse=True) top_pairs = pairs[:num_top_connections] # Draw attention arcs for weight, i, j in top_pairs: x1, y1 = positions[i] x2, y2 = positions[j] mid_x = (x1 + x2) / 2 curve_y = y1 - 80 if (i + j) % 2 == 0 else y1 + 80 # Color coding if weight > 0.08: color = "#d32f2f" # red opacity = "0.8" elif weight > 0.04: color = "#ff9800" # orange opacity = "0.6" else: color = "#2196f3" # blue opacity = "0.4" thickness = max(2, weight * 10) svg += f'' # Draw nodes for i, (token, (x, y)) in enumerate(zip(merged_tokens, positions)): svg += f'' svg += f'{token[:10]}' # Legend and info svg += f'' svg += f'Attention Network - {best_name}' svg += f'' svg += f'Red: Strong | Orange: Medium | Blue: Weak | Showing top {num_top_connections} connections' # Debug info (limited lines) for i, line in enumerate(debug_info.split('\n')[:8]): svg += f'{line}' svg += '' return svg except Exception as e: return f"Error generating attention network: {str(e)}" with gr.Blocks() as demo: gr.Markdown("# Language Models & Methods Lab Interface") with gr.Tabs() as tabs: # Week 3 Tab with gr.Tab("Week 3: Text Processing"): gr.Markdown("# How Language Models Process Text") gr.Markdown("Explore tokenization, context windows, and attention mechanisms") with gr.Tabs() as week3_tabs: with gr.Tab("Tokenization Explorer"): gr.Markdown("### See how text gets broken into tokens") with gr.Row(): token_input = gr.Textbox( label="Enter your text", placeholder="Type any text to see how it gets tokenized...", lines=3, value="The quick brown fox jumps over the lazy dog." ) with gr.Row(): tokenize_btn = gr.Button("Tokenize Text") with gr.Row(): token_display = gr.Textbox(label="Tokens", lines=3, interactive=False) token_count = gr.Number(label="Token Count", interactive=False) with gr.Row(): token_info = gr.Textbox(label="Tokenization Info", lines=2, interactive=False) with gr.Tab("Context & Predictions"): gr.Markdown("### Next-word predictions and context understanding") with gr.Row(): context_input = gr.Textbox( label="Enter incomplete text", placeholder="I went to the bank to", lines=2, value="I went to the bank to" ) with gr.Row(): predict_btn = gr.Button("Get Next Word Predictions") with gr.Row(): predictions_output = gr.Textbox(label="Most Likely Next Words", lines=5, interactive=False) with gr.Row(): context_window_info = gr.Textbox( label="Context Window Status", value="Click 'Get Predictions' to see token usage", interactive=False ) with gr.Tab("Attention Network"): gr.Markdown("### Network visualisation of attention patterns") gr.Markdown("See how words connect to each other through attention mechanisms") with gr.Row(): attention_input = gr.Textbox( label="Enter a sentence (shorter sentences work better)", placeholder="The bank was closed.", lines=2, value="The bank was closed." ) with gr.Row(): analyze_attention_btn = gr.Button("Generate Attention Network") with gr.Row(): attention_network = gr.HTML(label="Attention Network Visualisation") # Week 3 Event Handlers def update_tokenization(text): tokens, count, info = tokenize_text(text) token_str = " | ".join(tokens) if tokens else "" return token_str, count, info def update_predictions_with_context(text): if not text.strip(): return "Enter text to get predictions", "No text to analyze" # Get token count for context window _, token_count, _ = tokenize_text(text) context_status = f"Current text: {token_count} tokens / 4096 (GPT-3.5 limit) = {token_count/4096*100:.1f}% used" # Get predictions predictions = get_next_token_predictions(text) return predictions, context_status def generate_network_visualization(text): return create_attention_network_svg(text) # Connect event handlers tokenize_btn.click( update_tokenization, inputs=[token_input], outputs=[token_display, token_count, token_info] ) # Auto-update tokenization as user types token_input.change( update_tokenization, inputs=[token_input], outputs=[token_display, token_count, token_info] ) predict_btn.click( update_predictions_with_context, inputs=[context_input], outputs=[predictions_output, context_window_info] ) analyze_attention_btn.click( generate_network_visualization, inputs=[attention_input], outputs=[attention_network] ) # OTHER WEEKS with gr.Tab("Week 4: Controlling Model Behaviour"): gr.Markdown("# Controlling Model Behaviour Through Prompting") gr.Markdown("Explore how different prompting techniques and parameters affect model outputs") with gr.Tabs() as week4_tabs: with gr.Tab("Temperature Effects"): gr.Markdown("### Compare how temperature affects creativity and consistency") with gr.Row(): temp_input = gr.Textbox( label="Enter your prompt", placeholder="Type your question or prompt here...", lines=3, value="Write a creative opening sentence for a story about a robot looking for a friend." ) with gr.Row(): temp_slider1 = gr.Slider( minimum=0.1, maximum=0.4, value=0.2, step=0.1, label="Low Temperature (More Focused & Consistent)" ) temp_slider2 = gr.Slider( minimum=0.7, maximum=1.0, value=0.9, step=0.1, label="High Temperature (More Creative & Varied)" ) with gr.Row(): generate_temp = gr.Button("Generate Both Responses") with gr.Row(): focused_output = gr.Textbox( label="Focused Output (Low Temperature)", lines=5 ) creative_output = gr.Textbox( label="Creative Output (High Temperature)", lines=5 ) with gr.Tab("System Prompts"): gr.Markdown("### See how system prompts shape model behaviour") with gr.Row(): system_input = gr.Textbox( label="Enter your prompt", placeholder="Type your question or prompt here...", lines=3, value="Explain what a database index is." ) with gr.Row(): system_prompt_dropdown = gr.Dropdown( choices=[ "You are a helpful assistant providing accurate, concise answers.", "You are a data scientist explaining technical concepts with precision and examples.", "You are a creative storyteller who uses vivid metaphors and analogies.", "You are a critical reviewer who evaluates information carefully and points out limitations.", "You are a friendly teacher explaining concepts to someone learning for the first time." ], label="Choose System Prompt", value="You are a helpful assistant providing accurate, concise answers." ) with gr.Row(): generate_system = gr.Button("Generate Response") with gr.Row(): system_output = gr.Textbox(label="Output", lines=6) with gr.Tab("Prompting Techniques"): gr.Markdown(""" ### Compare Zero-Shot, Few-Shot, and Chain-of-Thought - **Zero-shot:** Direct question without examples - **Few-shot:** You should provide similar examples to guide the response - **Chain-of-thought:** Asks model to break down reasoning step-by-step """) with gr.Row(): shot_input = gr.Textbox( label="Enter your task", placeholder="Enter a task that requires reasoning...", lines=3, value="Classify the sentiment: 'The product works okay but customer service was terrible.'" ) with gr.Row(): approach_type = gr.Radio( ["zero-shot", "few-shot", "chain-of-thought"], label="Select Prompting Technique", value="zero-shot" ) with gr.Row(): generate_shot = gr.Button("Generate Response") with gr.Row(): shot_output = gr.Textbox(label="Output", lines=8) with gr.Tab("Combining Techniques"): gr.Markdown("### Experiment with combining multiple techniques") with gr.Row(): combo_input = gr.Textbox( label="Enter your task", placeholder="Enter a complex task...", lines=3, value="Analyse this review and suggest improvements: 'App crashes sometimes but has good features.'" ) with gr.Row(): combo_system = gr.Dropdown( choices=[ "None (default)", "You are a product analyst providing structured feedback.", "You are a UX researcher focused on user experience.", ], label="System Prompt (optional)", value="None (default)" ) with gr.Row(): combo_examples = gr.Checkbox( label="Include few-shot examples", value=False ) combo_cot = gr.Checkbox( label="Use chain-of-thought reasoning", value=False ) with gr.Row(): combo_temp = gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature" ) with gr.Row(): generate_combo = gr.Button("Generate Response") with gr.Row(): combo_output = gr.Textbox(label="Output", lines=8) combo_info = gr.Textbox(label="Techniques Applied", lines=4) generate_temp.click( lambda x, t1, t2: [ generate_with_temperature(x, t1), generate_with_temperature(x, t2) ], inputs=[temp_input, temp_slider1, temp_slider2], outputs=[focused_output, creative_output] ) generate_system.click( generate_with_system_prompt, inputs=[system_input, system_prompt_dropdown], outputs=system_output ) generate_shot.click( generate_with_examples, inputs=[shot_input, approach_type], outputs=shot_output ) generate_combo.click( generate_combined_techniques, inputs=[combo_input, combo_system, combo_examples, combo_cot, combo_temp], outputs=[combo_output, combo_info] ) with gr.Tab("Week 5: Advanced Prompting"): gr.Markdown("# Advanced Prompt Engineering Techniques") gr.Markdown("Explore sophisticated prompting strategies and visualise reasoning patterns") with gr.Tabs() as week5_tabs: with gr.Tab("Tree of Thought Explorer"): gr.Markdown(""" ### Visualise Multi-Path Reasoning The model will break down your problem into multiple approaches, evaluate each one, and select the best path. """) with gr.Row(): tot_input = gr.Textbox( label="Enter a problem to solve", placeholder="e.g., How can we improve user engagement on a mobile app?", lines=3, value="How should a startup decide between building a mobile app or a web application first?" ) with gr.Row(): generate_tot = gr.Button("Generate Tree of Thought", variant="primary") with gr.Row(): tot_output = gr.Textbox( label="Reasoning Process", lines=12 ) with gr.Row(): tot_visualization = gr.HTML( label="Tree Visualisation" ) with gr.Tab("Self-Consistency Testing"): gr.Markdown(""" ### Test Response Consistency Run the same prompt multiple times to identify consistent patterns and areas of uncertainty. """) with gr.Row(): consistency_input = gr.Textbox( label="Enter your prompt", placeholder="Ask a question that requires reasoning...", lines=3, value="What are the three most important factors in choosing a database system?" ) with gr.Row(): num_runs = gr.Slider( minimum=3, maximum=5, value=3, step=1, label="Number of generations" ) consistency_temp = gr.Slider( minimum=0.3, maximum=0.9, value=0.7, step=0.1, label="Temperature" ) with gr.Row(): generate_consistency = gr.Button("Generate Multiple Responses", variant="primary") with gr.Row(): consistency_analysis = gr.Textbox( label="Analysis Guide", lines=4 ) with gr.Row(): consistency_output1 = gr.Textbox(label="Response 1", lines=5) consistency_output2 = gr.Textbox(label="Response 2", lines=5) with gr.Row(): consistency_output3 = gr.Textbox(label="Response 3", lines=5) consistency_output4 = gr.Textbox(label="Response 4 (if selected)", lines=5, visible=True) with gr.Row(): consistency_output5 = gr.Textbox(label="Response 5 (if selected)", lines=5, visible=True) with gr.Tab("Prompt Structure Comparison"): gr.Markdown(""" ### Compare Structural Strategies Test how different prompt structures affect model attention and output quality. """) with gr.Row(): structure_input = gr.Textbox( label="Enter your task", placeholder="Enter a task or question...", lines=3, value="" ) with gr.Row(): gr.Markdown("### Select ONE structure to test:") with gr.Row(): structure_radio = gr.Radio( choices=[ "Baseline (no special structure)", "Front-loading (critical instruction first)", "Delimiter strategy (section separation)", "Sandwich technique (instruction at start and end)" ], label="Prompt Structure", value="Baseline (no special structure)" ) with gr.Row(): generate_structure = gr.Button("Generate Response", variant="primary") with gr.Row(): structure_output = gr.Textbox( label="Response", lines=8 ) structure_info = gr.Textbox( label="Structure Information", lines=8 ) # Week 5 Event Handlers def handle_tot(task): text_output, svg_output = generate_tot_response(task) return text_output, svg_output def handle_consistency(prompt, runs, temp): responses, analysis = generate_self_consistency(prompt, int(runs), temp) while len(responses) < 5: responses.append("") return analysis, responses[0], responses[1], responses[2], responses[3], responses[4] def handle_structure(task, structure_choice): use_frontload = "Front-loading" in structure_choice use_delimiters = "Delimiter" in structure_choice use_sandwich = "Sandwich" in structure_choice output, info = compare_prompt_structures(task, use_frontload, use_delimiters, use_sandwich) return output, info generate_tot.click( handle_tot, inputs=[tot_input], outputs=[tot_output, tot_visualization] ) generate_consistency.click( handle_consistency, inputs=[consistency_input, num_runs, consistency_temp], outputs=[consistency_analysis, consistency_output1, consistency_output2, consistency_output3, consistency_output4, consistency_output5] ) generate_structure.click( handle_structure, inputs=[structure_input, structure_radio], outputs=[structure_output, structure_info] ) # with gr.Tab("Week 8: Error Detection"): # # Week 8 content here # pass with gr.Tab("Assignment 1"): gr.Markdown("# Assignment 1: Prompting Strategy Evaluation") gr.Markdown(""" Test different prompting strategies for your chosen NLP task. Remember: You need 3 documents, with 2 different strategies tested per document (6 total experiments). """) with gr.Row(): assignment_task = gr.Dropdown( choices=["Sentiment Analysis", "Summarisation"], label="Select NLP Task", value="Sentiment Analysis" ) with gr.Row(): with gr.Column(): assignment_text = gr.Textbox( label="Enter Text", placeholder="Paste your document text here...", lines=6 ) with gr.Column(): assignment_file = gr.File( label="OR Upload a File (TXT or PDF)", file_types=[".txt", ".pdf"], type="binary" ) gr.Markdown("### Select Your Prompting Strategy") with gr.Row(): strategy_type = gr.Radio( choices=[ "Direct (no special technique)", "Chain-of-thought (step-by-step reasoning)", "Role-based (uses system prompt)", "Combined (role + chain-of-thought)" ], label="Prompting Strategy", value="Direct (no special technique)", info="Choose how the model should approach the task" ) with gr.Row(): system_role = gr.Dropdown( choices=[ "None", "Technical analyst", "Creative assistant" ], label="System Role (for role-based strategies)", value="None", info="Only applies if you selected a role-based strategy" ) with gr.Row(): assignment_temp = gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature (0.1 = focused, 1.0 = creative)" ) with gr.Row(): generate_assignment = gr.Button("Generate Response", variant="primary") with gr.Row(): assignment_output = gr.Textbox( label="Model Output", lines=12 ) with gr.Row(): assignment_info = gr.Textbox( label="Strategy Applied", lines=3, info="Documents which settings were used for this experiment" ) generate_assignment.click( handle_assignment_experiment, inputs=[assignment_text, assignment_file, assignment_task, strategy_type, system_role, assignment_temp], outputs=[assignment_output, assignment_info] ) demo.launch()