Spaces:
Sleeping
Sleeping
| # Configuration file for PerplexityViewer | |
| # Default models for different types | |
| DEFAULT_MODELS = { | |
| "decoder": [ | |
| "gpt2", | |
| "distilgpt2", | |
| "microsoft/DialoGPT-small", | |
| "microsoft/DialoGPT-medium", | |
| "openai-gpt" | |
| ], | |
| "encoder": [ | |
| "bert-base-uncased", | |
| "bert-base-cased", | |
| "distilbert-base-uncased", | |
| "roberta-base", | |
| "albert-base-v2", | |
| "UMCU/CardioMedRoBERTa.nl", | |
| "UMCU/CardioBERTa_base.nl", | |
| "UMCU/CardioBERTa.nl_clinical", | |
| "UMCU/CardioDeBERTa.nl", | |
| "UMCU/CardioDeBERTa.nl_clinical", | |
| "CLTL/MedRoBERTa.nl", | |
| "DTAI-KULeuven/robbert-2023-dutch-base", | |
| "DTAI-KULeuven/robbert-2023-dutch-large" | |
| ] | |
| } | |
| # Model display settings | |
| MODEL_SETTINGS = { | |
| "max_length": 512, | |
| "torch_dtype": "float16", | |
| "device_map": "auto" | |
| } | |
| # Visualization settings | |
| VIZ_SETTINGS = { | |
| "max_perplexity_display": 50.0, # Cap visualization at this perplexity value | |
| "color_scheme": { | |
| "low_perplexity": {"r": 46, "g": 204, "b": 113}, # Green for low perplexity (confident) | |
| "medium_perplexity": {"r": 241, "g": 196, "b": 15}, # Yellow for medium perplexity | |
| "high_perplexity": {"r": 231, "g": 76, "b": 60}, # Red for high perplexity (uncertain) | |
| "background_alpha": 0.7, # Background transparency | |
| "border_alpha": 0.9 # Border transparency | |
| }, | |
| "thresholds": { | |
| "low_threshold": 0.3, # Below this is low perplexity (green) | |
| "high_threshold": 0.7 # Above this is high perplexity (red) | |
| }, | |
| "displacy_options": { | |
| "ents": ["PP"], | |
| "colors": {} | |
| } | |
| } | |
| # Processing settings | |
| PROCESSING_SETTINGS = { | |
| "epsilon": 1e-10, # Small value to avoid log(0) | |
| "default_mask_probability": 0.15, | |
| "min_mask_probability": 0.05, | |
| "max_mask_probability": 0.5, | |
| "default_min_samples": 10, | |
| "min_samples_range": (5, 50) | |
| } | |
| # UI settings | |
| UI_SETTINGS = { | |
| "theme": "soft", | |
| "title": "π Perplexity Viewer", | |
| "description": """ | |
| Visualize per-token perplexity using color gradients. | |
| - **Red**: High perplexity (model is uncertain) | |
| - **Green**: Low perplexity (model is confident) | |
| Choose between decoder models (like GPT) for true perplexity or encoder models (like BERT) for pseudo-perplexity via MLM. | |
| """, | |
| "examples": [ | |
| { | |
| "text": "The quick brown fox jumps over the lazy dog.", | |
| "model": "gpt2", | |
| "type": "decoder", | |
| "mask_prob": 0.15, | |
| "min_samples": 10 | |
| }, | |
| { | |
| "text": "The capital of France is Paris.", | |
| "model": "bert-base-uncased", | |
| "type": "encoder", | |
| "mask_prob": 0.15, | |
| "min_samples": 10 | |
| }, | |
| { | |
| "text": "Quantum entanglement defies classical physics intuition completely.", | |
| "model": "distilgpt2", | |
| "type": "decoder", | |
| "mask_prob": 0.15, | |
| "min_samples": 10 | |
| }, | |
| { | |
| "text": "Machine learning requires large datasets for training.", | |
| "model": "distilbert-base-uncased", | |
| "type": "encoder", | |
| "mask_prob": 0.2, | |
| "min_samples": 15 | |
| }, | |
| { | |
| "text": "Artificial intelligence transforms modern computing paradigms.", | |
| "model": "bert-base-uncased", | |
| "type": "encoder", | |
| "mask_prob": 0.1, | |
| "min_samples": 20 | |
| } | |
| ] | |
| } | |
| # Error messages | |
| ERROR_MESSAGES = { | |
| "empty_text": "Please enter some text to analyze.", | |
| "model_load_error": "Error loading model {model_name}: {error}", | |
| "processing_error": "Error processing text: {error}", | |
| "no_tokens_masked": "No tokens were masked during MLM processing.", | |
| "invalid_model_type": "Invalid model type. Must be 'encoder' or 'decoder'." | |
| } | |