# Configuration for Clinical Trial Matching Pipeline # # Edit the values below to set your default models and trial database. # Models will auto-load on application startup. # ============================================================================ # MODEL PATHS - Set your default models here # ============================================================================ # Set to None to skip auto-loading, or provide model path/HuggingFace ID MODEL_CONFIG = { # TinyBERT tagger for extracting relevant excerpts "tagger": "kenlkehl/mmai-tiny-bert-tagger", # e.g., "prajjwal1/bert-tiny" or "./auto-tiny-bert-tagger" # Sentence transformer for embedding patient summaries and trials "embedder": "kenlkehl/mmai-trialspace", # e.g., "Qwen/Qwen3-Embedding-0.6B" or "./reranker_round2.model" # Large language model for patient history summarization "llm": "kenlkehl/mmai-oncoreasoning-3b", # e.g., "microsoft/Phi-3-mini-4k-instruct" or "openai/gpt-oss-120b" #"llm": "meta-llama/Llama-3.2-1B-Instruct", # ModernBERT classifier for eligibility prediction "trial_checker": "kenlkehl/mmai-trial-checker", # e.g., "answerdotai/ModernBERT-large" or "./modernbert-trial-checker" # ModernBERT classifier for boilerplate exclusion prediction "boilerplate_checker": "kenlkehl/mmai-boilerplate-checker", # e.g., "answerdotai/ModernBERT-large" or "./modernbert-boilerplate-checker" } # Example configuration with base models: # MODEL_CONFIG = { # "tagger": "prajjwal1/bert-tiny", # "embedder": "Qwen/Qwen3-Embedding-0.6B", # "llm": "microsoft/Phi-3-mini-4k-instruct", # "trial_checker": "answerdotai/ModernBERT-large", # "boilerplate_checker": "answerdotai/ModernBERT-large", # } # Example configuration with fine-tuned models: # MODEL_CONFIG = { # "tagger": "./auto-tiny-bert-tagger", # "embedder": "./reranker_round2.model", # "llm": "/data/models/gpt-oss-120b", # "trial_checker": "./modernbert-trial-checker", # "boilerplate_checker": "./modernbert-boilerplate-checker", # } # ============================================================================ # DEFAULT TRIAL DATABASE # ============================================================================ # Path to default trial database CSV/Excel file # Will auto-load and embed when embedder model is ready # Set to None to disable auto-loading DEFAULT_TRIAL_DB = "trial_space_lineitems.csv" # e.g., "./my_trials.csv" or "./sample_trials.csv" PREEMBEDDED_TRIALS = "trial_embeddings" # ============================================================================ # USAGE NOTES # ============================================================================ # # 1. Set the model paths above to your preferred models # 2. Optionally set DEFAULT_TRIAL_DB to your trial database file # 3. Save this file # 4. Run: python trial_matching_app.py # 5. Models will load automatically on startup # # You can still manually load different models through the web interface # if you need to switch models during a session. #