Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
| import torch | |
| st.set_page_config(page_title="NeoProtein Designer", page_icon="🧬") | |
| st.title("🧬 NeoProtein-GPT Protein Designer") | |
| st.markdown(""" | |
| ### Design novel protein sequences with unique binding sites | |
| *Using the [NeoProtein-GPT](https://huggingface.co/ayyuce/NeoProtein-GPT) model* | |
| """) | |
| with st.sidebar: | |
| st.header("Parameters") | |
| binding_motif = st.text_input("Binding site motif (e.g., AXXC):", help="Use X for wildcard positions") | |
| seq_length = st.slider("Sequence length", 50, 500, 150) | |
| temperature = st.slider("Temperature (creativity)", 0.1, 2.0, 1.0) | |
| num_sequences = st.slider("Number of sequences", 1, 5, 3) | |
| def load_model(): | |
| model = GPT2LMHeadModel.from_pretrained( | |
| "ayyuce/NeoProtein-GPT", | |
| force_download=True, | |
| resume_download=False, | |
| local_files_only=False, | |
| trust_remote_code=True | |
| ) | |
| tokenizer = GPT2Tokenizer.from_pretrained("ayyuce/NeoProtein-GPT") | |
| return model, tokenizer | |
| model, tokenizer = load_model() | |
| def generate_sequences(): | |
| if not binding_motif: | |
| st.error("Please enter a binding motif") | |
| return | |
| prompt = f"<start>BindingMotif:{binding_motif}<start>Seq:" | |
| try: | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| input_length = inputs.input_ids.shape[1] | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_length=input_length + seq_length, | |
| temperature=temperature, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| num_return_sequences=num_sequences, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| generated_sequences = [ | |
| tokenizer.decode(output[input_length:], skip_special_tokens=True) | |
| for output in outputs | |
| ] | |
| return generated_sequences | |
| except Exception as e: | |
| st.error(f"Generation failed: {str(e)}") | |
| return [] | |
| if st.button("Generate Protein Sequences"): | |
| with st.spinner("Designing novel proteins..."): | |
| sequences = generate_sequences() | |
| if sequences: | |
| st.subheader("Generated Sequences") | |
| for i, seq in enumerate(sequences): | |
| st.markdown(f""" | |
| **Sequence #{i+1}** | |
| ```fasta | |
| {seq} | |
| ``` | |
| """) | |
| st.markdown(""" | |
| ### How to use: | |
| 1. Enter your target binding motif using single-letter amino acid codes | |
| 2. Adjust parameters in the sidebar | |
| 3. Click the generate button | |
| 4. Results will appear in FASTA format | |
| **Example motifs:** | |
| - `GHXXXH` for histidine-rich motifs | |
| - `CXXC` for disulfide bond motifs | |
| - `DE` for acidic patches | |
| """) |