ManMenGon commited on
Commit
ccd1e65
·
verified ·
1 Parent(s): e493cfd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -58
app.py CHANGED
@@ -1,58 +1,56 @@
1
-
2
- import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch
5
-
6
- # Cargar el modelo solo una vez
7
- model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")
8
- tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
9
- tokenizer.pad_token = tokenizer.eos_token
10
-
11
- # Traducción entre moléculas
12
- def transcode_phrase(phrase, src, dst):
13
- if src == dst:
14
- return "⚠️ Source and target are the same."
15
- if src == "DNA" and dst == "RNA":
16
- return phrase.replace("~d:", ":r:").replace("Exon", "Ex").replace("Intr", "removed")
17
- elif src == "RNA" and dst == "Protein":
18
- return phrase.replace(":r:", "^p:").replace("Ex1", "Dom(Kin)").replace("Ex2", "Mot(NLS)")
19
- elif src == "Protein" and dst == "DNA":
20
- return phrase.replace("^p:", "~d:").replace("Dom(Kin)", "Exon1").replace("Mot(NLS)", "Exon2")
21
- else:
22
- return "❌ Translation not implemented."
23
-
24
- # Generar proteína a partir de frase
25
- semillas = {
26
- "^p:Dom(Kin)-Mot(NLS)*AcK@147=Localize(Nucleus)": "MKKK",
27
- "^p:Mot(NLS)-Mot(PEST)*P@120": "MKSP",
28
- "^p:Dom(ZnF)-Mot(NLS)*UbK@42": "MKHG",
29
- }
30
-
31
- def generar_desde_frase(frase):
32
- semilla = semillas.get(frase, "MKKK")
33
- inputs = tokenizer(semilla, return_tensors="pt", padding=True)
34
- outputs = model.generate(**inputs, max_length=200, do_sample=True, top_k=950, temperature=1.5, num_return_sequences=1)
35
- secuencia = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
- return f"🧪 Seed: {semilla}"
37
- 🧬 Generated Protein:
38
- {secuencia}"
39
-
40
- # Interfaz Gradio
41
- with gr.Blocks() as demo:
42
- with gr.Tab("Phrase → Protein"):
43
- gr.Markdown("### Generate Protein Sequence from GeneForgeLang Phrase")
44
- input_frase = gr.Textbox(label="Input Phrase")
45
- output_prot = gr.Textbox(label="Generated Protein")
46
- boton_gen = gr.Button("Generate")
47
- boton_gen.click(fn=generar_desde_frase, inputs=input_frase, outputs=output_prot)
48
-
49
- with gr.Tab("Transcode Across Molecules"):
50
- gr.Markdown("### Convert between DNA, RNA, and Protein symbolic phrases")
51
- input_phrase = gr.Textbox(label="Input GeneForgeLang Phrase")
52
- src_select = gr.Radio(choices=["DNA", "RNA", "Protein"], label="Translate From", value="DNA")
53
- dst_select = gr.Radio(choices=["DNA", "RNA", "Protein"], label="Translate To", value="RNA")
54
- output = gr.Textbox(label="Translated Phrase")
55
- trans_btn = gr.Button("Translate")
56
- trans_btn.click(fn=transcode_phrase, inputs=[input_phrase, src_select, dst_select], outputs=output)
57
-
58
- demo.launch()
 
1
+
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ # Cargar el modelo solo una vez
7
+ model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")
8
+ tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
9
+ tokenizer.pad_token = tokenizer.eos_token
10
+
11
+ # Traducción entre moléculas
12
+ def transcode_phrase(phrase, src, dst):
13
+ if src == dst:
14
+ return "⚠️ Source and target are the same."
15
+ if src == "DNA" and dst == "RNA":
16
+ return phrase.replace("~d:", ":r:").replace("Exon", "Ex").replace("Intr", "removed")
17
+ elif src == "RNA" and dst == "Protein":
18
+ return phrase.replace(":r:", "^p:").replace("Ex1", "Dom(Kin)").replace("Ex2", "Mot(NLS)")
19
+ elif src == "Protein" and dst == "DNA":
20
+ return phrase.replace("^p:", "~d:").replace("Dom(Kin)", "Exon1").replace("Mot(NLS)", "Exon2")
21
+ else:
22
+ return "❌ Translation not implemented."
23
+
24
+ # Generar proteína a partir de frase
25
+ semillas = {
26
+ "^p:Dom(Kin)-Mot(NLS)*AcK@147=Localize(Nucleus)": "MKKK",
27
+ "^p:Mot(NLS)-Mot(PEST)*P@120": "MKSP",
28
+ "^p:Dom(ZnF)-Mot(NLS)*UbK@42": "MKHG",
29
+ }
30
+
31
+ def generar_desde_frase(frase):
32
+ semilla = semillas.get(frase, "MKKK")
33
+ inputs = tokenizer(semilla, return_tensors="pt", padding=True)
34
+ outputs = model.generate(**inputs, max_length=200, do_sample=True, top_k=950, temperature=1.5, num_return_sequences=1)
35
+ secuencia = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ return f"🧪 Seed: {semilla}\n🧬 Generated Protein:\n{secuencia}"
37
+
38
+ # Interfaz Gradio
39
+ with gr.Blocks() as demo:
40
+ with gr.Tab("Phrase → Protein"):
41
+ gr.Markdown("### Generate Protein Sequence from GeneForgeLang Phrase")
42
+ input_frase = gr.Textbox(label="Input Phrase")
43
+ output_prot = gr.Textbox(label="Generated Protein")
44
+ boton_gen = gr.Button("Generate")
45
+ boton_gen.click(fn=generar_desde_frase, inputs=input_frase, outputs=output_prot)
46
+
47
+ with gr.Tab("Transcode Across Molecules"):
48
+ gr.Markdown("### Convert between DNA, RNA, and Protein symbolic phrases")
49
+ input_phrase = gr.Textbox(label="Input GeneForgeLang Phrase")
50
+ src_select = gr.Radio(choices=["DNA", "RNA", "Protein"], label="Translate From", value="DNA")
51
+ dst_select = gr.Radio(choices=["DNA", "RNA", "Protein"], label="Translate To", value="RNA")
52
+ output = gr.Textbox(label="Translated Phrase")
53
+ trans_btn = gr.Button("Translate")
54
+ trans_btn.click(fn=transcode_phrase, inputs=[input_phrase, src_select, dst_select], outputs=output)
55
+
56
+ demo.launch()