|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
|
|
import torch |
|
from transformers import pipeline |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
st.title('Booba') |
|
st.subheader("Commencez la phrase, l'algorithme la termine.") |
|
st.write("Note : la génération du texte prend ~ 5 minutes") |
|
|
|
|
|
|
|
|
|
with st.form("my_form"): |
|
|
|
text = st.text_input("Début de la phrase :", "C'était Noël dans la famille") |
|
|
|
|
|
submitted = st.form_submit_button("Générer la suite") |
|
|
|
|
|
model_checkpoint = "bigscience/bloom-560m" |
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) |
|
model = AutoModelForCausalLM.from_pretrained("dan-vdb/BoobaAI") |
|
device = torch.device("cpu") |
|
|
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", model=model, tokenizer=tokenizer, device=device |
|
) |
|
|
|
|
|
|
|
if submitted: |
|
text = pipe(text, num_return_sequences=1, max_length=200, repetition_penalty=2.0)[0]["generated_text"] |
|
text = re.sub("( [A-Z])", r"\n\1", text) |
|
st.write(text) |