File size: 1,182 Bytes
2302a09
 
 
5e23aa5
2302a09
 
 
 
 
 
246399c
2302a09
 
 
 
 
 
 
 
246399c
2302a09
 
 
 
 
 
 
246399c
2302a09
 
 
 
 
 
 
 
 
 
 
5e23aa5
6680b8d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import pandas as pd
import numpy as np
import re

import torch
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer


st.title('Booba')
st.subheader("Commencez la phrase, l'algorithme la termine.")
st.write("Note : la génération du texte prend ~ 5 minutes")




with st.form("my_form"):
   
   text = st.text_input("Début de la phrase :", "C'était Noël dans la famille")
   
   # Every form must have a submit button.
   submitted = st.form_submit_button("Générer la suite")
   
   # Load the model ---
   model_checkpoint = "bigscience/bloom-560m"
   tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
   model = AutoModelForCausalLM.from_pretrained("dan-vdb/BoobaAI")
   device = torch.device("cpu")

   # device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

   pipe = pipeline(
    "text-generation", model=model, tokenizer=tokenizer, device=device
   )
   # ---

   
   if submitted:
       text = pipe(text, num_return_sequences=1, max_length=200, repetition_penalty=2.0)[0]["generated_text"]
       text = re.sub("( [A-Z])", r"  \n\1", text)
       st.write(text)