File size: 2,724 Bytes
05ec8c2
 
 
 
 
 
 
ef64635
dcb539c
 
05ec8c2
 
 
 
 
 
 
 
dcb539c
5ebf111
98ca599
ad62bbb
bf92c11
 
05ec8c2
 
 
 
109caed
05ec8c2
 
 
 
 
 
 
 
b743d9d
 
 
 
 
 
 
 
 
 
05ec8c2
 
 
b743d9d
05ec8c2
 
 
 
 
b743d9d
05ec8c2
 
 
b743d9d
05ec8c2
 
 
 
b743d9d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers.activations import get_activation
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoTokenizer, AutoModel
from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast


st.title('GPT2:')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

@st.cache(allow_output_mutation=True)
def get_model():
    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/MASKGPT2")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/MASKGPT2")
    #tokenizer = GPTNeoXTokenizerFast.from_pretrained("CarperAI/FIM-NeoX-1.3B")
    #model = GPTNeoXForCausalLM.from_pretrained("BigSalmon/FormalInformalConcise-FIM-NeoX-1.3B")
    tokenizer = AutoTokenizer.from_pretrained("BigSalmon/FamilyFeud")
    model = AutoModelForCausalLM.from_pretrained("BigSalmon/FamilyFeud")
    return model, tokenizer
    
model, tokenizer = get_model()

g = """***

original: sports teams are profitable for owners. [MASK], their valuations experience a dramatic uptick. 
infill: sports teams are profitable for owners. ( accumulating vast sums / stockpiling treasure / realizing benefits / cashing in / registering robust financials / scoring on balance sheets ), their valuations experience a dramatic uptick. 

***

original:"""

def prefix_format(sentence):
  words = sentence.split()
  if "[MASK]" in sentence:
    words2 = words.index("[MASK]")
    #print(words2)
    output = ("<|SUF|> " + ' '.join(words[words2+1:]) + " <|PRE|> " + ' '.join(words[:words2]) + " <|MID|>")
    st.write(output)
  else:
    st.write("Add [MASK] to sentence")
    
with st.form(key='my_form'):
    prompt = st.text_area(label='Enter sentence', value=g)
    submit_button = st.form_submit_button(label='Submit')
    submit_button6 = st.form_submit_button(label='Turn Into Infill Format. Just add [MASK] where you want it infilled')
    if submit_button:
      with torch.no_grad():
        text = tokenizer.encode(prompt)
        myinput, past_key_values = torch.tensor([text]), None
        myinput = myinput
        myinput= myinput
        logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
        logits = logits[0,-1]
        probabilities = torch.nn.functional.softmax(logits)
        best_logits, best_indices = logits.topk(250)
        best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
        text.append(best_indices[0].item())
        best_probabilities = probabilities[best_indices].tolist()
        words = []              
        st.write(best_words)
    if submit_button6:
        prefix_format(prompt)