Spaces:
Runtime error
Runtime error
import numpy as np | |
import os | |
import streamlit as st | |
import sys | |
import urllib | |
import json | |
import torch | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config | |
def generate(tokenizer, model, text, features): | |
generated = tokenizer("<|startoftext|><|titlestart|>{}<|titleend|><|authornamebegin|>".format(text), return_tensors="pt").input_ids | |
count = 0 | |
while count < features['num']: | |
sample_outputs = model.generate( | |
generated, do_sample=True, top_k=50, | |
max_length=features['max_length'], top_p=features['top_p'], temperature=features['t'] / 100.0, num_return_sequences=1, | |
) | |
decoded = tokenizer.decode(sample_outputs[0], skip_special_tokens=False) | |
print(decoded, file=sys.stderr) | |
if '<|authornamebegin|>' not in decoded: | |
continue | |
raw = decoded.split('<|authornamebegin|>')[-1] | |
if '<|authornameend|>' not in raw: | |
continue | |
end_name = raw.split('<|authornameend|>') | |
author = end_name[-2] | |
text = end_name[-1] | |
count += 1 | |
st.markdown('**' + author.strip() + '**: ' + text.replace('<|endoftext|>', '').replace('<|pad|>', '').strip()) | |
def load_model(): | |
additional_special_tokens = ['<|titlestart|>', '<|titleend|>', '<|authornamebegin|>', '<|authornameend|>'] | |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium', bos_token='<|startoftext|>', | |
eos_token='<|endoftext|>', pad_token='<|pad|>', | |
additional_special_tokens=additional_special_tokens) | |
config = GPT2Config.from_json_file('./config.json') | |
model = GPT2LMHeadModel(config) | |
state_dict = torch.load('./pytorch_model.bin', map_location=torch.device('cpu')) | |
model.load_state_dict(state_dict) | |
return tokenizer, model | |
def main(): | |
tokenizer, model = load_model() | |
st.title("YouTube comments generating project") | |
st.header('YouTube comments generator') | |
st.sidebar.title("Features") | |
seed = 27834096 | |
default_control_features = ["Количество комментариев", "Температура", "Top-p"] | |
control_features = default_control_features | |
# Insert user-controlled values from sliders into the feature vector. | |
features = { | |
"num": st.sidebar.slider("Количество комментариев", 0, 20, 1, 1), | |
"t": st.sidebar.slider("Температура", 0, 300, 180, 1), | |
"top_p": st.sidebar.slider("Top-p", 0, 100, 95, 5), | |
"max_length": st.sidebar.slider("Максимальная длина комментария", 0, 300, 100, 5), | |
} | |
st.sidebar.title("Note") | |
st.sidebar.write( | |
""" | |
Изменяя значения, можно получить различные выводы модели | |
""" | |
) | |
st.sidebar.write( | |
""" | |
Значение температуры делится на 100 | |
""" | |
) | |
st.sidebar.caption(f"Streamlit version `{st.__version__}`") | |
with st.form(key='my_form'): | |
url = st.text_input('Введите url видео на YouTube') | |
st.form_submit_button('Готово!') | |
if url: | |
params = {"format": "json", "url": url} | |
base_url = "https://www.youtube.com/oembed" | |
query_string = urllib.parse.urlencode(params) | |
base_url = base_url + "?" + query_string | |
with urllib.request.urlopen(base_url) as response: | |
response_text = response.read() | |
data = json.loads(response_text.decode()) | |
st.write('Video Title: ' + data['title']) | |
st.video(url) | |
generate(tokenizer, model, data['title'], features) | |
if __name__ == "__main__": | |
main() | |