Zbot / app.py
Jabrain's picture
Update app.py
4b03ca9
raw
history blame
3.71 kB
import streamlit as st
# Import libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Load the text data
text = open('shakespeare.txt', 'r').read() # Read the text file
vocab = sorted(set(text)) # Get the unique characters in the text
char2idx = {c: i for i, c in enumerate(vocab)} # Map characters to indices
idx2char = np.array(vocab) # Map indices to characters
text_as_int = np.array([char2idx[c] for c in text]) # Convert text to integers
# Create training examples and targets
seq_length = 100 # Length of the input sequence
examples_per_epoch = len(text) // (seq_length + 1) # Number of examples per epoch
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) # Create a dataset from the text
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True) # Create batches of sequences
def split_input_target(chunk): # Define a function to split the input and target
input_text = chunk[:-1] # Input is the sequence except the last character
target_text = chunk[1:] # Target is the sequence except the first character
return input_text, target_text
dataset = sequences.map(split_input_target) # Apply the function to the dataset
# Shuffle and batch the dataset
BATCH_SIZE = 64 # Batch size
BUFFER_SIZE = 10000 # Buffer size for shuffling
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) # Shuffle and batch the dataset
# Define the model
vocab_size = len(vocab) # Size of the vocabulary
embedding_dim = 256 # Dimension of the embedding layer
rnn_units = 1024 # Number of units in the RNN layer
model = keras.Sequential([
layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]), # Embedding layer
layers.GRU(rnn_units, return_sequences=True, stateful=True), # GRU layer
layers.Dense(vocab_size) # Dense layer with vocab_size units
])
# Define the loss function
def loss(labels, logits):
return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
# Compile the model
model.compile(optimizer='adam', loss=loss)
# Define a function to generate text
def generate_text(model, start_string):
num_generate = 50 # Number of characters to generate
input_eval = [char2idx[s] for s in start_string] # Convert the start string to numbers
input_eval = tf.expand_dims(input_eval, 0) # Expand the dimension for batch size
text_generated = [] # Empty list to store the generated text
temperature = 1.0 # Temperature parameter to control the randomness
model.reset_states() # Reset the states of the model
for i in range(num_generate): # Loop over the number of characters to generate
predictions = model(input_eval) # Get the predictions from the model
predictions = tf.squeeze(predictions, 0) # Remove the batch dimension
predictions = predictions / temperature # Divide by temperature to increase or decrease randomness
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy() # Sample from the predictions
input_eval = tf.expand_dims([predicted_id], 0) # Update the input with the predicted id
text_generated.append(idx2char[predicted_id]) # Append the predicted character to the generated text
return (start_string + ''.join(text_generated)) # Return the start string and the generated text
# Train the model
EPOCHS = 1 # Number of epochs to train
for epoch in range(EPOCHS): # Loop over the epochs
print(f'Epoch {epoch + 1}')
model.fit(dataset, epochs=1) # Fit the model on the dataset for one epoch
start_string = 'ROMEO: ' # Define a start string to generate text from
print(generate_text(model, start_string)) # Print the generated text