Spaces:

Jabrain
/

Zbot

Sleeping

App Files Files Community

Zbot / app.py

Jabrain

Update app.py

4b03ca9 about 2 years ago

raw

history blame

3.71 kB

	import streamlit as st
	# Import libraries
	import numpy as np
	import tensorflow as tf
	from tensorflow import keras
	from tensorflow.keras import layers

	# Load the text data
	text = open('shakespeare.txt', 'r').read() # Read the text file
	vocab = sorted(set(text)) # Get the unique characters in the text
	char2idx = {c: i for i, c in enumerate(vocab)} # Map characters to indices
	idx2char = np.array(vocab) # Map indices to characters
	text_as_int = np.array([char2idx[c] for c in text]) # Convert text to integers

	# Create training examples and targets
	seq_length = 100 # Length of the input sequence
	examples_per_epoch = len(text) // (seq_length + 1) # Number of examples per epoch
	char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) # Create a dataset from the text
	sequences = char_dataset.batch(seq_length + 1, drop_remainder=True) # Create batches of sequences

	def split_input_target(chunk): # Define a function to split the input and target
	input_text = chunk[:-1] # Input is the sequence except the last character
	target_text = chunk[1:] # Target is the sequence except the first character
	return input_text, target_text

	dataset = sequences.map(split_input_target) # Apply the function to the dataset

	# Shuffle and batch the dataset
	BATCH_SIZE = 64 # Batch size
	BUFFER_SIZE = 10000 # Buffer size for shuffling
	dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) # Shuffle and batch the dataset

	# Define the model
	vocab_size = len(vocab) # Size of the vocabulary
	embedding_dim = 256 # Dimension of the embedding layer
	rnn_units = 1024 # Number of units in the RNN layer

	model = keras.Sequential([
	layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]), # Embedding layer
	layers.GRU(rnn_units, return_sequences=True, stateful=True), # GRU layer
	layers.Dense(vocab_size) # Dense layer with vocab_size units
	])

	# Define the loss function
	def loss(labels, logits):
	return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

	# Compile the model
	model.compile(optimizer='adam', loss=loss)

	# Define a function to generate text
	def generate_text(model, start_string):
	num_generate = 50 # Number of characters to generate
	input_eval = [char2idx[s] for s in start_string] # Convert the start string to numbers
	input_eval = tf.expand_dims(input_eval, 0) # Expand the dimension for batch size
	text_generated = [] # Empty list to store the generated text

	temperature = 1.0 # Temperature parameter to control the randomness

	model.reset_states() # Reset the states of the model

	for i in range(num_generate): # Loop over the number of characters to generate
	predictions = model(input_eval) # Get the predictions from the model
	predictions = tf.squeeze(predictions, 0) # Remove the batch dimension

	predictions = predictions / temperature # Divide by temperature to increase or decrease randomness
	predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy() # Sample from the predictions

	input_eval = tf.expand_dims([predicted_id], 0) # Update the input with the predicted id

	text_generated.append(idx2char[predicted_id]) # Append the predicted character to the generated text

	return (start_string + ''.join(text_generated)) # Return the start string and the generated text

	# Train the model
	EPOCHS = 1 # Number of epochs to train

	for epoch in range(EPOCHS): # Loop over the epochs
	print(f'Epoch {epoch + 1}')
	model.fit(dataset, epochs=1) # Fit the model on the dataset for one epoch

	start_string = 'ROMEO: ' # Define a start string to generate text from

	print(generate_text(model, start_string)) # Print the generated text