Jabrain commited on
Commit
97fea5d
·
1 Parent(s): af67342

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -81
app.py CHANGED
@@ -1,84 +1,94 @@
1
- import streamlit as st
2
  # Import libraries
 
 
 
 
 
 
3
  import numpy as np
4
- import tensorflow as tf
5
- from tensorflow import keras
6
- from tensorflow.keras import layers
7
-
8
- # Load the text data
9
- text = open('shakespeare.txt', 'r').read() # Read the text file
10
- vocab = sorted(set(text)) # Get the unique characters in the text
11
- char2idx = {c: i for i, c in enumerate(vocab)} # Map characters to indices
12
- idx2char = np.array(vocab) # Map indices to characters
13
- text_as_int = np.array([char2idx[c] for c in text]) # Convert text to integers
14
-
15
- # Create training examples and targets
16
- seq_length = 100 # Length of the input sequence
17
- examples_per_epoch = len(text) // (seq_length + 1) # Number of examples per epoch
18
- char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) # Create a dataset from the text
19
- sequences = char_dataset.batch(seq_length + 1, drop_remainder=True) # Create batches of sequences
20
-
21
- def split_input_target(chunk): # Define a function to split the input and target
22
- input_text = chunk[:-1] # Input is the sequence except the last character
23
- target_text = chunk[1:] # Target is the sequence except the first character
24
- return input_text, target_text
25
-
26
- dataset = sequences.map(split_input_target) # Apply the function to the dataset
27
-
28
- # Shuffle and batch the dataset
29
- BATCH_SIZE = 1 # Batch size
30
- BUFFER_SIZE = 10000 # Buffer size for shuffling
31
- dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) # Shuffle and batch the dataset
32
-
33
- # Define the model
34
- vocab_size = len(vocab) # Size of the vocabulary
35
- embedding_dim = 256 # Dimension of the embedding layer
36
- rnn_units = 1024 # Number of units in the RNN layer
37
-
38
- model = keras.Sequential([
39
- layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]), # Embedding layer
40
- layers.GRU(rnn_units, return_sequences=True, stateful=True), # GRU layer
41
- layers.Dense(vocab_size) # Dense layer with vocab_size units
42
- ])
43
-
44
- # Define the loss function
45
- def loss(labels, logits):
46
- return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
47
-
48
- # Compile the model
49
- model.compile(optimizer='adam', loss=loss)
50
-
51
- # Define a function to generate text
52
- def generate_text(model, start_string):
53
- num_generate = 50 # Number of characters to generate
54
- input_eval = [char2idx[s] for s in start_string] # Convert the start string to numbers
55
- input_eval = tf.expand_dims(input_eval, 0) # Expand the dimension for batch size
56
- text_generated = [] # Empty list to store the generated text
57
-
58
- temperature = 1.0 # Temperature parameter to control the randomness
59
-
60
- model.reset_states() # Reset the states of the model
61
-
62
- for i in range(num_generate): # Loop over the number of characters to generate
63
- predictions = model(input_eval) # Get the predictions from the model
64
- predictions = tf.squeeze(predictions, 0) # Remove the batch dimension
65
-
66
- predictions = predictions / temperature # Divide by temperature to increase or decrease randomness
67
- predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy() # Sample from the predictions
68
-
69
- input_eval = tf.expand_dims([predicted_id], 0) # Update the input with the predicted id
70
-
71
- text_generated.append(idx2char[predicted_id]) # Append the predicted character to the generated text
72
-
73
- return (start_string + ''.join(text_generated)) # Return the start string and the generated text
74
-
75
- # Train the model
76
- EPOCHS = 1 # Number of epochs to train
77
 
78
- for epoch in range(EPOCHS): # Loop over the epochs
79
- print(f'Epoch {epoch + 1}')
80
- model.fit(dataset, epochs=1) # Fit the model on the dataset for one epoch
81
-
82
- start_string = 'ROMEO: ' # Define a start string to generate text from
83
-
84
- print(generate_text(model, start_string)) # Print the generated text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Import libraries
2
+ import streamlit as st
3
+ import gradio as gr
4
+ import torch
5
+ import transformers
6
+ import librosa
7
+ import cv2
8
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Load models
11
+ text_model = transformers.pipeline("text-generation")
12
+ audio_model = transformers.Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
13
+ audio_tokenizer = transformers.Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
14
+ image_model = transformers.pipeline("image-classification")
15
+ video_model = transformers.VideoClassificationPipeline(model="facebook/mmf-vit-base-16", feature_extractor="facebook/mmf-vit-base-16")
16
+
17
+ # Define functions for processing inputs and outputs
18
+ def text_to_text(input):
19
+ output = text_model(input, max_length=50)
20
+ return output[0]["generated_text"]
21
+
22
+ def text_to_audio(input):
23
+ output = text_model(input, max_length=50)
24
+ output = gr.outputs.Audio.from_str(output[0]["generated_text"])
25
+ return output
26
+
27
+ def text_to_image(input):
28
+ output = text_model(input, max_length=50)
29
+ output = gr.outputs.Image.from_str(output[0]["generated_text"])
30
+ return output
31
+
32
+ def text_to_video(input):
33
+ output = text_model(input, max_length=50)
34
+ output = gr.outputs.Video.from_str(output[0]["generated_text"])
35
+ return output
36
+
37
+ def audio_to_text(input):
38
+ input = librosa.load(input)[0]
39
+ input = torch.from_numpy(input).unsqueeze(0)
40
+ logits = audio_model(input).logits
41
+ predicted_ids = torch.argmax(logits, dim=-1)
42
+ output = audio_tokenizer.batch_decode(predicted_ids)[0]
43
+ return output
44
+
45
+ def audio_to_audio(input):
46
+ return input
47
+
48
+ def audio_to_image(input):
49
+ input = librosa.load(input)[0]
50
+ input = torch.from_numpy(input).unsqueeze(0)
51
+ logits = audio_model(input).logits
52
+ predicted_ids = torch.argmax(logits, dim=-1)
53
+ output = audio_tokenizer.batch_decode(predicted_ids)[0]
54
+ output = gr.outputs.Image.from_str(output)
55
+ return output
56
+
57
+ def audio_to_video(input):
58
+ input = librosa.load(input)[0]
59
+ input = torch.from_numpy(input).unsqueeze(0)
60
+ logits = audio_model(input).logits
61
+ predicted_ids = torch.argmax(logits, dim=-1)
62
+ output = audio_tokenizer.batch_decode(predicted_ids)[0]
63
+ output = gr.outputs.Video.from_str(output)
64
+ return output
65
+
66
+ def image_to_text(input):
67
+ input = cv2.imread(input)
68
+ input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB)
69
+ input = np.expand_dims(input, axis=0)
70
+ output = image_model(input)
71
+ return output[0]["label"]
72
+
73
+ def image_to_audio(input):
74
+ input = cv2.imread(input)
75
+ input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB)
76
+ input = np.expand_dims(input, axis=0)
77
+ output = image_model(input)
78
+ output = gr.outputs.Audio.from_str(output[0]["label"])
79
+ return output
80
+
81
+ def image_to_image(input):
82
+ return input
83
+
84
+ def image_to_video(input):
85
+ input = cv2.imread(input)
86
+ input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB)
87
+ input = np.expand_dims(input, axis=0)
88
+ output = image_model(input)
89
+ output = gr.outputs.Video.from_str(output[0]["label"])
90
+ return output
91
+
92
+ def video_to_text(input):
93
+ input = cv2.VideoCapture(input)
94
+ frames = []