Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 11, 2024

Commit

79ac5ce

verified ·

1 Parent(s): ef623be

Create 21_NLP.py

Browse files

Files changed (1) hide show

pages/21_NLP.py +98 -0

pages/21_NLP.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import streamlit as st
+import tensorflow as tf
+from transformers import BertTokenizer, TFBertForSequenceClassification
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+# Load the IMDb dataset
+from datasets import load_dataset
+# Load dataset
+dataset = load_dataset("imdb")
+# Split dataset into training and testing
+train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2)
+# Initialize the tokenizer
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+# Tokenization and padding
+max_length = 128
+def tokenize_and_pad(text):
+    tokens = tokenizer.encode_plus(
+        text,
+        max_length=max_length,
+        padding='max_length',
+        truncation=True,
+        return_tensors='tf'
+    )
+    return tokens['input_ids'], tokens['attention_mask']
+# Preprocess the dataset
+def preprocess_data(data):
+    input_ids = []
+    attention_masks = []
+    labels = []
+    for review, label in zip(data['text'], data['label']):
+        ids, mask = tokenize_and_pad(review)
+        input_ids.append(ids)
+        attention_masks.append(mask)
+        labels.append(label)
+    return np.array(input_ids), np.array(attention_masks), np.array(labels)
+X_train_ids, X_train_mask, y_train = preprocess_data(train_data)
+X_test_ids, X_test_mask, y_test = preprocess_data(test_data)
+# Load the pre-trained BERT model
+model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
+# Build the Keras model
+input_ids = tf.keras.Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
+attention_mask = tf.keras.Input(shape=(max_length,), dtype=tf.int32, name="attention_mask")
+bert_outputs = model(input_ids, attention_mask=attention_mask)
+outputs = tf.keras.layers.Dense(1, activation='sigmoid')(bert_outputs.logits)
+model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=outputs)
+model.summary()
+# Compile the model
+model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
+              loss='binary_crossentropy',
+              metrics=['accuracy'])
+# Train the model
+history = model.fit(
+    [X_train_ids, X_train_mask],
+    y_train,
+    validation_split=0.1,
+    epochs=3,
+    batch_size=32
+)
+# Evaluate the model
+loss, accuracy = model.evaluate([X_test_ids, X_test_mask], y_test)
+st.write(f'Test Accuracy: {accuracy}')
+# Plot training & validation accuracy values
+st.subheader("Training and Validation Accuracy")
+fig, ax = plt.subplots()
+ax.plot(history.history['accuracy'], label='Training Accuracy')
+ax.plot(history.history['val_accuracy'], label='Validation Accuracy')
+ax.set_xlabel('Epoch')
+ax.set_ylabel('Accuracy')
+ax.legend()
+st.pyplot(fig)
+st.subheader("Training and Validation Loss")
+fig, ax = plt.subplots()
+ax.plot(history.history['loss'], label='Training Loss')
+ax.plot(history.history['val_loss'], label='Validation Loss')
+ax.set_xlabel('Epoch')
+ax.set_ylabel('Loss')
+ax.legend()
+st.pyplot(fig)