File size: 4,923 Bytes

1895c5d

# IMPORTS
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import matplotlib.pyplot as plt
import time

# LOAD DATA
df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')

# ENCODE CHARACTERS AND MORSE CODE
# Encoding characters as integers
label_encoder = LabelEncoder()
df['Character'] = label_encoder.fit_transform(df['Character'])

# Encoding Morse Code
morse_dict = {'.': 0, '-': 1, ' ': 2}  # '.' -> 0, '-' -> 1, ' ' -> 2 for padding
df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])

# Pad Morse Code sequences to equal length
max_length = df['Morse Code Enc'].apply(len).max()
df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'], maxlen=max_length, padding='post', value=2).tolist()

# PREPARE FEATURES AND LABELS
X = torch.tensor(df['Character'].values, dtype=torch.long)
y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)

# MODEL DEFINITION
class MorseHModel(nn.Module):
    def __init__(self, input_size, output_size, max_length):
        super(MorseHModel, self).__init__()
        self.emmbedding = nn.Embedding(input_size, 16)
        self.fc1 = nn.Linear(16, 32)
        self.fc2 = nn.Linear(32, output_size * max_length)
        self.output_size = output_size
        self.max_length = max_length

    def forward(self, x):
        x = self.emmbedding(x).view(-1, 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x.view(-1, self.max_length, self.output_size)

input_size = len(label_encoder.classes_)
output_size = 3
model = MorseHModel(input_size=input_size, output_size=output_size, max_length=max_length)

# Load the model weights if available
not_pretrained = True
try:
    model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))
    not_pretrained = False
except FileNotFoundError:
    print("Pre-trained weights not found, starting training from scratch.")

# CREATE DATALOADER
dataset = TensorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

# LOSS FUNCTION AND OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# TRAINING LOOP
num_epochs = 20
if not_pretrained:
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for inputs, targets in data_loader:
            optimizer.zero_grad()
            outputs = model(inputs)

            targets = targets.view(-1)
            outputs = outputs.view(-1, output_size)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(data_loader):.4f}")

    # MODEL EVALUATION
    model.eval()
    sample_size = 10
    correct_predictions = 0
    total_elements = 0

    with torch.no_grad():
        for i in range(sample_size):
            input_sample = X[i].unsqueeze(0)
            target_sample = y[i]

            output = model(input_sample)
            _, predicted = torch.max(output.data, 2)

            total_elements += target_sample.size(0)
            correct_predictions += (predicted.squeeze() == target_sample).sum().item()

    accuracy = 100 * correct_predictions / total_elements
    print(f"Accuracy on sample of training set: {accuracy:.2f}%")

# INFERENCE FUNCTIONS
def predict(character_index):
    """Predict the Morse code sequence for a given character index."""
    with torch.no_grad():
        output = model(torch.tensor([character_index]))
        _, prediction = torch.max(output, 2)
        return prediction[0]

def decode(prediction):
    """Decode a prediction from numerical values to Morse code symbols."""
    prediction = [p for p in prediction if p != 2]
    return ''.join('.' if c == 0 else '-' for c in prediction)

def encode(word):
    """Encode a word into character indices."""
    return [label_encoder.transform([char])[0] for char in word.upper()]

def get_morse_word(word):
    """Convert a word into Morse code using the model predictions."""
    char_indices = encode(word)
    morse_sequence = []
    for index in char_indices:
        pred = predict(index)
        morse_sequence.append(decode(pred))
        morse_sequence.append(' ')
    return ''.join(morse_sequence)

# USER INPUT INFERENCE
user_input = input("Type your message: ")
response = [get_morse_word(word) + '   ' for word in user_input.split()]
response = ''.join(response)

print("Response: ", response)
# for char in response:
#     print(char, end="")
    # time.sleep(10*pow(10, -3))  # Delay for visualization

# SAVE MODEL
torch.save(model.state_dict(), 'morse_model_weights.pth')
torch.save(model, 'complete_model.pth')