File size: 4,923 Bytes
1895c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# IMPORTS
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import matplotlib.pyplot as plt
import time

# LOAD DATA
df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')

# ENCODE CHARACTERS AND MORSE CODE
# Encoding characters as integers
label_encoder = LabelEncoder()
df['Character'] = label_encoder.fit_transform(df['Character'])

# Encoding Morse Code
morse_dict = {'.': 0, '-': 1, ' ': 2}  # '.' -> 0, '-' -> 1, ' ' -> 2 for padding
df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])

# Pad Morse Code sequences to equal length
max_length = df['Morse Code Enc'].apply(len).max()
df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'], maxlen=max_length, padding='post', value=2).tolist()

# PREPARE FEATURES AND LABELS
X = torch.tensor(df['Character'].values, dtype=torch.long)
y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)

# MODEL DEFINITION
class MorseHModel(nn.Module):
    def __init__(self, input_size, output_size, max_length):
        super(MorseHModel, self).__init__()
        self.emmbedding = nn.Embedding(input_size, 16)
        self.fc1 = nn.Linear(16, 32)
        self.fc2 = nn.Linear(32, output_size * max_length)
        self.output_size = output_size
        self.max_length = max_length

    def forward(self, x):
        x = self.emmbedding(x).view(-1, 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x.view(-1, self.max_length, self.output_size)

input_size = len(label_encoder.classes_)
output_size = 3
model = MorseHModel(input_size=input_size, output_size=output_size, max_length=max_length)

# Load the model weights if available
not_pretrained = True
try:
    model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))
    not_pretrained = False
except FileNotFoundError:
    print("Pre-trained weights not found, starting training from scratch.")

# CREATE DATALOADER
dataset = TensorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

# LOSS FUNCTION AND OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# TRAINING LOOP
num_epochs = 20
if not_pretrained:
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for inputs, targets in data_loader:
            optimizer.zero_grad()
            outputs = model(inputs)

            targets = targets.view(-1)
            outputs = outputs.view(-1, output_size)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(data_loader):.4f}")

    # MODEL EVALUATION
    model.eval()
    sample_size = 10
    correct_predictions = 0
    total_elements = 0

    with torch.no_grad():
        for i in range(sample_size):
            input_sample = X[i].unsqueeze(0)
            target_sample = y[i]

            output = model(input_sample)
            _, predicted = torch.max(output.data, 2)

            total_elements += target_sample.size(0)
            correct_predictions += (predicted.squeeze() == target_sample).sum().item()

    accuracy = 100 * correct_predictions / total_elements
    print(f"Accuracy on sample of training set: {accuracy:.2f}%")

# INFERENCE FUNCTIONS
def predict(character_index):
    """Predict the Morse code sequence for a given character index."""
    with torch.no_grad():
        output = model(torch.tensor([character_index]))
        _, prediction = torch.max(output, 2)
        return prediction[0]

def decode(prediction):
    """Decode a prediction from numerical values to Morse code symbols."""
    prediction = [p for p in prediction if p != 2]
    return ''.join('.' if c == 0 else '-' for c in prediction)

def encode(word):
    """Encode a word into character indices."""
    return [label_encoder.transform([char])[0] for char in word.upper()]

def get_morse_word(word):
    """Convert a word into Morse code using the model predictions."""
    char_indices = encode(word)
    morse_sequence = []
    for index in char_indices:
        pred = predict(index)
        morse_sequence.append(decode(pred))
        morse_sequence.append(' ')
    return ''.join(morse_sequence)

# USER INPUT INFERENCE
user_input = input("Type your message: ")
response = [get_morse_word(word) + '   ' for word in user_input.split()]
response = ''.join(response)

print("Response: ", response)
# for char in response:
#     print(char, end="")
    # time.sleep(10*pow(10, -3))  # Delay for visualization

# SAVE MODEL
torch.save(model.state_dict(), 'morse_model_weights.pth')
torch.save(model, 'complete_model.pth')