IMPORTS

In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import matplotlib.pyplot as plt

LOAD DATA

In [4]:
df = pd.read_csv('C:/My Projects/MorseH Model/morse_data.csv')
df.head()

Unnamed: 0,Character,Morse Code
0,A,.-
1,B,-...
2,C,-.-.
3,D,-..
4,E,.


Checking Data types

In [5]:
type(df['Character'][0]), type(df['Morse Code'][0])

(str, str)

ENCODE THE STRINGS

In [6]:
lb = LabelEncoder()
df['Character'] = lb.fit_transform(df['Character'])

ENCODE THE MORSE CODES <br>
'.' -> 0, <br>
'-' -> 1, <br>
' ' -> 2  PADDING

In [7]:
morse_dict = {'.':0,'-':1,' ':2}
df['Morse Code Enc'] = df['Morse Code'].apply(lambda x: [morse_dict[char] for char in x])
df.head()

Unnamed: 0,Character,Morse Code,Morse Code Enc
0,25,.-,"[0, 1]"
1,26,-...,"[1, 0, 0, 0]"
2,27,-.-.,"[1, 0, 1, 0]"
3,28,-..,"[1, 0, 0]"
4,29,.,[0]


In [8]:
max_length = df['Morse Code Enc'].apply(len).max()
max_length

8

Adding Padding to equalize the length of each morse code enocoded to max length

In [9]:
df['Morse Code Enc'] = pad_sequences(df['Morse Code Enc'],maxlen = max_length, padding='post', value=2).tolist()
df.head()

Unnamed: 0,Character,Morse Code,Morse Code Enc
0,25,.-,"[0, 1, 2, 2, 2, 2, 2, 2]"
1,26,-...,"[1, 0, 0, 0, 2, 2, 2, 2]"
2,27,-.-.,"[1, 0, 1, 0, 2, 2, 2, 2]"
3,28,-..,"[1, 0, 0, 2, 2, 2, 2, 2]"
4,29,.,"[0, 2, 2, 2, 2, 2, 2, 2]"


Taking Features and Labels

In [10]:
X = df['Character'].values
y = df['Morse Code Enc'].tolist()

Splitting Data (Traditional Way) (NOT PREFERRED) (Scroll Down for torch approach)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
X_train_tensor = torch.tensor(X_train, dtype=torch.long).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [13]:
class MorseH_Model(nn.Module):
    def __init__(self, input_size, output_size, max_length):
        super(MorseH_Model, self).__init__()
        # Embedding layer to represent each character as a vector
        self.emmbedding = nn.Embedding(input_size, 16)

        # Linear Layers
        self.fc1 = nn.Linear(16, 32)
        self.fc2 = nn.Linear(32, output_size*max_length)

        #Reshaping output shape to match morse code shape
        self.output_size = output_size
        self.max_length = max_length
    
    def forward(self, x):
        # Pass input through embedding layer
        x = self.emmbedding(x).view(-1, 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)

        return x.view(-1, self.max_length, self.output_size)

In [14]:
input_size = len(lb.classes_)
output_size = 3
max_len = max_length
model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)
# Load the weights into a new model
model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))
model

MorseH_Model(
  (emmbedding): Embedding(54, 16)
  (fc1): Linear(in_features=16, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=24, bias=True)
)

Prepare Data

In [15]:

X = torch.tensor(df['Character'].values, dtype=torch.long)
y = torch.tensor(df['Morse Code Enc'].tolist(), dtype=torch.long)

data = TensorDataset(X, y)
loader = DataLoader(data, batch_size=16, shuffle=True)

Define Loss Function and Optimizer

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

Training Loop

In [17]:
# num_epochs = 20
# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     for inputs, targets in loader:
#         optimizer.zero_grad() # Reset gradients
#         outputs = model(inputs) # Forward Pass

#         #  Redhape for Loss Calculation
#         targets = targets.view(-1)
#         outputs = outputs.view(-1, output_size)

#         loss = criterion(outputs, targets) # Calculate loss
#         loss.backward() # Backward Pass
#         optimizer.step() # Update weights

#         running_loss += loss.item()
    
#     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')

Evaluating Trained Model

In [18]:
# model.eval()  # set model to evaluation mode
# sample_size = 10
# correct = 0
# total = 0
# with torch.no_grad():
#     for i in range(sample_size):
#         input_sample = X[i].unsqueeze(0)
#         target_sample = y[i]

#         output = model(input_sample)
#         _, predicted = torch.max(output.data, 2)

#         total += target_sample.size(0)
#         correct += (predicted.squeeze()==target_sample).sum().item()

# accuracy = 100*correct/total
# print(f'Accuracy on sample of training set: {accuracy:.2f}%')

Predicting and Decoding the Predicted Output

In [19]:
def predict(char_index):
    with torch.no_grad():
        output = model(torch.tensor([char_index]))
        _, prediction = torch.max(output, 2)
        return prediction[0]

def decode(prediction):
    # Removing Padding
    prediction = [p for p in prediction if p!=2]
    decode_symb = ['.' if c == 0 else '-' for c in prediction]
    morse_code = ''.join(decode_symb)
    return morse_code

In [20]:
def encode(word):
    word = word.upper()
    return [lb.transform([c])[0] for c in word]

Testing with Some Random Data

In [21]:
trancode_list = ["apple", "ball", "cat" ,"xmas-tree"]
def get_morse_word(word):
    char_indices = encode(word)
    decoded = []
    for ind in char_indices:
        pred = predict(ind)
        decoded.append(decode(pred))
        decoded.append(' ')
    return ''.join(decoded)
codes = [get_morse_word(word) for word in trancode_list]
codes

['.- .--. .--. .-.. . ',
 '-... .- .-.. .-.. ',
 '-.-. .- - ',
 '-..- -- .- ... -....- - .-. . . ']

Testing with long Sentences

In [22]:
trancode_sentences = ["Be yourself; everyone else is already taken.", "So many books so little time.", "Two things are infinite: the universe and human stupidity; and I'm not sure about the universe." ]
trancode_lists = [ sen.split(' ') for sen in trancode_sentences ]
trancode_lists

[['Be', 'yourself;', 'everyone', 'else', 'is', 'already', 'taken.'],
 ['So', 'many', 'books', 'so', 'little', 'time.'],
 ['Two',
  'things',
  'are',
  'infinite:',
  'the',
  'universe',
  'and',
  'human',
  'stupidity;',
  'and',
  "I'm",
  'not',
  'sure',
  'about',
  'the',
  'universe.']]

In [23]:
get_morse_codes = []
for l1 in trancode_lists:
    codes = [get_morse_word(word)+'   ' for word in l1]
    get_morse_codes.append(''.join(codes))
get_morse_codes

['-... .    -.-- --- ..- .-. ... . .-.. ..-. -.-.-.    . ...- . .-. -.-- --- -. .    . .-.. ... .    .. ...    .- .-.. .-. . .- -.. -.--    - .- -.- . -. .-.-.-    ',
 '... ---    -- .- -. -.--    -... --- --- -.- ...    ... ---    .-.. .. - - .-.. .    - .. -- . .-.-.-    ',
 '- .-- ---    - .... .. -. --. ...    .- .-. .    .. -. ..-. .. -. .. - . ---...    - .... .    ..- -. .. ...- . .-. ... .    .- -. -..    .... ..- -- .- -.    ... - ..- .--. .. -.. .. - -.-- -.-.-.    .- -. -..    .. .----. --    -. --- -    ... ..- .-. .    .- -... --- ..- -    - .... .    ..- -. .. ...- . .-. ... . .-.-.-    ']

### INFERENCE API

In [24]:
import time
take_input = input("Type your message: ")
response = [get_morse_word(word)+'   ' for word in take_input.split()]
response = ''.join(response)
for i in response:
    print(i, end="")
    # time.sleep(100*pow(10, -3)) FUN

- . -- .--. . .-. .- - ..- .-. .    

In [25]:
# Save the model's weights
torch.save(model.state_dict(), 'morse_model_weights.pth')

# Load the weights into a new model
model.load_state_dict(torch.load('morse_model_weights.pth', weights_only=True))

# Set the model to evaluation mode
model.eval()
# Save the entire model
torch.save(model, 'complete_model.pth')

In [26]:
model

MorseH_Model(
  (emmbedding): Embedding(54, 16)
  (fc1): Linear(in_features=16, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=24, bias=True)
)

In [27]:
# Save the model weights as pytorch_model.bin
import torch
torch.save(model.state_dict(), "pytorch_model.bin")

To Use it later

In [28]:
# # Instantiate the model (ensure it has the same architecture)
# model = MorseH_Model(input_size=input_size, output_size=output_size, max_length=max_len)

# # Load the saved weights
# model.load_state_dict(torch.load("pytorch_model.bin"))

# # Set the model to evaluation mode if needed
# model.eval()