highdeff1 / token prep.py
highdeff's picture
Upload 16 files
2c07569
raw
history blame contribute delete
488 Bytes
# Load your data from a text file
with open('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt', 'r') as f:
data = f.read()
# Split your data into individual questions
questions = data.split('\n')
# Tokenize each question using your custom tokenizer
tokenized_questions = []
for question in questions:
tokens = custom_tokenizer(question)
tokenized_questions.append(tokens)
# Now you can use tokenized_questions to train your model