remiai3's picture
Upload 8 files
f9396fc verified
import json
import pandas as pd
# Read dataset.txt
input_file = "dataset.txt"
output_file = "dataset.json"
# Initialize a list to store dataset entries
data = []
# Read the text file line by line
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
try:
# Parse each line as a JSON object
entry = json.loads(line.strip())
# Ensure the entry has 'input' and 'response' keys
if "input" in entry and "response" in entry:
data.append({
"input": entry["input"],
"response": entry["response"]
})
else:
print(f"Skipping invalid entry: {line.strip()}")
except json.JSONDecodeError:
print(f"Error parsing line: {line.strip()}")
# Save to JSON file
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=4)
print(f"Converted {len(data)} entries to {output_file}")
import json
# Read dataset.txt
input_file = "dataset.txt"
output_file = "dataset.json"
# Read the entire file content
try:
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# Parse the entire content as a JSON array
data = json.loads(content)
except json.JSONDecodeError as e:
print(f"Error parsing dataset.txt: {e}")
print("Please check the JSON format in dataset.txt")
exit(1)
# Validate and filter entries
valid_data = []
for entry in data:
if isinstance(entry, dict) and "input" in entry and "response" in entry:
valid_data.append({
"input": entry["input"],
"response": entry["response"]
})
else:
print(f"Skipping invalid entry: {entry}")
# Save to JSON file
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(valid_data, f, indent=4)
print(f"Converted {len(valid_data)} entries to {output_file}")