import json import pandas as pd # Read dataset.txt input_file = "dataset.txt" output_file = "dataset.json" # Initialize a list to store dataset entries data = [] # Read the text file line by line with open(input_file, 'r', encoding='utf-8') as f: for line in f: try: # Parse each line as a JSON object entry = json.loads(line.strip()) # Ensure the entry has 'input' and 'response' keys if "input" in entry and "response" in entry: data.append({ "input": entry["input"], "response": entry["response"] }) else: print(f"Skipping invalid entry: {line.strip()}") except json.JSONDecodeError: print(f"Error parsing line: {line.strip()}") # Save to JSON file with open(output_file, 'w', encoding='utf-8') as f: json.dump(data, f, indent=4) print(f"Converted {len(data)} entries to {output_file}") import json # Read dataset.txt input_file = "dataset.txt" output_file = "dataset.json" # Read the entire file content try: with open(input_file, 'r', encoding='utf-8') as f: content = f.read() # Parse the entire content as a JSON array data = json.loads(content) except json.JSONDecodeError as e: print(f"Error parsing dataset.txt: {e}") print("Please check the JSON format in dataset.txt") exit(1) # Validate and filter entries valid_data = [] for entry in data: if isinstance(entry, dict) and "input" in entry and "response" in entry: valid_data.append({ "input": entry["input"], "response": entry["response"] }) else: print(f"Skipping invalid entry: {entry}") # Save to JSON file with open(output_file, 'w', encoding='utf-8') as f: json.dump(valid_data, f, indent=4) print(f"Converted {len(valid_data)} entries to {output_file}")