ohalkhateeb commited on
Commit
1a34c4b
·
verified ·
1 Parent(s): 6088560

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +1 -1
preprocess.py CHANGED
@@ -25,7 +25,7 @@ def preprocess(legislation_dir="./legislation"):
25
  for filename in os.listdir(directory):
26
  if filename.endswith(".html"):
27
  file_path = os.path.join(directory, filename)
28
- with open(file_path, "r", encoding="utf-8") as f:
29
  soup = BeautifulSoup(f, "html.parser")
30
  text = soup.get_text(separator=" ", strip=True)
31
  documents.append(text)
 
25
  for filename in os.listdir(directory):
26
  if filename.endswith(".html"):
27
  file_path = os.path.join(directory, filename)
28
+ with open(file_path, "r", encoding="latin-1") as f:
29
  soup = BeautifulSoup(f, "html.parser")
30
  text = soup.get_text(separator=" ", strip=True)
31
  documents.append(text)