Spaces:
Sleeping
Sleeping
Update preprocess.py
Browse files- preprocess.py +1 -1
preprocess.py
CHANGED
@@ -25,7 +25,7 @@ def preprocess(legislation_dir="./legislation"):
|
|
25 |
for filename in os.listdir(directory):
|
26 |
if filename.endswith(".html"):
|
27 |
file_path = os.path.join(directory, filename)
|
28 |
-
with open(file_path, "r", encoding="
|
29 |
soup = BeautifulSoup(f, "html.parser")
|
30 |
text = soup.get_text(separator=" ", strip=True)
|
31 |
documents.append(text)
|
|
|
25 |
for filename in os.listdir(directory):
|
26 |
if filename.endswith(".html"):
|
27 |
file_path = os.path.join(directory, filename)
|
28 |
+
with open(file_path, "r", encoding="latin-1") as f:
|
29 |
soup = BeautifulSoup(f, "html.parser")
|
30 |
text = soup.get_text(separator=" ", strip=True)
|
31 |
documents.append(text)
|