Kevin Hu
commited on
Commit
·
b80e2f3
1
Parent(s):
d73a5e2
Fix csv for TAG. (#4454)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- rag/app/tag.py +2 -2
rag/app/tag.py
CHANGED
@@ -91,14 +91,14 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
|
|
91 |
callback(0.1, "Start to parse.")
|
92 |
txt = get_text(filename, binary)
|
93 |
lines = txt.split("\n")
|
94 |
-
delimiter = "\t" if any("\t" in line for line in lines) else ","
|
95 |
|
96 |
fails = []
|
97 |
content = ""
|
98 |
res = []
|
99 |
-
reader = csv.reader(lines
|
100 |
|
101 |
for i, row in enumerate(reader):
|
|
|
102 |
if len(row) != 2:
|
103 |
content += "\n" + lines[i]
|
104 |
elif len(row) == 2:
|
|
|
91 |
callback(0.1, "Start to parse.")
|
92 |
txt = get_text(filename, binary)
|
93 |
lines = txt.split("\n")
|
|
|
94 |
|
95 |
fails = []
|
96 |
content = ""
|
97 |
res = []
|
98 |
+
reader = csv.reader(lines)
|
99 |
|
100 |
for i, row in enumerate(reader):
|
101 |
+
row = [r.strip() for r in row if r.strip()]
|
102 |
if len(row) != 2:
|
103 |
content += "\n" + lines[i]
|
104 |
elif len(row) == 2:
|