Kevin Hu commited on
Commit
b80e2f3
·
1 Parent(s): d73a5e2

Fix csv for TAG. (#4454)

Browse files

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (1) hide show
  1. rag/app/tag.py +2 -2
rag/app/tag.py CHANGED
@@ -91,14 +91,14 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
91
  callback(0.1, "Start to parse.")
92
  txt = get_text(filename, binary)
93
  lines = txt.split("\n")
94
- delimiter = "\t" if any("\t" in line for line in lines) else ","
95
 
96
  fails = []
97
  content = ""
98
  res = []
99
- reader = csv.reader(lines, delimiter=delimiter)
100
 
101
  for i, row in enumerate(reader):
 
102
  if len(row) != 2:
103
  content += "\n" + lines[i]
104
  elif len(row) == 2:
 
91
  callback(0.1, "Start to parse.")
92
  txt = get_text(filename, binary)
93
  lines = txt.split("\n")
 
94
 
95
  fails = []
96
  content = ""
97
  res = []
98
+ reader = csv.reader(lines)
99
 
100
  for i, row in enumerate(reader):
101
+ row = [r.strip() for r in row if r.strip()]
102
  if len(row) != 2:
103
  content += "\n" + lines[i]
104
  elif len(row) == 2: