Kevin Hu
commited on
Commit
·
d719333
1
Parent(s):
286159b
enable 3 char words to finegrind tokenize (#2210)
Browse files### What problem does this PR solve?
### Type of change
- [x] Performance Improvement
- rag/nlp/query.py +1 -1
rag/nlp/query.py
CHANGED
@@ -83,7 +83,7 @@ class EsQueryer:
|
|
83 |
), tks
|
84 |
|
85 |
def need_fine_grained_tokenize(tk):
|
86 |
-
if len(tk) <
|
87 |
return False
|
88 |
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
89 |
return False
|
|
|
83 |
), tks
|
84 |
|
85 |
def need_fine_grained_tokenize(tk):
|
86 |
+
if len(tk) < 3:
|
87 |
return False
|
88 |
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
89 |
return False
|