balibabu commited on
Commit
f28c040
·
1 Parent(s): eb0c846

feat: Add delimiter field to naive parsing method #1909 (#1911)

Browse files

### What problem does this PR solve?

feat: Add delimiter field to naive parsing method #1909
### Type of change


- [x] New Feature (non-breaking change which adds functionality)

web/src/components/chunk-method-modal/index.tsx CHANGED
@@ -22,6 +22,7 @@ import React, { useEffect, useMemo } from 'react';
22
  import { useFetchParserListOnMount } from './hooks';
23
 
24
  import { useTranslate } from '@/hooks/common-hooks';
 
25
  import EntityTypesItem from '../entity-types-item';
26
  import LayoutRecognize from '../layout-recognize';
27
  import ParseConfiguration, {
@@ -268,7 +269,12 @@ const ChunkMethodModal: React.FC<IProps> = ({
268
  }
269
  </Form.Item>
270
  )}
271
- {showMaxTokenNumber && <MaxTokenNumber></MaxTokenNumber>}
 
 
 
 
 
272
  {showRaptorParseConfiguration(selectedTag) && (
273
  <ParseConfiguration></ParseConfiguration>
274
  )}
 
22
  import { useFetchParserListOnMount } from './hooks';
23
 
24
  import { useTranslate } from '@/hooks/common-hooks';
25
+ import Delimiter from '../delimiter';
26
  import EntityTypesItem from '../entity-types-item';
27
  import LayoutRecognize from '../layout-recognize';
28
  import ParseConfiguration, {
 
269
  }
270
  </Form.Item>
271
  )}
272
+ {showMaxTokenNumber && (
273
+ <>
274
+ <MaxTokenNumber></MaxTokenNumber>
275
+ <Delimiter></Delimiter>
276
+ </>
277
+ )}
278
  {showRaptorParseConfiguration(selectedTag) && (
279
  <ParseConfiguration></ParseConfiguration>
280
  )}
web/src/components/delimiter.tsx ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Form, Input } from 'antd';
2
+ import { useTranslation } from 'react-i18next';
3
+
4
+ interface IProps {
5
+ value?: string | undefined;
6
+ onChange?: (val: string | undefined) => void;
7
+ }
8
+
9
+ const DelimiterInput = ({ value, onChange }: IProps) => {
10
+ const nextValue = value?.replaceAll('\n', '\\n');
11
+ const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
12
+ const val = e.target.value;
13
+ const nextValue = val.replaceAll('\\n', '\n');
14
+ onChange?.(nextValue);
15
+ };
16
+ return <Input value={nextValue} onChange={handleInputChange}></Input>;
17
+ };
18
+
19
+ const Delimiter = () => {
20
+ const { t } = useTranslation();
21
+
22
+ return (
23
+ <Form.Item
24
+ name={['parser_config', 'delimiter']}
25
+ label={t('knowledgeDetails.delimiter')}
26
+ initialValue={`\\n!?;。;!?`}
27
+ rules={[{ required: true }]}
28
+ >
29
+ <DelimiterInput />
30
+ </Form.Item>
31
+ );
32
+ };
33
+
34
+ export default Delimiter;
web/src/locales/en.ts CHANGED
@@ -148,6 +148,7 @@ export default {
148
  rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
149
  topK: 'Top-K',
150
  topKTip: `K chunks will be fed into rerank models.`,
 
151
  },
152
  knowledgeConfiguration: {
153
  titleDescription:
 
148
  rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
149
  topK: 'Top-K',
150
  topKTip: `K chunks will be fed into rerank models.`,
151
+ delimiter: `Delimiter`,
152
  },
153
  knowledgeConfiguration: {
154
  titleDescription:
web/src/locales/zh-traditional.ts CHANGED
@@ -144,6 +144,7 @@ export default {
144
  rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則,它使用rerank評分代替矢量餘弦相似性。`,
145
  topK: 'Top-K',
146
  topKTip: `K塊將被送入Rerank型號。`,
 
147
  },
148
  knowledgeConfiguration: {
149
  titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
 
144
  rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則,它使用rerank評分代替矢量餘弦相似性。`,
145
  topK: 'Top-K',
146
  topKTip: `K塊將被送入Rerank型號。`,
147
+ delimiter: `分段標識符`,
148
  },
149
  knowledgeConfiguration: {
150
  titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
web/src/locales/zh.ts CHANGED
@@ -145,6 +145,7 @@ export default {
145
  rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则,它使用rerank评分代替矢量余弦相似性。`,
146
  topK: 'Top-K',
147
  topKTip: `K块将被送入Rerank型号。`,
 
148
  },
149
  knowledgeConfiguration: {
150
  titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
 
145
  rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则,它使用rerank评分代替矢量余弦相似性。`,
146
  topK: 'Top-K',
147
  topKTip: `K块将被送入Rerank型号。`,
148
+ delimiter: `分段标识符`,
149
  },
150
  knowledgeConfiguration: {
151
  titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import EntityTypesItem from '@/components/entity-types-item';
2
  import LayoutRecognize from '@/components/layout-recognize';
3
  import MaxTokenNumber from '@/components/max-token-number';
@@ -111,6 +112,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
111
  {parserId === 'naive' && (
112
  <>
113
  <MaxTokenNumber></MaxTokenNumber>
 
114
  <LayoutRecognize></LayoutRecognize>
115
  </>
116
  )}
 
1
+ import Delimiter from '@/components/delimiter';
2
  import EntityTypesItem from '@/components/entity-types-item';
3
  import LayoutRecognize from '@/components/layout-recognize';
4
  import MaxTokenNumber from '@/components/max-token-number';
 
112
  {parserId === 'naive' && (
113
  <>
114
  <MaxTokenNumber></MaxTokenNumber>
115
+ <Delimiter></Delimiter>
116
  <LayoutRecognize></LayoutRecognize>
117
  </>
118
  )}