balibabu commited on
Commit
4b36338
·
1 Parent(s): eb7da20

Feat: Add tag_kwd parameter to chunk configuration modal #4368 (#4414)

Browse files

### What problem does this PR solve?

Feat: Add tag_kwd parameter to chunk configuration modal #4368

### Type of change


- [x] New Feature (non-breaking change which adds functionality)

web/src/components/parse-configuration/index.tsx CHANGED
@@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
26
  return !excludedParseMethods.includes(parserId);
27
  };
28
 
 
 
 
 
 
 
29
  // The three types "table", "resume" and "one" do not display this configuration.
30
  const ParseConfiguration = () => {
31
  const form = Form.useFormInstance();
 
26
  return !excludedParseMethods.includes(parserId);
27
  };
28
 
29
+ export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
30
+
31
+ export const showTagItems = (parserId: string) => {
32
+ return !excludedTagParseMethods.includes(parserId);
33
+ };
34
+
35
  // The three types "table", "resume" and "one" do not display this configuration.
36
  const ParseConfiguration = () => {
37
  const form = Form.useFormInstance();
web/src/locales/en.ts CHANGED
@@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
325
  searchTags: 'Search tags',
326
  tagCloud: 'Cloud',
327
  tagTable: 'Table',
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  },
329
  chunk: {
330
  chunk: 'Chunk',
 
325
  searchTags: 'Search tags',
326
  tagCloud: 'Cloud',
327
  tagTable: 'Table',
328
+ tagSet: 'Tag set',
329
+ tagSetTip: `
330
+ <p> Selecting the 'Tag' knowledge bases helps to tag every chunks. </p>
331
+ <p>Query to those chunks will also be with tags too.</p>
332
+ This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
333
+ <p>Difference between tags and keywords:</p>
334
+ <ul>
335
+ <li>Tag is a close set which is defined and manipulated by user while keyword is an open set.</li>
336
+ <li>You need to upload tag sets with samples prior to use.</li>
337
+ <li>Keywords are generated by LLM which is expensive and time consuming.</li>
338
+ </ul>
339
+ `,
340
+ topnTags: 'Top-N Tags',
341
  },
342
  chunk: {
343
  chunk: 'Chunk',
web/src/locales/zh-traditional.ts CHANGED
@@ -309,6 +309,19 @@ export default {
309
  searchTags: '搜尋標籤',
310
  tagCloud: '雲端',
311
  tagTable: '表',
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  },
313
  chunk: {
314
  chunk: '解析塊',
 
309
  searchTags: '搜尋標籤',
310
  tagCloud: '雲端',
311
  tagTable: '表',
312
+ tagSet: '標籤庫',
313
+ topnTags: 'Top-N 標籤',
314
+ tagSetTip: `
315
+ <p> 選擇「標籤」知識庫有助於標記每個區塊。 </p>
316
+ <p>對這些區塊的查詢也將帶有標籤。
317
+ 此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。
318
+ <p>標籤和關鍵字的差異:</p>
319
+ <ul>
320
+ <li>標籤是一個閉集,由使用者定義和操作,而關鍵字是一個開集。
321
+ <li>您需要在使用前上傳包含範例的標籤集。
322
+ <li>關鍵字由 LLM 生成,既昂貴又耗時。
323
+ </ul>
324
+ `,
325
  },
326
  chunk: {
327
  chunk: '解析塊',
web/src/locales/zh.ts CHANGED
@@ -326,6 +326,19 @@ export default {
326
  searchTags: '搜索标签',
327
  tagCloud: '云',
328
  tagTable: '表',
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  },
330
  chunk: {
331
  chunk: '解析块',
 
326
  searchTags: '搜索标签',
327
  tagCloud: '云',
328
  tagTable: '表',
329
+ tagSet: '标签库',
330
+ topnTags: 'Top-N 标签',
331
+ tagSetTip: `
332
+ <p> 选择“标签”知识库有助于标记每个块。 </p>
333
+ <p>对这些块的查询也将带有标签。 </p>
334
+ 此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。
335
+ <p>标签和关键字之间的区别:</p>
336
+ <ul>
337
+ <li>标签是一个由用户定义和操作的封闭集,而关键字是一个开放集。 </li>
338
+ <li>您需要在使用前上传带有样本的标签集。 </li>
339
+ <li>关键字由 LLM 生成,这既昂贵又耗时。 </li>
340
+ </ul>
341
+ `,
342
  },
343
  chunk: {
344
  chunk: '解析块',
web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx CHANGED
@@ -13,6 +13,7 @@ type FieldType = {
13
  interface kFProps {
14
  doc_id: string;
15
  chunkId: string | undefined;
 
16
  }
17
 
18
  const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
@@ -21,15 +22,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
21
  hideModal,
22
  onOk,
23
  loading,
 
24
  }) => {
25
  const [form] = Form.useForm();
26
  const [checked, setChecked] = useState(false);
27
  const [keywords, setKeywords] = useState<string[]>([]);
28
  const [question, setQuestion] = useState<string[]>([]);
 
29
  const { removeChunk } = useDeleteChunkByIds();
30
  const { data } = useFetchChunk(chunkId);
31
  const { t } = useTranslation();
32
 
 
 
33
  useEffect(() => {
34
  if (data?.code === 0) {
35
  const {
@@ -37,16 +42,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
37
  important_kwd = [],
38
  available_int,
39
  question_kwd = [],
 
40
  } = data.data;
41
  form.setFieldsValue({ content: content_with_weight });
42
  setKeywords(important_kwd);
43
  setQuestion(question_kwd);
 
44
  setChecked(available_int !== 0);
45
  }
46
 
47
  if (!chunkId) {
48
  setKeywords([]);
49
  setQuestion([]);
 
50
  form.setFieldsValue({ content: undefined });
51
  }
52
  }, [data, form, chunkId]);
@@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
58
  content: values.content,
59
  keywords, // keywords
60
  question_kwd: question,
 
61
  available_int: checked ? 1 : 0, // available_int
62
  });
63
  } catch (errorInfo) {
@@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
105
  </div>
106
  <EditTag tags={question} setTags={setQuestion} />
107
  </section>
 
 
 
 
 
 
108
  {chunkId && (
109
  <section>
110
  <Divider></Divider>
 
13
  interface kFProps {
14
  doc_id: string;
15
  chunkId: string | undefined;
16
+ parserId: string;
17
  }
18
 
19
  const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
 
22
  hideModal,
23
  onOk,
24
  loading,
25
+ parserId,
26
  }) => {
27
  const [form] = Form.useForm();
28
  const [checked, setChecked] = useState(false);
29
  const [keywords, setKeywords] = useState<string[]>([]);
30
  const [question, setQuestion] = useState<string[]>([]);
31
+ const [tagKeyWords, setTagKeyWords] = useState<string[]>([]);
32
  const { removeChunk } = useDeleteChunkByIds();
33
  const { data } = useFetchChunk(chunkId);
34
  const { t } = useTranslation();
35
 
36
+ const isTagParser = parserId === 'tag';
37
+
38
  useEffect(() => {
39
  if (data?.code === 0) {
40
  const {
 
42
  important_kwd = [],
43
  available_int,
44
  question_kwd = [],
45
+ tag_kwd = [],
46
  } = data.data;
47
  form.setFieldsValue({ content: content_with_weight });
48
  setKeywords(important_kwd);
49
  setQuestion(question_kwd);
50
+ setTagKeyWords(tag_kwd);
51
  setChecked(available_int !== 0);
52
  }
53
 
54
  if (!chunkId) {
55
  setKeywords([]);
56
  setQuestion([]);
57
+ setTagKeyWords([]);
58
  form.setFieldsValue({ content: undefined });
59
  }
60
  }, [data, form, chunkId]);
 
66
  content: values.content,
67
  keywords, // keywords
68
  question_kwd: question,
69
+ tag_kwd: tagKeyWords,
70
  available_int: checked ? 1 : 0, // available_int
71
  });
72
  } catch (errorInfo) {
 
114
  </div>
115
  <EditTag tags={question} setTags={setQuestion} />
116
  </section>
117
+ {isTagParser && (
118
+ <section className="mt-4">
119
+ <p className="mb-2">{t('knowledgeConfiguration.tagName')} </p>
120
+ <EditTag tags={tagKeyWords} setTags={setTagKeyWords} />
121
+ </section>
122
+ )}
123
  {chunkId && (
124
  <section>
125
  <Divider></Divider>
web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts CHANGED
@@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
100
  keywords,
101
  available_int,
102
  question_kwd,
 
103
  }: {
104
  content: string;
105
  keywords: string;
106
  available_int: number;
107
  question_kwd: string;
 
108
  }) => {
109
  const code = await createChunk({
110
  content_with_weight: content,
@@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
113
  important_kwd: keywords, // keywords
114
  available_int,
115
  question_kwd,
 
116
  });
117
 
118
  if (code === 0) {
 
100
  keywords,
101
  available_int,
102
  question_kwd,
103
+ tag_kwd,
104
  }: {
105
  content: string;
106
  keywords: string;
107
  available_int: number;
108
  question_kwd: string;
109
+ tag_kwd: string;
110
  }) => {
111
  const code = await createChunk({
112
  content_with_weight: content,
 
115
  important_kwd: keywords, // keywords
116
  available_int,
117
  question_kwd,
118
+ tag_kwd,
119
  });
120
 
121
  if (code === 0) {
web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx CHANGED
@@ -193,6 +193,7 @@ const Chunk = () => {
193
  visible={chunkUpdatingVisible}
194
  loading={chunkUpdatingLoading}
195
  onOk={onChunkUpdatingOk}
 
196
  />
197
  )}
198
  <KnowledgeGraphModal></KnowledgeGraphModal>
 
193
  visible={chunkUpdatingVisible}
194
  loading={chunkUpdatingLoading}
195
  onOk={onChunkUpdatingOk}
196
+ parserId={documentInfo.parser_id}
197
  />
198
  )}
199
  <KnowledgeGraphModal></KnowledgeGraphModal>
web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx CHANGED
@@ -79,7 +79,7 @@ const ParsingActionCell = ({
79
  <Dropdown
80
  menu={{ items: chunkItems }}
81
  trigger={['click']}
82
- disabled={isRunning}
83
  >
84
  <Button type="text" className={styles.iconButton}>
85
  <ToolOutlined size={20} />
 
79
  <Dropdown
80
  menu={{ items: chunkItems }}
81
  trigger={['click']}
82
+ disabled={isRunning || record.parser_id === 'tag'}
83
  >
84
  <Button type="text" className={styles.iconButton}>
85
  <ToolOutlined size={20} />
web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx CHANGED
@@ -9,7 +9,7 @@ import styles from './index.less';
9
  import { TagTabs } from './tag-tabs';
10
  import { ImageMap } from './utils';
11
 
12
- const { Title, Text } = Typography;
13
 
14
  const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
15
  const parserList = useSelectParserList();
@@ -37,15 +37,15 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
37
  <section className={styles.categoryPanelWrapper}>
38
  {imageList.length > 0 ? (
39
  <>
40
- <Title level={5} className={styles.topTitle}>
41
  {`"${item.title}" ${t('methodTitle')}`}
42
- </Title>
43
  <p
44
  dangerouslySetInnerHTML={{
45
  __html: DOMPurify.sanitize(item.description),
46
  }}
47
  ></p>
48
- <Title level={5}>{`"${item.title}" ${t('methodExamples')}`}</Title>
49
  <Text>{t('methodExamplesDescription')}</Text>
50
  <Row gutter={[10, 10]} className={styles.imageRow}>
51
  {imageList.map((x) => (
@@ -58,9 +58,9 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
58
  </Col>
59
  ))}
60
  </Row>
61
- <Title level={5}>
62
  {item.title} {t('dialogueExamplesTitle')}
63
- </Title>
64
  <Divider></Divider>
65
  </>
66
  ) : (
 
9
  import { TagTabs } from './tag-tabs';
10
  import { ImageMap } from './utils';
11
 
12
+ const { Text } = Typography;
13
 
14
  const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
15
  const parserList = useSelectParserList();
 
37
  <section className={styles.categoryPanelWrapper}>
38
  {imageList.length > 0 ? (
39
  <>
40
+ <h5 className="font-semibold text-base mt-0 mb-1">
41
  {`"${item.title}" ${t('methodTitle')}`}
42
+ </h5>
43
  <p
44
  dangerouslySetInnerHTML={{
45
  __html: DOMPurify.sanitize(item.description),
46
  }}
47
  ></p>
48
+ <h5 className="font-semibold text-base mt-4 mb-1">{`"${item.title}" ${t('methodExamples')}`}</h5>
49
  <Text>{t('methodExamplesDescription')}</Text>
50
  <Row gutter={[10, 10]} className={styles.imageRow}>
51
  {imageList.map((x) => (
 
58
  </Col>
59
  ))}
60
  </Row>
61
+ <h5 className="font-semibold text-base mt-4 mb-1">
62
  {item.title} {t('dialogueExamplesTitle')}
63
+ </h5>
64
  <Divider></Divider>
65
  </>
66
  ) : (
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx CHANGED
@@ -11,6 +11,7 @@ import MaxTokenNumber from '@/components/max-token-number';
11
  import PageRank from '@/components/page-rank';
12
  import ParseConfiguration, {
13
  showRaptorParseConfiguration,
 
14
  } from '@/components/parse-configuration';
15
  import { useTranslate } from '@/hooks/common-hooks';
16
  import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
@@ -23,6 +24,7 @@ import {
23
  useSubmitKnowledgeConfiguration,
24
  } from './hooks';
25
  import styles from './index.less';
 
26
 
27
  const { Option } = Select;
28
 
@@ -146,6 +148,8 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
146
  {showRaptorParseConfiguration(parserId) && (
147
  <ParseConfiguration></ParseConfiguration>
148
  )}
 
 
149
  </>
150
  );
151
  }}
 
11
  import PageRank from '@/components/page-rank';
12
  import ParseConfiguration, {
13
  showRaptorParseConfiguration,
14
+ showTagItems,
15
  } from '@/components/parse-configuration';
16
  import { useTranslate } from '@/hooks/common-hooks';
17
  import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
 
24
  useSubmitKnowledgeConfiguration,
25
  } from './hooks';
26
  import styles from './index.less';
27
+ import { TagItems } from './tag-item';
28
 
29
  const { Option } = Select;
30
 
 
148
  {showRaptorParseConfiguration(parserId) && (
149
  <ParseConfiguration></ParseConfiguration>
150
  )}
151
+
152
+ {showTagItems(parserId) && <TagItems></TagItems>}
153
  </>
154
  );
155
  }}
web/src/pages/add-knowledge/components/knowledge-setting/tag-item.tsx ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
2
+ import { UserOutlined } from '@ant-design/icons';
3
+ import {
4
+ Avatar,
5
+ Divider,
6
+ Flex,
7
+ Form,
8
+ InputNumber,
9
+ Select,
10
+ Slider,
11
+ Space,
12
+ } from 'antd';
13
+ import DOMPurify from 'dompurify';
14
+ import { useTranslation } from 'react-i18next';
15
+
16
+ export const TagSetItem = () => {
17
+ const { t } = useTranslation();
18
+
19
+ const { list: knowledgeList } = useFetchKnowledgeList(true);
20
+
21
+ const knowledgeOptions = knowledgeList
22
+ .filter((x) => x.parser_id === 'tag')
23
+ .map((x) => ({
24
+ label: (
25
+ <Space>
26
+ <Avatar size={20} icon={<UserOutlined />} src={x.avatar} />
27
+ {x.name}
28
+ </Space>
29
+ ),
30
+ value: x.id,
31
+ }));
32
+
33
+ return (
34
+ <Form.Item
35
+ label={t('knowledgeConfiguration.tagSet')}
36
+ name={['parser_config', 'tag_kb_ids']}
37
+ tooltip={
38
+ <div
39
+ dangerouslySetInnerHTML={{
40
+ __html: DOMPurify.sanitize(t('knowledgeConfiguration.tagSetTip')),
41
+ }}
42
+ ></div>
43
+ }
44
+ rules={[
45
+ {
46
+ message: t('chat.knowledgeBasesMessage'),
47
+ type: 'array',
48
+ },
49
+ ]}
50
+ >
51
+ <Select
52
+ mode="multiple"
53
+ options={knowledgeOptions}
54
+ placeholder={t('chat.knowledgeBasesMessage')}
55
+ ></Select>
56
+ </Form.Item>
57
+ );
58
+ };
59
+
60
+ export const TopNTagsItem = () => {
61
+ const { t } = useTranslation();
62
+
63
+ return (
64
+ <Form.Item label={t('knowledgeConfiguration.topnTags')}>
65
+ <Flex gap={20} align="center">
66
+ <Flex flex={1}>
67
+ <Form.Item
68
+ name={['parser_config', 'topn_tags']}
69
+ noStyle
70
+ initialValue={3}
71
+ >
72
+ <Slider max={10} min={1} style={{ width: '100%' }} />
73
+ </Form.Item>
74
+ </Flex>
75
+ <Form.Item name={['parser_config', 'topn_tags']} noStyle>
76
+ <InputNumber max={10} min={1} />
77
+ </Form.Item>
78
+ </Flex>
79
+ </Form.Item>
80
+ );
81
+ };
82
+
83
+ export function TagItems() {
84
+ return (
85
+ <>
86
+ <Divider />
87
+ <TagSetItem></TagSetItem>
88
+ <Form.Item noStyle dependencies={[['parser_config', 'tag_kb_ids']]}>
89
+ {({ getFieldValue }) => {
90
+ const ids: string[] = getFieldValue(['parser_config', 'tag_kb_ids']);
91
+
92
+ return (
93
+ Array.isArray(ids) &&
94
+ ids.length > 0 && <TopNTagsItem></TopNTagsItem>
95
+ );
96
+ }}
97
+ </Form.Item>
98
+ <Divider />
99
+ </>
100
+ );
101
+ }
web/src/pages/add-knowledge/components/knowledge-setting/tag-table/index.tsx CHANGED
@@ -108,7 +108,7 @@ export function TagTable() {
108
  variant="ghost"
109
  onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
110
  >
111
- {t('knowledgeConfiguration.tag')}
112
  <ArrowUpDown />
113
  </Button>
114
  );
 
108
  variant="ghost"
109
  onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
110
  >
111
+ {t('knowledgeConfiguration.tagName')}
112
  <ArrowUpDown />
113
  </Button>
114
  );
web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx CHANGED
@@ -1,12 +1,23 @@
1
  import { useFetchTagList } from '@/hooks/knowledge-hooks';
2
  import { Chart } from '@antv/g2';
3
- import { useCallback, useEffect, useRef } from 'react';
 
4
 
5
  export function TagWordCloud() {
6
  const domRef = useRef<HTMLDivElement>(null);
7
  let chartRef = useRef<Chart>();
8
  const { list } = useFetchTagList();
9
 
 
 
 
 
 
 
 
 
 
 
10
  const renderWordCloud = useCallback(() => {
11
  if (domRef.current) {
12
  chartRef.current = new Chart({ container: domRef.current });
@@ -14,19 +25,30 @@ export function TagWordCloud() {
14
  chartRef.current.options({
15
  type: 'wordCloud',
16
  autoFit: true,
17
- layout: { fontSize: [20, 100] },
 
 
 
 
 
 
 
 
18
  data: {
19
  type: 'inline',
20
- value: list.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
21
  },
22
  encode: { color: 'text' },
23
  legend: false,
24
- tooltip: false,
 
 
 
25
  });
26
 
27
  chartRef.current.render();
28
  }
29
- }, [list]);
30
 
31
  useEffect(() => {
32
  renderWordCloud();
 
1
  import { useFetchTagList } from '@/hooks/knowledge-hooks';
2
  import { Chart } from '@antv/g2';
3
+ import { sumBy } from 'lodash';
4
+ import { useCallback, useEffect, useMemo, useRef } from 'react';
5
 
6
  export function TagWordCloud() {
7
  const domRef = useRef<HTMLDivElement>(null);
8
  let chartRef = useRef<Chart>();
9
  const { list } = useFetchTagList();
10
 
11
+ const { list: tagList } = useMemo(() => {
12
+ const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256);
13
+
14
+ return {
15
+ list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
16
+ sumValue: sumBy(nextList, (x: [string, number]) => x[1]),
17
+ length: nextList.length,
18
+ };
19
+ }, [list]);
20
+
21
  const renderWordCloud = useCallback(() => {
22
  if (domRef.current) {
23
  chartRef.current = new Chart({ container: domRef.current });
 
25
  chartRef.current.options({
26
  type: 'wordCloud',
27
  autoFit: true,
28
+ layout: {
29
+ fontSize: [20, 100],
30
+ // fontSize: (d: any) => {
31
+ // if (d.value) {
32
+ // return (d.value / sumValue) * 100 * (length / 10);
33
+ // }
34
+ // return 0;
35
+ // },
36
+ },
37
  data: {
38
  type: 'inline',
39
+ value: tagList,
40
  },
41
  encode: { color: 'text' },
42
  legend: false,
43
+ tooltip: {
44
+ title: 'name', // title
45
+ items: ['value'], // data item
46
+ },
47
  });
48
 
49
  chartRef.current.render();
50
  }
51
+ }, [tagList]);
52
 
53
  useEffect(() => {
54
  renderWordCloud();