balibabu commited on
Commit
c0be17f
·
1 Parent(s): c78c46b

Feat: Add description for tag parsing method #4368 (#4402)

Browse files

### What problem does this PR solve?

Feat: Add description for tag parsing method #4368

### Type of change


- [x] New Feature (non-breaking change which adds functionality)

web/src/assets/svg/chunk-method/tag-01.svg ADDED
web/src/assets/svg/chunk-method/tag-02.svg ADDED
web/src/locales/en.ts CHANGED
@@ -286,6 +286,16 @@ export default {
286
  <p>This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.</p>
287
  <p>The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.</p>
288
  <p>Ensure that you set the <b>Entity types</b>.</p>`,
 
 
 
 
 
 
 
 
 
 
289
  useRaptor: 'Use RAPTOR to enhance retrieval',
290
  useRaptorTip:
291
  'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.',
@@ -310,9 +320,11 @@ The above is the content you need to summarize.`,
310
  vietnamese: 'Vietnamese',
311
  pageRank: 'Page rank',
312
  pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`,
313
- tag: 'Tag',
314
  frequency: 'Frequency',
315
  searchTags: 'Search tags',
 
 
316
  },
317
  chunk: {
318
  chunk: 'Chunk',
 
286
  <p>This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.</p>
287
  <p>The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.</p>
288
  <p>Ensure that you set the <b>Entity types</b>.</p>`,
289
+ tag: `<p>Knowlege base using 'Tag' as a chunking method is supposed to be used by other knowledge bases to add tags to their chunks, queries to which will also be with tags too.</p>
290
+ <p>Knowlege base using 'Tag' as a chunking method is <b>NOT</b> supposed to be involved in RAG procedure.</p>
291
+ <p>The chunks in this knowledge base are examples of tags, which demonstrate the entire tag set and the relevance between chunk and tags.</p>
292
+
293
+ <p>This chunk method supports <b>EXCEL</b> and <b>CSV/TXT</b> file formats.</p>
294
+ <p>If a file is in <b>Excel</b> format, it should contain two columns without headers: one for content and the other for tags, with the content column preceding the tags column. Multiple sheets are acceptable, provided the columns are properly structured.</p>
295
+ <p>If a file is in <b>CSV/TXT</b> format, it must be UTF-8 encoded with TAB as the delimiter to separate content and tags.</p>
296
+ <p>In tags column, there're English <b>comma</b> between tags.</p>
297
+ <i>Lines of texts that fail to follow the above rules will be ignored, and each pair will be considered a distinct chunk.</i>
298
+ `,
299
  useRaptor: 'Use RAPTOR to enhance retrieval',
300
  useRaptorTip:
301
  'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.',
 
320
  vietnamese: 'Vietnamese',
321
  pageRank: 'Page rank',
322
  pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`,
323
+ tagName: 'Tag',
324
  frequency: 'Frequency',
325
  searchTags: 'Search tags',
326
+ tagCloud: 'Cloud',
327
+ tagTable: 'Table',
328
  },
329
  chunk: {
330
  chunk: 'Chunk',
web/src/locales/zh-traditional.ts CHANGED
@@ -271,6 +271,16 @@ export default {
271
  <p>接下來,區塊將傳送到LLM以提取知識圖譜和思維導圖的節點和關係。
272
 
273
  <p>請注意您需要指定的條目類型。</p></p>`,
 
 
 
 
 
 
 
 
 
 
274
  useRaptor: '使用RAPTOR文件增強策略',
275
  useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
276
  prompt: '提示詞',
@@ -294,9 +304,11 @@ export default {
294
  pageRank: '頁面排名',
295
  pageRankTip: `這用來提高相關性分數。所有檢索到的區塊的相關性得分將加上該數字。
296
  當您想要先搜尋給定的知識庫時,請設定比其他人更高的 pagerank 分數。`,
297
- tag: '標籤',
298
  frequency: '頻次',
299
  searchTags: '搜尋標籤',
 
 
300
  },
301
  chunk: {
302
  chunk: '解析塊',
 
271
  <p>接下來,區塊將傳送到LLM以提取知識圖譜和思維導圖的節點和關係。
272
 
273
  <p>請注意您需要指定的條目類型。</p></p>`,
274
+ tag: `<p>使用「標籤」作為分塊方法的知識庫應該被其他知識庫用來將標籤加入其區塊中,查詢也將帶有標籤。
275
+ <p>使用「標籤」作為分塊方法的知識庫<b>不</b>應該參與 RAG 過程。
276
+ <p>本知識庫中的區塊是標籤的範例,展示了整個標籤集以及區塊與標籤之間的相關性。
277
+
278
+ <p>此區塊方法支援<b>EXCEL</b>和<b>CSV/TXT</b>檔案格式。
279
+ <p>如果檔案採用 <b>Excel</b> 格式,則應包含兩列,不含標題:一列用於內容,另一列用於標籤,內容列位於標籤列之前。只要列的結構正確,多張紙也是可以接受的。
280
+ <p>如果檔案為<b>CSV/TXT</b>格式,則必須採用UTF-8編碼,並以TAB作為分隔符號來分隔內容和標籤。
281
+ <p>標籤欄中,標籤之間有英文<b>逗號</b>。
282
+ <i>不符合上述規則的文字行將被忽略,並且每一對將被視為一個不同的區塊。
283
+ `,
284
  useRaptor: '使用RAPTOR文件增強策略',
285
  useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
286
  prompt: '提示詞',
 
304
  pageRank: '頁面排名',
305
  pageRankTip: `這用來提高相關性分數。所有檢索到的區塊的相關性得分將加上該數字。
306
  當您想要先搜尋給定的知識庫時,請設定比其他人更高的 pagerank 分數。`,
307
+ tagName: '標籤',
308
  frequency: '頻次',
309
  searchTags: '搜尋標籤',
310
+ tagCloud: '雲端',
311
+ tagTable: '表',
312
  },
313
  chunk: {
314
  chunk: '解析塊',
web/src/locales/zh.ts CHANGED
@@ -288,6 +288,16 @@ export default {
288
  <p>接下来,将分块传输到 LLM 以提取知识图谱和思维导图的节点和关系。</p>
289
 
290
  注意您需要指定的条目类型。</p>`,
 
 
 
 
 
 
 
 
 
 
291
  useRaptor: '使用召回增强RAPTOR策略',
292
  useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
293
  prompt: '提示词',
@@ -311,9 +321,11 @@ export default {
311
  pageRank: '页面排名',
312
  pageRankTip: `这用于提高相关性得分。所有检索到的块的相关性得分将加上此数字。
313
  当您想首先搜索给定的知识库时,请设置比其他知识库更高的 pagerank 得分。`,
314
- tag: '标签',
315
  frequency: '频次',
316
  searchTags: '搜索标签',
 
 
317
  },
318
  chunk: {
319
  chunk: '解析块',
 
288
  <p>接下来,将分块传输到 LLM 以提取知识图谱和思维导图的节点和关系。</p>
289
 
290
  注意您需要指定的条目类型。</p>`,
291
+ tag: `<p>使用“标签”作为分块方法的知识库应该被其他知识库使用,以将标签添加到其块中,对这些块的查询也将带有标签。</p>
292
+ <p>使用“标签”作为分块方法的知识库<b>不</b>应该参与 RAG 过程。</p>
293
+ <p>此知识库中的块是标签的示例,它们演示了整个标签集以及块和标签之间的相关性。</p>
294
+
295
+ <p>此块方法支持<b>EXCEL</b>和<b>CSV/TXT</b>文件格式。</p>
296
+ <p>如果文件为<b>Excel</b>格式,则它应该包含两列无标题:一列用于内容,另一列用于标签,内容列位于标签列之前。可以接受多个工作表,只要列结构正确即可。</p>
297
+ <p>如果文件为 <b>CSV/TXT</b> 格式,则必须使用 UTF-8 编码并以 TAB 作为分隔符来分隔内容和标签。</p>
298
+ <p>在标签列中,标签之间使用英文 <b>逗号</b>。</p>
299
+ <i>不符合上述规则的文本行将被忽略,并且每对文本将被视为一个不同的块。</i>
300
+ `,
301
  useRaptor: '使用召回增强RAPTOR策略',
302
  useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
303
  prompt: '提示词',
 
321
  pageRank: '页面排名',
322
  pageRankTip: `这用于提高相关性得分。所有检索到的块的相关性得分将加上此数字。
323
  当您想首先搜索给定的知识库时,请设置比其他知识库更高的 pagerank 得分。`,
324
+ tagName: '标签',
325
  frequency: '频次',
326
  searchTags: '搜索标签',
327
+ tagCloud: '云',
328
+ tagTable: '表',
329
  },
330
  chunk: {
331
  chunk: '解析块',
web/src/pages/add-knowledge/components/knowledge-setting/tag-tabs.tsx CHANGED
@@ -1,20 +1,40 @@
1
- import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
 
 
 
 
2
  import { TagTable } from './tag-table';
3
  import { TagWordCloud } from './tag-word-cloud';
4
 
 
 
 
 
 
 
 
 
 
 
5
  export function TagTabs() {
 
 
 
 
 
 
 
 
 
 
6
  return (
7
- <Tabs defaultValue="account" className="mt-4">
8
- <TabsList>
9
- <TabsTrigger value="account">Word cloud</TabsTrigger>
10
- <TabsTrigger value="password">Table</TabsTrigger>
11
- </TabsList>
12
- <TabsContent value="account">
13
- <TagWordCloud></TagWordCloud>
14
- </TabsContent>
15
- <TabsContent value="password">
16
- <TagTable></TagTable>
17
- </TabsContent>
18
- </Tabs>
19
  );
20
  }
 
1
+ import { Segmented } from 'antd';
2
+ import { SegmentedLabeledOption } from 'antd/es/segmented';
3
+ import { upperFirst } from 'lodash';
4
+ import { useState } from 'react';
5
+ import { useTranslation } from 'react-i18next';
6
  import { TagTable } from './tag-table';
7
  import { TagWordCloud } from './tag-word-cloud';
8
 
9
+ enum TagType {
10
+ Cloud = 'cloud',
11
+ Table = 'table',
12
+ }
13
+
14
+ const TagContentMap = {
15
+ [TagType.Cloud]: <TagWordCloud></TagWordCloud>,
16
+ [TagType.Table]: <TagTable></TagTable>,
17
+ };
18
+
19
  export function TagTabs() {
20
+ const [value, setValue] = useState<TagType>(TagType.Cloud);
21
+ const { t } = useTranslation();
22
+
23
+ const options: SegmentedLabeledOption[] = [TagType.Cloud, TagType.Table].map(
24
+ (x) => ({
25
+ label: t(`knowledgeConfiguration.tag${upperFirst(x)}`),
26
+ value: x,
27
+ }),
28
+ );
29
+
30
  return (
31
+ <section className="mt-4">
32
+ <Segmented
33
+ value={value}
34
+ options={options}
35
+ onChange={(val) => setValue(val as TagType)}
36
+ />
37
+ {TagContentMap[value]}
38
+ </section>
 
 
 
 
39
  );
40
  }
web/src/pages/add-knowledge/components/knowledge-setting/utils.ts CHANGED
@@ -16,4 +16,5 @@ export const ImageMap = {
16
  table: getImageName('table', 2),
17
  one: getImageName('one', 2),
18
  knowledge_graph: getImageName('knowledge-graph', 2),
 
19
  };
 
16
  table: getImageName('table', 2),
17
  one: getImageName('one', 2),
18
  knowledge_graph: getImageName('knowledge-graph', 2),
19
+ tag: getImageName('tag', 2),
20
  };
web/src/pages/flow/form/generate-form/dynamic-parameters.tsx CHANGED
@@ -1,9 +1,10 @@
1
  import { EditableCell, EditableRow } from '@/components/editable-cell';
2
  import { useTranslate } from '@/hooks/common-hooks';
 
3
  import { DeleteOutlined } from '@ant-design/icons';
4
  import { Button, Flex, Select, Table, TableProps } from 'antd';
5
  import { useBuildComponentIdSelectOptions } from '../../hooks/use-get-begin-query';
6
- import { IGenerateParameter, RAGFlowNodeType } from '../../interface';
7
  import { useHandleOperateParameters } from './hooks';
8
 
9
  import styles from './index.less';
@@ -46,7 +47,7 @@ const DynamicParameters = ({ node }: IProps) => {
46
  }),
47
  },
48
  {
49
- title: t('componentId'),
50
  dataIndex: 'component_id',
51
  key: 'component_id',
52
  align: 'center',
 
1
  import { EditableCell, EditableRow } from '@/components/editable-cell';
2
  import { useTranslate } from '@/hooks/common-hooks';
3
+ import { RAGFlowNodeType } from '@/interfaces/database/flow';
4
  import { DeleteOutlined } from '@ant-design/icons';
5
  import { Button, Flex, Select, Table, TableProps } from 'antd';
6
  import { useBuildComponentIdSelectOptions } from '../../hooks/use-get-begin-query';
7
+ import { IGenerateParameter } from '../../interface';
8
  import { useHandleOperateParameters } from './hooks';
9
 
10
  import styles from './index.less';
 
47
  }),
48
  },
49
  {
50
+ title: t('value'),
51
  dataIndex: 'component_id',
52
  key: 'component_id',
53
  align: 'center',