|
from enum import Enum |
|
from typing import Literal, Optional |
|
|
|
from pydantic import BaseModel |
|
|
|
|
|
class SegmentUpdateEntity(BaseModel): |
|
content: str |
|
answer: Optional[str] = None |
|
keywords: Optional[list[str]] = None |
|
enabled: Optional[bool] = None |
|
|
|
|
|
class ParentMode(str, Enum): |
|
FULL_DOC = "full-doc" |
|
PARAGRAPH = "paragraph" |
|
|
|
|
|
class NotionIcon(BaseModel): |
|
type: str |
|
url: Optional[str] = None |
|
emoji: Optional[str] = None |
|
|
|
|
|
class NotionPage(BaseModel): |
|
page_id: str |
|
page_name: str |
|
page_icon: Optional[NotionIcon] = None |
|
type: str |
|
|
|
|
|
class NotionInfo(BaseModel): |
|
workspace_id: str |
|
pages: list[NotionPage] |
|
|
|
|
|
class WebsiteInfo(BaseModel): |
|
provider: str |
|
job_id: str |
|
urls: list[str] |
|
only_main_content: bool = True |
|
|
|
|
|
class FileInfo(BaseModel): |
|
file_ids: list[str] |
|
|
|
|
|
class InfoList(BaseModel): |
|
data_source_type: Literal["upload_file", "notion_import", "website_crawl"] |
|
notion_info_list: Optional[list[NotionInfo]] = None |
|
file_info_list: Optional[FileInfo] = None |
|
website_info_list: Optional[WebsiteInfo] = None |
|
|
|
|
|
class DataSource(BaseModel): |
|
info_list: InfoList |
|
|
|
|
|
class PreProcessingRule(BaseModel): |
|
id: str |
|
enabled: bool |
|
|
|
|
|
class Segmentation(BaseModel): |
|
separator: str = "\n" |
|
max_tokens: int |
|
chunk_overlap: int = 0 |
|
|
|
|
|
class Rule(BaseModel): |
|
pre_processing_rules: Optional[list[PreProcessingRule]] = None |
|
segmentation: Optional[Segmentation] = None |
|
parent_mode: Optional[Literal["full-doc", "paragraph"]] = None |
|
subchunk_segmentation: Optional[Segmentation] = None |
|
|
|
|
|
class ProcessRule(BaseModel): |
|
mode: Literal["automatic", "custom", "hierarchical"] |
|
rules: Optional[Rule] = None |
|
|
|
|
|
class RerankingModel(BaseModel): |
|
reranking_provider_name: Optional[str] = None |
|
reranking_model_name: Optional[str] = None |
|
|
|
|
|
class RetrievalModel(BaseModel): |
|
search_method: Literal["hybrid_search", "semantic_search", "full_text_search"] |
|
reranking_enable: bool |
|
reranking_model: Optional[RerankingModel] = None |
|
top_k: int |
|
score_threshold_enabled: bool |
|
score_threshold: Optional[float] = None |
|
|
|
|
|
class MetaDataConfig(BaseModel): |
|
doc_type: str |
|
doc_metadata: dict |
|
|
|
|
|
class KnowledgeConfig(BaseModel): |
|
original_document_id: Optional[str] = None |
|
duplicate: bool = True |
|
indexing_technique: Literal["high_quality", "economy"] |
|
data_source: Optional[DataSource] = None |
|
process_rule: Optional[ProcessRule] = None |
|
retrieval_model: Optional[RetrievalModel] = None |
|
doc_form: str = "text_model" |
|
doc_language: str = "English" |
|
embedding_model: Optional[str] = None |
|
embedding_model_provider: Optional[str] = None |
|
name: Optional[str] = None |
|
metadata: Optional[MetaDataConfig] = None |
|
|
|
|
|
class SegmentUpdateArgs(BaseModel): |
|
content: Optional[str] = None |
|
answer: Optional[str] = None |
|
keywords: Optional[list[str]] = None |
|
regenerate_child_chunks: bool = False |
|
enabled: Optional[bool] = None |
|
|
|
|
|
class ChildChunkUpdateArgs(BaseModel): |
|
id: Optional[str] = None |
|
content: str |
|
|