Spaces:

retopara
/

ragflow

Build error

File size: 9,513 Bytes

3079197
484e5ab
3079197
 
 
 
 
 
 
 
 
 
 
 
 
484e5ab
e32ef75
c037a22
9bf75d4
 
 
 
3079197
 
 
 
 
 
 
 
 
 
 
 
3198faf
 
 
484e5ab
 
e32ef75
 
3198faf
 
6be3dd5
 
 
e32ef75
 
 
 
 
c127ae4
 
 
484e5ab
c1bdfb8
6be3dd5
 
 
 
 
79ada0b
 
e32ef75
 
 
484e5ab
e32ef75
ba51460
e32ef75
 
 
 
 
 
c037a22
 
e32ef75
 
484e5ab
 
ba51460
e32ef75
c037a22
cd3f5c7
c037a22
cd3f5c7
801a3c1
 
 
 
 
c037a22
 
801a3c1
ba51460
484e5ab
e32ef75
 
 
0c30cc9
484e5ab
c037a22
 
 
 
 
 
484e5ab
e32ef75
 
 
e06e08c
 
 
e32ef75
 
 
 
 
0c30cc9
e32ef75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c037a22
 
e32ef75
 
 
2587709
c037a22
 
 
 
 
 
 
e32ef75
 
db713d9
 
 
 
 
 
 
 
 
 
e32ef75
 
41c7a59
e32ef75
 
 
79ada0b
 
 
 
2436df2
 
 
 
e32ef75
 
 
886ae57
79ada0b
 
 
e32ef75
 
 
 
886ae57
79ada0b
 
 
e32ef75
 
c037a22
 
 
 
 
 
 
 
e32ef75
 
79ada0b
 
 
 
e32ef75
 
 
 
028fe40
79ada0b
 
 
e32ef75
028fe40

#
#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
from api.db.services.user_service import TenantService
from api.settings import database_logger
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel
from api.db import LLMType
from api.db.db_models import DB, UserTenant
from api.db.db_models import LLMFactories, LLM, TenantLLM
from api.db.services.common_service import CommonService


class LLMFactoriesService(CommonService):
    model = LLMFactories


class LLMService(CommonService):
    model = LLM


class TenantLLMService(CommonService):
    model = TenantLLM

    @classmethod
    @DB.connection_context()
    def get_api_key(cls, tenant_id, model_name):
        objs = cls.query(tenant_id=tenant_id, llm_name=model_name)
        if not objs:
            return
        return objs[0]

    @classmethod
    @DB.connection_context()
    def get_my_llms(cls, tenant_id):
        fields = [
            cls.model.llm_factory,
            LLMFactories.logo,
            LLMFactories.tags,
            cls.model.model_type,
            cls.model.llm_name,
            cls.model.used_tokens
        ]
        objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
            cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()

        return list(objs)

    @classmethod
    @DB.connection_context()
    def model_instance(cls, tenant_id, llm_type,

                       llm_name=None, lang="Chinese"):
        e, tenant = TenantService.get_by_id(tenant_id)
        if not e:
            raise LookupError("Tenant not found")

        if llm_type == LLMType.EMBEDDING.value:
            mdlnm = tenant.embd_id if not llm_name else llm_name
        elif llm_type == LLMType.SPEECH2TEXT.value:
            mdlnm = tenant.asr_id
        elif llm_type == LLMType.IMAGE2TEXT.value:
            mdlnm = tenant.img2txt_id
        elif llm_type == LLMType.CHAT.value:
            mdlnm = tenant.llm_id if not llm_name else llm_name
        elif llm_type == LLMType.RERANK:
            mdlnm = tenant.rerank_id if not llm_name else llm_name
        else:
            assert False, "LLM type error"

        model_config = cls.get_api_key(tenant_id, mdlnm)
        if model_config: model_config = model_config.to_dict()
        if not model_config:
            if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
                llm = LLMService.query(llm_name=llm_name if llm_name else mdlnm)
                if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]:
                    model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name if llm_name else mdlnm, "api_base": ""}
            if not model_config:
                if llm_name == "flag-embedding":
                    model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "",
                                "llm_name": llm_name, "api_base": ""}
                else:
                    if not mdlnm:
                        raise LookupError(f"Type of {llm_type} model is not set.")
                    raise LookupError("Model({}) not authorized".format(mdlnm))

        if llm_type == LLMType.EMBEDDING.value:
            if model_config["llm_factory"] not in EmbeddingModel:
                return
            return EmbeddingModel[model_config["llm_factory"]](
                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

        if llm_type == LLMType.RERANK:
            if model_config["llm_factory"] not in RerankModel:
                return
            return RerankModel[model_config["llm_factory"]](
                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

        if llm_type == LLMType.IMAGE2TEXT.value:
            if model_config["llm_factory"] not in CvModel:
                return
            return CvModel[model_config["llm_factory"]](
                model_config["api_key"], model_config["llm_name"], lang,
                base_url=model_config["api_base"]
            )

        if llm_type == LLMType.CHAT.value:
            if model_config["llm_factory"] not in ChatModel:
                return
            return ChatModel[model_config["llm_factory"]](
                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

    @classmethod
    @DB.connection_context()
    def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
        e, tenant = TenantService.get_by_id(tenant_id)
        if not e:
            raise LookupError("Tenant not found")

        if llm_type == LLMType.EMBEDDING.value:
            mdlnm = tenant.embd_id
        elif llm_type == LLMType.SPEECH2TEXT.value:
            mdlnm = tenant.asr_id
        elif llm_type == LLMType.IMAGE2TEXT.value:
            mdlnm = tenant.img2txt_id
        elif llm_type == LLMType.CHAT.value:
            mdlnm = tenant.llm_id if not llm_name else llm_name
        elif llm_type == LLMType.RERANK:
            mdlnm = tenant.llm_id if not llm_name else llm_name
        else:
            assert False, "LLM type error"

        num = 0
        try:
            for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm):
                num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\
                    .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
                    .execute()
        except Exception as e:
            pass
        return num

    @classmethod
    @DB.connection_context()
    def get_openai_models(cls):
        objs = cls.model.select().where(
            (cls.model.llm_factory == "OpenAI"),
            ~(cls.model.llm_name == "text-embedding-3-small"),
            ~(cls.model.llm_name == "text-embedding-3-large")
        ).dicts()
        return list(objs)


class LLMBundle(object):
    def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
        self.tenant_id = tenant_id
        self.llm_type = llm_type
        self.llm_name = llm_name
        self.mdl = TenantLLMService.model_instance(
            tenant_id, llm_type, llm_name, lang=lang)
        assert self.mdl, "Can't find mole for {}/{}/{}".format(
            tenant_id, llm_type, llm_name)
        self.max_length = 512
        for lm in LLMService.query(llm_name=llm_name):
            self.max_length = lm.max_tokens
            break

    def encode(self, texts: list, batch_size=32):
        emd, used_tokens = self.mdl.encode(texts, batch_size)
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
        return emd, used_tokens

    def encode_queries(self, query: str):
        emd, used_tokens = self.mdl.encode_queries(query)
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
        return emd, used_tokens

    def similarity(self, query: str, texts: list):
        sim, used_tokens = self.mdl.similarity(query, texts)
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
                "Can't update token usage for {}/RERANK".format(self.tenant_id))
        return sim, used_tokens

    def describe(self, image, max_tokens=300):
        txt, used_tokens = self.mdl.describe(image, max_tokens)
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
                "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
        return txt

    def chat(self, system, history, gen_conf):
        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens, self.llm_name):
            database_logger.error(
                "Can't update token usage for {}/CHAT".format(self.tenant_id))
        return txt

    def chat_streamly(self, system, history, gen_conf):
        for txt in self.mdl.chat_streamly(system, history, gen_conf):
            if isinstance(txt, int):
                if not TenantLLMService.increase_usage(
                        self.tenant_id, self.llm_type, txt, self.llm_name):
                    database_logger.error(
                        "Can't update token usage for {}/CHAT".format(self.tenant_id))
                return
            yield txt