Spaces:

weiwei1392
/

paper_generate

Runtime error

File size: 6,732 Bytes

from typing import Optional, List
# from langchain.llms.utils import enforce_stop_tokens
# import torch
import requests
# import logging
# from transformers import AutoTokenizer, AutoModel, AutoConfig
# logging.basicConfig(filename='chat_log.txt', level=logging.INFO)

DEVICE = "cuda"
FORWARD_KEY = 'fk198719-Pmvv22OqZiovaxRq6YxCzkTcd6UVVX5O0'


# def torch_gc():
#     if torch.cuda.is_available():
#         with torch.cuda.device(DEVICE):
#             torch.cuda.empty_cache()
#             torch.cuda.ipc_collect()


class ChatGLM:
    max_length: int = 10000
    temperature: float = 0
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 10
    history = []
    URL = 'http://183.131.3.48:9200'
    HEADERS = {'Content-Type': 'application/json'}

    @property
    def _llm_type(self) -> str:
        return "ChatGLM"

    def __call__(self,
              prompt: str,
              history:  Optional[List[list[str]]] = None,
              stop: Optional[List[str]] = None) -> str:
        # print('\n\n\n\n')
        # print('-------------------------------------------------------------------------------------------------------')
        # print('                               ******   prompt    ******                                               ')
        # print(prompt)

        if history:
            history = [i for i in history if i[0] is not None]  # clear out the system message
            history = history[-self.history_len:]

        params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p,
                  'max_length': self.max_length, 'temperature': self.temperature}

        response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
        answer = response['response']

        # question = prompt.split('question:\n')[-1]
        # self.history = self.history+[[prompt, response]]
        # print("                                ******      GLM_answer      ******                                   ")
        # print(answer)
        # print('-------------------------------------------------------------------------------------------------------')
        # print('\n\n\n\n')

        return answer


class LocalChatGLM:
    max_length: int = 10000
    temperature: float = 0
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 10
    history = []

    @property
    def _llm_type(self) -> str:
        return "ChatGLM"

    def __call__(self,
              prompt: str,
              history: List[List[str]] = [],
              stop: Optional[List[str]] = None) -> str:
        # print('\n\n\n\n')
        # print('-------------------------------------------------------------------------------------------------------')
        # print('****************                            prompt                                    ****************：')
        # print(prompt)

        response, _ = self.model.chat(
            self.tokenizer,
            prompt,
            history=history[-self.history_len:] if self.history_len > 0 else [],
            max_length=self.max_length,
            temperature=self.temperature,
        )
        # torch_gc()
        # if stop is not None:
        #     response = enforce_stop_tokens(response, stop)
        question = prompt.split('question:\n')[-1]
        self.history = self.history+[[question, response]]
        # print("***********************                      answer                        **************************：")
        # print(response)
        # print('-------------------------------------------------------------------------------------------------------')
        # print('\n\n\n\n')

        return response

    # @classmethod
    # def load_model(cls,
    #                model_name_or_path: str = "THUDM/chatglm-6b"):
    #     tokenizer = AutoTokenizer.from_pretrained(
    #         model_name_or_path,
    #         trust_remote_code=True
    #     )
    #     if torch.cuda.is_available() and DEVICE.lower().startswith("cuda"):
    #         model = (
    #             AutoModel.from_pretrained(
    #                 model_name_or_path,
    #                 trust_remote_code=True)
    #             .half()
    #             .cuda()
    #         )
    #     else:
    #        model = (
    #             AutoModel.from_pretrained(
    #                 model_name_or_path,
    #                 trust_remote_code=True)
    #             .float()
    #             .to(DEVICE)
    #         )
    #     llm = cls()
    #     llm.tokenizer = tokenizer
    #     llm.model = model
    #     return llm


class OpenAI3:
    max_length: int = 10000
    temperature: float = 0.2
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 10
    history = []
    HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
    URL ='https://openai.api2d.net/v1/chat/completions'
    MODEL_NAME = "gpt-3.5-turbo"


    @property
    def _llm_type(self) -> str:
        return "OPENAI3"

    def __call__(self,
              prompt: str,
              history: Optional[List[List[str]]] = None,
              stop: Optional[List[str]] = None) -> str:

        message = [{"role": "user", "content": prompt}]
        params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
        response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
        answer = response['choices'][0]['message']['content']
        # if stop is not None:
        #     answer = enforce_stop_tokens(answer, stop)

        return answer


class OpenAI4:
    max_length: int = 10000
    temperature: float = 0.2
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 10
    history = []
    HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
    URL ='https://openai.api2d.net/v1/chat/completions'
    MODEL_NAME = "gpt-4"


    @property
    def _llm_type(self) -> str:
        return "OPENAI4"

    def __call__(self,
        prompt: str,
        history: Optional[List[List[str]]] = None,
        stop: Optional[List[str]] = None) -> str:
        message = [{"role": "user", "content": prompt}]
        params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
        response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
        answer = response['choices'][0]['message']['content']
        # if stop is not None:
        #     answer = enforce_stop_tokens(answer, stop)
        return answer