Spaces:

retopara
/

ragflow

Build error

App Files Files Community

H commited on Jul 17, 2024

Commit

1fa0527

1 Parent(s): 69a7c76

Fix web search and template max tokens (#1564)

Browse files

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (6) hide show

graph/canvas.py +1 -0
graph/component/baidu.py +5 -3
graph/component/duckduckgo.py +5 -2
graph/component/generate.py +2 -2
graph/component/wikipedia.py +3 -3
graph/templates/websearch_assistant.json +3 -6

graph/canvas.py CHANGED Viewed

@@ -188,6 +188,7 @@ class Canvas(ABC):
         def prepare2run(cpns):
             nonlocal ran, ans
             for c in cpns:
                 cpn = self.components[c]["obj"]
                 if cpn.component_name == "Answer":
                     self.answer.append(c)

         def prepare2run(cpns):
             nonlocal ran, ans
             for c in cpns:
+                if self.path[-1] and c == self.path[-1][-1]: continue
                 cpn = self.components[c]["obj"]
                 if cpn.component_name == "Answer":
                     self.answer.append(c)

graph/component/baidu.py CHANGED Viewed

@@ -43,7 +43,7 @@ class Baidu(ComponentBase, ABC):
         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
-            return Baidu.be_output(self._param.no)
         url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
         headers = {
@@ -56,8 +56,10 @@ class Baidu(ComponentBase, ABC):
         baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a>    ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
         del body_res, url_res, title_res
-        df = pd.DataFrame(baidu_res)
-        print(df, ":::::::::::::::::::::::::::::::::")
         return df

         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
+            return Baidu.be_output("")
         url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n)
         headers = {
         baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a>    ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
         del body_res, url_res, title_res
+        if not baidu_res:
+            return Baidu.be_output("")
+        df = pd.DataFrame(baidu_res)
+        if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
         return df

graph/component/duckduckgo.py CHANGED Viewed

@@ -44,7 +44,7 @@ class DuckDuckGo(ComponentBase, ABC):
         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
-            return DuckDuckGo.be_output(self._param.no)
         if self._param.channel == "text":
             with DDGS() as ddgs:
@@ -57,6 +57,9 @@ class DuckDuckGo(ComponentBase, ABC):
                 duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a>    ' + i["body"]} for i in
                             ddgs.news(ans, max_results=self._param.top_n)]
         df = pd.DataFrame(duck_res)
-        print(df, ":::::::::::::::::::::::::::::::::")
         return df

         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
+            return DuckDuckGo.be_output("")
         if self._param.channel == "text":
             with DDGS() as ddgs:
                 duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a>    ' + i["body"]} for i in
                             ddgs.news(ans, max_results=self._param.top_n)]
+       if not duck_res:
+           return DuckDuckGo.be_output("")
         df = pd.DataFrame(duck_res)
+        if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
         return df

graph/component/generate.py CHANGED Viewed

@@ -72,14 +72,14 @@ class Generate(ComponentBase):
         prompt = self._param.prompt
         retrieval_res = self.get_input()
-        input = "\n- ".join(retrieval_res["content"]) if "content" in retrieval_res else ""
         for para in self._param.parameters:
             cpn = self._canvas.get_component(para["component_id"])["obj"]
             _, out = cpn.output(allow_partial=False)
             if "content" not in out.columns:
                 kwargs[para["key"]] = "Nothing"
             else:
-                kwargs[para["key"]] = "\n - ".join(out["content"])
         kwargs["input"] = input
         for n, v in kwargs.items():

         prompt = self._param.prompt
         retrieval_res = self.get_input()
+        input = ("  - " + "\n  - ".join(retrieval_res["content"])) if "content" in retrieval_res else ""
         for para in self._param.parameters:
             cpn = self._canvas.get_component(para["component_id"])["obj"]
             _, out = cpn.output(allow_partial=False)
             if "content" not in out.columns:
                 kwargs[para["key"]] = "Nothing"
             else:
+                kwargs[para["key"]] = "  - " + "\n  - ".join(out["content"])
         kwargs["input"] = input
         for n, v in kwargs.items():

graph/component/wikipedia.py CHANGED Viewed

@@ -30,7 +30,7 @@ class WikipediaParam(ComponentParamBase):
     def __init__(self):
         super().__init__()
         self.top_n = 10
-        self.language = 'en'
     def check(self):
         self.check_positive_integer(self.top_n, "Top N")
@@ -49,7 +49,7 @@ class Wikipedia(ComponentBase, ABC):
         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
-            return Wikipedia.be_output(self._param.no)
         wiki_res = []
         wikipedia.set_lang(self._param.language)
@@ -63,7 +63,7 @@ class Wikipedia(ComponentBase, ABC):
                 pass
         if not wiki_res:
-            return Wikipedia.be_output(self._param.no)
         df = pd.DataFrame(wiki_res)
         if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")

     def __init__(self):
         super().__init__()
         self.top_n = 10
+        self.language = "en"
     def check(self):
         self.check_positive_integer(self.top_n, "Top N")
         ans = self.get_input()
         ans = " - ".join(ans["content"]) if "content" in ans else ""
         if not ans:
+            return Wikipedia.be_output("")
         wiki_res = []
         wikipedia.set_lang(self._param.language)
                 pass
         if not wiki_res:
+            return Wikipedia.be_output("")
         df = pd.DataFrame(wiki_res)
         if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")

graph/templates/websearch_assistant.json CHANGED Viewed

@@ -59,7 +59,6 @@
                             "cite": true,
                             "frequency_penalty": 0.7,
                             "llm_id": "deepseek-chat",
-                            "max_tokens": 2048,
                             "message_history_window_size": 12,
                             "parameters": [
                                 {
@@ -108,7 +107,7 @@
                             "frequencyPenaltyEnabled": true,
                             "frequency_penalty": 0.7,
                             "llm_id": "deepseek-chat",
-                            "maxTokensEnabled": false,
                             "max_tokens": 256,
                             "parameter": "Precise",
                             "presencePenaltyEnabled": true,
@@ -366,7 +365,7 @@
                                 "frequencyPenaltyEnabled": true,
                                 "frequency_penalty": 0.7,
                                 "llm_id": "deepseek-chat",
-                                "maxTokensEnabled": false,
                                 "max_tokens": 256,
                                 "parameter": "Precise",
                                 "presencePenaltyEnabled": true,
@@ -510,8 +509,6 @@
                                 "frequencyPenaltyEnabled": true,
                                 "frequency_penalty": 0.7,
                                 "llm_id": "deepseek-chat",
-                                "maxTokensEnabled": true,
-                                "max_tokens": 2048,
                                 "message_history_window_size": 12,
                                 "parameter": "Precise",
                                 "parameters": [
@@ -538,7 +535,7 @@
                                 ],
                                 "presencePenaltyEnabled": true,
                                 "presence_penalty": 0.4,
-                                "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n  - Answer should be in markdown format.\n - Summarize and label the sources of the cited content separately: (Knowledge Base, Wikipedia, Duckduckgo, Baidu).\n  - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n  - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}",
                                 "temperature": 0.1,
                                 "temperatureEnabled": true,
                                 "topPEnabled": true,

                             "cite": true,
                             "frequency_penalty": 0.7,
                             "llm_id": "deepseek-chat",
                             "message_history_window_size": 12,
                             "parameters": [
                                 {
                             "frequencyPenaltyEnabled": true,
                             "frequency_penalty": 0.7,
                             "llm_id": "deepseek-chat",
+                            "maxTokensEnabled": true,
                             "max_tokens": 256,
                             "parameter": "Precise",
                             "presencePenaltyEnabled": true,
                                 "frequencyPenaltyEnabled": true,
                                 "frequency_penalty": 0.7,
                                 "llm_id": "deepseek-chat",
+                                "maxTokensEnabled": true,
                                 "max_tokens": 256,
                                 "parameter": "Precise",
                                 "presencePenaltyEnabled": true,
                                 "frequencyPenaltyEnabled": true,
                                 "frequency_penalty": 0.7,
                                 "llm_id": "deepseek-chat",
                                 "message_history_window_size": 12,
                                 "parameter": "Precise",
                                 "parameters": [
                                 ],
                                 "presencePenaltyEnabled": true,
                                 "presence_penalty": 0.4,
+                                "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n  - Answer should be in markdown format.\n - Answer should include all sources(Knowledge Base, Wikipedia, Duckduckgo, Baidu) as long as they are relevant, and label the sources of the cited content separately.\n  - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n  - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}",
                                 "temperature": 0.1,
                                 "temperatureEnabled": true,
                                 "topPEnabled": true,