ragflow / graphrag /utils.py
jinhai-2012's picture
Update license (#2086)
a7642c6
raw
history blame
1.84 kB
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""
Reference:
- [graphrag](https://github.com/microsoft/graphrag)
"""
import html
import re
from collections.abc import Callable
from typing import Any
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None]
def perform_variable_replacements(
input: str, history: list[dict]=[], variables: dict | None ={}
) -> str:
"""Perform variable replacements on the input string and in a chat log."""
result = input
def replace_all(input: str) -> str:
result = input
if variables:
for entry in variables:
result = result.replace(f"{{{entry}}}", variables[entry])
return result
result = replace_all(result)
for i in range(len(history)):
entry = history[i]
if entry.get("role") == "system":
history[i]["content"] = replace_all(entry.get("content") or "")
return result
def clean_str(input: Any) -> str:
"""Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
# If we get non-string input, just give it back
if not isinstance(input, str):
return input
result = html.unescape(input.strip())
# https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
return re.sub(r"[\"\x00-\x1f\x7f-\x9f]", "", result)
def dict_has_keys_with_types(
data: dict, expected_fields: list[tuple[str, type]]
) -> bool:
"""Return True if the given dictionary has the given keys with the given types."""
for field, field_type in expected_fields:
if field not in data:
return False
value = data[field]
if not isinstance(value, field_type):
return False
return True