Spaces:
Runtime error
Runtime error
Commit
·
b580d80
1
Parent(s):
ac8a60b
to create RAGAs result with triad of metrics
Browse files- .gitignore +2 -2
- archive/{requirements.txt → dependencies/requirements_backup.txt} +0 -0
- archive/dependencies/requirements_llama-index==0.9.24.txt +259 -0
- archive/{test.py → experiments/test.py} +0 -0
- archive/{init_setup.py → model_evaluation/init_setup.py} +0 -0
- archive/{main.py → model_evaluation/main.py} +0 -0
- archive/model_evaluation/main_new.py +180 -0
- archive/model_evaluation/utils.py +160 -0
- archive/model_evaluation/utils_new.py +95 -0
- database/mock_qna_source.csv +2 -2
- evaluate_model.py +83 -0
- models/trulens_eval.sqlite +3 -0
- notebooks/002_persisted-embedding-model.ipynb +1 -0
- pages/1_Leaderboard.py +1 -1
- pages/2_Evaluations.py +2 -2
- pages/3_app.py +4 -6
- qna_prompting.py +11 -6
- raw_documents/eval_answers.txt +2 -2
- raw_documents/eval_questions.txt +2 -2
- raw_documents/qna.txt +2 -2
- requirements.txt +5 -4
- streamlit_app.py +1 -1
- utils.py +3 -72
.gitignore
CHANGED
|
@@ -3,10 +3,10 @@
|
|
| 3 |
.streamlit/
|
| 4 |
results/
|
| 5 |
|
| 6 |
-
*.sqlite
|
| 7 |
data/
|
| 8 |
|
| 9 |
notebooks/test_model
|
| 10 |
screenshot_questions/
|
| 11 |
|
| 12 |
-
# ux/
|
|
|
|
|
|
| 3 |
.streamlit/
|
| 4 |
results/
|
| 5 |
|
|
|
|
| 6 |
data/
|
| 7 |
|
| 8 |
notebooks/test_model
|
| 9 |
screenshot_questions/
|
| 10 |
|
| 11 |
+
# ux/
|
| 12 |
+
# *.sqlite
|
archive/{requirements.txt → dependencies/requirements_backup.txt}
RENAMED
|
File without changes
|
archive/dependencies/requirements_llama-index==0.9.24.txt
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp==3.9.1
|
| 2 |
+
aiosignal==1.3.1
|
| 3 |
+
alembic==1.13.1
|
| 4 |
+
altair==5.2.0
|
| 5 |
+
annotated-types==0.6.0
|
| 6 |
+
anyio==4.2.0
|
| 7 |
+
appnope==0.1.3
|
| 8 |
+
argon2-cffi==23.1.0
|
| 9 |
+
argon2-cffi-bindings==21.2.0
|
| 10 |
+
arrow==1.3.0
|
| 11 |
+
asgiref==3.7.2
|
| 12 |
+
asttokens==2.4.1
|
| 13 |
+
async-lru==2.0.4
|
| 14 |
+
async-timeout==4.0.3
|
| 15 |
+
attrs==23.2.0
|
| 16 |
+
Babel==2.14.0
|
| 17 |
+
backoff==2.2.1
|
| 18 |
+
bcrypt==4.1.2
|
| 19 |
+
beautifulsoup4==4.12.3
|
| 20 |
+
bleach==6.1.0
|
| 21 |
+
blinker==1.7.0
|
| 22 |
+
bs4==0.0.2
|
| 23 |
+
build==1.0.3
|
| 24 |
+
cachetools==5.3.2
|
| 25 |
+
certifi==2023.11.17
|
| 26 |
+
cffi==1.16.0
|
| 27 |
+
charset-normalizer==3.3.2
|
| 28 |
+
chroma-hnswlib==0.7.3
|
| 29 |
+
chromadb==0.4.22
|
| 30 |
+
click==8.1.7
|
| 31 |
+
coloredlogs==15.0.1
|
| 32 |
+
comm==0.2.0
|
| 33 |
+
contourpy==1.2.0
|
| 34 |
+
cycler==0.12.1
|
| 35 |
+
dataclasses-json==0.6.3
|
| 36 |
+
debugpy==1.8.0
|
| 37 |
+
decorator==5.1.1
|
| 38 |
+
defusedxml==0.7.1
|
| 39 |
+
Deprecated==1.2.14
|
| 40 |
+
dill==0.3.7
|
| 41 |
+
dirtyjson==1.0.8
|
| 42 |
+
distro==1.9.0
|
| 43 |
+
entrypoints==0.4
|
| 44 |
+
exceptiongroup==1.2.0
|
| 45 |
+
executing==2.0.1
|
| 46 |
+
Faker==22.0.0
|
| 47 |
+
fastapi==0.109.0
|
| 48 |
+
fastjsonschema==2.19.1
|
| 49 |
+
favicon==0.7.0
|
| 50 |
+
filelock==3.13.1
|
| 51 |
+
flatbuffers==23.5.26
|
| 52 |
+
fonttools==4.47.0
|
| 53 |
+
fqdn==1.5.1
|
| 54 |
+
frozendict==2.4.0
|
| 55 |
+
frozenlist==1.4.1
|
| 56 |
+
fsspec==2023.12.2
|
| 57 |
+
gitdb==4.0.11
|
| 58 |
+
GitPython==3.1.40
|
| 59 |
+
google-auth==2.27.0
|
| 60 |
+
googleapis-common-protos==1.62.0
|
| 61 |
+
greenlet==3.0.3
|
| 62 |
+
grpcio==1.60.0
|
| 63 |
+
h11==0.14.0
|
| 64 |
+
htbuilder==0.6.2
|
| 65 |
+
httpcore==1.0.2
|
| 66 |
+
httptools==0.6.1
|
| 67 |
+
httpx==0.26.0
|
| 68 |
+
huggingface-hub==0.20.1
|
| 69 |
+
humanfriendly==10.0
|
| 70 |
+
humanize==4.9.0
|
| 71 |
+
idna==3.6
|
| 72 |
+
importlib-metadata==6.11.0
|
| 73 |
+
importlib-resources==6.1.1
|
| 74 |
+
ipykernel==6.28.0
|
| 75 |
+
ipython==8.18.1
|
| 76 |
+
ipywidgets==8.1.1
|
| 77 |
+
isoduration==20.11.0
|
| 78 |
+
jedi==0.19.1
|
| 79 |
+
Jinja2==3.1.2
|
| 80 |
+
joblib==1.3.2
|
| 81 |
+
json5==0.9.14
|
| 82 |
+
jsonpatch==1.33
|
| 83 |
+
jsonpointer==2.4
|
| 84 |
+
jsonschema==4.20.0
|
| 85 |
+
jsonschema-specifications==2023.12.1
|
| 86 |
+
jupyter==1.0.0
|
| 87 |
+
jupyter-console==6.6.3
|
| 88 |
+
jupyter-events==0.9.0
|
| 89 |
+
jupyter-lsp==2.2.1
|
| 90 |
+
jupyter_client==8.6.0
|
| 91 |
+
jupyter_core==5.6.1
|
| 92 |
+
jupyter_server==2.12.1
|
| 93 |
+
jupyter_server_terminals==0.5.1
|
| 94 |
+
jupyterlab==4.0.10
|
| 95 |
+
jupyterlab-widgets==3.0.9
|
| 96 |
+
jupyterlab_pygments==0.3.0
|
| 97 |
+
jupyterlab_server==2.25.2
|
| 98 |
+
kiwisolver==1.4.5
|
| 99 |
+
kubernetes==29.0.0
|
| 100 |
+
langchain==0.0.354
|
| 101 |
+
langchain-community==0.0.8
|
| 102 |
+
langchain-core==0.1.23
|
| 103 |
+
langsmith==0.0.87
|
| 104 |
+
llama-index==0.9.24
|
| 105 |
+
lxml==5.1.0
|
| 106 |
+
Mako==1.3.0
|
| 107 |
+
Markdown==3.5.1
|
| 108 |
+
markdown-it-py==3.0.0
|
| 109 |
+
markdownlit==0.0.7
|
| 110 |
+
MarkupSafe==2.1.3
|
| 111 |
+
marshmallow==3.20.1
|
| 112 |
+
matplotlib==3.8.2
|
| 113 |
+
matplotlib-inline==0.1.6
|
| 114 |
+
mdurl==0.1.2
|
| 115 |
+
merkle-json==1.0.0
|
| 116 |
+
millify==0.1.1
|
| 117 |
+
mistune==3.0.2
|
| 118 |
+
mmh3==4.1.0
|
| 119 |
+
monotonic==1.6
|
| 120 |
+
more-itertools==10.1.0
|
| 121 |
+
mpmath==1.3.0
|
| 122 |
+
multidict==6.0.4
|
| 123 |
+
munch==4.0.0
|
| 124 |
+
mypy-extensions==1.0.0
|
| 125 |
+
nbclient==0.9.0
|
| 126 |
+
nbconvert==7.14.0
|
| 127 |
+
nbformat==5.9.2
|
| 128 |
+
nest-asyncio==1.5.8
|
| 129 |
+
networkx==3.2.1
|
| 130 |
+
nltk==3.8.1
|
| 131 |
+
notebook==7.0.6
|
| 132 |
+
notebook_shim==0.2.3
|
| 133 |
+
numpy==1.26.2
|
| 134 |
+
oauthlib==3.2.2
|
| 135 |
+
onnxruntime==1.17.0
|
| 136 |
+
openai==1.6.1
|
| 137 |
+
opentelemetry-api==1.22.0
|
| 138 |
+
opentelemetry-exporter-otlp-proto-common==1.22.0
|
| 139 |
+
opentelemetry-exporter-otlp-proto-grpc==1.22.0
|
| 140 |
+
opentelemetry-instrumentation==0.43b0
|
| 141 |
+
opentelemetry-instrumentation-asgi==0.43b0
|
| 142 |
+
opentelemetry-instrumentation-fastapi==0.43b0
|
| 143 |
+
opentelemetry-proto==1.22.0
|
| 144 |
+
opentelemetry-sdk==1.22.0
|
| 145 |
+
opentelemetry-semantic-conventions==0.43b0
|
| 146 |
+
opentelemetry-util-http==0.43b0
|
| 147 |
+
overrides==7.4.0
|
| 148 |
+
packaging==23.2
|
| 149 |
+
pandas==2.1.4
|
| 150 |
+
pandocfilters==1.5.0
|
| 151 |
+
parso==0.8.3
|
| 152 |
+
pexpect==4.9.0
|
| 153 |
+
pillow==10.2.0
|
| 154 |
+
platformdirs==4.1.0
|
| 155 |
+
posthog==3.3.3
|
| 156 |
+
prometheus-client==0.19.0
|
| 157 |
+
prompt-toolkit==3.0.43
|
| 158 |
+
protobuf==4.25.1
|
| 159 |
+
psutil==5.9.7
|
| 160 |
+
ptyprocess==0.7.0
|
| 161 |
+
pulsar-client==3.4.0
|
| 162 |
+
pure-eval==0.2.2
|
| 163 |
+
pyarrow==14.0.2
|
| 164 |
+
pyasn1==0.5.1
|
| 165 |
+
pyasn1-modules==0.3.0
|
| 166 |
+
pycparser==2.21
|
| 167 |
+
pydantic==2.5.3
|
| 168 |
+
pydantic_core==2.14.6
|
| 169 |
+
pydeck==0.8.1b0
|
| 170 |
+
Pygments==2.17.2
|
| 171 |
+
pymdown-extensions==10.7
|
| 172 |
+
PyMuPDF==1.23.22
|
| 173 |
+
PyMuPDFb==1.23.22
|
| 174 |
+
pyparsing==3.1.1
|
| 175 |
+
pypdf==4.0.1
|
| 176 |
+
PyPika==0.48.9
|
| 177 |
+
pyproject_hooks==1.0.0
|
| 178 |
+
python-dateutil==2.8.2
|
| 179 |
+
python-decouple==3.8
|
| 180 |
+
python-dotenv==1.0.0
|
| 181 |
+
python-json-logger==2.0.7
|
| 182 |
+
pytz==2023.3.post1
|
| 183 |
+
PyYAML==6.0.1
|
| 184 |
+
pyzmq==25.1.2
|
| 185 |
+
qtconsole==5.5.1
|
| 186 |
+
QtPy==2.4.1
|
| 187 |
+
referencing==0.32.0
|
| 188 |
+
regex==2023.12.25
|
| 189 |
+
requests==2.31.0
|
| 190 |
+
requests-oauthlib==1.3.1
|
| 191 |
+
rfc3339-validator==0.1.4
|
| 192 |
+
rfc3986-validator==0.1.1
|
| 193 |
+
rich==13.7.0
|
| 194 |
+
rpds-py==0.16.2
|
| 195 |
+
rsa==4.9
|
| 196 |
+
safetensors==0.4.1
|
| 197 |
+
scikit-learn==1.4.0
|
| 198 |
+
scipy==1.12.0
|
| 199 |
+
Send2Trash==1.8.2
|
| 200 |
+
sentence-transformers==2.3.0
|
| 201 |
+
sentencepiece==0.1.99
|
| 202 |
+
six==1.16.0
|
| 203 |
+
smmap==5.0.1
|
| 204 |
+
sniffio==1.3.0
|
| 205 |
+
soupsieve==2.5
|
| 206 |
+
SQLAlchemy==2.0.24
|
| 207 |
+
st-annotated-text==4.0.1
|
| 208 |
+
stack-data==0.6.3
|
| 209 |
+
starlette==0.35.1
|
| 210 |
+
streamlit==1.31.1
|
| 211 |
+
streamlit-aggrid==0.3.4.post3
|
| 212 |
+
streamlit-camera-input-live==0.2.0
|
| 213 |
+
streamlit-card==1.0.0
|
| 214 |
+
streamlit-embedcode==0.1.2
|
| 215 |
+
streamlit-extras==0.3.6
|
| 216 |
+
streamlit-faker==0.0.3
|
| 217 |
+
streamlit-feedback==0.1.3
|
| 218 |
+
streamlit-image-coordinates==0.1.6
|
| 219 |
+
streamlit-keyup==0.2.2
|
| 220 |
+
streamlit-toggle-switch==1.0.2
|
| 221 |
+
streamlit-vertical-slider==2.5.5
|
| 222 |
+
sympy==1.12
|
| 223 |
+
tenacity==8.2.3
|
| 224 |
+
terminado==0.18.0
|
| 225 |
+
threadpoolctl==3.2.0
|
| 226 |
+
tiktoken==0.5.2
|
| 227 |
+
tinycss2==1.2.1
|
| 228 |
+
tokenizers==0.15.2
|
| 229 |
+
toml==0.10.2
|
| 230 |
+
tomli==2.0.1
|
| 231 |
+
toolz==0.12.0
|
| 232 |
+
torch==2.1.2
|
| 233 |
+
tornado==6.4
|
| 234 |
+
tqdm==4.66.1
|
| 235 |
+
traitlets==5.14.0
|
| 236 |
+
transformers==4.37.2
|
| 237 |
+
trulens==0.13.4
|
| 238 |
+
trulens-eval==0.20.0
|
| 239 |
+
typer==0.9.0
|
| 240 |
+
types-python-dateutil==2.8.19.14
|
| 241 |
+
typing-inspect==0.9.0
|
| 242 |
+
typing_extensions==4.9.0
|
| 243 |
+
tzdata==2023.4
|
| 244 |
+
tzlocal==5.2
|
| 245 |
+
uri-template==1.3.0
|
| 246 |
+
urllib3==2.1.0
|
| 247 |
+
uvicorn==0.27.0
|
| 248 |
+
uvloop==0.19.0
|
| 249 |
+
validators==0.22.0
|
| 250 |
+
watchfiles==0.21.0
|
| 251 |
+
wcwidth==0.2.12
|
| 252 |
+
webcolors==1.13
|
| 253 |
+
webencodings==0.5.1
|
| 254 |
+
websocket-client==1.7.0
|
| 255 |
+
websockets==12.0
|
| 256 |
+
widgetsnbextension==4.0.9
|
| 257 |
+
wrapt==1.16.0
|
| 258 |
+
yarl==1.9.4
|
| 259 |
+
zipp==3.17.0
|
archive/{test.py → experiments/test.py}
RENAMED
|
File without changes
|
archive/{init_setup.py → model_evaluation/init_setup.py}
RENAMED
|
File without changes
|
archive/{main.py → model_evaluation/main.py}
RENAMED
|
File without changes
|
archive/model_evaluation/main_new.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import utils
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import nest_asyncio
|
| 6 |
+
import openai
|
| 7 |
+
import chromadb
|
| 8 |
+
|
| 9 |
+
from llama_index.legacy import (
|
| 10 |
+
VectorStoreIndex,
|
| 11 |
+
SimpleDirectoryReader
|
| 12 |
+
)
|
| 13 |
+
from llama_index.core import (
|
| 14 |
+
StorageContext,
|
| 15 |
+
Document,
|
| 16 |
+
Settings
|
| 17 |
+
)
|
| 18 |
+
from llama_index.vector_stores.chroma.base import ChromaVectorStore
|
| 19 |
+
from llama_index.llms.openai import OpenAI
|
| 20 |
+
from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding
|
| 21 |
+
from trulens_eval import Tru
|
| 22 |
+
|
| 23 |
+
from utils import get_prebuilt_trulens_recorder
|
| 24 |
+
import time
|
| 25 |
+
|
| 26 |
+
nest_asyncio.apply()
|
| 27 |
+
openai.api_key = utils.get_openai_api_key()
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
|
| 31 |
+
if not os.path.exists("./default.sqlite"):
|
| 32 |
+
|
| 33 |
+
start_time = time.time()
|
| 34 |
+
|
| 35 |
+
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
|
| 36 |
+
fine_tuned_path = "local:./models/fine-tuned-embeddings"
|
| 37 |
+
|
| 38 |
+
Settings.llm = llm
|
| 39 |
+
Settings.embed_model = fine_tuned_path
|
| 40 |
+
|
| 41 |
+
db = chromadb.PersistentClient(path="./models/chroma_db")
|
| 42 |
+
chroma_collection = db.get_or_create_collection("quickstart")
|
| 43 |
+
|
| 44 |
+
# assign chroma as the vector_store to the context
|
| 45 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
| 46 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 47 |
+
|
| 48 |
+
# create your index
|
| 49 |
+
index = VectorStoreIndex.from_vector_store(
|
| 50 |
+
vector_store=vector_store,
|
| 51 |
+
storage_context=storage_context
|
| 52 |
+
)
|
| 53 |
+
query_engine = index.as_query_engine()
|
| 54 |
+
|
| 55 |
+
separator = "\n\n"
|
| 56 |
+
eval_questions = []
|
| 57 |
+
with open('./raw_documents/eval_questions.txt', 'r') as file:
|
| 58 |
+
content = file.read()
|
| 59 |
+
|
| 60 |
+
for question in content.split(separator):
|
| 61 |
+
print(question)
|
| 62 |
+
print(separator)
|
| 63 |
+
eval_questions.append(question.strip())
|
| 64 |
+
|
| 65 |
+
response = query_engine.query(eval_questions[0])
|
| 66 |
+
print(str(response))
|
| 67 |
+
|
| 68 |
+
tru = Tru(database_file="./models/trulens_eval.sqlite")
|
| 69 |
+
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
|
| 70 |
+
app_id="Direct Query Engine")
|
| 71 |
+
|
| 72 |
+
print("Sending each question to llm ..")
|
| 73 |
+
with tru_recorder as recording:
|
| 74 |
+
for question in eval_questions:
|
| 75 |
+
response = query_engine.query(question)
|
| 76 |
+
|
| 77 |
+
records, feedback = tru.get_records_and_feedback(app_ids=[])
|
| 78 |
+
|
| 79 |
+
os.makedirs("./results", exist_ok=True)
|
| 80 |
+
records.to_csv("./results/records.csv", index=False)
|
| 81 |
+
|
| 82 |
+
print(tru.db.engine.url.render_as_string(hide_password=False))
|
| 83 |
+
|
| 84 |
+
end_time = time.time()
|
| 85 |
+
time_spent_mins = (end_time - start_time) / 60
|
| 86 |
+
with open("./results/time_cost.txt", "w") as fp:
|
| 87 |
+
fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
|
| 91 |
+
# main()
|
| 92 |
+
if False:
|
| 93 |
+
start_time = time.time()
|
| 94 |
+
|
| 95 |
+
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
|
| 96 |
+
fine_tuned_path = "local:./models/fine-tuned-embeddings"
|
| 97 |
+
|
| 98 |
+
Settings.llm = llm
|
| 99 |
+
Settings.embed_model = fine_tuned_path
|
| 100 |
+
|
| 101 |
+
db = chromadb.PersistentClient(path="./models/chroma_db")
|
| 102 |
+
chroma_collection = db.get_or_create_collection("quickstart")
|
| 103 |
+
|
| 104 |
+
# assign chroma as the vector_store to the context
|
| 105 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
| 106 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 107 |
+
|
| 108 |
+
# create your index
|
| 109 |
+
index = VectorStoreIndex.from_vector_store(
|
| 110 |
+
vector_store=vector_store,
|
| 111 |
+
storage_context=storage_context
|
| 112 |
+
)
|
| 113 |
+
query_engine = index.as_query_engine()
|
| 114 |
+
|
| 115 |
+
separator = "\n\n"
|
| 116 |
+
eval_questions = []
|
| 117 |
+
with open('./raw_documents/eval_questions.txt', 'r') as file:
|
| 118 |
+
content = file.read()
|
| 119 |
+
|
| 120 |
+
for question in content.split(separator):
|
| 121 |
+
print(question)
|
| 122 |
+
print(separator)
|
| 123 |
+
eval_questions.append(question.strip())
|
| 124 |
+
|
| 125 |
+
response = query_engine.query(eval_questions[0])
|
| 126 |
+
print(str(response))
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
from trulens_eval import Tru
|
| 131 |
+
tru = Tru()
|
| 132 |
+
|
| 133 |
+
documents = SimpleDirectoryReader(
|
| 134 |
+
input_files=["./raw_documents/qna.txt"]
|
| 135 |
+
).load_data()
|
| 136 |
+
index = VectorStoreIndex.from_documents(documents)
|
| 137 |
+
|
| 138 |
+
query_engine = index.as_query_engine()
|
| 139 |
+
response = query_engine.query("Which is not a government healthcare philosophy?")
|
| 140 |
+
print(response)
|
| 141 |
+
|
| 142 |
+
from trulens_eval.feedback.provider.openai import OpenAI
|
| 143 |
+
openai = OpenAI()
|
| 144 |
+
|
| 145 |
+
# select context to be used in feedback. the location of context is app specific.
|
| 146 |
+
from trulens_eval.app import App
|
| 147 |
+
context = App.select_context(query_engine)
|
| 148 |
+
|
| 149 |
+
from trulens_eval import Feedback
|
| 150 |
+
|
| 151 |
+
# Define a groundedness feedback function
|
| 152 |
+
from trulens_eval.feedback import Groundedness
|
| 153 |
+
grounded = Groundedness(groundedness_provider=OpenAI())
|
| 154 |
+
f_groundedness = (
|
| 155 |
+
Feedback(grounded.groundedness_measure_with_cot_reasons)
|
| 156 |
+
.on(context.collect()) # collect context chunks into a list
|
| 157 |
+
.on_output()
|
| 158 |
+
.aggregate(grounded.grounded_statements_aggregator)
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
# Question/answer relevance between overall question and answer.
|
| 162 |
+
f_qa_relevance = Feedback(openai.relevance).on_input_output()
|
| 163 |
+
|
| 164 |
+
# Question/statement relevance between question and each context chunk.
|
| 165 |
+
f_qs_relevance = (
|
| 166 |
+
Feedback(openai.qs_relevance)
|
| 167 |
+
.on_input()
|
| 168 |
+
.on(context)
|
| 169 |
+
.aggregate(np.mean)
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
from trulens_eval import TruLlama
|
| 173 |
+
tru_query_engine_recorder = TruLlama(query_engine,
|
| 174 |
+
app_id='LlamaIndex_App1',
|
| 175 |
+
feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])
|
| 176 |
+
|
| 177 |
+
if False:
|
| 178 |
+
# or as context manager
|
| 179 |
+
with tru_query_engine_recorder as recording:
|
| 180 |
+
query_engine.query("Which of the following is TRUE on the similarity of Means Testing and Casemix?")
|
archive/model_evaluation/utils.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
from trulens_eval import (
|
| 4 |
+
Feedback,
|
| 5 |
+
TruLlama,
|
| 6 |
+
OpenAI
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
from trulens_eval.feedback import Groundedness
|
| 10 |
+
import nest_asyncio
|
| 11 |
+
|
| 12 |
+
from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
|
| 13 |
+
from llama_index.core import load_index_from_storage
|
| 14 |
+
from llama_index.core.node_parser import HierarchicalNodeParser
|
| 15 |
+
from llama_index.core.node_parser import get_leaf_nodes
|
| 16 |
+
|
| 17 |
+
from llama_index.packs.auto_merging_retriever.base import AutoMergingRetrieverPack
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
from llama_index.node_parser import SentenceWindowNodeParser
|
| 21 |
+
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
| 22 |
+
from llama_index.indices.postprocessor import SentenceTransformerRerank
|
| 23 |
+
from llama_index.query_engine import RetrieverQueryEngine
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
nest_asyncio.apply()
|
| 27 |
+
openai = OpenAI()
|
| 28 |
+
|
| 29 |
+
qa_relevance = (
|
| 30 |
+
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
|
| 31 |
+
.on_input_output()
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
qs_relevance = (
|
| 35 |
+
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
|
| 36 |
+
.on_input()
|
| 37 |
+
.on(TruLlama.select_source_nodes().node.text)
|
| 38 |
+
.aggregate(np.mean)
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
|
| 42 |
+
grounded = Groundedness(groundedness_provider=openai)
|
| 43 |
+
|
| 44 |
+
groundedness = (
|
| 45 |
+
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
|
| 46 |
+
.on(TruLlama.select_source_nodes().node.text)
|
| 47 |
+
.on_output()
|
| 48 |
+
.aggregate(grounded.grounded_statements_aggregator)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
feedbacks = [qa_relevance, qs_relevance, groundedness]
|
| 52 |
+
|
| 53 |
+
def get_openai_api_key():
|
| 54 |
+
return os.getenv("OPENAI_API_KEY")
|
| 55 |
+
|
| 56 |
+
def get_trulens_recorder(query_engine, feedbacks, app_id):
|
| 57 |
+
tru_recorder = TruLlama(
|
| 58 |
+
query_engine,
|
| 59 |
+
app_id=app_id,
|
| 60 |
+
feedbacks=feedbacks
|
| 61 |
+
)
|
| 62 |
+
return tru_recorder
|
| 63 |
+
|
| 64 |
+
def get_prebuilt_trulens_recorder(query_engine, app_id):
|
| 65 |
+
tru_recorder = TruLlama(
|
| 66 |
+
query_engine,
|
| 67 |
+
app_id=app_id,
|
| 68 |
+
feedbacks=feedbacks
|
| 69 |
+
)
|
| 70 |
+
return tru_recorder
|
| 71 |
+
|
| 72 |
+
def build_sentence_window_index(
|
| 73 |
+
document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
|
| 74 |
+
):
|
| 75 |
+
# create the sentence window node parser w/ default settings
|
| 76 |
+
node_parser = SentenceWindowNodeParser.from_defaults(
|
| 77 |
+
window_size=3,
|
| 78 |
+
window_metadata_key="window",
|
| 79 |
+
original_text_metadata_key="original_text",
|
| 80 |
+
)
|
| 81 |
+
sentence_context = ServiceContext.from_defaults(
|
| 82 |
+
llm=llm,
|
| 83 |
+
embed_model=embed_model,
|
| 84 |
+
node_parser=node_parser,
|
| 85 |
+
)
|
| 86 |
+
if not os.path.exists(save_dir):
|
| 87 |
+
sentence_index = VectorStoreIndex.from_documents(
|
| 88 |
+
[document], service_context=sentence_context
|
| 89 |
+
)
|
| 90 |
+
sentence_index.storage_context.persist(persist_dir=save_dir)
|
| 91 |
+
else:
|
| 92 |
+
sentence_index = load_index_from_storage(
|
| 93 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
| 94 |
+
service_context=sentence_context,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
return sentence_index
|
| 98 |
+
|
| 99 |
+
def get_sentence_window_query_engine(
|
| 100 |
+
sentence_index,
|
| 101 |
+
similarity_top_k=6,
|
| 102 |
+
rerank_top_n=2,
|
| 103 |
+
):
|
| 104 |
+
# define postprocessors
|
| 105 |
+
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
|
| 106 |
+
rerank = SentenceTransformerRerank(
|
| 107 |
+
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
sentence_window_engine = sentence_index.as_query_engine(
|
| 111 |
+
similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
|
| 112 |
+
)
|
| 113 |
+
return sentence_window_engine
|
| 114 |
+
|
| 115 |
+
def build_automerging_index(
|
| 116 |
+
documents,
|
| 117 |
+
llm,
|
| 118 |
+
embed_model="local:BAAI/bge-small-en-v1.5",
|
| 119 |
+
save_dir="merging_index",
|
| 120 |
+
chunk_sizes=None,
|
| 121 |
+
):
|
| 122 |
+
chunk_sizes = chunk_sizes or [2048, 512, 128]
|
| 123 |
+
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
|
| 124 |
+
nodes = node_parser.get_nodes_from_documents(documents)
|
| 125 |
+
leaf_nodes = get_leaf_nodes(nodes)
|
| 126 |
+
merging_context = ServiceContext.from_defaults(
|
| 127 |
+
llm=llm,
|
| 128 |
+
embed_model=embed_model,
|
| 129 |
+
)
|
| 130 |
+
storage_context = StorageContext.from_defaults()
|
| 131 |
+
storage_context.docstore.add_documents(nodes)
|
| 132 |
+
|
| 133 |
+
if not os.path.exists(save_dir):
|
| 134 |
+
automerging_index = VectorStoreIndex(
|
| 135 |
+
leaf_nodes, storage_context=storage_context, service_context=merging_context
|
| 136 |
+
)
|
| 137 |
+
automerging_index.storage_context.persist(persist_dir=save_dir)
|
| 138 |
+
else:
|
| 139 |
+
automerging_index = load_index_from_storage(
|
| 140 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
| 141 |
+
service_context=merging_context,
|
| 142 |
+
)
|
| 143 |
+
return automerging_index
|
| 144 |
+
|
| 145 |
+
def get_automerging_query_engine(
|
| 146 |
+
automerging_index,
|
| 147 |
+
similarity_top_k=12,
|
| 148 |
+
rerank_top_n=2,
|
| 149 |
+
):
|
| 150 |
+
base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
|
| 151 |
+
retriever = AutoMergingRetriever(
|
| 152 |
+
base_retriever, automerging_index.storage_context, verbose=True
|
| 153 |
+
)
|
| 154 |
+
rerank = SentenceTransformerRerank(
|
| 155 |
+
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
| 156 |
+
)
|
| 157 |
+
auto_merging_engine = RetrieverQueryEngine.from_args(
|
| 158 |
+
retriever, node_postprocessors=[rerank]
|
| 159 |
+
)
|
| 160 |
+
return auto_merging_engine
|
archive/model_evaluation/utils_new.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
from trulens_eval import (
|
| 4 |
+
Feedback,
|
| 5 |
+
TruLlama,
|
| 6 |
+
OpenAI
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
from trulens_eval.feedback import Groundedness
|
| 10 |
+
import nest_asyncio
|
| 11 |
+
|
| 12 |
+
from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
|
| 13 |
+
from llama_index.core import load_index_from_storage
|
| 14 |
+
from llama_index.core.node_parser import HierarchicalNodeParser
|
| 15 |
+
from llama_index.core.node_parser import get_leaf_nodes
|
| 16 |
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
| 17 |
+
|
| 18 |
+
from llama_index.packs.auto_merging_retriever.base import AutoMergingRetrieverPack
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
nest_asyncio.apply()
|
| 22 |
+
openai = OpenAI()
|
| 23 |
+
|
| 24 |
+
qa_relevance = (
|
| 25 |
+
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
|
| 26 |
+
.on_input_output()
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
qs_relevance = (
|
| 30 |
+
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
|
| 31 |
+
.on_input()
|
| 32 |
+
.on(TruLlama.select_source_nodes().node.text)
|
| 33 |
+
.aggregate(np.mean)
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
|
| 37 |
+
grounded = Groundedness(groundedness_provider=openai)
|
| 38 |
+
|
| 39 |
+
groundedness = (
|
| 40 |
+
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
|
| 41 |
+
.on(TruLlama.select_source_nodes().node.text)
|
| 42 |
+
.on_output()
|
| 43 |
+
.aggregate(grounded.grounded_statements_aggregator)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
feedbacks = [qa_relevance, qs_relevance, groundedness]
|
| 47 |
+
|
| 48 |
+
def get_openai_api_key():
|
| 49 |
+
return os.getenv("OPENAI_API_KEY")
|
| 50 |
+
|
| 51 |
+
def get_trulens_recorder(query_engine, feedbacks, app_id):
|
| 52 |
+
tru_recorder = TruLlama(
|
| 53 |
+
query_engine,
|
| 54 |
+
app_id=app_id,
|
| 55 |
+
feedbacks=feedbacks
|
| 56 |
+
)
|
| 57 |
+
return tru_recorder
|
| 58 |
+
|
| 59 |
+
def get_prebuilt_trulens_recorder(query_engine, app_id):
|
| 60 |
+
tru_recorder = TruLlama(
|
| 61 |
+
query_engine,
|
| 62 |
+
app_id=app_id,
|
| 63 |
+
feedbacks=feedbacks
|
| 64 |
+
)
|
| 65 |
+
return tru_recorder
|
| 66 |
+
|
| 67 |
+
def build_automerging_index(
|
| 68 |
+
documents,
|
| 69 |
+
llm,
|
| 70 |
+
embed_model="local:BAAI/bge-small-en-v1.5",
|
| 71 |
+
save_dir="merging_index",
|
| 72 |
+
chunk_sizes=None,
|
| 73 |
+
):
|
| 74 |
+
chunk_sizes = chunk_sizes or [2048, 512, 128]
|
| 75 |
+
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
|
| 76 |
+
nodes = node_parser.get_nodes_from_documents(documents)
|
| 77 |
+
leaf_nodes = get_leaf_nodes(nodes)
|
| 78 |
+
merging_context = ServiceContext.from_defaults(
|
| 79 |
+
llm=llm,
|
| 80 |
+
embed_model=embed_model,
|
| 81 |
+
)
|
| 82 |
+
storage_context = StorageContext.from_defaults()
|
| 83 |
+
storage_context.docstore.add_documents(nodes)
|
| 84 |
+
|
| 85 |
+
if not os.path.exists(save_dir):
|
| 86 |
+
automerging_index = VectorStoreIndex(
|
| 87 |
+
leaf_nodes, storage_context=storage_context, service_context=merging_context
|
| 88 |
+
)
|
| 89 |
+
automerging_index.storage_context.persist(persist_dir=save_dir)
|
| 90 |
+
else:
|
| 91 |
+
automerging_index = load_index_from_storage(
|
| 92 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
| 93 |
+
service_context=merging_context,
|
| 94 |
+
)
|
| 95 |
+
return automerging_index
|
database/mock_qna_source.csv
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c80d88333c3b9fb2a700d49113d2ba3fef7cc671c11b640168c389bef411bc05
|
| 3 |
+
size 7624
|
evaluate_model.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, time
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from tqdm import tqdm
|
| 4 |
+
|
| 5 |
+
import chromadb
|
| 6 |
+
import openai
|
| 7 |
+
from llama_index import (
|
| 8 |
+
SimpleDirectoryReader,
|
| 9 |
+
StorageContext,
|
| 10 |
+
Document,
|
| 11 |
+
VectorStoreIndex,
|
| 12 |
+
ServiceContext
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
| 16 |
+
from llama_index.llms import OpenAI
|
| 17 |
+
from llama_index.embeddings import HuggingFaceEmbedding
|
| 18 |
+
from trulens_eval import Tru
|
| 19 |
+
|
| 20 |
+
import utils
|
| 21 |
+
from utils import get_prebuilt_trulens_recorder
|
| 22 |
+
|
| 23 |
+
openai.api_key = utils.get_openai_api_key()
|
| 24 |
+
|
| 25 |
+
def main():
|
| 26 |
+
|
| 27 |
+
start_time = time.time()
|
| 28 |
+
|
| 29 |
+
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
|
| 30 |
+
fine_tuned_path = "local:./models/fine-tuned-embeddings"
|
| 31 |
+
|
| 32 |
+
db = chromadb.PersistentClient(path="./models/chroma_db")
|
| 33 |
+
chroma_collection = db.get_or_create_collection("quickstart")
|
| 34 |
+
|
| 35 |
+
# assign chroma as the vector_store to the context
|
| 36 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
| 37 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
| 38 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path)
|
| 39 |
+
|
| 40 |
+
print("Loading embeddings from vector store..")
|
| 41 |
+
index = VectorStoreIndex.from_vector_store(
|
| 42 |
+
vector_store=vector_store,
|
| 43 |
+
storage_context=storage_context,
|
| 44 |
+
service_context=service_context
|
| 45 |
+
)
|
| 46 |
+
query_engine = index.as_query_engine()
|
| 47 |
+
|
| 48 |
+
mock_qna_source = pd.read_csv("./database/mock_qna_source.csv")
|
| 49 |
+
mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ]
|
| 50 |
+
print("mock_qna_source.shape", mock_qna_source.shape)
|
| 51 |
+
|
| 52 |
+
with open("./raw_documents/eval_questions.txt", "r") as fp:
|
| 53 |
+
questions_content = fp.read()
|
| 54 |
+
questions_content_ls = questions_content.split("\n\n")
|
| 55 |
+
|
| 56 |
+
eval_questions = mock_qna_source["question"].tolist() + questions_content_ls
|
| 57 |
+
response = query_engine.query(eval_questions[0])
|
| 58 |
+
print(str(response))
|
| 59 |
+
|
| 60 |
+
tru = Tru(database_file="./models/trulens_eval.sqlite")
|
| 61 |
+
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
|
| 62 |
+
app_id="Direct Query Engine")
|
| 63 |
+
|
| 64 |
+
print("Sending each question to llm..")
|
| 65 |
+
with tru_recorder as recording:
|
| 66 |
+
for question in tqdm(eval_questions):
|
| 67 |
+
response = query_engine.query(question)
|
| 68 |
+
|
| 69 |
+
records, feedback = tru.get_records_and_feedback(app_ids=[])
|
| 70 |
+
|
| 71 |
+
os.makedirs("./results", exist_ok=True)
|
| 72 |
+
records.to_csv("./results/records.csv", index=False)
|
| 73 |
+
|
| 74 |
+
print(tru.db.engine.url.render_as_string(hide_password=False))
|
| 75 |
+
|
| 76 |
+
end_time = time.time()
|
| 77 |
+
time_spent_mins = (end_time - start_time) / 60
|
| 78 |
+
with open("./results/time_cost.txt", "w") as fp:
|
| 79 |
+
fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
|
| 83 |
+
main()
|
models/trulens_eval.sqlite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6849488edfa526805c51322b217557de99ac01882a9d2a136a351a68c6b305d5
|
| 3 |
+
size 2936832
|
notebooks/002_persisted-embedding-model.ipynb
CHANGED
|
@@ -236,6 +236,7 @@
|
|
| 236 |
"from llama_index.core import StorageContext\n",
|
| 237 |
"from llama_index.core import ServiceContext\n",
|
| 238 |
"from llama_index.core import Document\n",
|
|
|
|
| 239 |
"\n",
|
| 240 |
"from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
|
| 241 |
"from llama_index.llms.openai import OpenAI\n",
|
|
|
|
| 236 |
"from llama_index.core import StorageContext\n",
|
| 237 |
"from llama_index.core import ServiceContext\n",
|
| 238 |
"from llama_index.core import Document\n",
|
| 239 |
+
"from llama_index.core import Settings\n",
|
| 240 |
"\n",
|
| 241 |
"from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
|
| 242 |
"from llama_index.llms.openai import OpenAI\n",
|
pages/1_Leaderboard.py
CHANGED
|
@@ -31,7 +31,7 @@ database_url = None
|
|
| 31 |
|
| 32 |
|
| 33 |
def streamlit_app():
|
| 34 |
-
tru = Tru(
|
| 35 |
lms = tru.db
|
| 36 |
|
| 37 |
# Set the title and subtitle of the app
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
def streamlit_app():
|
| 34 |
+
tru = Tru(database_file="./models/trulens_eval.sqlite")
|
| 35 |
lms = tru.db
|
| 36 |
|
| 37 |
# Set the title and subtitle of the app
|
pages/2_Evaluations.py
CHANGED
|
@@ -48,7 +48,7 @@ st.runtime.legacy_caching.clear_cache()
|
|
| 48 |
|
| 49 |
add_logo_and_style_overrides()
|
| 50 |
|
| 51 |
-
tru = Tru()
|
| 52 |
lms = tru.db
|
| 53 |
|
| 54 |
df_results, feedback_cols = lms.get_records_and_feedback([])
|
|
@@ -143,7 +143,7 @@ else:
|
|
| 143 |
else:
|
| 144 |
app = apps
|
| 145 |
|
| 146 |
-
st.
|
| 147 |
|
| 148 |
options = st.multiselect("Filter Applications", apps, default=app)
|
| 149 |
|
|
|
|
| 48 |
|
| 49 |
add_logo_and_style_overrides()
|
| 50 |
|
| 51 |
+
tru = Tru(database_file="./models/trulens_eval.sqlite")
|
| 52 |
lms = tru.db
|
| 53 |
|
| 54 |
df_results, feedback_cols = lms.get_records_and_feedback([])
|
|
|
|
| 143 |
else:
|
| 144 |
app = apps
|
| 145 |
|
| 146 |
+
st.query_params["app"] = app
|
| 147 |
|
| 148 |
options = st.multiselect("Filter Applications", apps, default=app)
|
| 149 |
|
pages/3_app.py
CHANGED
|
@@ -4,13 +4,11 @@ import os
|
|
| 4 |
try:
|
| 5 |
raw_docs_files = ", ".join(os.listdir("./raw_documents"))
|
| 6 |
curr_directory_files = ", ".join(os.listdir("."))
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
time_cost_str = fp.read()
|
| 10 |
-
|
| 11 |
-
system_update = raw_docs_files + "\n\n" + curr_directory_files + "\n\n" + time_cost_str
|
| 12 |
-
|
| 13 |
except:
|
| 14 |
system_update = "NA"
|
| 15 |
|
| 16 |
-
st.write(f"Hello World!
|
|
|
|
| 4 |
try:
|
| 5 |
raw_docs_files = ", ".join(os.listdir("./raw_documents"))
|
| 6 |
curr_directory_files = ", ".join(os.listdir("."))
|
| 7 |
+
with open("./raw_documents/eval_answers.txt", "r") as fp:
|
| 8 |
+
eval_answers = fp.read()
|
| 9 |
|
| 10 |
+
system_update = raw_docs_files + "\n\n" + curr_directory_files + "\n\n" + eval_answers
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
except:
|
| 12 |
system_update = "NA"
|
| 13 |
|
| 14 |
+
st.write(f"Hello World! Info about the app: {system_update}")
|
qna_prompting.py
CHANGED
|
@@ -22,7 +22,11 @@ class QnA_Model(BaseModel):
|
|
| 22 |
description=(
|
| 23 |
"which chapter to extract, the format of this function argumet"
|
| 24 |
"is with `Chapter_` as prefix concatenated with chapter number"
|
| 25 |
-
"in integer. For example, `Chapter_2`, `Chapter_10`."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
def get_qna_question(chapter_n: str) -> str:
|
|
@@ -37,11 +41,12 @@ def get_qna_question(chapter_n: str) -> str:
|
|
| 37 |
"""
|
| 38 |
con = sqlite3.connect(db_path)
|
| 39 |
cur = con.cursor()
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
| 45 |
res = cur.execute(sql_string)
|
| 46 |
result = res.fetchone()
|
| 47 |
|
|
|
|
| 22 |
description=(
|
| 23 |
"which chapter to extract, the format of this function argumet"
|
| 24 |
"is with `Chapter_` as prefix concatenated with chapter number"
|
| 25 |
+
"in integer. For example, `Chapter_2`, `Chapter_10`."
|
| 26 |
+
"if no chapter number specified or user requested for random question"
|
| 27 |
+
"or user has no preference over which chapter of textbook to be tested"
|
| 28 |
+
"return `Chapter_0`"
|
| 29 |
+
)
|
| 30 |
)
|
| 31 |
|
| 32 |
def get_qna_question(chapter_n: str) -> str:
|
|
|
|
| 41 |
"""
|
| 42 |
con = sqlite3.connect(db_path)
|
| 43 |
cur = con.cursor()
|
| 44 |
+
|
| 45 |
+
filter_clause = "" if chapter_n == "Chapter_0" else f"WHERE chapter='{chapter_n}'"
|
| 46 |
+
sql_string = """SELECT id, question, option_1, option_2, option_3, option_4, correct_answer
|
| 47 |
+
FROM qna_tbl
|
| 48 |
+
""" + filter_clause
|
| 49 |
+
|
| 50 |
res = cur.execute(sql_string)
|
| 51 |
result = res.fetchone()
|
| 52 |
|
raw_documents/eval_answers.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ac533f41fb123fe9281d27f2a3166e997f09c37178d12f5cbbea1fedeb5026b
|
| 3 |
+
size 1458
|
raw_documents/eval_questions.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bae3f2ac0cf2fdb2f58de8ecaa8d63014a4f84aa8a839dc7ff0d4ae8eb0eb22
|
| 3 |
+
size 1126
|
raw_documents/qna.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59cc1b620ccad1393fc4311e91e538774ac76149a42bd2391af7c855895d80bc
|
| 3 |
+
size 56746
|
requirements.txt
CHANGED
|
@@ -99,8 +99,8 @@ kiwisolver==1.4.5
|
|
| 99 |
kubernetes==29.0.0
|
| 100 |
langchain==0.0.354
|
| 101 |
langchain-community==0.0.8
|
| 102 |
-
langchain-core==0.1.
|
| 103 |
-
langsmith==0.0.
|
| 104 |
llama-index==0.10.1
|
| 105 |
llama-index-agent-openai==0.1.1
|
| 106 |
llama-index-core==0.10.1
|
|
@@ -109,6 +109,7 @@ llama-index-embeddings-openai==0.1.1
|
|
| 109 |
llama-index-legacy==0.9.48
|
| 110 |
llama-index-llms-openai==0.1.1
|
| 111 |
llama-index-multi-modal-llms-openai==0.1.1
|
|
|
|
| 112 |
llama-index-program-openai==0.1.1
|
| 113 |
llama-index-question-gen-openai==0.1.1
|
| 114 |
llama-index-readers-file==0.1.2
|
|
@@ -218,7 +219,7 @@ SQLAlchemy==2.0.24
|
|
| 218 |
st-annotated-text==4.0.1
|
| 219 |
stack-data==0.6.3
|
| 220 |
starlette==0.35.1
|
| 221 |
-
streamlit==1.
|
| 222 |
streamlit-aggrid==0.3.4.post3
|
| 223 |
streamlit-camera-input-live==0.2.0
|
| 224 |
streamlit-card==1.0.0
|
|
@@ -246,7 +247,7 @@ tqdm==4.66.1
|
|
| 246 |
traitlets==5.14.0
|
| 247 |
transformers==4.37.2
|
| 248 |
trulens==0.13.4
|
| 249 |
-
trulens-eval==0.
|
| 250 |
typer==0.9.0
|
| 251 |
types-python-dateutil==2.8.19.14
|
| 252 |
typing-inspect==0.9.0
|
|
|
|
| 99 |
kubernetes==29.0.0
|
| 100 |
langchain==0.0.354
|
| 101 |
langchain-community==0.0.8
|
| 102 |
+
langchain-core==0.1.23
|
| 103 |
+
langsmith==0.0.87
|
| 104 |
llama-index==0.10.1
|
| 105 |
llama-index-agent-openai==0.1.1
|
| 106 |
llama-index-core==0.10.1
|
|
|
|
| 109 |
llama-index-legacy==0.9.48
|
| 110 |
llama-index-llms-openai==0.1.1
|
| 111 |
llama-index-multi-modal-llms-openai==0.1.1
|
| 112 |
+
llama-index-packs-auto-merging-retriever==0.1.2
|
| 113 |
llama-index-program-openai==0.1.1
|
| 114 |
llama-index-question-gen-openai==0.1.1
|
| 115 |
llama-index-readers-file==0.1.2
|
|
|
|
| 219 |
st-annotated-text==4.0.1
|
| 220 |
stack-data==0.6.3
|
| 221 |
starlette==0.35.1
|
| 222 |
+
streamlit==1.31.1
|
| 223 |
streamlit-aggrid==0.3.4.post3
|
| 224 |
streamlit-camera-input-live==0.2.0
|
| 225 |
streamlit-card==1.0.0
|
|
|
|
| 247 |
traitlets==5.14.0
|
| 248 |
transformers==4.37.2
|
| 249 |
trulens==0.13.4
|
| 250 |
+
trulens-eval==0.22.2
|
| 251 |
typer==0.9.0
|
| 252 |
types-python-dateutil==2.8.19.14
|
| 253 |
typing-inspect==0.9.0
|
streamlit_app.py
CHANGED
|
@@ -71,7 +71,7 @@ with st.sidebar:
|
|
| 71 |
|
| 72 |
st.subheader("Models and parameters")
|
| 73 |
selected_model = st.sidebar.selectbox("Choose an OpenAI model",
|
| 74 |
-
["gpt-3.5-turbo-
|
| 75 |
key="selected_model")
|
| 76 |
temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
|
| 77 |
value=0.0, step=0.01)
|
|
|
|
| 71 |
|
| 72 |
st.subheader("Models and parameters")
|
| 73 |
selected_model = st.sidebar.selectbox("Choose an OpenAI model",
|
| 74 |
+
["gpt-3.5-turbo-0125", "gpt-4-0125-preview"],
|
| 75 |
key="selected_model")
|
| 76 |
temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
|
| 77 |
value=0.0, step=0.01)
|
utils.py
CHANGED
|
@@ -5,27 +5,18 @@ from trulens_eval import (
|
|
| 5 |
TruLlama,
|
| 6 |
OpenAI
|
| 7 |
)
|
| 8 |
-
|
| 9 |
from trulens_eval.feedback import Groundedness
|
| 10 |
-
import nest_asyncio
|
| 11 |
|
| 12 |
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
|
| 13 |
-
from llama_index.node_parser import SentenceWindowNodeParser
|
| 14 |
-
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
| 15 |
-
from llama_index.indices.postprocessor import SentenceTransformerRerank
|
| 16 |
from llama_index import load_index_from_storage
|
| 17 |
-
|
| 18 |
from llama_index.node_parser import HierarchicalNodeParser
|
| 19 |
from llama_index.node_parser import get_leaf_nodes
|
| 20 |
from llama_index import StorageContext
|
| 21 |
-
from llama_index.retrievers import AutoMergingRetriever
|
| 22 |
-
from llama_index.indices.postprocessor import SentenceTransformerRerank
|
| 23 |
-
from llama_index.query_engine import RetrieverQueryEngine
|
| 24 |
-
|
| 25 |
|
|
|
|
| 26 |
nest_asyncio.apply()
|
| 27 |
-
openai = OpenAI()
|
| 28 |
|
|
|
|
| 29 |
qa_relevance = (
|
| 30 |
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
|
| 31 |
.on_input_output()
|
|
@@ -69,49 +60,6 @@ def get_prebuilt_trulens_recorder(query_engine, app_id):
|
|
| 69 |
)
|
| 70 |
return tru_recorder
|
| 71 |
|
| 72 |
-
def build_sentence_window_index(
|
| 73 |
-
document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
|
| 74 |
-
):
|
| 75 |
-
# create the sentence window node parser w/ default settings
|
| 76 |
-
node_parser = SentenceWindowNodeParser.from_defaults(
|
| 77 |
-
window_size=3,
|
| 78 |
-
window_metadata_key="window",
|
| 79 |
-
original_text_metadata_key="original_text",
|
| 80 |
-
)
|
| 81 |
-
sentence_context = ServiceContext.from_defaults(
|
| 82 |
-
llm=llm,
|
| 83 |
-
embed_model=embed_model,
|
| 84 |
-
node_parser=node_parser,
|
| 85 |
-
)
|
| 86 |
-
if not os.path.exists(save_dir):
|
| 87 |
-
sentence_index = VectorStoreIndex.from_documents(
|
| 88 |
-
[document], service_context=sentence_context
|
| 89 |
-
)
|
| 90 |
-
sentence_index.storage_context.persist(persist_dir=save_dir)
|
| 91 |
-
else:
|
| 92 |
-
sentence_index = load_index_from_storage(
|
| 93 |
-
StorageContext.from_defaults(persist_dir=save_dir),
|
| 94 |
-
service_context=sentence_context,
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
return sentence_index
|
| 98 |
-
|
| 99 |
-
def get_sentence_window_query_engine(
|
| 100 |
-
sentence_index,
|
| 101 |
-
similarity_top_k=6,
|
| 102 |
-
rerank_top_n=2,
|
| 103 |
-
):
|
| 104 |
-
# define postprocessors
|
| 105 |
-
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
|
| 106 |
-
rerank = SentenceTransformerRerank(
|
| 107 |
-
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
| 108 |
-
)
|
| 109 |
-
|
| 110 |
-
sentence_window_engine = sentence_index.as_query_engine(
|
| 111 |
-
similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
|
| 112 |
-
)
|
| 113 |
-
return sentence_window_engine
|
| 114 |
-
|
| 115 |
def build_automerging_index(
|
| 116 |
documents,
|
| 117 |
llm,
|
|
@@ -140,21 +88,4 @@ def build_automerging_index(
|
|
| 140 |
StorageContext.from_defaults(persist_dir=save_dir),
|
| 141 |
service_context=merging_context,
|
| 142 |
)
|
| 143 |
-
return automerging_index
|
| 144 |
-
|
| 145 |
-
def get_automerging_query_engine(
|
| 146 |
-
automerging_index,
|
| 147 |
-
similarity_top_k=12,
|
| 148 |
-
rerank_top_n=2,
|
| 149 |
-
):
|
| 150 |
-
base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
|
| 151 |
-
retriever = AutoMergingRetriever(
|
| 152 |
-
base_retriever, automerging_index.storage_context, verbose=True
|
| 153 |
-
)
|
| 154 |
-
rerank = SentenceTransformerRerank(
|
| 155 |
-
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
| 156 |
-
)
|
| 157 |
-
auto_merging_engine = RetrieverQueryEngine.from_args(
|
| 158 |
-
retriever, node_postprocessors=[rerank]
|
| 159 |
-
)
|
| 160 |
-
return auto_merging_engine
|
|
|
|
| 5 |
TruLlama,
|
| 6 |
OpenAI
|
| 7 |
)
|
|
|
|
| 8 |
from trulens_eval.feedback import Groundedness
|
|
|
|
| 9 |
|
| 10 |
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
|
|
|
|
|
|
|
|
|
|
| 11 |
from llama_index import load_index_from_storage
|
|
|
|
| 12 |
from llama_index.node_parser import HierarchicalNodeParser
|
| 13 |
from llama_index.node_parser import get_leaf_nodes
|
| 14 |
from llama_index import StorageContext
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
import nest_asyncio
|
| 17 |
nest_asyncio.apply()
|
|
|
|
| 18 |
|
| 19 |
+
openai = OpenAI()
|
| 20 |
qa_relevance = (
|
| 21 |
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
|
| 22 |
.on_input_output()
|
|
|
|
| 60 |
)
|
| 61 |
return tru_recorder
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
def build_automerging_index(
|
| 64 |
documents,
|
| 65 |
llm,
|
|
|
|
| 88 |
StorageContext.from_defaults(persist_dir=save_dir),
|
| 89 |
service_context=merging_context,
|
| 90 |
)
|
| 91 |
+
return automerging_index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|