Spaces:
Sleeping
Sleeping
Nikolay Banar
commited on
Commit
·
8e0ebb3
1
Parent(s):
d83ab70
Add application file
Browse files- .gitattributes +0 -35
- README.md +0 -13
- app.py +60 -0
.gitattributes
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: LegalSearchApp
|
| 3 |
-
emoji: 👀
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
sdk_version: 1.29.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
license: cc-by-nc-sa-4.0
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import json
|
| 4 |
+
from search import TFIDF, GenClient
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def display_search_results(results, page_number, results_per_page):
|
| 8 |
+
st.title("Search Results")
|
| 9 |
+
start_idx = (page_number - 1) * results_per_page
|
| 10 |
+
end_idx = start_idx + results_per_page
|
| 11 |
+
|
| 12 |
+
for i, result in enumerate(results[start_idx:end_idx]):
|
| 13 |
+
if start_idx + i > 10:
|
| 14 |
+
st.write(f"{start_idx + i + 1}. [link](https://www.vlaanderen.be{result['link']}):\n PDPPDP \n {result['text']}")
|
| 15 |
+
else:
|
| 16 |
+
st.write(f"{start_idx + i + 1}. [{result['Nummer']}](https://www.vlaanderen.be{result['link']}):")
|
| 17 |
+
first_index = result['summary'].find('{')
|
| 18 |
+
last_index = result['summary'].rfind('}')
|
| 19 |
+
print(first_index, last_index)
|
| 20 |
+
summary = result['summary'][first_index:last_index+1]
|
| 21 |
+
print(summary)
|
| 22 |
+
summary = json.loads(summary)
|
| 23 |
+
print(summary)
|
| 24 |
+
if summary['relevancy'] == 'YES':
|
| 25 |
+
st.write(f"{summary['answer']}")
|
| 26 |
+
|
| 27 |
+
# st.write(f"{result['text']}")
|
| 28 |
+
relevant = st.checkbox(f"Is this document {start_idx + i} relevant?")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Function to annotate documents
|
| 32 |
+
def main():
|
| 33 |
+
st.title("Legal Search Engine")
|
| 34 |
+
query = st.text_input("Enter your search query:")
|
| 35 |
+
model = TFIDF()
|
| 36 |
+
gen = GenClient()
|
| 37 |
+
if st.button("Search"):
|
| 38 |
+
results = model.search_all([query], top_k=5)[0]
|
| 39 |
+
for r in results:
|
| 40 |
+
r['summary'] = gen.respond(query, r['text'])
|
| 41 |
+
|
| 42 |
+
# for
|
| 43 |
+
# gen.respond()
|
| 44 |
+
st.session_state.results = results
|
| 45 |
+
|
| 46 |
+
if "results" in st.session_state:
|
| 47 |
+
results = st.session_state.results
|
| 48 |
+
st.write("Search Results:")
|
| 49 |
+
results_per_page = st.slider("Results per Page", min_value=1, max_value=len(results), value=5)
|
| 50 |
+
page_number = st.number_input("Page Number", min_value=1, max_value=len(results), value=1)
|
| 51 |
+
|
| 52 |
+
# Display search results for the selected page
|
| 53 |
+
display_search_results(results, page_number, results_per_page)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Press the green button in the gutter to run the script.
|
| 57 |
+
if __name__ == '__main__':
|
| 58 |
+
# Sample data (replace with your own dataset)
|
| 59 |
+
main()
|
| 60 |
+
# display_all_documents()
|