Spaces:
Runtime error

dzenzzz commited on
Commit
815e080
·
verified ·
1 Parent(s): 5b19129

Update doc_searcher.py

Browse files
Files changed (1) hide show
  1. doc_searcher.py +34 -20
doc_searcher.py CHANGED
@@ -1,4 +1,5 @@
1
  from qdrant_client import QdrantClient
 
2
  from fastembed import SparseTextEmbedding, LateInteractionTextEmbedding
3
  from qdrant_client import QdrantClient, models
4
  from sentence_transformers import SentenceTransformer
@@ -13,7 +14,7 @@ class DocSearcher:
13
  self.late_interaction_model = LateInteractionTextEmbedding(LATE_INTERACTION_MODEL)
14
  self.qdrant_client = QdrantClient(QDRANT_URL,api_key=QDRANT_API_KEY,timeout=30)
15
 
16
- async def search(self, text: str):
17
 
18
  dense_query = self.dense_model.encode(text).tolist()
19
  sparse_query = next(self.sparse_model.query_embed(text))
@@ -22,39 +23,52 @@ class DocSearcher:
22
  models.Prefetch(
23
  query=dense_query,
24
  using=DENSE_MODEL,
25
- params=models.SearchParams(
26
- quantization=models.QuantizationSearchParams(
27
- rescore=False,
28
- ),
29
- ),
30
- limit=200
31
  ),
32
  models.Prefetch(
33
  query=models.SparseVector(**sparse_query.as_object()),
34
  using=SPARSE_MODEL,
35
- params=models.SearchParams(
36
- quantization=models.QuantizationSearchParams(
37
- rescore=False,
38
- ),
39
- ),
40
- limit=200
41
  )
42
  ]
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  search_result = self.qdrant_client.query_points(
45
  collection_name= self.collection_name,
46
- search_params=models.SearchParams(
47
- hnsw_ef=128,
48
- quantization=models.QuantizationSearchParams(
49
- rescore=True,
50
- ),
51
- ),
52
  prefetch=prefetch,
53
  query=models.FusionQuery(
54
  fusion=models.Fusion.RRF,
55
  ),
56
  with_payload=True,
57
- limit = 10
 
58
  ).points
59
 
60
  data = []
 
1
  from qdrant_client import QdrantClient
2
+ from qdrant_client.models import Filter, FieldCondition, MatchValue
3
  from fastembed import SparseTextEmbedding, LateInteractionTextEmbedding
4
  from qdrant_client import QdrantClient, models
5
  from sentence_transformers import SentenceTransformer
 
14
  self.late_interaction_model = LateInteractionTextEmbedding(LATE_INTERACTION_MODEL)
15
  self.qdrant_client = QdrantClient(QDRANT_URL,api_key=QDRANT_API_KEY,timeout=30)
16
 
17
+ async def search(self, text: str,type:int, law_type: str | None = None, offset: int = 0):
18
 
19
  dense_query = self.dense_model.encode(text).tolist()
20
  sparse_query = next(self.sparse_model.query_embed(text))
 
23
  models.Prefetch(
24
  query=dense_query,
25
  using=DENSE_MODEL,
26
+ limit=100
 
 
 
 
 
27
  ),
28
  models.Prefetch(
29
  query=models.SparseVector(**sparse_query.as_object()),
30
  using=SPARSE_MODEL,
31
+ limit=100
 
 
 
 
 
32
  )
33
  ]
34
 
35
+ if type == 2:
36
+ filter = None
37
+ elif type == 1 and law_type is not None:
38
+ filter = Filter(
39
+ must=[
40
+ FieldCondition(
41
+ key="tip_dokumenta",
42
+ match=MatchValue(value=type)
43
+ ),
44
+ FieldCondition(
45
+ key="vrsta_akta",
46
+ match=MatchValue(value=law_type)
47
+ ),
48
+ ],
49
+ must_not=[
50
+ FieldCondition(key="status", match=MatchValue(value="Nevažeći")),
51
+ ]
52
+ )
53
+ else:
54
+ filter = Filter(
55
+ must=[
56
+ FieldCondition(
57
+ key="tip_dokumenta",
58
+ match=MatchValue(value=type)
59
+ ),
60
+ ]
61
+ )
62
  search_result = self.qdrant_client.query_points(
63
  collection_name= self.collection_name,
64
+ query_filter=filter,
 
 
 
 
 
65
  prefetch=prefetch,
66
  query=models.FusionQuery(
67
  fusion=models.Fusion.RRF,
68
  ),
69
  with_payload=True,
70
+ limit = 10,
71
+ offset = offset
72
  ).points
73
 
74
  data = []