|
from flask import Flask,request,jsonify,Blueprint,session,render_template,Response |
|
import spacy |
|
import numpy as np |
|
from sentence_transformers import SentenceTransformer,util |
|
from elasticsearch import Elasticsearch |
|
|
|
|
|
app=Flask(__name__) |
|
|
|
|
|
|
|
def similarity(model,vertical): |
|
vertical_list = ["Mortgage","Real estate","Retail","Insurance","Human_Resource","Pharmaceutical"] |
|
word_embeddings = model.encode(vertical_list, convert_to_tensor=True) |
|
user_embedding = model.encode(vertical, convert_to_tensor=True) |
|
cosine_similarities = util.pytorch_cos_sim(user_embedding, word_embeddings) |
|
most_similar_word_index = np.argmax(cosine_similarities) |
|
most_similar_word = vertical_list[most_similar_word_index] |
|
return most_similar_word |
|
|
|
|
|
|
|
def get_entities(text): |
|
ner_model=spacy.load("spacy_ner_model/en_pipeline/en_pipeline-0.0.0") |
|
sim_model=SentenceTransformer("sentence_transformer_model") |
|
|
|
doc = ner_model(text) |
|
|
|
city = None |
|
vertical = None |
|
zip_code = None |
|
state = None |
|
state_code = None |
|
|
|
entity_variables = { |
|
'CITY': 'city', |
|
'VERTICAL': 'vertical', |
|
'ZIPCODE': 'zip_code', |
|
'STATE': 'state', |
|
'STATE_CODE': 'state_code' |
|
} |
|
|
|
entity_values = {var: None for var in entity_variables.values()} |
|
|
|
for ent in doc.ents: |
|
label = ent.label_ |
|
if label in entity_variables: |
|
var_name = entity_variables[label] |
|
var_value = ent.text |
|
if var_name == 'vertical': |
|
if entity_values[var_name] is None or len(var_value) < len(entity_values[var_name]): |
|
entity_values[var_name] = var_value |
|
else: |
|
entity_values[var_name] = var_value |
|
|
|
|
|
city = entity_values['city'] |
|
if entity_values['vertical']: |
|
vertical=similarity(sim_model,entity_values['vertical']) |
|
else: |
|
vertical = None |
|
zip_code = entity_values['zip_code'] |
|
state = entity_values['state'] |
|
state_code = entity_values['state_code'] |
|
|
|
|
|
should_clauses = [] |
|
fields_with_values = [] |
|
fields = [ |
|
("city", city), |
|
("vertical", vertical), |
|
("zip_code", zip_code), |
|
("state", state), |
|
("state_code", state_code) |
|
] |
|
|
|
for field, value in fields: |
|
if value is not None: |
|
should_clause = { |
|
"match": { |
|
field: value |
|
} |
|
} |
|
should_clauses.append(should_clause) |
|
fields_with_values.append(field) |
|
fields_with_values.append("name") |
|
|
|
query = { |
|
'query': { |
|
"bool": { |
|
"must": should_clauses |
|
} |
|
|
|
} |
|
} |
|
|
|
return query,fields_with_values |
|
|
|
|
|
|
|
@app.route('/search',methods=["GET"]) |
|
def search(): |
|
text = request.args.get("text") |
|
|
|
|
|
|
|
query,fields_with_values=get_entities(text) |
|
ELASTIC_PASSWORD = "tILF0omcl27kdZ2G7sFZ3SGf" |
|
es = Elasticsearch(["https://elasticsearch.preprod.experience.com:443"], basic_auth=("elastic_app_user", ELASTIC_PASSWORD), verify_certs= True) |
|
results = es.search(index='profiles_phase2', body=query, track_total_hits=True, source=fields_with_values) |
|
l=[] |
|
for hit in results['hits']['hits']: |
|
source = hit['_source'] |
|
l.append(source) |
|
print("hai") |
|
return jsonify(l) |
|
|
|
|
|
|
|
|
|
if __name__=="__main__": |
|
with app.app_context(): |
|
|
|
app.run() |