Spaces:

NikosKprl
/

Entity_Linking_Web_Application

Running

App Files Files

NikosKprl commited on Nov 10, 2024

Commit

2d2d25b

verified ·

1 Parent(s): 07c6579

Update ✨Entity Linking Application✨.py

Browse files

Files changed (1) hide show

✨Entity Linking Application✨.py +180 -176

✨Entity Linking Application✨.py CHANGED Viewed

@@ -298,198 +298,202 @@ def main_cli():
         if input_sentence_user and input_mention_user:
             # check if the mention is in the sentence
             if input_mention_user in input_sentence_user:
-                st.write("Applying Data Normalization module... (1/5)")
-                # Data Normalization
-                start_time = time.time()
-                list_with_full_names = []
-                list_with_names_to_show = []
-                if disambi == "Yes":
                     response = client.chat.completions.create(
-                        messages=[
-                            {
-                                "role": "system",
-                                "content": """
-                                            I will give you one or more labels within a sentence. Your task is as follows:
-                                            Identify each label in the sentence, and check if it is an acronym.
-                                            If the label is an acronym, respond with the full name of the acronym.
-                                            If the label is not an acronym, respond with the label exactly as it was given to you.
-                                            If a label contains multiple terms (e.g., 'phase and DIC microscopy'), treat each term within the label as a separate label.
-                                            This means you should identify and explain each part of the label individually.
-                                            Each part should be on its own line in the response.
-                                            Context-Specific Terms: If the sentence context suggests a relevant term that applies to each label (such as "study" in 'morphological, sedimentological, and stratigraphical study'), add that term to each label’s explanation.
-                                            Use context clues to determine the appropriate term to add (e.g., 'study' or 'microscopy').
-                                            Output Format: Your response should contain only the explanations, formatted as follows:
-                                            Each label or part of a label should be on a new line.
-                                            Do not include any additional text, and do not repeat the original sentence.
-                                            Example 1:
-                                            Input:
-                                            label: phase and DIC microscopy
-                                            context: Tardigrades have been extracted from samples using centrifugation with Ludox AM™ and mounted on individual microscope slides in Hoyer's medium for identification under phase and DIC microscopy.
-                                            Expected response:
-                                            phase: phase microscopy
-                                            DIC microscopy: Differential interference contrast microscopy
-                                            Example 2:
-                                            Input:
-                                            label: morphological, sedimentological, and stratigraphical study
-                                            context: This paper presents results of a morphological, sedimentological, and stratigraphical study of relict beach ridges formed on a prograded coastal barrier in Bream Bay, North Island New Zealand.
-                                            Expected response:
-                                            morphological: morphological study
-                                            sedimentological: sedimentological study
-                                            stratigraphical: stratigraphical study
-                                            IMPORTANT:
-                                            Each label, even if nested within another, should be treated as an individual item.
-                                            Each individual label or acronym should be output on a separate line.
-                                            """
-                            },
-                            {
-                                "role": "user",
-                                "content": f"label:{input_mention_user}, context:{input_sentence_user}"
-                            }
-                        ],
-                        temperature=1.0,
-                        top_p=1.0,
-                        max_tokens=1000,
-                        model=model_name
-                    )
-                    kati = response.choices[0].message.content.splitlines()
                     print(response.choices[0].message.content)
-                    for i in kati:
                         context = i.split(":")[-1].strip()
-                        original_name = i.split(":")[0].strip()
-                        list_with_full_names.append(context)
-                        list_with_names_to_show.append(original_name)
-                    name = ",".join(list_with_full_names)
-                else:
-                    name = input_mention_user
-                    list_with_full_names.append(name)
-                    list_with_names_to_show.append(name)
-                input_sentence_user = input_sentence_user.replace(input_mention_user, name)  # Changing the mention to the correct one
-                response = client.chat.completions.create(
-                        messages=[
-                            {
-                                "role": "system",
-                                "content": "Given a label or labels within a sentence, provide a brief description (2-3 sentences) explaining what the label represents, similar to how a Wikipedia entry would. Format your response as follows: label: description. I want only the description of the label, not the role in the context. Include the label in the description as well. For example: Sentiment analysis: Sentiment analysis is the use of natural language processing, text analysis, computational linguistics, and biometrics to systematically identify, extract, quantify, and study affective states and subjective information.\nText analysis: Text mining, text data mining (TDM) or text analytics is the process of deriving high-quality information from text. It involves the discovery by computer of new, previously unknown information, by automatically extracting information from different written resources.",
-                            },
-                            {
-                                "role": "user",
-                                "content": f"label:{name}, context:{input_sentence_user}"
-                            }
-                        ],
-                        temperature=1.0,
-                        top_p=1.0,
-                        max_tokens=1000,
-                        model=model_name
-                    )
-                z = response.choices[0].message.content.splitlines()
-                print(response.choices[0].message.content)
-                list_with_contexts = []
-                for i in z:
-                    context = i.split(":")[-1].strip()
-                    list_with_contexts.append(context)
                 # Candidate Retrieval & Information Gathering
                 async def big_main(mention, single, combi):
                     mention = mention.split(",")
-                    st.write("Applying Candidate Retrieval module... (2/5)")
-                    for i in mention:
-                        await mains(i, single, combi)
-                    st.write("Applying Information Gathering module... (3/5)")
-                    for i in mention:
-                        await main(i)
                 asyncio.run(big_main(name, single, combi))
                 number = 0
                 for i,j,o in zip(list_with_full_names,list_with_contexts,list_with_names_to_show):
                     number += 1
-                    st.write(f"Applying Candidate Selection module... (4/5) [{number}/{len(list_with_full_names)}]")
-                    with open(f"/home/user/app/info_extraction/{i}.json", "r") as f:
-                        json_file = json.load(f)
-                        lista = []
-                        lista_1 = []
-                        for element in json_file:
-                            qid = element.get("qid")
-                            link = f"https://www.wikidata.org/wiki/{qid}"
-                            label = element.get("label")
-                            description = element.get("description")
-                            label_emb = model.encode([label])
-                            desc_emb = model.encode([description])
-                            lista.append({link: [label_emb, desc_emb]})
-                        label_dataset_emb = model.encode([i])
-                        desc_dataset_emb = model.encode([j])
-                        for emb in lista:
-                            for k, v in emb.items():
-                                cossim_label = model.similarity(label_dataset_emb, v[0][0])
-                                desc_label = model.similarity(desc_dataset_emb, v[1][0])
-                                emb_mean = np.mean([cossim_label, desc_label])
-                                lista_1.append({k: emb_mean})
-                        sorted_data = sorted(lista_1, key=lambda x: list(x.values())[0], reverse=True)
-                        st.write(f"Applying Candidate Matching module... (4/5) [{number}/{len(list_with_full_names)}]")
-                        if sorted_data:
-                            sorted_top = sorted_data[0]
-                            for k, v in sorted_top.items():
-                                qid = k.split("/")[-1]
-                                wikidata2wikipedia = f"""
-                                    SELECT ?wikipedia
-                                    WHERE {{
-                                          ?wikipedia schema:about wd:{qid} .
-                                          ?wikipedia schema:isPartOf <https://en.wikipedia.org/> .
-                                    }}
-                                    """
-                                results = get_resultss(wikidata2wikipedia)
-                                for result in results["results"]["bindings"]:
-                                    for key, value in result.items():
-                                        wikipedia = value.get("value", "None")
-                                sparql = SPARQLWrapper("http://dbpedia.org/sparql")
-                                wikidata2dbpedia = f"""
-                                    SELECT ?dbpedia
-                                    WHERE {{
-                                          ?dbpedia owl:sameAs <http://www.wikidata.org/entity/{qid}>.
-                                    }}
-                                    """
-                                sparql.setQuery(wikidata2dbpedia)
-                                sparql.setReturnFormat(JSON)
-                                results = sparql.query().convert()
-                                for result in results["results"]["bindings"]:
-                                    dbpedia = result["dbpedia"]["value"]
-                                st.text(f"The correct entity for '{o}' is:")
-                                st.success(f"Wikipedia: {wikipedia}")
-                                st.success(f"Wikidata: {k}")
-                                st.success(f"DBpedia: {dbpedia}")
-                        else:
-                            st.warning(f"The entity: {o} is NIL.")
             else:
                 st.warning(f"The mention '{input_mention_user}' was NOT found in the sentence.")
         else:

         if input_sentence_user and input_mention_user:
             # check if the mention is in the sentence
             if input_mention_user in input_sentence_user:
+                with st.spinner("Applying Data Normalization module... (1/5)")
+                # Data Normalization
+                    start_time = time.time()
+                    list_with_full_names = []
+                    list_with_names_to_show = []
+                    if disambi == "Yes":
+                        response = client.chat.completions.create(
+                            messages=[
+                                {
+                                    "role": "system",
+                                    "content": """
+                                                I will give you one or more labels within a sentence. Your task is as follows:
+                                                Identify each label in the sentence, and check if it is an acronym.
+                                                If the label is an acronym, respond with the full name of the acronym.
+                                                If the label is not an acronym, respond with the label exactly as it was given to you.
+                                                If a label contains multiple terms (e.g., 'phase and DIC microscopy'), treat each term within the label as a separate label.
+                                                This means you should identify and explain each part of the label individually.
+                                                Each part should be on its own line in the response.
+                                                Context-Specific Terms: If the sentence context suggests a relevant term that applies to each label (such as "study" in 'morphological, sedimentological, and stratigraphical study'), add that term to each label’s explanation.
+                                                Use context clues to determine the appropriate term to add (e.g., 'study' or 'microscopy').
+                                                Output Format: Your response should contain only the explanations, formatted as follows:
+                                                Each label or part of a label should be on a new line.
+                                                Do not include any additional text, and do not repeat the original sentence.
+                                                Example 1:
+                                                Input:
+                                                label: phase and DIC microscopy
+                                                context: Tardigrades have been extracted from samples using centrifugation with Ludox AM™ and mounted on individual microscope slides in Hoyer's medium for identification under phase and DIC microscopy.
+                                                Expected response:
+                                                phase: phase microscopy
+                                                DIC microscopy: Differential interference contrast microscopy
+                                                Example 2:
+                                                Input:
+                                                label: morphological, sedimentological, and stratigraphical study
+                                                context: This paper presents results of a morphological, sedimentological, and stratigraphical study of relict beach ridges formed on a prograded coastal barrier in Bream Bay, North Island New Zealand.
+                                                Expected response:
+                                                morphological: morphological study
+                                                sedimentological: sedimentological study
+                                                stratigraphical: stratigraphical study
+                                                IMPORTANT:
+                                                Each label, even if nested within another, should be treated as an individual item.
+                                                Each individual label or acronym should be output on a separate line.
+                                                """
+                                },
+                                {
+                                    "role": "user",
+                                    "content": f"label:{input_mention_user}, context:{input_sentence_user}"
+                                }
+                            ],
+                            temperature=1.0,
+                            top_p=1.0,
+                            max_tokens=1000,
+                            model=model_name
+                        )
+                        kati = response.choices[0].message.content.splitlines()
+                        print(response.choices[0].message.content)
+                        for i in kati:
+                            context = i.split(":")[-1].strip()
+                            original_name = i.split(":")[0].strip()
+                            list_with_full_names.append(context)
+                            list_with_names_to_show.append(original_name)
+                        name = ",".join(list_with_full_names)
+                    else:
+                        name = input_mention_user
+                        list_with_full_names.append(name)
+                        list_with_names_to_show.append(name)
+                    input_sentence_user = input_sentence_user.replace(input_mention_user, name)  # Changing the mention to the correct one
                     response = client.chat.completions.create(
+                            messages=[
+                                {
+                                    "role": "system",
+                                    "content": "Given a label or labels within a sentence, provide a brief description (2-3 sentences) explaining what the label represents, similar to how a Wikipedia entry would. Format your response as follows: label: description. I want only the description of the label, not the role in the context. Include the label in the description as well. For example: Sentiment analysis: Sentiment analysis is the use of natural language processing, text analysis, computational linguistics, and biometrics to systematically identify, extract, quantify, and study affective states and subjective information.\nText analysis: Text mining, text data mining (TDM) or text analytics is the process of deriving high-quality information from text. It involves the discovery by computer of new, previously unknown information, by automatically extracting information from different written resources.",
+                                },
+                                {
+                                    "role": "user",
+                                    "content": f"label:{name}, context:{input_sentence_user}"
+                                }
+                            ],
+                            temperature=1.0,
+                            top_p=1.0,
+                            max_tokens=1000,
+                            model=model_name
+                        )
+                    z = response.choices[0].message.content.splitlines()
                     print(response.choices[0].message.content)
+                    list_with_contexts = []
+                    for i in z:
                         context = i.split(":")[-1].strip()
+                        list_with_contexts.append(context)
+                st.write("✅ Applied Data Normilzation module (1/5)")
                 # Candidate Retrieval & Information Gathering
                 async def big_main(mention, single, combi):
                     mention = mention.split(",")
+                    with st.spinner("Applying Candidate Retrieval module... (2/5)"):
+                        for i in mention:
+                            await mains(i, single, combi)
+                    st.write("✅ Applied Candidate Retrieval module (2/5)")
+                    with st.spinner("Applying Information Gathering module... (3/5)"):
+                        for i in mention:
+                            await main(i)
+                    st.write("✅ Applied Information Gathering module (3/5)")
                 asyncio.run(big_main(name, single, combi))
                 number = 0
                 for i,j,o in zip(list_with_full_names,list_with_contexts,list_with_names_to_show):
                     number += 1
+                    with st.spinner(f"Applying Candidate Selection module... (4/5) [{number}/{len(list_with_full_names)}]")):
+                        with open(f"/home/user/app/info_extraction/{i}.json", "r") as f:
+                            json_file = json.load(f)
+                            lista = []
+                            lista_1 = []
+                            for element in json_file:
+                                qid = element.get("qid")
+                                link = f"https://www.wikidata.org/wiki/{qid}"
+                                label = element.get("label")
+                                description = element.get("description")
+                                label_emb = model.encode([label])
+                                desc_emb = model.encode([description])
+                                lista.append({link: [label_emb, desc_emb]})
+                            label_dataset_emb = model.encode([i])
+                            desc_dataset_emb = model.encode([j])
+                            for emb in lista:
+                                for k, v in emb.items():
+                                    cossim_label = model.similarity(label_dataset_emb, v[0][0])
+                                    desc_label = model.similarity(desc_dataset_emb, v[1][0])
+                                    emb_mean = np.mean([cossim_label, desc_label])
+                                    lista_1.append({k: emb_mean})
+                                    print(k)
+                            sorted_data = sorted(lista_1, key=lambda x: list(x.values())[0], reverse=True)
+                        st.write(f"✅ Applined Candidate Selection module (4/5) [{number}/{len(list_with_full_names)}]")
+                        with st.spinner(f"Applying Candidate Matching module... (5/5) [{number}/{len(list_with_full_names)}]"):
+                            if sorted_data:
+                                sorted_top = sorted_data[0]
+                                for k, v in sorted_top.items():
+                                    qid = k.split("/")[-1]
+                                    wikidata2wikipedia = f"""
+                                        SELECT ?wikipedia
+                                        WHERE {{
+                                              ?wikipedia schema:about wd:{qid} .
+                                              ?wikipedia schema:isPartOf <https://en.wikipedia.org/> .
+                                        }}
+                                        """
+                                    results = get_resultss(wikidata2wikipedia)
+                                    for result in results["results"]["bindings"]:
+                                        for key, value in result.items():
+                                            wikipedia = value.get("value", "None")
+                                    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
+                                    wikidata2dbpedia = f"""
+                                        SELECT ?dbpedia
+                                        WHERE {{
+                                              ?dbpedia owl:sameAs <http://www.wikidata.org/entity/{qid}>.
+                                        }}
+                                        """
+                                    sparql.setQuery(wikidata2dbpedia)
+                                    sparql.setReturnFormat(JSON)
+                                    results = sparql.query().convert()
+                                    for result in results["results"]["bindings"]:
+                                        dbpedia = result["dbpedia"]["value"]
+                                    st.text(f"The correct entity for '{o}' is:")
+                                    st.success(f"Wikipedia: {wikipedia}")
+                                    st.success(f"Wikidata: {k}")
+                                    st.success(f"DBpedia: {dbpedia}")
+                            else:
+                                st.warning(f"The entity: {o} is NIL.")
+                            st.write(f"✅ Applied Candidate Matching module (5/5) [{number}/{len(list_with_full_names)}]")
             else:
                 st.warning(f"The mention '{input_mention_user}' was NOT found in the sentence.")
         else: