import gradio as gr
from transformers import pipeline
from langdetect import detect
import requests
import wikipedia
import re
# Load NER model
ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True)
# Get Wikidata entity info
def get_wikidata_info(entity, lang="en"):
    query = f'''
    SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{
      ?item rdfs:label "{entity}"@{lang}.
      OPTIONAL {{ ?item wdt:P625 ?coordinate. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }}
    }} LIMIT 1
    '''
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    try:
        response = requests.get(url, params={"query": query}, headers=headers)
        data = response.json()
        if data['results']['bindings']:
            item = data['results']['bindings'][0]
            label = item.get('itemLabel', {}).get('value', entity)
            description = item.get('itemDescription', {}).get('value', 'No description available.')
            coord = item.get('coordinate', {}).get('value', '')
            wikidata_link = item.get('item', {}).get('value', '')
            return label, description, coord, wikidata_link
    except:
        pass
    return entity, "No description available.", "", ""
# Get Wikipedia details
def get_wikipedia_details(entity, lang="en"):
    try:
        wikipedia.set_lang(lang)
        page = wikipedia.page(entity, auto_suggest=True, redirect=True)
        categories = page.categories[:5]
        links = page.links[:5]
        url = page.url
        return url, categories, links
    except:
        return "", [], []
# Enrich info with tags and intent
def enrich_info(summary):
    related_info = []
    if re.search(r'capital', summary, re.IGNORECASE):
        related_info.append("🏛️ Capital city")
    if re.search(r'tourism|attraction', summary, re.IGNORECASE):
        related_info.append("🧳 Popular for tourism")
    if re.search(r'population', summary, re.IGNORECASE):
        related_info.append("👥 Densely populated")
    if re.search(r'university|education', summary, re.IGNORECASE):
        related_info.append("🎓 Educational hub")
    if re.search(r'beach', summary, re.IGNORECASE):
        related_info.append("🏖️ Known for beaches")
    intent = "General knowledge inquiry"
    if re.search(r'tourism|travel', summary, re.IGNORECASE):
        intent = "Looking for travel guidance"
    elif re.search(r'university|education', summary, re.IGNORECASE):
        intent = "Seeking educational info"
    return related_info, intent
# Main combined function
def ner_wikidata_lookup(text):
    try:
        detected_lang = detect(text)
    except:
        detected_lang = "en"
    entities = ner_pipeline(text)
    seen = set()
    result = f"🌐 Detected Language: {detected_lang}
"
    for ent in entities:
        name = ent['word'].strip()
        if name not in seen and name.isalpha():
            seen.add(name)
            label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang)
            wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang)
            related_tags, detected_intent = enrich_info(desc)
            osm_link = ""
            if coord:
                try:
                    lon, lat = coord.replace('Point(', '').replace(')', '').split(' ')
                    osm_link = f"📍 View on OpenStreetMap"
                except:
                    pass
            links = ""
            if wikidata_url:
                links += f"🔗 Wikidata  "
            if wiki_url:
                links += f"📘 Wikipedia"
            tags_html = f"
Related Tags: {' | '.join(related_tags)}
" if related_tags else "" intent_html = f"Intent: {detected_intent}
" extra_info = "" if wiki_categories: extra_info += f"Wikipedia Categories: {', '.join(wiki_categories)}
" if wiki_links: extra_info += f"Related Topics: {', '.join(wiki_links)}
" result += f"""{desc}
{links}
{osm_link}
{tags_html} {intent_html} {extra_info} """ return result if seen else "No named entities found." # Gradio Interface using HTML output iface = gr.Interface( fn=ner_wikidata_lookup, inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), outputs=gr.HTML(), title="🌐 NER with Wikidata + Wikipedia + Smart Tags", description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics." ) if __name__ == "__main__": iface.launch()