|  |  | 
					
						
						|  | import spacy | 
					
						
						|  | from collections import Counter | 
					
						
						|  | from spacy import displacy | 
					
						
						|  | import re | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | POS_COLORS = { | 
					
						
						|  | 'ADJ': '#FFA07A', | 
					
						
						|  | 'ADP': '#98FB98', | 
					
						
						|  | 'ADV': '#87CEFA', | 
					
						
						|  | 'AUX': '#DDA0DD', | 
					
						
						|  | 'CCONJ': '#F0E68C', | 
					
						
						|  | 'DET': '#FFB6C1', | 
					
						
						|  | 'INTJ': '#FF6347', | 
					
						
						|  | 'NOUN': '#90EE90', | 
					
						
						|  | 'NUM': '#FAFAD2', | 
					
						
						|  | 'PART': '#D3D3D3', | 
					
						
						|  | 'PRON': '#FFA500', | 
					
						
						|  | 'PROPN': '#20B2AA', | 
					
						
						|  | 'SCONJ': '#DEB887', | 
					
						
						|  | 'SYM': '#7B68EE', | 
					
						
						|  | 'VERB': '#FF69B4', | 
					
						
						|  | 'X': '#A9A9A9', | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | POS_TRANSLATIONS = { | 
					
						
						|  | 'es': { | 
					
						
						|  | 'ADJ': 'Adjetivo', | 
					
						
						|  | 'ADP': 'Adposici贸n', | 
					
						
						|  | 'ADV': 'Adverbio', | 
					
						
						|  | 'AUX': 'Auxiliar', | 
					
						
						|  | 'CCONJ': 'Conjunci贸n Coordinante', | 
					
						
						|  | 'DET': 'Determinante', | 
					
						
						|  | 'INTJ': 'Interjecci贸n', | 
					
						
						|  | 'NOUN': 'Sustantivo', | 
					
						
						|  | 'NUM': 'N煤mero', | 
					
						
						|  | 'PART': 'Part铆cula', | 
					
						
						|  | 'PRON': 'Pronombre', | 
					
						
						|  | 'PROPN': 'Nombre Propio', | 
					
						
						|  | 'SCONJ': 'Conjunci贸n Subordinante', | 
					
						
						|  | 'SYM': 'S铆mbolo', | 
					
						
						|  | 'VERB': 'Verbo', | 
					
						
						|  | 'X': 'Otro', | 
					
						
						|  | }, | 
					
						
						|  | 'en': { | 
					
						
						|  | 'ADJ': 'Adjective', | 
					
						
						|  | 'ADP': 'Adposition', | 
					
						
						|  | 'ADV': 'Adverb', | 
					
						
						|  | 'AUX': 'Auxiliary', | 
					
						
						|  | 'CCONJ': 'Coordinating Conjunction', | 
					
						
						|  | 'DET': 'Determiner', | 
					
						
						|  | 'INTJ': 'Interjection', | 
					
						
						|  | 'NOUN': 'Noun', | 
					
						
						|  | 'NUM': 'Number', | 
					
						
						|  | 'PART': 'Particle', | 
					
						
						|  | 'PRON': 'Pronoun', | 
					
						
						|  | 'PROPN': 'Proper Noun', | 
					
						
						|  | 'SCONJ': 'Subordinating Conjunction', | 
					
						
						|  | 'SYM': 'Symbol', | 
					
						
						|  | 'VERB': 'Verb', | 
					
						
						|  | 'X': 'Other', | 
					
						
						|  | }, | 
					
						
						|  | 'fr': { | 
					
						
						|  | 'ADJ': 'Adjectif', | 
					
						
						|  | 'ADP': 'Adposition', | 
					
						
						|  | 'ADV': 'Adverbe', | 
					
						
						|  | 'AUX': 'Auxiliaire', | 
					
						
						|  | 'CCONJ': 'Conjonction de Coordination', | 
					
						
						|  | 'DET': 'D茅terminant', | 
					
						
						|  | 'INTJ': 'Interjection', | 
					
						
						|  | 'NOUN': 'Nom', | 
					
						
						|  | 'NUM': 'Nombre', | 
					
						
						|  | 'PART': 'Particule', | 
					
						
						|  | 'PRON': 'Pronom', | 
					
						
						|  | 'PROPN': 'Nom Propre', | 
					
						
						|  | 'SCONJ': 'Conjonction de Subordination', | 
					
						
						|  | 'SYM': 'Symbole', | 
					
						
						|  | 'VERB': 'Verbe', | 
					
						
						|  | 'X': 'Autre', | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_repeated_words_colors(doc): | 
					
						
						|  | word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT') | 
					
						
						|  | repeated_words = {word: count for word, count in word_counts.items() if count > 1} | 
					
						
						|  |  | 
					
						
						|  | word_colors = {} | 
					
						
						|  | for token in doc: | 
					
						
						|  | if token.text.lower() in repeated_words: | 
					
						
						|  | word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF') | 
					
						
						|  |  | 
					
						
						|  | return word_colors | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def highlight_repeated_words(doc, word_colors): | 
					
						
						|  | highlighted_text = [] | 
					
						
						|  | for token in doc: | 
					
						
						|  | if token.text.lower() in word_colors: | 
					
						
						|  | color = word_colors[token.text.lower()] | 
					
						
						|  | highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>') | 
					
						
						|  | else: | 
					
						
						|  | highlighted_text.append(token.text) | 
					
						
						|  | return ' '.join(highlighted_text) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def generate_arc_diagram(doc, lang_code): | 
					
						
						|  | sentences = list(doc.sents) | 
					
						
						|  | arc_diagrams = [] | 
					
						
						|  | for sent in sentences: | 
					
						
						|  | html = displacy.render(sent, style="dep", options={"distance": 100}) | 
					
						
						|  | html = html.replace('height="375"', 'height="200"') | 
					
						
						|  | html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html) | 
					
						
						|  | html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html) | 
					
						
						|  | arc_diagrams.append(html) | 
					
						
						|  | return arc_diagrams |