Spaces:

TDLI2024
/

Ckunsa_Buscar_Palabra

Sleeping

App Files Files Community

Alexis Palmer commited on Jan 12, 2024

Commit

679e59f

1 Parent(s): ec37cf8

Scramble para Ckunsa, first version

Browse files

Files changed (3) hide show

app.py +88 -0
ckunsa.easy.filtered +61 -0
util.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import gradio as gr
+import util
+import re
+import random
+### load and prepare corpus
+#corpus = util.load_raw_text(corpus_directory="map_avenue")
+corpus = util.load_single_raw_text_file("ckunsa.easy.filtered")
+#corpus = corpus.lower()
+#word_regex = r"[a-z]+"
+#def tokenize(text: str):
+#    return re.findall(word_regex, text)
+#words = tokenize(corpus)
+words = corpus.split()
+print(words)
+lexicon = set()
+for word in words:
+    lexicon.add(word)
+filtered_lexicon = set()
+for word in lexicon:
+    filtered_lexicon.add(word)
+    #    if 4 <= len(word) <= 6:
+#        filtered_lexicon.add(word)
+print(len(filtered_lexicon))
+def random_scramble(lexicon: set):
+    lexicon = list(lexicon)
+    word = random.choice(lexicon)
+    # Turn the word into a list of characters
+    word_chars = list(word)
+    # Shuffle those characters
+    random.shuffle(word_chars)
+    # Re-join the characters into a string
+    shuffled = ''.join(word_chars)
+    return {'shuffled': shuffled, 'original': word}
+def scrambler_game(current_word, guess: str):
+    """
+    If `guess` is the correct word, return 'Correct' and pick a new word. Otherwise, return 'Incorrect'
+    Returns (correct_label, scrambled_word, current_word)
+    """
+    if guess == current_word['original']:
+        current_word = random_scramble(filtered_lexicon)
+        return ('😀 ¡Correcto! 😀', current_word['shuffled'], current_word)
+    else:
+        return ('Incorrecto 😕', current_word['shuffled'], current_word)
+def new_word():
+    current_word = random_scramble(filtered_lexicon)
+    return ('', current_word['shuffled'], current_word)
+with gr.Blocks(theme=gr.themes.Soft(), title="Ckunsa Buscar Palabra") as unscramble:
+    # Start with some initial word
+    current_word = gr.State(random_scramble(filtered_lexicon))
+    gr.Markdown("# Ckunsa Buscar Palabra")
+    # Notice how we set the initial value based on the State
+    scrambled_textbox = gr.Textbox(label="Crucigrama", interactive=False, value=current_word.value['shuffled'])
+    guess_textbox = gr.Textbox(label="Adivinar - Adivina la palabra y luego aprieta en 'enviar'")
+    guess_button = gr.Button(value="Enviar")
+    new_word_button = gr.Button(value="Nueva Palabra")
+    output_textbox = gr.Textbox(label="Resultado", interactive=False)
+    guess_button.click(fn=scrambler_game, inputs=[current_word, guess_textbox], outputs=[output_textbox, scrambled_textbox, current_word])
+    new_word_button.click(fn=new_word, inputs=[], outputs=[output_textbox, scrambled_textbox, current_word])
+unscramble.launch(share=True)

ckunsa.easy.filtered ADDED Viewed

	@@ -0,0 +1,61 @@

+Ckooyo
+Seppi
+sutchi
+Tchacksa
+Sulayie
+Selti
+Socke
+Ckuchir
+Ckunsa
+teckara
+Lassi
+Sali
+Ckabal
+Tarar
+Nanni
+Ckutchi
+Salipanni
+Ttosi
+semma
+Tchockbar
+Tickne
+Ckaari
+Lacksi
+tcholama
+Mussur
+Patta
+MItchi
+Packo
+Tchocko
+ppoya
+mitchala
+tchalpa
+Tchitah
+Ckaala
+Suyi
+Aulo
+Ppanti
+mutsima
+tchoya
+Ckare
+Ckai
+Lockma
+Tchuinu
+Panni
+Haari
+ppalama
+Atchi
+Tickan
+Tchumpi
+Aytia
+Simma
+Ckilir
+Lari
+Ckamai
+Ckocko
+Puluckur
+Ckaipi
+Ckuru
+Athalpa
+Licki

util.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import re
+import unicodedata
+def strip_accents(text: str) -> str:
+    """Removes accents from text."""
+    return ''.join(c for c in unicodedata.normalize('NFD', text)
+                  if unicodedata.category(c) != 'Mn')
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+def load_single_raw_text_file(file_name):
+    """Loads a single text file into one large string"""
+    corpus = ""
+    with open(file_name, 'r') as file:
+        file_contents = file.read()
+        corpus += (file_contents + "\n")
+    return corpus
+word_regex = r"[\w|\']+"
+def tokenize(text):
+    return re.findall(word_regex, text)
+def preprocess(text):
+    """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
+    text = strip_accents(text)
+    text = text.lower()
+    tokens = text.split(" ")
+    tokens_filtered = []
+    for token in tokens:
+        # Skip any tokens with special characters
+        if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
+            tokens_filtered.append(token)
+    return tokens_filtered
+def pad(text: list, num_padding: int):
+    """Pads the given text, as a list of strings, with <s> characters between sentences."""
+    padded_text = []
+    # Add initial padding to the first sentence
+    for _ in range(num_padding):
+        padded_text.append("<s>")
+    for word in text:
+        padded_text.append(word)
+        # Every time we see an end punctuation mark, add <s> tokens before it
+        # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
+        if word in [".", "?", "!"]:
+            for _ in range(num_padding):
+                padded_text.append("<s>")
+    return padded_text