Alexis Palmer commited on
Commit
679e59f
·
1 Parent(s): ec37cf8

Scramble para Ckunsa, first version

Browse files
Files changed (3) hide show
  1. app.py +88 -0
  2. ckunsa.easy.filtered +61 -0
  3. util.py +79 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import util
3
+ import re
4
+ import random
5
+
6
+ ### load and prepare corpus
7
+ #corpus = util.load_raw_text(corpus_directory="map_avenue")
8
+
9
+ corpus = util.load_single_raw_text_file("ckunsa.easy.filtered")
10
+
11
+ #corpus = corpus.lower()
12
+ #word_regex = r"[a-z]+"
13
+ #def tokenize(text: str):
14
+ # return re.findall(word_regex, text)
15
+
16
+ #words = tokenize(corpus)
17
+ words = corpus.split()
18
+ print(words)
19
+
20
+
21
+ lexicon = set()
22
+ for word in words:
23
+ lexicon.add(word)
24
+
25
+ filtered_lexicon = set()
26
+
27
+ for word in lexicon:
28
+ filtered_lexicon.add(word)
29
+ # if 4 <= len(word) <= 6:
30
+ # filtered_lexicon.add(word)
31
+
32
+ print(len(filtered_lexicon))
33
+
34
+ def random_scramble(lexicon: set):
35
+ lexicon = list(lexicon)
36
+
37
+ word = random.choice(lexicon)
38
+
39
+ # Turn the word into a list of characters
40
+ word_chars = list(word)
41
+
42
+ # Shuffle those characters
43
+ random.shuffle(word_chars)
44
+
45
+ # Re-join the characters into a string
46
+ shuffled = ''.join(word_chars)
47
+
48
+ return {'shuffled': shuffled, 'original': word}
49
+
50
+
51
+
52
+ def scrambler_game(current_word, guess: str):
53
+ """
54
+ If `guess` is the correct word, return 'Correct' and pick a new word. Otherwise, return 'Incorrect'
55
+ Returns (correct_label, scrambled_word, current_word)
56
+ """
57
+ if guess == current_word['original']:
58
+ current_word = random_scramble(filtered_lexicon)
59
+ return ('😀 ¡Correcto! 😀', current_word['shuffled'], current_word)
60
+ else:
61
+ return ('Incorrecto 😕', current_word['shuffled'], current_word)
62
+
63
+
64
+ def new_word():
65
+ current_word = random_scramble(filtered_lexicon)
66
+ return ('', current_word['shuffled'], current_word)
67
+
68
+
69
+ with gr.Blocks(theme=gr.themes.Soft(), title="Ckunsa Buscar Palabra") as unscramble:
70
+ # Start with some initial word
71
+ current_word = gr.State(random_scramble(filtered_lexicon))
72
+
73
+ gr.Markdown("# Ckunsa Buscar Palabra")
74
+
75
+ # Notice how we set the initial value based on the State
76
+ scrambled_textbox = gr.Textbox(label="Crucigrama", interactive=False, value=current_word.value['shuffled'])
77
+
78
+ guess_textbox = gr.Textbox(label="Adivinar - Adivina la palabra y luego aprieta en 'enviar'")
79
+ guess_button = gr.Button(value="Enviar")
80
+
81
+ new_word_button = gr.Button(value="Nueva Palabra")
82
+
83
+ output_textbox = gr.Textbox(label="Resultado", interactive=False)
84
+
85
+ guess_button.click(fn=scrambler_game, inputs=[current_word, guess_textbox], outputs=[output_textbox, scrambled_textbox, current_word])
86
+ new_word_button.click(fn=new_word, inputs=[], outputs=[output_textbox, scrambled_textbox, current_word])
87
+
88
+ unscramble.launch(share=True)
ckunsa.easy.filtered ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Ckooyo
3
+ Seppi
4
+ sutchi
5
+ Tchacksa
6
+ Sulayie
7
+ Selti
8
+ Socke
9
+ Ckuchir
10
+ Ckunsa
11
+ teckara
12
+ Lassi
13
+ Sali
14
+ Ckabal
15
+ Tarar
16
+ Nanni
17
+ Ckutchi
18
+ Salipanni
19
+ Ttosi
20
+ semma
21
+ Tchockbar
22
+ Tickne
23
+ Ckaari
24
+ Lacksi
25
+ tcholama
26
+ Mussur
27
+ Patta
28
+ MItchi
29
+ Packo
30
+ Tchocko
31
+ ppoya
32
+ mitchala
33
+ tchalpa
34
+ Tchitah
35
+ Ckaala
36
+ Suyi
37
+ Aulo
38
+ Ppanti
39
+ mutsima
40
+ tchoya
41
+ Ckare
42
+ Ckai
43
+ Lockma
44
+ Tchuinu
45
+ Panni
46
+ Haari
47
+ ppalama
48
+ Atchi
49
+ Tickan
50
+ Tchumpi
51
+ Aytia
52
+ Simma
53
+ Ckilir
54
+ Lari
55
+ Ckamai
56
+ Ckocko
57
+ Puluckur
58
+ Ckaipi
59
+ Ckuru
60
+ Athalpa
61
+ Licki
util.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import unicodedata
4
+
5
+ def strip_accents(text: str) -> str:
6
+ """Removes accents from text."""
7
+ return ''.join(c for c in unicodedata.normalize('NFD', text)
8
+ if unicodedata.category(c) != 'Mn')
9
+
10
+
11
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
12
+ """Loads all the text files in a directory into one large string"""
13
+ corpus = ""
14
+
15
+ for file_name in os.listdir(corpus_directory):
16
+ # Read the file as a string
17
+ file_path = os.path.join(corpus_directory, file_name)
18
+ if os.path.isdir(file_path):
19
+ continue
20
+
21
+ # Make sure we only read text files
22
+ if ".txt" not in file_name:
23
+ continue
24
+
25
+ with open(file_path, 'r') as file:
26
+ file_contents = file.read()
27
+ corpus += (file_contents + "\n")
28
+ return corpus
29
+
30
+ def load_single_raw_text_file(file_name):
31
+ """Loads a single text file into one large string"""
32
+
33
+ corpus = ""
34
+ with open(file_name, 'r') as file:
35
+ file_contents = file.read()
36
+ corpus += (file_contents + "\n")
37
+
38
+ return corpus
39
+
40
+
41
+ word_regex = r"[\w|\']+"
42
+ def tokenize(text):
43
+ return re.findall(word_regex, text)
44
+
45
+
46
+ def preprocess(text):
47
+ """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
48
+ text = strip_accents(text)
49
+ text = text.lower()
50
+
51
+ tokens = text.split(" ")
52
+
53
+ tokens_filtered = []
54
+ for token in tokens:
55
+ # Skip any tokens with special characters
56
+ if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
57
+ tokens_filtered.append(token)
58
+ return tokens_filtered
59
+
60
+
61
+ def pad(text: list, num_padding: int):
62
+ """Pads the given text, as a list of strings, with <s> characters between sentences."""
63
+ padded_text = []
64
+
65
+ # Add initial padding to the first sentence
66
+ for _ in range(num_padding):
67
+ padded_text.append("<s>")
68
+
69
+ for word in text:
70
+ padded_text.append(word)
71
+
72
+ # Every time we see an end punctuation mark, add <s> tokens before it
73
+ # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
74
+ if word in [".", "?", "!"]:
75
+ for _ in range(num_padding):
76
+ padded_text.append("<s>")
77
+
78
+
79
+ return padded_text