Merge pull request #13 from cclauss/flake8-fixes

Files changed (5) hide show

scripts/calculate_coverages.py CHANGED Viewed

@@ -11,6 +11,11 @@ sys.path.insert(0, dirname(dirname(abspath(__file__))))
 from torchmoji.sentence_tokenizer import SentenceTokenizer, coverage
 IS_PYTHON2 = int(sys.version[0]) == 2
 OUTPUT_PATH = 'coverage.csv'

 from torchmoji.sentence_tokenizer import SentenceTokenizer, coverage
+try:
+    unicode        # Python 2
+except NameError:
+    unicode = str  # Python 3
 IS_PYTHON2 = int(sys.version[0]) == 2
 OUTPUT_PATH = 'coverage.csv'

scripts/convert_all_datasets.py CHANGED Viewed

@@ -14,6 +14,11 @@ from torchmoji.create_vocab import VocabBuilder
 from torchmoji.sentence_tokenizer import SentenceTokenizer, extend_vocab, coverage
 from torchmoji.tokenizer import tokenize
 IS_PYTHON2 = int(sys.version[0]) == 2
 DATASETS = [

 from torchmoji.sentence_tokenizer import SentenceTokenizer, extend_vocab, coverage
 from torchmoji.tokenizer import tokenize
+try:
+    unicode        # Python 2
+except NameError:
+    unicode = str  # Python 3
 IS_PYTHON2 = int(sys.version[0]) == 2
 DATASETS = [

torchmoji/filter_utils.py CHANGED Viewed

@@ -11,8 +11,11 @@ import numpy as np
 from torchmoji.tokenizer import RE_MENTION, RE_URL
 from torchmoji.global_variables import SPECIAL_TOKENS
-IS_PYTHON2 = int(sys.version[0]) == 2
-chr_ = unichr if IS_PYTHON2 else chr
 AtMentionRegex = re.compile(RE_MENTION)
 urlRegex = re.compile(RE_URL)
@@ -36,8 +39,8 @@ VARIATION_SELECTORS = [ '\ufe00',
                         '\ufe0f']
 # from https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
-ALL_CHARS = (chr_(i) for i in range(sys.maxunicode))
-CONTROL_CHARS = ''.join(map(chr_, list(range(0,32)) + list(range(127,160))))
 CONTROL_CHAR_REGEX = re.compile('[%s]' % re.escape(CONTROL_CHARS))
 def is_special_token(word):

 from torchmoji.tokenizer import RE_MENTION, RE_URL
 from torchmoji.global_variables import SPECIAL_TOKENS
+try:
+    unichr        # Python 2
+except NameError:
+    unichr = chr  # Python 3
 AtMentionRegex = re.compile(RE_MENTION)
 urlRegex = re.compile(RE_URL)
                         '\ufe0f']
 # from https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
+ALL_CHARS = (unichr(i) for i in range(sys.maxunicode))
+CONTROL_CHARS = ''.join(map(unichr, list(range(0,32)) + list(range(127,160))))
 CONTROL_CHAR_REGEX = re.compile('[%s]' % re.escape(CONTROL_CHARS))
 def is_special_token(word):

torchmoji/finetuning.py CHANGED Viewed

@@ -3,7 +3,6 @@
 """
 from __future__ import print_function
-import sys
 import uuid
 from time import sleep
 from io import open
@@ -28,8 +27,13 @@ from torchmoji.global_variables import (FINETUNING_METHODS,
 from torchmoji.tokenizer import tokenize
 from torchmoji.sentence_tokenizer import SentenceTokenizer
-IS_PYTHON2 = int(sys.version[0]) == 2
-unicode_ = unicode if IS_PYTHON2 else str
 def load_benchmark(path, vocab, extend_with=0):
     """ Loads the given benchmark dataset.
@@ -66,7 +70,7 @@ def load_benchmark(path, vocab, extend_with=0):
     # Decode data
     try:
-        texts = [unicode_(x) for x in data['texts']]
     except UnicodeDecodeError:
         texts = [x.decode('utf-8') for x in data['texts']]

 """
 from __future__ import print_function
 import uuid
 from time import sleep
 from io import open
 from torchmoji.tokenizer import tokenize
 from torchmoji.sentence_tokenizer import SentenceTokenizer
+try:
+    unicode
+    IS_PYTHON2 = True
+except NameError:
+    unicode = str
+    IS_PYTHON2 = False
 def load_benchmark(path, vocab, extend_with=0):
     """ Loads the given benchmark dataset.
     # Decode data
     try:
+        texts = [unicode(x) for x in data['texts']]
     except UnicodeDecodeError:
         texts = [x.decode('utf-8') for x in data['texts']]

torchmoji/word_generator.py CHANGED Viewed

@@ -7,7 +7,6 @@
 from __future__ import division, print_function, unicode_literals
-import sys
 import re
 import unicodedata
 import numpy as np
@@ -26,8 +25,10 @@ from torchmoji.filter_utils import (convert_linebreaks,
                                            remove_variation_selectors,
                                            separate_emojis_and_text)
-IS_PYTHON2 = int(sys.version[0]) == 2
-unicode_ = unicode if IS_PYTHON2 else str
 # Only catch retweets in the beginning of the tweet as those are the
 # automatically added ones.
@@ -68,7 +69,7 @@ class WordGenerator():
             that is not allowed.
         """
-        if not isinstance(sentence, unicode_):
             raise ValueError("All sentences should be Unicode-encoded!")
         sentence = sentence.strip().lower()

 from __future__ import division, print_function, unicode_literals
 import re
 import unicodedata
 import numpy as np
                                            remove_variation_selectors,
                                            separate_emojis_and_text)
+try:
+    unicode        # Python 2
+except NameError:
+    unicode = str  # Python 3
 # Only catch retweets in the beginning of the tweet as those are the
 # automatically added ones.
             that is not allowed.
         """
+        if not isinstance(sentence, unicode):
             raise ValueError("All sentences should be Unicode-encoded!")
         sentence = sentence.strip().lower()