Spaces:
Running
Running
| import re | |
| import sys | |
| import locale | |
| import io | |
| def re_findall(pattern, string): | |
| return [m.groupdict() for m in re.finditer(pattern, string)] | |
| def jaccard(x1, x2, y1, y2): | |
| # Calculate jaccard index | |
| intersection = max(0, min(x2, y2)-max(x1, y1)) | |
| filled_union = max(x2, y2) - min(x1, y1) | |
| return intersection/filled_union if filled_union > 0 else 0 | |
| def regex_search(text, pattern, group=1, default=None): | |
| match = re.search(pattern, text) | |
| return match.group(group) if match else default | |
| def _windows_write_string(s, out, skip_errors=True): | |
| """ Returns True if the string was written using special methods, | |
| False if it has yet to be written out.""" | |
| # Adapted from http://stackoverflow.com/a/3259271/35070 | |
| import ctypes | |
| import ctypes.wintypes | |
| WIN_OUTPUT_IDS = { | |
| 1: -11, | |
| 2: -12, | |
| } | |
| try: | |
| fileno = out.fileno() | |
| except AttributeError: | |
| # If the output stream doesn't have a fileno, it's virtual | |
| return False | |
| except io.UnsupportedOperation: | |
| # Some strange Windows pseudo files? | |
| return False | |
| if fileno not in WIN_OUTPUT_IDS: | |
| return False | |
| GetStdHandle = ctypes.WINFUNCTYPE( | |
| ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( | |
| ('GetStdHandle', ctypes.windll.kernel32)) | |
| h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) | |
| WriteConsoleW = ctypes.WINFUNCTYPE( | |
| ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, | |
| ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), | |
| ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) | |
| written = ctypes.wintypes.DWORD(0) | |
| GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)( | |
| ('GetFileType', ctypes.windll.kernel32)) | |
| FILE_TYPE_CHAR = 0x0002 | |
| FILE_TYPE_REMOTE = 0x8000 | |
| GetConsoleMode = ctypes.WINFUNCTYPE( | |
| ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, | |
| ctypes.POINTER(ctypes.wintypes.DWORD))( | |
| ('GetConsoleMode', ctypes.windll.kernel32)) | |
| INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value | |
| def not_a_console(handle): | |
| if handle == INVALID_HANDLE_VALUE or handle is None: | |
| return True | |
| return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | |
| if not_a_console(h): | |
| return False | |
| def next_nonbmp_pos(s): | |
| try: | |
| return next(i for i, c in enumerate(s) if ord(c) > 0xffff) | |
| except StopIteration: | |
| return len(s) | |
| while s: | |
| count = min(next_nonbmp_pos(s), 1024) | |
| ret = WriteConsoleW( | |
| h, s, count if count else 2, ctypes.byref(written), None) | |
| if ret == 0: | |
| if skip_errors: | |
| continue | |
| else: | |
| raise OSError('Failed to write string') | |
| if not count: # We just wrote a non-BMP character | |
| assert written.value == 2 | |
| s = s[1:] | |
| else: | |
| assert written.value > 0 | |
| s = s[written.value:] | |
| return True | |
| def preferredencoding(): | |
| """Get preferred encoding. | |
| Returns the best encoding scheme for the system, based on | |
| locale.getpreferredencoding() and some further tweaks. | |
| """ | |
| try: | |
| pref = locale.getpreferredencoding() | |
| 'TEST'.encode(pref) | |
| except Exception: | |
| pref = 'utf-8' | |
| return pref | |
| def safe_print(*objects, sep=' ', end='\n', out=None, encoding=None, flush=False): | |
| """ | |
| Ensure printing to standard output can be done safely (especially on Windows). | |
| There are usually issues with printing emojis and non utf-8 characters. | |
| """ | |
| output_string = sep.join(map(lambda x: str(x), objects)) + end | |
| if out is None: | |
| out = sys.stdout | |
| if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): | |
| if _windows_write_string(output_string, out): | |
| return | |
| if 'b' in getattr(out, 'mode', '') or not hasattr(out, 'buffer'): | |
| out.write(output_string) | |
| else: | |
| enc = encoding or getattr(out, 'encoding', None) or preferredencoding() | |
| byt = output_string.encode(enc, 'ignore') | |
| out.buffer.write(byt) | |
| if flush and hasattr(out, 'flush'): | |
| out.flush() |