Spaces:
Paused
Paused
| # Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors | |
| # | |
| # This module is part of GitDB and is released under | |
| # the New BSD License: https://opensource.org/license/bsd-3-clause/ | |
| import binascii | |
| import os | |
| import mmap | |
| import sys | |
| import time | |
| import errno | |
| from io import BytesIO | |
| from smmap import ( | |
| StaticWindowMapManager, | |
| SlidingWindowMapManager, | |
| SlidingWindowMapBuffer | |
| ) | |
| # initialize our global memory manager instance | |
| # Use it to free cached (and unused) resources. | |
| mman = SlidingWindowMapManager() | |
| # END handle mman | |
| import hashlib | |
| try: | |
| from struct import unpack_from | |
| except ImportError: | |
| from struct import unpack, calcsize | |
| __calcsize_cache = dict() | |
| def unpack_from(fmt, data, offset=0): | |
| try: | |
| size = __calcsize_cache[fmt] | |
| except KeyError: | |
| size = calcsize(fmt) | |
| __calcsize_cache[fmt] = size | |
| # END exception handling | |
| return unpack(fmt, data[offset: offset + size]) | |
| # END own unpack_from implementation | |
| #{ Aliases | |
| hex_to_bin = binascii.a2b_hex | |
| bin_to_hex = binascii.b2a_hex | |
| # errors | |
| ENOENT = errno.ENOENT | |
| # os shortcuts | |
| exists = os.path.exists | |
| mkdir = os.mkdir | |
| chmod = os.chmod | |
| isdir = os.path.isdir | |
| isfile = os.path.isfile | |
| rename = os.rename | |
| dirname = os.path.dirname | |
| basename = os.path.basename | |
| join = os.path.join | |
| read = os.read | |
| write = os.write | |
| close = os.close | |
| fsync = os.fsync | |
| def _retry(func, *args, **kwargs): | |
| # Wrapper around functions, that are problematic on "Windows". Sometimes | |
| # the OS or someone else has still a handle to the file | |
| if sys.platform == "win32": | |
| for _ in range(10): | |
| try: | |
| return func(*args, **kwargs) | |
| except Exception: | |
| time.sleep(0.1) | |
| return func(*args, **kwargs) | |
| else: | |
| return func(*args, **kwargs) | |
| def remove(*args, **kwargs): | |
| return _retry(os.remove, *args, **kwargs) | |
| # Backwards compatibility imports | |
| from gitdb.const import ( | |
| NULL_BIN_SHA, | |
| NULL_HEX_SHA | |
| ) | |
| #} END Aliases | |
| #{ compatibility stuff ... | |
| class _RandomAccessBytesIO: | |
| """Wrapper to provide required functionality in case memory maps cannot or may | |
| not be used. This is only really required in python 2.4""" | |
| __slots__ = '_sio' | |
| def __init__(self, buf=''): | |
| self._sio = BytesIO(buf) | |
| def __getattr__(self, attr): | |
| return getattr(self._sio, attr) | |
| def __len__(self): | |
| return len(self.getvalue()) | |
| def __getitem__(self, i): | |
| return self.getvalue()[i] | |
| def __getslice__(self, start, end): | |
| return self.getvalue()[start:end] | |
| def byte_ord(b): | |
| """ | |
| Return the integer representation of the byte string. This supports Python | |
| 3 byte arrays as well as standard strings. | |
| """ | |
| try: | |
| return ord(b) | |
| except TypeError: | |
| return b | |
| #} END compatibility stuff ... | |
| #{ Routines | |
| def make_sha(source=b''): | |
| """A python2.4 workaround for the sha/hashlib module fiasco | |
| **Note** From the dulwich project """ | |
| try: | |
| return hashlib.sha1(source) | |
| except NameError: | |
| import sha | |
| sha1 = sha.sha(source) | |
| return sha1 | |
| def allocate_memory(size): | |
| """:return: a file-protocol accessible memory block of the given size""" | |
| if size == 0: | |
| return _RandomAccessBytesIO(b'') | |
| # END handle empty chunks gracefully | |
| try: | |
| return mmap.mmap(-1, size) # read-write by default | |
| except OSError: | |
| # setup real memory instead | |
| # this of course may fail if the amount of memory is not available in | |
| # one chunk - would only be the case in python 2.4, being more likely on | |
| # 32 bit systems. | |
| return _RandomAccessBytesIO(b"\0" * size) | |
| # END handle memory allocation | |
| def file_contents_ro(fd, stream=False, allow_mmap=True): | |
| """:return: read-only contents of the file represented by the file descriptor fd | |
| :param fd: file descriptor opened for reading | |
| :param stream: if False, random access is provided, otherwise the stream interface | |
| is provided. | |
| :param allow_mmap: if True, its allowed to map the contents into memory, which | |
| allows large files to be handled and accessed efficiently. The file-descriptor | |
| will change its position if this is False""" | |
| try: | |
| if allow_mmap: | |
| # supports stream and random access | |
| try: | |
| return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) | |
| except OSError: | |
| # python 2.4 issue, 0 wants to be the actual size | |
| return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) | |
| # END handle python 2.4 | |
| except OSError: | |
| pass | |
| # END exception handling | |
| # read manually | |
| contents = os.read(fd, os.fstat(fd).st_size) | |
| if stream: | |
| return _RandomAccessBytesIO(contents) | |
| return contents | |
| def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): | |
| """Get the file contents at filepath as fast as possible | |
| :return: random access compatible memory of the given filepath | |
| :param stream: see ``file_contents_ro`` | |
| :param allow_mmap: see ``file_contents_ro`` | |
| :param flags: additional flags to pass to os.open | |
| :raise OSError: If the file could not be opened | |
| **Note** for now we don't try to use O_NOATIME directly as the right value needs to be | |
| shared per database in fact. It only makes a real difference for loose object | |
| databases anyway, and they use it with the help of the ``flags`` parameter""" | |
| fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) | |
| try: | |
| return file_contents_ro(fd, stream, allow_mmap) | |
| finally: | |
| close(fd) | |
| # END assure file is closed | |
| def sliding_ro_buffer(filepath, flags=0): | |
| """ | |
| :return: a buffer compatible object which uses our mapped memory manager internally | |
| ready to read the whole given filepath""" | |
| return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags) | |
| def to_hex_sha(sha): | |
| """:return: hexified version of sha""" | |
| if len(sha) == 40: | |
| return sha | |
| return bin_to_hex(sha) | |
| def to_bin_sha(sha): | |
| if len(sha) == 20: | |
| return sha | |
| return hex_to_bin(sha) | |
| #} END routines | |
| #{ Utilities | |
| class LazyMixin: | |
| """ | |
| Base class providing an interface to lazily retrieve attribute values upon | |
| first access. If slots are used, memory will only be reserved once the attribute | |
| is actually accessed and retrieved the first time. All future accesses will | |
| return the cached value as stored in the Instance's dict or slot. | |
| """ | |
| __slots__ = tuple() | |
| def __getattr__(self, attr): | |
| """ | |
| Whenever an attribute is requested that we do not know, we allow it | |
| to be created and set. Next time the same attribute is requested, it is simply | |
| returned from our dict/slots. """ | |
| self._set_cache_(attr) | |
| # will raise in case the cache was not created | |
| return object.__getattribute__(self, attr) | |
| def _set_cache_(self, attr): | |
| """ | |
| This method should be overridden in the derived class. | |
| It should check whether the attribute named by attr can be created | |
| and cached. Do nothing if you do not know the attribute or call your subclass | |
| The derived class may create as many additional attributes as it deems | |
| necessary in case a git command returns more information than represented | |
| in the single attribute.""" | |
| pass | |
| class LockedFD: | |
| """ | |
| This class facilitates a safe read and write operation to a file on disk. | |
| If we write to 'file', we obtain a lock file at 'file.lock' and write to | |
| that instead. If we succeed, the lock file will be renamed to overwrite | |
| the original file. | |
| When reading, we obtain a lock file, but to prevent other writers from | |
| succeeding while we are reading the file. | |
| This type handles error correctly in that it will assure a consistent state | |
| on destruction. | |
| **note** with this setup, parallel reading is not possible""" | |
| __slots__ = ("_filepath", '_fd', '_write') | |
| def __init__(self, filepath): | |
| """Initialize an instance with the givne filepath""" | |
| self._filepath = filepath | |
| self._fd = None | |
| self._write = None # if True, we write a file | |
| def __del__(self): | |
| # will do nothing if the file descriptor is already closed | |
| if self._fd is not None: | |
| self.rollback() | |
| def _lockfilepath(self): | |
| return "%s.lock" % self._filepath | |
| def open(self, write=False, stream=False): | |
| """ | |
| Open the file descriptor for reading or writing, both in binary mode. | |
| :param write: if True, the file descriptor will be opened for writing. Other | |
| wise it will be opened read-only. | |
| :param stream: if True, the file descriptor will be wrapped into a simple stream | |
| object which supports only reading or writing | |
| :return: fd to read from or write to. It is still maintained by this instance | |
| and must not be closed directly | |
| :raise IOError: if the lock could not be retrieved | |
| :raise OSError: If the actual file could not be opened for reading | |
| **note** must only be called once""" | |
| if self._write is not None: | |
| raise AssertionError("Called %s multiple times" % self.open) | |
| self._write = write | |
| # try to open the lock file | |
| binary = getattr(os, 'O_BINARY', 0) | |
| lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary | |
| try: | |
| fd = os.open(self._lockfilepath(), lockmode, int("600", 8)) | |
| if not write: | |
| os.close(fd) | |
| else: | |
| self._fd = fd | |
| # END handle file descriptor | |
| except OSError as e: | |
| raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e | |
| # END handle lock retrieval | |
| # open actual file if required | |
| if self._fd is None: | |
| # we could specify exclusive here, as we obtained the lock anyway | |
| try: | |
| self._fd = os.open(self._filepath, os.O_RDONLY | binary) | |
| except: | |
| # assure we release our lockfile | |
| remove(self._lockfilepath()) | |
| raise | |
| # END handle lockfile | |
| # END open descriptor for reading | |
| if stream: | |
| # need delayed import | |
| from gitdb.stream import FDStream | |
| return FDStream(self._fd) | |
| else: | |
| return self._fd | |
| # END handle stream | |
| def commit(self): | |
| """When done writing, call this function to commit your changes into the | |
| actual file. | |
| The file descriptor will be closed, and the lockfile handled. | |
| **Note** can be called multiple times""" | |
| self._end_writing(successful=True) | |
| def rollback(self): | |
| """Abort your operation without any changes. The file descriptor will be | |
| closed, and the lock released. | |
| **Note** can be called multiple times""" | |
| self._end_writing(successful=False) | |
| def _end_writing(self, successful=True): | |
| """Handle the lock according to the write mode """ | |
| if self._write is None: | |
| raise AssertionError("Cannot end operation if it wasn't started yet") | |
| if self._fd is None: | |
| return | |
| os.close(self._fd) | |
| self._fd = None | |
| lockfile = self._lockfilepath() | |
| if self._write and successful: | |
| # on windows, rename does not silently overwrite the existing one | |
| if sys.platform == "win32": | |
| if isfile(self._filepath): | |
| remove(self._filepath) | |
| # END remove if exists | |
| # END win32 special handling | |
| os.rename(lockfile, self._filepath) | |
| # assure others can at least read the file - the tmpfile left it at rw-- | |
| # We may also write that file, on windows that boils down to a remove- | |
| # protection as well | |
| chmod(self._filepath, int("644", 8)) | |
| else: | |
| # just delete the file so far, we failed | |
| remove(lockfile) | |
| # END successful handling | |
| #} END utilities | |