import py import fcntl import datetime from ezdb import escape_key class indexer(object): """ very simple index manager """ def __init__(self, **indexes): self.indexes = indexes def index(self, key, data): for k in data: if k in self.indexes: self.indexes[k].index(key, data[k]) def unindex(self, key): for index in self.indexes.values(): try: index.unindex(key) except KeyError: pass def search(self, key, *values, **kwargs): return self.indexes[key].search(*values, **kwargs) def to_datetime(d): if isinstance(d, datetime.datetime): return d elif isinstance(d, datetime.date): return datetime.datetime(d.year, d.month, d.day) else: raise ValueError('not a datetime.date[time] object') class abstractindex(object): def __init__(self, path): path = py.path.local(path) self.path = path.join('data') self.path.ensure(dir=True) # revpath is a dir with one file per key (for now) self.revpath = path.join('reverse') self.revpath.ensure(dir=True) def index(self, key, value): value = self._verify(value) if value is None: return path = self._get_path(value, True) fp = path.open() try: fcntl.flock(fp, fcntl.LOCK_EX) try: data = eval(path.read()) data[key] = value path.write(repr(data)) revpath = self.revpath.join(escape_key(key)) revpath.ensure(file=True) rfp = revpath.open() try: fcntl.flock(rfp, fcntl.LOCK_EX) try: revpath.write(repr(value)) finally: fcntl.flock(rfp, fcntl.LOCK_UN) finally: rfp.close() finally: fcntl.flock(fp, fcntl.LOCK_UN) finally: fp.close() def unindex(self, key): revpath = self.revpath.join(escape_key(key)) if not revpath.check(): return fp = revpath.open() try: fcntl.flock(fp, fcntl.LOCK_EX) try: value = eval(revpath.read()) finally: fcntl.flock(fp, fcntl.LOCK_UN) finally: fp.close() path = self._get_path(value, True) fp = path.open() try: fcntl.flock(fp, fcntl.LOCK_EX) try: data = eval(path.read()) data.pop(key) path.write(repr(data)) rfp = revpath.open() try: fcntl.flock(rfp, fcntl.LOCK_EX) try: revpath.remove() finally: fcntl.flock(rfp, fcntl.LOCK_UN) finally: rfp.close() finally: fcntl.flock(fp, fcntl.LOCK_EX) finally: fp.close() def search(self, key, *args, **kwargs): raise NotImplemented('implement in subclasses') def _verify(self, value): raise NotImplemented('implement in subclasses') def _get_path(self, value, create=False): raise NotImplemented('implement in subclasses') class datetimeindex(abstractindex): def _get_path(self, data, create=False): fpath = self.path.join(data.year, data.month, data.day) if create and not fpath.check(file=True): fpath.ensure(file=True) fpath.write('{}') return fpath def _verify(self, data): if data is None: return if (not isinstance(data, datetime.date) and not isinstance(data, datetime.date)): raise ValueError('can not index %s' % (data,)) return to_datetime(data) def search(self, start, end=None): start = to_datetime(start) if end is None: end = start else: end = to_datetime(end) curr = start while curr <= end: path = self._get_path(curr) if path.check(file=True): fp = path.open() try: fcntl.flock(fp, fcntl.LOCK_EX) try: data = eval(path.read()) finally: fcntl.flock(fp, fcntl.LOCK_UN) finally: fp.close() items = data.items() items.sort(lambda a, b: cmp(a[1], b[1])) for key, dt in items: if curr == start and dt < start: continue if dt > end: break yield key curr += datetime.timedelta(1) class fieldindex(abstractindex): def _verify(self, value): return value # just about anything goes ;) def _get_path(self, value, create=False): fpath = self.path.join(escape_key(repr(value))) if create and not fpath.check(file=True): fpath.ensure(file=True) fpath.write('{}') return fpath def search(self, value): path = self._get_path(value) if path.check(file=True): fp = path.open() try: fcntl.flock(fp, fcntl.LOCK_EX) try: data = eval(path.read()) finally: fcntl.flock(fp, fcntl.LOCK_UN) finally: fp.close() for k, v in data.iteritems(): # may be _get_path() doesn't return a path per value if v == value: yield k class fulltextindex(object): def __init__(self, path, charset='UTF-8'): from pymindex import mindex self.mindex = mindex.mindex(path, charset) def index(self, key, value): self.mindex.index(key, value) def unindex(self, key): self.mindex.unindex(key) def search(self, *words): for ret in self.mindex.search(words): yield ret