diff --git a/util/pilerpurge.py b/util/pilerpurge.py index c4bdc66c..6028d835 100755 --- a/util/pilerpurge.py +++ b/util/pilerpurge.py @@ -18,6 +18,7 @@ SQL_PURGE_SELECT_QUERY = "SELECT piler_id, size FROM " +\ "legal_hold))" opts = {} +default_conf = "/usr/local/etc/piler/piler.conf" def read_options(filename="", opts={}): @@ -38,9 +39,9 @@ def purge_m_files(ids=[], opts={}): if len(ids) > 0: remove_m_files(ids, opts) - if opts['dry_run'] is False: - # Set deleted=1 for aged metadata entries + # Set deleted=1 for aged metadata entries + if opts['dry_run'] is False: cursor = opts['db'].cursor() format = ", ".join(['%s'] * len(ids)) cursor.execute("UPDATE metadata SET deleted=1 WHERE piler_id IN " + @@ -48,54 +49,92 @@ def purge_m_files(ids=[], opts={}): opts['db'].commit() -def purge_attachments(ids=[], opts={}): +def purge_attachments_by_piler_id(ids=[], opts={}): format = ", ".join(['%s'] * len(ids)) cursor = opts['db'].cursor() - # Select non referenced attachments - cursor.execute("SELECT i, piler_id, attachment_id FROM v_attachment " + - "WHERE refcount=0 AND piler_id IN (%s)" % (format), ids) + cursor.execute("SELECT i, piler_id, attachment_id, refcount FROM " + + "v_attachment WHERE piler_id IN (%s)" % (format), ids) while True: rows = cursor.fetchall() if rows == (): break + else: + remove_attachment_files(rows, opts) - remove_ids = [] - # Delete attachments from filesystem - for (id, piler_id, attachment_id) in rows: +def purge_attachments_by_attachment_id(opts={}): + format = ", ".join(['%s'] * len(opts['referenced_attachments'])) + + cursor = opts['db'].cursor() + + cursor.execute("SELECT i, piler_id, attachment_id, refcount FROM " + + "v_attachment WHERE i IN (%s)" % + (format), opts['referenced_attachments']) + + while True: + rows = cursor.fetchall() + if rows == (): + break + else: + remove_attachment_files(rows, opts) + + +def remove_attachment_files(rows=(), opts={}): + remove_ids = [] + referenced_ids = [] + + if rows == (): + return + + # If refcount > 0, then save attachment.id, and handle later, + # otherwise delete the attachment from the filesystem, and + # attachment table + + for (id, piler_id, attachment_id, refcount) in rows: + if refcount == 0: remove_ids.append(id) if opts['dry_run'] is False: - unlink(get_attachment_file_path(piler_id, attachment_id, opts)) + unlink(get_attachment_file_path(piler_id, attachment_id, + opts), opts) else: print get_attachment_file_path(piler_id, attachment_id, opts) + else: + referenced_ids.append(id) - # Delete these IDs from attachment table + if remove_ids: if opts['dry_run'] is False: format = ", ".join(['%s'] * len(remove_ids)) + cursor = opts['db'].cursor() cursor.execute("DELETE FROM attachment WHERE id IN (%s)" % (format), remove_ids) opts['db'].commit() else: print remove_ids + opts['referenced_attachments'] = referenced_ids + def remove_m_files(ids=[], opts={}): for i in range(0, len(ids)): if opts['dry_run'] is False: unlink(get_m_file_path(ids[i], opts), opts) + opts['messages'] = opts['messages'] + 1 else: print get_m_file_path(ids[i], opts) def unlink(filename="", opts={}): + if opts['verbose']: + print "removing", filename + try: st = os.stat(filename) opts['purged_stored_size'] = opts['purged_stored_size'] + st.st_size - opts['count'] = opts['count'] + 1 + opts['files'] = opts['files'] + 1 os.unlink(filename) except: pass @@ -113,9 +152,12 @@ def get_attachment_file_path(piler_id='', attachment_id=0, opts={}): def main(): + if "/usr/libexec" in __file__: + default_conf = "/etc/piler/piler.conf" + parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", type=str, help="piler.conf path", - default="/usr/local/etc/piler/piler.conf") + default="/etc/piler/piler.conf") parser.add_argument("-b", "--batch-size", type=int, help="batch size " + "to delete", default=1000) parser.add_argument("-d", "--dry-run", help="dry run", action='store_true') @@ -129,11 +171,14 @@ def main(): sys.exit(1) opts['dry_run'] = args.dry_run + opts['verbose'] = args.verbose opts['db'] = None - opts['count'] = 0 + opts['messages'] = 0 + opts['files'] = 0 opts['size'] = 0 opts['purged_size'] = 0 opts['purged_stored_size'] = 0 + opts['referenced_attachments'] = [] syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_MAIL) @@ -156,14 +201,19 @@ def main(): opts['purged_size'] = opts['purged_size'] + sum(size) purge_m_files(piler_id, opts) + purge_attachments_by_piler_id(piler_id, opts) - purge_attachments(piler_id, opts) + # It's possible that there's attachment duplication, thus + # refcount > 0, even though after deleting the duplicates + # (references) refcount becomes zero. + if len(opts['referenced_attachments']) > 0: + purge_attachments_by_attachment_id(opts) # Update the counter table if opts['dry_run'] is False: cursor.execute("UPDATE counter SET rcvd=rcvd-%s, size=size-%s, " + "stored_size=stored_size-%s", - (str(opts['count']), str(opts['purged_size']), + (str(opts['messages']), str(opts['purged_size']), str(opts['purged_stored_size']))) opts['db'].commit() @@ -173,9 +223,15 @@ def main(): if opts['db']: opts['db'].close() - syslog.syslog("purged " + str(opts['count']) + " files, " + - str(opts['purged_size']) + "/" + - str(opts['purged_stored_size']) + " bytes") + summary = "Purged " + str(opts['messages']) + " messages, " +\ + str(opts['files']) + " files, " +\ + str(opts['purged_size']) + "/" +\ + str(opts['purged_stored_size']) + " bytes" + + if opts['verbose']: + print summary + + syslog.syslog(summary) if __name__ == "__main__":