util: pilerpurge.py fix

Change-Id: I0480f9a1cf1477b53118b02585547d47f1ba7554
Signed-off-by: SJ <sj@acts.hu>
This commit is contained in:
SJ 2017-05-15 14:26:21 +02:00
parent c5c8c28422
commit 671c4d8078

View File

@ -18,6 +18,7 @@ SQL_PURGE_SELECT_QUERY = "SELECT piler_id, size FROM " +\
"legal_hold))" "legal_hold))"
opts = {} opts = {}
default_conf = "/usr/local/etc/piler/piler.conf"
def read_options(filename="", opts={}): def read_options(filename="", opts={}):
@ -38,9 +39,9 @@ def purge_m_files(ids=[], opts={}):
if len(ids) > 0: if len(ids) > 0:
remove_m_files(ids, opts) remove_m_files(ids, opts)
if opts['dry_run'] is False:
# Set deleted=1 for aged metadata entries # Set deleted=1 for aged metadata entries
if opts['dry_run'] is False:
cursor = opts['db'].cursor() cursor = opts['db'].cursor()
format = ", ".join(['%s'] * len(ids)) format = ", ".join(['%s'] * len(ids))
cursor.execute("UPDATE metadata SET deleted=1 WHERE piler_id IN " + cursor.execute("UPDATE metadata SET deleted=1 WHERE piler_id IN " +
@ -48,54 +49,92 @@ def purge_m_files(ids=[], opts={}):
opts['db'].commit() opts['db'].commit()
def purge_attachments(ids=[], opts={}): def purge_attachments_by_piler_id(ids=[], opts={}):
format = ", ".join(['%s'] * len(ids)) format = ", ".join(['%s'] * len(ids))
cursor = opts['db'].cursor() cursor = opts['db'].cursor()
# Select non referenced attachments cursor.execute("SELECT i, piler_id, attachment_id, refcount FROM " +
cursor.execute("SELECT i, piler_id, attachment_id FROM v_attachment " + "v_attachment WHERE piler_id IN (%s)" % (format), ids)
"WHERE refcount=0 AND piler_id IN (%s)" % (format), ids)
while True: while True:
rows = cursor.fetchall() rows = cursor.fetchall()
if rows == (): if rows == ():
break break
else:
remove_attachment_files(rows, opts)
def purge_attachments_by_attachment_id(opts={}):
format = ", ".join(['%s'] * len(opts['referenced_attachments']))
cursor = opts['db'].cursor()
cursor.execute("SELECT i, piler_id, attachment_id, refcount FROM " +
"v_attachment WHERE i IN (%s)" %
(format), opts['referenced_attachments'])
while True:
rows = cursor.fetchall()
if rows == ():
break
else:
remove_attachment_files(rows, opts)
def remove_attachment_files(rows=(), opts={}):
remove_ids = [] remove_ids = []
referenced_ids = []
# Delete attachments from filesystem if rows == ():
for (id, piler_id, attachment_id) in rows: return
# If refcount > 0, then save attachment.id, and handle later,
# otherwise delete the attachment from the filesystem, and
# attachment table
for (id, piler_id, attachment_id, refcount) in rows:
if refcount == 0:
remove_ids.append(id) remove_ids.append(id)
if opts['dry_run'] is False: if opts['dry_run'] is False:
unlink(get_attachment_file_path(piler_id, attachment_id, opts)) unlink(get_attachment_file_path(piler_id, attachment_id,
opts), opts)
else: else:
print get_attachment_file_path(piler_id, attachment_id, opts) print get_attachment_file_path(piler_id, attachment_id, opts)
else:
referenced_ids.append(id)
# Delete these IDs from attachment table if remove_ids:
if opts['dry_run'] is False: if opts['dry_run'] is False:
format = ", ".join(['%s'] * len(remove_ids)) format = ", ".join(['%s'] * len(remove_ids))
cursor = opts['db'].cursor()
cursor.execute("DELETE FROM attachment WHERE id IN (%s)" % cursor.execute("DELETE FROM attachment WHERE id IN (%s)" %
(format), remove_ids) (format), remove_ids)
opts['db'].commit() opts['db'].commit()
else: else:
print remove_ids print remove_ids
opts['referenced_attachments'] = referenced_ids
def remove_m_files(ids=[], opts={}): def remove_m_files(ids=[], opts={}):
for i in range(0, len(ids)): for i in range(0, len(ids)):
if opts['dry_run'] is False: if opts['dry_run'] is False:
unlink(get_m_file_path(ids[i], opts), opts) unlink(get_m_file_path(ids[i], opts), opts)
opts['messages'] = opts['messages'] + 1
else: else:
print get_m_file_path(ids[i], opts) print get_m_file_path(ids[i], opts)
def unlink(filename="", opts={}): def unlink(filename="", opts={}):
if opts['verbose']:
print "removing", filename
try: try:
st = os.stat(filename) st = os.stat(filename)
opts['purged_stored_size'] = opts['purged_stored_size'] + st.st_size opts['purged_stored_size'] = opts['purged_stored_size'] + st.st_size
opts['count'] = opts['count'] + 1 opts['files'] = opts['files'] + 1
os.unlink(filename) os.unlink(filename)
except: except:
pass pass
@ -113,9 +152,12 @@ def get_attachment_file_path(piler_id='', attachment_id=0, opts={}):
def main(): def main():
if "/usr/libexec" in __file__:
default_conf = "/etc/piler/piler.conf"
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", type=str, help="piler.conf path", parser.add_argument("-c", "--config", type=str, help="piler.conf path",
default="/usr/local/etc/piler/piler.conf") default="/etc/piler/piler.conf")
parser.add_argument("-b", "--batch-size", type=int, help="batch size " + parser.add_argument("-b", "--batch-size", type=int, help="batch size " +
"to delete", default=1000) "to delete", default=1000)
parser.add_argument("-d", "--dry-run", help="dry run", action='store_true') parser.add_argument("-d", "--dry-run", help="dry run", action='store_true')
@ -129,11 +171,14 @@ def main():
sys.exit(1) sys.exit(1)
opts['dry_run'] = args.dry_run opts['dry_run'] = args.dry_run
opts['verbose'] = args.verbose
opts['db'] = None opts['db'] = None
opts['count'] = 0 opts['messages'] = 0
opts['files'] = 0
opts['size'] = 0 opts['size'] = 0
opts['purged_size'] = 0 opts['purged_size'] = 0
opts['purged_stored_size'] = 0 opts['purged_stored_size'] = 0
opts['referenced_attachments'] = []
syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_MAIL) syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_MAIL)
@ -156,14 +201,19 @@ def main():
opts['purged_size'] = opts['purged_size'] + sum(size) opts['purged_size'] = opts['purged_size'] + sum(size)
purge_m_files(piler_id, opts) purge_m_files(piler_id, opts)
purge_attachments_by_piler_id(piler_id, opts)
purge_attachments(piler_id, opts) # It's possible that there's attachment duplication, thus
# refcount > 0, even though after deleting the duplicates
# (references) refcount becomes zero.
if len(opts['referenced_attachments']) > 0:
purge_attachments_by_attachment_id(opts)
# Update the counter table # Update the counter table
if opts['dry_run'] is False: if opts['dry_run'] is False:
cursor.execute("UPDATE counter SET rcvd=rcvd-%s, size=size-%s, " + cursor.execute("UPDATE counter SET rcvd=rcvd-%s, size=size-%s, " +
"stored_size=stored_size-%s", "stored_size=stored_size-%s",
(str(opts['count']), str(opts['purged_size']), (str(opts['messages']), str(opts['purged_size']),
str(opts['purged_stored_size']))) str(opts['purged_stored_size'])))
opts['db'].commit() opts['db'].commit()
@ -173,9 +223,15 @@ def main():
if opts['db']: if opts['db']:
opts['db'].close() opts['db'].close()
syslog.syslog("purged " + str(opts['count']) + " files, " + summary = "Purged " + str(opts['messages']) + " messages, " +\
str(opts['purged_size']) + "/" + str(opts['files']) + " files, " +\
str(opts['purged_stored_size']) + " bytes") str(opts['purged_size']) + "/" +\
str(opts['purged_stored_size']) + " bytes"
if opts['verbose']:
print summary
syslog.syslog(summary)
if __name__ == "__main__": if __name__ == "__main__":