import sys import os from subprocess import Popen, PIPE from uuid import uuid1 i = str(uuid1()) in_file = sys.argv[1] if len(sys.argv) > 2: df_op = '-%' % (sys.argv[2]) else: df_op = '' cmd = 'df %s /var/lib/mongodb' % (df_op) growth = [] df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) output = df.communicate()[0] used = int(output.split("\n")[1].split()[2]) growth.append(('0', 0)) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/nltk_segmenter.py %s /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt" % (in_file, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) output = df.communicate()[0] used = int(output.split("\n")[1].split()[2]) growth.append(('sentences', (used - current))) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/repp_tokenizer.py /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt nltk_punkt ptb" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) output = df.communicate()[0] used = int(output.split("\n")[1].split()[2]) growth.append(('tokens', used - current)) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/tagging/hunpos.py /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt /tmp/hunpos.emanuel.ws940701.%s.rpt eng_wsj.model nltk_punkt repp" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) output = df.communicate()[0] used = int(output.split("\n")[1].split()[2]) growth.append(('pos-tags', used - current)) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/parsing/maltparser.py /tmp/hunpos.emanuel.ws940701.%s.rpt /tmp/maltparser.emanuel.ws940701.%s.rpt nltk_punkt repp hunpos" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE) output = df.communicate()[0] used = int(output.split("\n")[1].split()[2]) growth.append(('dependencies', used - current)) print "------------------------" print "/var/lib/mongodb growth:" print "------------------------" for job, used in growth: print job, used