import sys
import os
from subprocess import Popen, PIPE
from uuid import uuid1

i = str(uuid1())
in_file = sys.argv[1]
if len(sys.argv) > 2:
    df_op = '-%' % (sys.argv[2])
else:
    df_op = ''

cmd = 'df %s /var/lib/mongodb' % (df_op)

growth = []

df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
output = df.communicate()[0]
used = int(output.split("\n")[1].split()[2])
growth.append(('0', 0))
current = used

p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/nltk_segmenter.py %s /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt" % (in_file, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE)
print p.stdout.read()

df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
output = df.communicate()[0]
used = int(output.split("\n")[1].split()[2])
growth.append(('sentences', (used - current)))
current = used 

p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/repp_tokenizer.py /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt nltk_punkt ptb" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE)
print p.stdout.read()

df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
output = df.communicate()[0]
used = int(output.split("\n")[1].split()[2])
growth.append(('tokens', used - current))
current = used 

p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/tagging/hunpos.py /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt /tmp/hunpos.emanuel.ws940701.%s.rpt eng_wsj.model nltk_punkt repp" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE)
print p.stdout.read()

df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
output = df.communicate()[0]
used = int(output.split("\n")[1].split()[2])
growth.append(('pos-tags', used - current))
current = used 

p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/parsing/maltparser.py /tmp/hunpos.emanuel.ws940701.%s.rpt /tmp/maltparser.emanuel.ws940701.%s.rpt nltk_punkt repp hunpos" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE)
print p.stdout.read()

df = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
output = df.communicate()[0]
used = int(output.split("\n")[1].split()[2])
growth.append(('dependencies', used - current))

print "------------------------"
print "/var/lib/mongodb growth:"
print "------------------------"

for job, used in growth:
    print job, used