import sys import os from subprocess import Popen, PIPE from uuid import uuid1 from lap.store import get_lapstore from pymongo import MongoClient i = str(uuid1()) in_file = sys.argv[1] growth = [] cred = get_lapstore() db = MongoClient(cred['host'], cred['port'])['lapstore'] db.authenticate(cred['user'], cred['password']) used = int(db.command("dbstats")['storageSize']) growth.append(('0', 0)) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/nltk_segmenter.py %s /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt" % (in_file, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() used = int(db.command("dbstats")['storageSize']) growth.append(('sentences', used - current + growth[-1][1])) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/segmentation/repp_tokenizer.py /tmp/nltk_segmenter.emanuel.ws940701.%s.rpt /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt nltk_punkt ptb" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() used = int(db.command("dbstats")['storageSize']) growth.append(('tokens', used - current + growth[-1][1])) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/tagging/hunpos.py /tmp/repp_tokenizer.emanuel.ws940701.%s.rpt /tmp/hunpos.emanuel.ws940701.%s.rpt eng_wsj.model nltk_punkt repp" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() used = int(db.command("dbstats")['storageSize']) growth.append(('pos-tags', used - current + growth[-1][1])) current = used p = Popen(("/projects/lap/tree/trunk/python/lap/python /projects/lap/tools/trunk/parsing/maltparser.py /tmp/hunpos.emanuel.ws940701.%s.rpt /tmp/maltparser.emanuel.ws940701.%s.rpt nltk_punkt repp hunpos" % (i, i)).split(), stdout=PIPE, stderr=PIPE, stdin=PIPE) print p.stdout.read() used = int(db.command("dbstats")['storageSize']) growth.append(('dependencies', used - current + growth[-1][1])) print "------------------------" print "/var/lib/mongodb growth:" print "------------------------" for job, used in growth: print job, used