from codecs import open from getopt import getopt import sys import simplejson from lap.store import Lafdb options, args = getopt(sys.argv[1:], '', ['output=']) options = {key: value for key, value in options} file_name = None if '--output' in options: file_name = options['--output'] infile = open(args[0], 'r', 'utf-8') db = Lafdb(data=infile.read()) dump = {} dump['nodes'] = [] dump['regions'] = [] dump['edges'] = [] for media_type, receipt in db.info['media'].items(): cursor = db.db[db.main_collection_name][media_type].find({'receipt': receipt, 'entity': media_type}) for document in cursor: del document['_id'] dump['media'] = document for annotator in db.info['annotators']: receipt = db.info['annotators'][annotator] cursor = db.db[db.main_collection_name]['graph'].find({'receipt': receipt}) for item in cursor: del item['_id'] if item['entity'] == 'node': dump['nodes'].append(item) if item['entity'] == 'region': dump['regions'].append(item) if item['entity'] == 'edge': dump['edges'].append(item) output_json = simplejson.dumps(dump, indent=4, sort_keys=True) if file_name is not None: output = open(file_name, 'w') output.write(output_json) output.close() else: print output_json quit()