import sys import rdflib import re import codecs from os.path import join, normpath from urllib import quote if len(sys.argv) < 3: print """ Usage: python split.py export_graph output_folder """ g = rdflib.Graph() print "Parsing %s..." % (sys.argv[1]) g.parse(sys.argv[1], format='n3') print "Done!" for s, p, o in g: #print s #print p, 'type:', type(p) #print o #print #print '-' * 78 #print if str(p) == "http://purl.org/linkedpolitics/vocabulary/text": print "Dumping %s..." % (s.n3(g.namespace_manager)) o = o.toPython() o = re.sub(r"\\n", "\n", o, flags=re.U) #o = re.sub(u"\xa0", " ", o, flags=re.U) # nbsp to space o = re.sub(ur"[^\n\S]", " ", o, flags=re.U) o = o.strip() f = codecs.open(join(sys.argv[2], s.n3(g.namespace_manager)) + '.txt', 'w', 'utf8') f.write(o) f.close() print 'All done :-)'