"
raise ImportError(msg)
except Exception :
e = sys.exc_info()[1]
# These are various parsing exception. Per spec, this is a case when
# error triples MUST be returned, ie, the usage of rdfOutput (which switches between an HTML formatted
# return page or a graph with error triples) does not apply
err = self.options.add_error(str(e), context = name)
self.http_status = 400
self.options.processor_graph.add_http_context(err, 400)
return copyErrors(graph, self.options)
# If we got here, we have a DOM tree to operate on...
return self.graph_from_DOM(dom, graph, pgraph)
except Exception :
# Something nasty happened during the generation of the graph...
(a,b,c) = sys.exc_info()
sys.excepthook(a,b,c)
if isinstance(b, ImportError) :
self.http_status = None
else :
self.http_status = 500
if not rdfOutput : raise b
err = self.options.add_error(str(b), context = name)
self.options.processor_graph.add_http_context(err, 500)
return copyErrors(graph, self.options)
def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) :
"""
Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
extracted, and serialization is done in the specified format.
@param names: list of sources, each can be a URI, a file name, or a file-like object
@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only.
@keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph
@type rdfOutput: boolean
@return: a serialized RDF Graph
@rtype: string
"""
# This is better because it gives access to the various, non-standard serializations
# If it does not work because the extra are not installed, fall back to the standard
# rdlib distribution...
try :
from pyRdfaExtras import MyGraph
graph = MyGraph()
except :
graph = Graph()
# graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#'))
# the value of rdfOutput determines the reaction on exceptions...
for name in names :
self.graph_from_source(name, graph, rdfOutput)
retval = graph.serialize(format=outputFormat)
return retval
def rdf_from_source(self, name, outputFormat = "turtle", rdfOutput = False) :
"""
Extract and RDF graph from an RDFa source and serialize it in one graph. The source is parsed, the RDF
extracted, and serialization is done in the specified format.
@param name: a URI, a file name, or a file-like object
@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only.
@keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph
@type rdfOutput: boolean
@return: a serialized RDF Graph
@rtype: string
"""
return self.rdf_from_sources([name], outputFormat, rdfOutput)
################################################# CGI Entry point
def processURI(uri, outputFormat, form={}) :
"""The standard processing of an RDFa uri options in a form; used as an entry point from a CGI call.
The call accepts extra form options (i.e., HTTP GET options) as follows:
- C{graph=[output|processor|output,processor|processor,output]} specifying which graphs are returned. Default: C{output}
- C{space_preserve=[true|false]} means that plain literals are normalized in terms of white spaces. Default: C{false}
- C{rfa_version} provides the RDFa version that should be used for distilling. The string should be of the form "1.0" or "1.1". Default is the highest version the current package implements, currently "1.1"
- C{host_language=[xhtml,html,xml]} : the host language. Used when files are uploaded or text is added verbatim, otherwise the HTTP return header should be used. Default C{xml}
- C{embedded_rdf=[true|false]} : whether embedded turtle or RDF/XML content should be added to the output graph. Default: C{false}
- C{vocab_expansion=[true|false]} : whether the vocabularies should be expanded through the restricted RDFS entailment. Default: C{false}
- C{vocab_cache=[true|false]} : whether vocab caching should be performed or whether it should be ignored and vocabulary files should be picked up every time. Default: C{false}
- C{vocab_cache_report=[true|false]} : whether vocab caching details should be reported. Default: C{false}
- C{vocab_cache_bypass=[true|false]} : whether vocab caches have to be regenerated every time. Default: C{false}
- C{rdfa_lite=[true|false]} : whether warnings should be generated for non RDFa Lite attribute usage. Default: C{false}
@param uri: URI to access. Note that the C{text:} and C{uploaded:} fake URI values are treated separately; the former is for textual intput (in which case a StringIO is used to get the data) and the latter is for uploaded file, where the form gives access to the file directly.
@param outputFormat: serialization format, as defined by the package. Currently "xml", "turtle", "nt", or "json". Default is "turtle", also used if any other string is given.
@param form: extra call options (from the CGI call) to set up the local options
@type form: cgi FieldStorage instance
@return: serialized graph
@rtype: string
"""
def _get_option(param, compare_value, default) :
param_old = param.replace('_','-')
if param in list(form.keys()) :
val = form.getfirst(param).lower()
return val == compare_value
elif param_old in list(form.keys()) :
# this is to ensure the old style parameters are still valid...
# in the old days I used '-' in the parameters, the standard favours '_'
val = form.getfirst(param_old).lower()
return val == compare_value
else :
return default
if uri == "uploaded:" :
input = form["uploaded"].file
base = ""
elif uri == "text:" :
input = StringIO(form.getfirst("text"))
base = ""
else :
input = uri
base = uri
if "rdfa_version" in list(form.keys()) :
rdfa_version = form.getfirst("rdfa_version")
else :
rdfa_version = None
# working through the possible options
# Host language: HTML, XHTML, or XML
# Note that these options should be used for the upload and inline version only in case of a form
# for real uris the returned content type should be used
if "host_language" in list(form.keys()) :
if form.getfirst("host_language").lower() == "xhtml" :
media_type = MediaTypes.xhtml
elif form.getfirst("host_language").lower() == "html" :
media_type = MediaTypes.html
elif form.getfirst("host_language").lower() == "svg" :
media_type = MediaTypes.svg
elif form.getfirst("host_language").lower() == "atom" :
media_type = MediaTypes.atom
else :
media_type = MediaTypes.xml
else :
media_type = ""
transformers = []
check_lite = "rdfa_lite" in list(form.keys()) and form.getfirst("rdfa_lite").lower() == "true"
# The code below is left for backward compatibility only. In fact, these options are not exposed any more,
# they are not really in use
if "extras" in list(form.keys()) and form.getfirst("extras").lower() == "true" :
from .transform.metaname import meta_transform
from .transform.OpenID import OpenID_transform
from .transform.DublinCore import DC_transform
for t in [OpenID_transform, DC_transform, meta_transform] :
transformers.append(t)
else :
if "extra-meta" in list(form.keys()) and form.getfirst("extra-meta").lower() == "true" :
from .transform.metaname import meta_transform
transformers.append(meta_transform)
if "extra-openid" in list(form.keys()) and form.getfirst("extra-openid").lower() == "true" :
from .transform.OpenID import OpenID_transform
transformers.append(OpenID_transform)
if "extra-dc" in list(form.keys()) and form.getfirst("extra-dc").lower() == "true" :
from .transform.DublinCore import DC_transform
transformers.append(DC_transform)
output_default_graph = True
output_processor_graph = False
# Note that I use the 'graph' and the 'rdfagraph' form keys here. Reason is that
# I used 'graph' in the previous versions, including the RDFa 1.0 processor,
# so if I removed that altogether that would create backward incompatibilities
# On the other hand, the RDFa 1.1 doc clearly refers to 'rdfagraph' as the standard
# key.
a = None
if "graph" in list(form.keys()) :
a = form.getfirst("graph").lower()
elif "rdfagraph" in list(form.keys()) :
a = form.getfirst("rdfagraph").lower()
if a != None :
if a == "processor" :
output_default_graph = False
output_processor_graph = True
elif a == "processor,output" or a == "output,processor" :
output_processor_graph = True
embedded_rdf = _get_option( "embedded_rdf", "true", False)
space_preserve = _get_option( "space_preserve", "true", True)
vocab_cache = _get_option( "vocab_cache", "true", True)
vocab_cache_report = _get_option( "vocab_cache_report", "true", False)
refresh_vocab_cache = _get_option( "vocab_cache_refresh", "true", False)
vocab_expansion = _get_option( "vocab_expansion", "true", False)
if vocab_cache_report : output_processor_graph = True
options = Options(output_default_graph = output_default_graph,
output_processor_graph = output_processor_graph,
space_preserve = space_preserve,
transformers = transformers,
vocab_cache = vocab_cache,
vocab_cache_report = vocab_cache_report,
refresh_vocab_cache = refresh_vocab_cache,
vocab_expansion = vocab_expansion,
embedded_rdf = embedded_rdf,
check_lite = check_lite
)
processor = pyRdfa(options = options, base = base, media_type = media_type, rdfa_version = rdfa_version)
# Decide the output format; the issue is what should happen in case of a top level error like an inaccessibility of
# the html source: should a graph be returned or an HTML page with an error message?
# decide whether HTML or RDF should be sent.
htmlOutput = False
#if 'HTTP_ACCEPT' in os.environ :
# acc = os.environ['HTTP_ACCEPT']
# possibilities = ['text/html',
# 'application/rdf+xml',
# 'text/turtle; charset=utf-8',
# 'application/json',
# 'application/ld+json',
# 'text/rdf+n3']
#
# # this nice module does content negotiation and returns the preferred format
# sg = acceptable_content_type(acc, possibilities)
# htmlOutput = (sg != None and sg[0] == content_type('text/html'))
# os.environ['rdfaerror'] = 'true'
# This is really for testing purposes only, it is an unpublished flag to force RDF output no
# matter what
try :
graph = processor.rdf_from_source(input, outputFormat, rdfOutput = ("forceRDFOutput" in list(form.keys())) or not htmlOutput)
if outputFormat == "n3" :
retval = 'Content-Type: text/rdf+n3; charset=utf-8\n'
elif outputFormat == "nt" or outputFormat == "turtle" :
retval = 'Content-Type: text/turtle; charset=utf-8\n'
elif outputFormat == "json-ld" or outputFormat == "json" :
retval = 'Content-Type: application/ld+json; charset=utf-8\n'
else :
retval = 'Content-Type: application/rdf+xml; charset=utf-8\n'
retval += '\n'
retval += graph
return retval
except HTTPError :
(type,h,traceback) = sys.exc_info()
import cgi
retval = 'Content-type: text/html; charset=utf-8\nStatus: %s \n\n' % h.http_code
retval += "\n"
retval += "\n"
retval += "HTTP Error in distilling RDFa content\n"
retval += "\n"
retval += "HTTP Error in distilling RDFa content
\n"
retval += "HTTP Error: %s (%s)
\n" % (h.http_code,h.msg)
retval += "On URI: '%s'
\n" % cgi.escape(uri)
retval +="\n"
retval +="\n"
return retval
except :
# This branch should occur only if an exception is really raised, ie, if it is not turned
# into a graph value.
(type,value,traceback) = sys.exc_info()
import traceback, cgi
retval = 'Content-type: text/html; charset=utf-8\nStatus: %s\n\n' % processor.http_status
retval += "\n"
retval += "\n"
retval += "Exception in RDFa processing\n"
retval += "\n"
retval += "Exception in distilling RDFa
\n"
retval += "\n"
strio = StringIO()
traceback.print_exc(file=strio)
retval += strio.getvalue()
retval +="
\n"
retval +="%s
\n" % value
retval +="Distiller request details
\n"
retval +="\n"
if uri == "text:" and "text" in form and form["text"].value != None and len(form["text"].value.strip()) != 0 :
retval +="- Text input:
- %s
\n" % cgi.escape(form["text"].value).replace('\n','
')
elif uri == "uploaded:" :
retval +="- Uploaded file
\n"
else :
retval +="- URI received:
'%s'
\n" % cgi.escape(uri)
if "host_language" in list(form.keys()) :
retval +="- Media Type:
- %s
\n" % media_type
if "graph" in list(form.keys()) :
retval +="- Requested graphs:
- %s
\n" % form.getfirst("graph").lower()
else :
retval +="- Requested graphs:
- default
\n"
retval +="- Output serialization format:
- %s
\n" % outputFormat
if "space_preserve" in form : retval +="- Space preserve:
- %s
\n" % form["space_preserve"].value
retval +="
\n"
retval +="\n"
retval +="\n"
return retval