Viewing file: NTriples.py (8.25 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Rdf/Serializers/NTriples.py,v 1.3 2004/06/21 16:42:36 mbrown Exp $ """ N-Triples module. http://www.w3.org/TR/rdf-testcases/#ntriples
Copyright 2003 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
#Regexen for NTriples
import re, urllib, cStringIO from Ft.Rdf import RDF_MS_BASE, RDF_SCHEMA_BASE from Ft.Rdf import OBJECT_TYPE_RESOURCE, OBJECT_TYPE_LITERAL, OBJECT_TYPE_UNKNOWN from Ft.Rdf import Statement
#Borrowed from Ft.Lib.Uri URI_REGEX = r"(?:(?:[a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"
CHAR = "[\x20-x7E]" NAME = "[A-Za-z][A-Za-z0-9]*" LANG = "[a-z0-9]+(\\-[a-z0-9]+)?"
URIREF_REGEX = "<(?P<uriref>" + URI_REGEX + ")>" NODEID_REGEX = "_:(?P<name>" + NAME + ")" LANGSTRING_REGEX = '"(?P<value>' + CHAR + '*)"(@(?P<lang>' + LANG + '))?' DTSTRING_REGEX = '"(?P<name>' + CHAR + '*)"^^(?P<dt>' + URI_REGEX + ')'
URIREF = re.compile("(" + URIREF_REGEX + ")(.*)") NODEID = re.compile("(" + NODEID_REGEX + ")(.*)") LANGSTRING = re.compile("(" + LANGSTRING_REGEX + ")(.*)") DTSTRING = re.compile("(" + DTSTRING_REGEX + ")(.*)")
class Resource: def __init__(self, id, anon=0): self.anon = anon if anon: self.name = id else: self.uri = id return
def __repr__(self): if self.anon: return "_:" + self.name else: return self.uri
__str__ = __repr__
class Literal: def __init__(self, value, datatype=RDF_SCHEMA_BASE+"Literal"): self.datatype = datatype self.value = value return
def __repr__(self): if self.datatype == RDF_SCHEMA_BASE+"Literal": return repr(self.value) else: return repr(self.value)[:-1]+"^^%s"%self.datatype
__str__ = __repr__
def ParseNTriples(lines): """ Takes a sequence of lines with NTriples and returns a list of resulting statement objects """ bnode_mappings = {} triples = [] for line in lines: line = line.strip() orig_line = line m = URIREF.match(line) if m: subject = Resource(urllib.unquote(m.groupdict()["uriref"])) #print m.groupdict() #print m.groups() else: m = NODEID.match(line) if m: subject = Resource(m.groupdict()["name"], 1) if not m: raise ValueError("Unable to parse subject from N-Triples line, '%s'"%orig_line) line = m.group(m.lastindex) line = line.lstrip() m = URIREF.match(line) if m: predicate = Resource(urllib.unquote(m.groupdict()["uriref"])) else: m = NODEID.match(line) if m: predicate = Resource(m.groupdict()["name"], 1) if not m: raise ValueError("Unable to parse predicate from N-Triples line, '%s'"%orig_line) line = m.group(m.lastindex) line = line.lstrip() m = URIREF.match(line) if m: object = Resource(urllib.unquote(m.groupdict()["uriref"])) else: m = NODEID.match(line) if m: object = Resource(m.groupdict()["name"], 1) else: m = LANGSTRING.match(line) if m: object = Literal(m.groupdict()["value"]) else: m = DTSTRING.match(line) if m: object = Literal(m.groupdict()["value"], m.groupdict()["dt"]) if not m: raise ValueError("Unable to parse object from N-Triples line, '%s'"%orig_line) triples.append((subject, predicate, object)) #print triples return triples
class Serializer: """Serialize or deserialize a model using N-Triples.""" def __init__(self, reify=1): self.reify = 0 return
def deserialize(self, model, stream, scope=None): triples = ParseNTriples(stream.readlines()) stmts = [] bnodes = {} for triple in triples: if triple[0].anon: if bnodes.has_key(triple[0].name): subject = bnodes[triple[0].name] else: subject = generateBnode() bnodes[triple[0].name] = subject else: subject = triple[0].uri if triple[1].anon: if bnodes.has_key(triple[1].name): predicate = bnodes[triple[1].name] else: predicate = generateBnode() bnodes[triple[1].name] = predicate else: predicate = triple[1].uri if isinstance(triple[2], Resource) and triple[2].anon: if bnodes.has_key(triple[2].name): object = bnodes[triple[2].name] else: object = model.generateBnode() bnodes[triple[2].name] = object objectType = OBJECT_TYPE_RESOURCE elif isinstance(triple[2], Resource): object = triple[2].uri objectType = OBJECT_TYPE_RESOURCE elif triple[2].datatype == RDF_SCHEMA_BASE+"Literal": objectType = OBJECT_TYPE_LITERAL object = triple[2].value else: objectType = triple[2].datatype object = triple[2].value stmts.append(Statement.Statement(subject, predicate, object, scope=scope, objectType=objectType)) model.add(stmts) return def serialize(self, model, nsMap=None, selectUri=None, localResources=[], stmts=None, stream=None): """Construct a DOM representing statements in the model.""" bnodes = {} bnode_count = 1 if stmts is None: stmts = model.statements() stmts = filter(lambda x: x.uri != RDF_SCHEMA_BASE, stmts) if selectUri: stmts = filter(lambda x, sel=selectUri: x.uri == sel, stmts) else: stmts = filter(lambda x: x.uri != RDF_SCHEMA_BASE, stmts)
if stream is None: stream = cStringIO.StringIO()
def getResource(uri, bnodes, bnode_count, model=model): if model.isBnodeLabel(uri): if bnodes.has_key(uri): name = bnodes[uri] else: name = "blank" + str(bnode_count) bnode_count += 1 bnodes[uri] = name return "_:" + name else: return "<" + uri + ">"
for stmt in stmts: stream.write(getResource(stmt.subject, bnodes, bnode_count)) stream.write("\t") stream.write(getResource(stmt.predicate, bnodes, bnode_count)) stream.write("\t") if stmt.objectType == OBJECT_TYPE_RESOURCE: stream.write(getResource(stmt.object, bnodes, bnode_count)) elif stmt.objectType in [OBJECT_TYPE_LITERAL, OBJECT_TYPE_UNKNOWN]: stream.write('"' + stmt.object + '"') else: stream.write('"' + stmt.object + '"^^' + stmt.objectType) stream.write(" .\n") return stream
def Test(): nt = """\ <http://www.w3.org/2001/08/rdf-test/> <http://purl.org/dc/elements/1.1/creator> "Dave Beckett" . <http://www.w3.org/2001/08/rdf-test/> <http://purl.org/dc/elements/1.1/creator> "Jan Grant" . <http://www.w3.org/2001/08/rdf-test/> <http://purl.org/dc/elements/1.1/publisher> _:a . _:a <http://purl.org/dc/elements/1.1/title> "World Wide Web Consortium" . _:a <http://purl.org/dc/elements/1.1/source> <http://www.w3.org/> ."""
stream = cStringIO.StringIO(nt) #triples = ParseNTriples(stream.readlines()) from Ft.Rdf.Drivers import Memory from Ft.Rdf import Model db = Memory.CreateDb('') m = Model.Model(db) s = Serializer() s.deserialize(m, stream) stream=cStringIO.StringIO() s.serialize(m, stream=stream) print stream.getvalue() return
|