Commit eba5bb56 authored by Thomas Huetter's avatar Thomas Huetter

workaround: escaping '\', '{' and '}' for dblp and swissprot

parent 03845e51
......@@ -15,7 +15,11 @@ class DBLPContentHandler(ContentHandler):
d = dict(attributes)
# Sort the attributes by their keys.
for key, value in sorted(d.items(), key = lambda element : element[0][1]):
self.bn += "{" + key[1] + "{" + value + "}}"
self.bn += "{" + key[1].translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "{" + value.translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "}}"
# Close tag.
def endElementNS(self, name, qname):
......@@ -23,7 +27,9 @@ class DBLPContentHandler(ContentHandler):
# Tag content.
def characters(self, data):
self.bn += "{" + data + "}"
self.bn += "{" + data.translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "}"
print("--- Loading DBLP dataset.")
......
......@@ -15,7 +15,11 @@ class SwissprotContentHandler(ContentHandler):
d = dict(attributes)
# Sort the attributes by their keys.
for key, value in sorted(d.items(), key = lambda element : element[0][1]):
self.bn += "{" + key[1] + "{" + value + "}}"
self.bn += "{" + key[1].translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "{" + value.translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "}}"
# Close tag.
def endElementNS(self, name, qname):
......@@ -23,12 +27,14 @@ class SwissprotContentHandler(ContentHandler):
# Tag content.
def characters(self, data):
self.bn += "{" + data + "}"
self.bn += "{" + data.translate(str.maketrans({"{": r"\{",
"}": r"\}",
"\\": r"\\"})) + "}"
print("--- Loading Swissprot dataset.")
swissprot_parser = etree.XMLParser(load_dtd=False, remove_blank_text=True)
swissprot_data_tree = etree.parse('uniprot_sprot.xml', dblp_parser)
swissprot_data_tree = etree.parse('uniprot_sprot.xml', swissprot_parser)
root = swissprot_data_tree.getroot()
# Output files.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment