Author: gnichols Date: 2010-12-14 16:57:55 +0000 (Tue, 14 Dec 2010) New Revision: 686
Modified: trunk/v7/documentbase.py Log: 642029 - [v7] USB SCSI tape drives cause: UnicodeEncodeError: 'ascii' codec can't encode characters in position 13-20: ordinal not in range(128)
Modified: trunk/v7/documentbase.py =================================================================== --- trunk/v7/documentbase.py 2010-12-14 16:45:33 UTC (rev 685) +++ trunk/v7/documentbase.py 2010-12-14 16:57:55 UTC (rev 686) @@ -23,6 +23,8 @@ from device import Device, HalDevice, ProcDevice, KudzuDevice, UdevDevice
class DocumentWrapper: + # non-printing ascii unacceptable to xml + asciiBadForXML = map(chr, range(9) + range(11,13) + range(14,32))
def __init__(self): self.document = None @@ -60,20 +62,19 @@ parent.appendChild(child) return child
- def setTextNode(self, element, value): - value = self.removeBadCharacters(value) + def setTextNode(self, element, value, filter=True): if value: - value = self.removeBadCharacters(value) + if filter: + value = self.removeBadCharacters(value) # is there an existing child text node? textNode = self.getTextNode(element) if textNode: textNode.data = value - return + else: + # else no existing text child note, create it + textNode = self.document.createTextNode(value) + element.appendChild(textNode)
- # else no existing text child note, create it - textNode = self.document.createTextNode(value) - element.appendChild(textNode) - def addTextNode(self, element, value): if value: if type(value) == unicode: @@ -99,6 +100,7 @@ stop = datetime.datetime(*(time.strptime(stopTime, Constants.DATETIMEFORMAT)[0:5])) return stop - start
+ def duration(self, startTime, stopTime): duration = self.durationData(startTime, stopTime) if duration.days > 0: @@ -113,32 +115,24 @@
def timeStringToTimestamp(self, theTimeString): + return theTimeString.replace(" ", "").replace(":", "").replace("-", "")
def removeBadCharacters(self, value): - if type(value) == unicode: - badUnicode = [u'\xae'] - goodValue = u"" - for uc in value: - if uc not in badUnicode: - goodValue += uc - value = goodValue.encode('ascii') - - try: - # badnumbers = [0,8,27,246] - badnumbers = range(9) # various non-printing characters - badnumbers.extend(range(14,31)) # various control characters, escape - badnumbers.extend(range(128, 256)) - badchars = "" - for i in badnumbers: - badchars += chr(i) - badchars += "|" - chars = "" - for i in range(256): - chars += chr(i) - return value.translate(chars, badchars) - except TypeError: - return value + filtered = u'' + filterStart = 0 + # for c in value: + for i in range(len(value)): + c = value[i] + if c in self.asciiBadForXML or (type(value) != unicode and ord(c) >= 128): + if self.Debugging != Constants.off: + print "skipping bad char \x%.2x" % ord(c) + if filterStart < i: + filtered += value[filterStart:i] + filterStart = i+1 + filtered += value[filterStart:] + return filtered +
def setAttribute(self, element, attribute, value): if not value: @@ -259,3 +253,58 @@ sys.exit(1)
file.close() + + + +if __name__ == '__main__': + + """ Test class for read/write of documents to xml files """ + class XmlFileTest(DocumentBase): + def __init__(self): + dom = getDOMImplementation() + self.document = dom.createDocument(None, "documentbase-test", None) + self.topElement = self.document.documentElement + stylesheet = self.document.createProcessingInstruction("xml-stylesheet", "href="/v7/css/results.css" type="text/css"") + self.document.insertBefore(stylesheet, self.topElement) + def test(self, data, id): + self.topElement.setAttribute("test", data.encode("utf-8")) + textNode = self.document.createTextNode(data) + self.topElement.appendChild(textNode) + file = open("documentbase-selftest-%s.xml" % id, "w") + xml.dom.ext.PrettyPrint(self.document, file) + file.close() + file = open("documentbase-selftest-%s.xml" % id, "r") + document = xml.dom.minidom.parse(file) + file.close() + print "data parsed, written, and read from xml file" + + testData = list() + print "\nTesting ascii 0-127:" + bad = '' # str, not unicode + for i in range(128): + bad += chr(i) + sample = u'' + bad + testData.append((sample, "0-127")) + + bad = u'' # unicode + for i in range(128,256): + bad += unichr(i) + sample = u'' + bad + testData.append((sample, "128-255")) + + testData.append((u'\u1234\u20ac\u0000\u2332', "multibyte")) + + testData.append((u'some\nwords\nget\nchopped', 'regular')) + + testData.append((chr(0xc2), "ascii-value")) + + wrapper = DocumentWrapper() + wrapper.Debugging = Constants.high + + for (sample, id) in testData: + xmlFileTest = XmlFileTest() + sample = wrapper.removeBadCharacters(sample) + xmlFileTest.test(sample, id) + + +
v7-commits@lists.stg.fedorahosted.org