Author: gnichols
Date: 2010-12-14 16:57:55 +0000 (Tue, 14 Dec 2010)
New Revision: 686
Modified:
trunk/v7/documentbase.py
Log:
642029 - [v7] USB SCSI tape drives cause: UnicodeEncodeError: 'ascii' codec can't encode characters in position 13-20: ordinal not in range(128)
Modified: trunk/v7/documentbase.py
===================================================================
--- trunk/v7/documentbase.py 2010-12-14 16:45:33 UTC (rev 685)
+++ trunk/v7/documentbase.py 2010-12-14 16:57:55 UTC (rev 686)
@@ -23,6 +23,8 @@
from device import Device, HalDevice, ProcDevice, KudzuDevice, UdevDevice
class DocumentWrapper:
+ # non-printing ascii unacceptable to xml
+ asciiBadForXML = map(chr, range(9) + range(11,13) + range(14,32))
def __init__(self):
self.document = None
@@ -60,20 +62,19 @@
parent.appendChild(child)
return child
- def setTextNode(self, element, value):
- value = self.removeBadCharacters(value)
+ def setTextNode(self, element, value, filter=True):
if value:
- value = self.removeBadCharacters(value)
+ if filter:
+ value = self.removeBadCharacters(value)
# is there an existing child text node?
textNode = self.getTextNode(element)
if textNode:
textNode.data = value
- return
+ else:
+ # else no existing text child note, create it
+ textNode = self.document.createTextNode(value)
+ element.appendChild(textNode)
- # else no existing text child note, create it
- textNode = self.document.createTextNode(value)
- element.appendChild(textNode)
-
def addTextNode(self, element, value):
if value:
if type(value) == unicode:
@@ -99,6 +100,7 @@
stop = datetime.datetime(*(time.strptime(stopTime, Constants.DATETIMEFORMAT)[0:5]))
return stop - start
+
def duration(self, startTime, stopTime):
duration = self.durationData(startTime, stopTime)
if duration.days > 0:
@@ -113,32 +115,24 @@
def timeStringToTimestamp(self, theTimeString):
+
return theTimeString.replace(" ", "").replace(":", "").replace("-", "")
def removeBadCharacters(self, value):
- if type(value) == unicode:
- badUnicode = [u'\xae']
- goodValue = u""
- for uc in value:
- if uc not in badUnicode:
- goodValue += uc
- value = goodValue.encode('ascii')
-
- try:
- # badnumbers = [0,8,27,246]
- badnumbers = range(9) # various non-printing characters
- badnumbers.extend(range(14,31)) # various control characters, escape
- badnumbers.extend(range(128, 256))
- badchars = ""
- for i in badnumbers:
- badchars += chr(i)
- badchars += "|"
- chars = ""
- for i in range(256):
- chars += chr(i)
- return value.translate(chars, badchars)
- except TypeError:
- return value
+ filtered = u''
+ filterStart = 0
+ # for c in value:
+ for i in range(len(value)):
+ c = value[i]
+ if c in self.asciiBadForXML or (type(value) != unicode and ord(c) >= 128):
+ if self.Debugging != Constants.off:
+ print "skipping bad char \\x%.2x" % ord(c)
+ if filterStart < i:
+ filtered += value[filterStart:i]
+ filterStart = i+1
+ filtered += value[filterStart:]
+ return filtered
+
def setAttribute(self, element, attribute, value):
if not value:
@@ -259,3 +253,58 @@
sys.exit(1)
file.close()
+
+
+
+if __name__ == '__main__':
+
+ """ Test class for read/write of documents to xml files """
+ class XmlFileTest(DocumentBase):
+ def __init__(self):
+ dom = getDOMImplementation()
+ self.document = dom.createDocument(None, "documentbase-test", None)
+ self.topElement = self.document.documentElement
+ stylesheet = self.document.createProcessingInstruction("xml-stylesheet", "href=\"/v7/css/results.css\" type=\"text/css\"")
+ self.document.insertBefore(stylesheet, self.topElement)
+ def test(self, data, id):
+ self.topElement.setAttribute("test", data.encode("utf-8"))
+ textNode = self.document.createTextNode(data)
+ self.topElement.appendChild(textNode)
+ file = open("documentbase-selftest-%s.xml" % id, "w")
+ xml.dom.ext.PrettyPrint(self.document, file)
+ file.close()
+ file = open("documentbase-selftest-%s.xml" % id, "r")
+ document = xml.dom.minidom.parse(file)
+ file.close()
+ print "data parsed, written, and read from xml file"
+
+ testData = list()
+ print "\nTesting ascii 0-127:"
+ bad = '' # str, not unicode
+ for i in range(128):
+ bad += chr(i)
+ sample = u'' + bad
+ testData.append((sample, "0-127"))
+
+ bad = u'' # unicode
+ for i in range(128,256):
+ bad += unichr(i)
+ sample = u'' + bad
+ testData.append((sample, "128-255"))
+
+ testData.append((u'\u1234\u20ac\u0000\u2332', "multibyte"))
+
+ testData.append((u'some\nwords\nget\nchopped', 'regular'))
+
+ testData.append((chr(0xc2), "ascii-value"))
+
+ wrapper = DocumentWrapper()
+ wrapper.Debugging = Constants.high
+
+ for (sample, id) in testData:
+ xmlFileTest = XmlFileTest()
+ sample = wrapper.removeBadCharacters(sample)
+ xmlFileTest.test(sample, id)
+
+
+