Module record
[hide private]
[frames] | no frames]

Source Code for Module record

   1    
   2  from baseObjects import Record 
   3  from c3errors import C3Exception 
   4  import types, utils, os, re 
   5  from Ft.Xml.Domlette import implementation, Print 
   6  from cStringIO import StringIO 
   7  from xml.sax.saxutils import escape 
   8  from PyZ3950.zmarc_relaxed import MARC 
   9  from xml.sax import ContentHandler 
  10   
  11  from utils import Context, flattenTexts 
  12   
  13  # 1 <name> <attrHash> parent predicate end 
  14  # Element 
  15  # 4 <as 1> 
  16  # Namespaced Element 
  17  # 2 <name> <startLine> 
  18  # End Element 
  19  # 5 <as 2> 
  20  # End Namespaced 
  21  # 3 <text> 
  22  # Characters 
  23  # 9 <element hash> 
  24  # Hash of locations 
  25   
  26  # Split to separate object to allow for DOM->SAX direct conversion 
  27  # by throwing events from DOM tree to handler. 
28 -class SaxContentHandler(ContentHandler):
29 currentText = [] 30 currentPath = [] 31 pathLines = [] 32 currentLine = -1 33 recordSize = 0 34 elementHash = {} 35 namespaces = [] 36 hashAttributesNames = {} 37 hashAttributes = [] 38 stripWS = 0 39 saveElementIndexes = 1 40
41 - def __init__(self):
42 self.saveElementIndexes = 1 43 self.hashAttributesNames = {} 44 self.hashAttributes = [] 45 self.stripWS = 0 46 self.reinit()
47
48 - def reinit(self):
49 self.currentText = [] 50 self.currentPath = [] 51 self.pathLines = [] 52 self.currentLine = -1 53 self.recordSize = 0 54 self.elementHash = {} 55 self.elementIndexes = [] 56 self.namespaces = []
57
58 - def startPrefixMapping(self, pfx, uri):
59 self.currentLine += 1 60 if (pfx == None): 61 pfx = '' 62 self.currentText.append("6 %r, %r" % (pfx, uri))
63 64 # We want to fwd elems to NS elem handlers with default NS?
65 - def startElement(self, name, attrs):
66 self.currentLine += 1 67 self.pathLines.append(self.currentLine) 68 try: 69 parent = self.pathLines[-2] 70 except IndexError: 71 parent = -1 72 attrHash = {} 73 if (attrs): 74 for k in attrs.keys(): 75 attrHash[k] = escape(attrs[k]) 76 77 try: 78 npred = self.elementIndexes[-1][name] + 1 79 self.elementIndexes[-1][name] += 1 80 except IndexError: 81 # Empty 82 npred = 1 83 self.elementIndexes = [{name: npred}] 84 except KeyError: 85 # First occurence of Element 86 npred = 1 87 self.elementIndexes[-1][name] = 1 88 except: 89 print (name, self.elementIndexes) 90 raise 91 self.elementIndexes.append({}) 92 self.currentText.append("1 %s %s %d %d" % (name, repr(attrHash), parent, npred)) 93 saveAttrs = [] 94 try: 95 hashAttrList = self.hashAttributesNames[name] 96 for a in hashAttrList: 97 try: 98 saveAttrs.append("%s[@%s='%s']" % (name, a, attrHash[a])) 99 except: 100 pass 101 except: 102 pass 103 try: 104 starAttrList = self.hashAttributesNames['*'] 105 for a in starAttrList: 106 try: 107 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a])) 108 except: 109 pass 110 except: 111 pass 112 if saveAttrs: 113 self.hashAttributes.append((self.currentLine, saveAttrs))
114 115 116
117 - def endElement(self, name):
118 self.currentLine += 1 119 start = self.pathLines.pop() 120 self.currentText.append("2 %s %d" % (name, start)) 121 self.currentText[start] = "%s %d" % (self.currentText[start], self.currentLine) 122 self.elementIndexes.pop() 123 try: 124 self.elementHash[name].append([start, self.currentLine]) 125 except: 126 self.elementHash[name] = [[start, self.currentLine]] 127 if self.hashAttributes and self.hashAttributes[-1][0] == start: 128 attrs = self.hashAttributes.pop()[1] 129 for sa in attrs: 130 try: 131 self.elementHash[sa].append([start, self.currentLine]) 132 except: 133 self.elementHash[sa] = [[start, self.currentLine]]
134
135 - def startElementNS(self, name, qname, attrs):
136 self.currentLine += 1 137 self.pathLines.append(self.currentLine) 138 try: 139 parent = self.pathLines[-2] 140 except: 141 parent = -1 142 attrHash = {} 143 # Convert from weird sax thing 144 if (attrs): 145 for k in attrs.keys(): 146 attrHash[k] = attrs[k] 147 148 simpleName = name[1] 149 try: 150 npred = self.elementIndexes[-1][simpleName] + 1 151 self.elementIndexes[-1][simpleName] += 1 152 except IndexError: 153 # Empty 154 npred = 1 155 self.elementIndexes = [{simpleName: npred}] 156 except KeyError: 157 # First occurence of Element 158 npred = 1 159 self.elementIndexes[-1][simpleName] = 1 160 self.elementIndexes.append({}) 161 162 self.currentText.append("4 %r, %r, %r, %r %d %d" % (name[0], simpleName, qname, attrHash, parent, npred)) 163 164 saveAttrs = [] 165 try: 166 hashAttrList = self.hashAttributesNames[simpleName] 167 for a in hashAttrList: 168 try: 169 saveAttrs.append("%s[@%s='%s']" % (simpleName, a, attrHash[a])) 170 except: 171 pass 172 except: 173 pass 174 try: 175 starAttrList = self.hashAttributesNames['*'] 176 for a in starAttrList: 177 try: 178 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a])) 179 except: 180 pass 181 except: 182 pass 183 if saveAttrs: 184 self.hashAttributes.append((self.currentLine, saveAttrs))
185 186
187 - def endElementNS(self, name, qname):
188 self.currentLine += 1 189 start = self.pathLines.pop() 190 self.currentText.append("5 %r, %r, %r %d" % (name[0], name[1], qname, start)) 191 self.currentText[start] ="%s %d" % (self.currentText[start], self.currentLine) 192 self.elementIndexes.pop() 193 try: 194 self.elementHash[name[1]].append([start, self.currentLine]) 195 except: 196 self.elementHash[name[1]] = [[start, self.currentLine]] 197 if self.hashAttributes and self.hashAttributes[-1][0] == start: 198 attrs = self.hashAttributes.pop()[1] 199 for sa in attrs: 200 try: 201 self.elementHash[sa].append([start, self.currentLine]) 202 except: 203 self.elementHash[sa] = [[start, self.currentLine]]
204
205 - def characters(self, text, start=0, length=-1):
206 # if text.isspace(): 207 # text = " " 208 prev = self.currentText[-1] 209 if self.stripWS and text.isspace(): 210 return 211 self.currentLine += 1 212 if (len(text) != 1 and len(prev) != 3 and prev[0] == "3" and not prev[-1] in [' ', '-']): 213 # Adjacent lines of text, ensure spaces 214 text = ' ' + text 215 self.currentText.append("3 %s" % (text)) 216 self.recordSize += len(text.split())
217
218 - def ignorableWhitespace(self, ws):
219 # ... ignore! :D 220 pass
221
222 - def processingInstruction(self, target, data):
223 pass
224 - def skippedEntity(self, name):
225 pass
226
227 -class SaxToDomHandler:
228 nodeStack = [] 229 document = None 230 currText = "" 231
232 - def initState(self):
233 self.nodeStack = [] 234 self.document=None 235 self.top = None
236
237 - def startElement(self, name, attribs={}):
238 if (not self.document): 239 self.document = implementation.createDocument(None, name, None) 240 elem = self.document.childNodes[0] 241 else: 242 elem = self.document.createElementNS(None,name) 243 for a in attribs: 244 elem.setAttributeNS(None,a,attribs[a]) 245 if (self.nodeStack): 246 self.nodeStack[-1].appendChild(elem) 247 else: 248 self.document.appendChild(elem) 249 self.nodeStack.append(elem)
250
251 - def endElement(self, foo):
252 self.nodeStack.pop()
253
254 - def characters(self, text, zero=0, length=0):
255 if (self.nodeStack): 256 if (text.isspace()): 257 text = " " 258 # Is this escape necessary? 259 text = escape(text) 260 d = self.document.createTextNode(text) 261 self.nodeStack[-1].appendChild(d)
262
263 - def startElementNS(self, name, qname, attribs):
264 if (not self.document): 265 self.document = implementation.createDocument(name[0], name[1], None) 266 elem = self.document.childNodes[0] 267 else: 268 elem = self.document.createElementNS(name[0],name[1]) 269 270 for a in attribs: 271 elem.setAttributeNS(a[0],a[1],attribs[a]) 272 if (self.nodeStack): 273 self.nodeStack[-1].appendChild(elem) 274 else: 275 self.document.appendChild(elem) 276 self.nodeStack.append(elem)
277
278 - def endElementNS(self, name,qname):
279 self.nodeStack.pop()
280
281 - def startPrefixMapping(self, pref, uri):
282 pass
283
284 - def getRootNode(self):
285 return self.document
286 287 s2dhandler = SaxToDomHandler() 288
289 -class SaxToXmlHandler:
290 xml = [] 291 currNs = 0 292 newNamespaces = {} 293
294 - def initState(self):
295 self.xml = [] 296 self.namespaces = {} 297 self.currNs = 0 298 self.newNamespaces = {}
299
300 - def startPrefixMapping(self, pref, uri):
301 self.namespaces[uri] = pref 302 self.newNamespaces[pref] = uri
303
304 - def startElement(self, name, attribs={}):
305 attrs = [] 306 for a in attribs: 307 attrs.append('%s="%s"' % (a, attribs[a])) 308 attribtxt = ' '.join(attrs) 309 if (attribtxt): 310 attribtxt = " " + attribtxt 311 self.xml.append("<%s%s>" % (name, attribtxt))
312
313 -