1
2 from baseObjects import Record
3 from c3errors import C3Exception
4 import types, utils, os, re
5 from Ft.Xml.Domlette import implementation, Print
6 from cStringIO import StringIO
7 from xml.sax.saxutils import escape
8 from PyZ3950.zmarc_relaxed import MARC
9 from xml.sax import ContentHandler
10
11 from utils import Context, flattenTexts
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 -class SaxContentHandler(ContentHandler):
29 currentText = []
30 currentPath = []
31 pathLines = []
32 currentLine = -1
33 recordSize = 0
34 elementHash = {}
35 namespaces = []
36 hashAttributesNames = {}
37 hashAttributes = []
38 stripWS = 0
39 saveElementIndexes = 1
40
42 self.saveElementIndexes = 1
43 self.hashAttributesNames = {}
44 self.hashAttributes = []
45 self.stripWS = 0
46 self.reinit()
47
49 self.currentText = []
50 self.currentPath = []
51 self.pathLines = []
52 self.currentLine = -1
53 self.recordSize = 0
54 self.elementHash = {}
55 self.elementIndexes = []
56 self.namespaces = []
57
58 - def startPrefixMapping(self, pfx, uri):
59 self.currentLine += 1
60 if (pfx == None):
61 pfx = ''
62 self.currentText.append("6 %r, %r" % (pfx, uri))
63
64
65 - def startElement(self, name, attrs):
66 self.currentLine += 1
67 self.pathLines.append(self.currentLine)
68 try:
69 parent = self.pathLines[-2]
70 except IndexError:
71 parent = -1
72 attrHash = {}
73 if (attrs):
74 for k in attrs.keys():
75 attrHash[k] = escape(attrs[k])
76
77 try:
78 npred = self.elementIndexes[-1][name] + 1
79 self.elementIndexes[-1][name] += 1
80 except IndexError:
81
82 npred = 1
83 self.elementIndexes = [{name: npred}]
84 except KeyError:
85
86 npred = 1
87 self.elementIndexes[-1][name] = 1
88 except:
89 print (name, self.elementIndexes)
90 raise
91 self.elementIndexes.append({})
92 self.currentText.append("1 %s %s %d %d" % (name, repr(attrHash), parent, npred))
93 saveAttrs = []
94 try:
95 hashAttrList = self.hashAttributesNames[name]
96 for a in hashAttrList:
97 try:
98 saveAttrs.append("%s[@%s='%s']" % (name, a, attrHash[a]))
99 except:
100 pass
101 except:
102 pass
103 try:
104 starAttrList = self.hashAttributesNames['*']
105 for a in starAttrList:
106 try:
107 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a]))
108 except:
109 pass
110 except:
111 pass
112 if saveAttrs:
113 self.hashAttributes.append((self.currentLine, saveAttrs))
114
115
116
117 - def endElement(self, name):
118 self.currentLine += 1
119 start = self.pathLines.pop()
120 self.currentText.append("2 %s %d" % (name, start))
121 self.currentText[start] = "%s %d" % (self.currentText[start], self.currentLine)
122 self.elementIndexes.pop()
123 try:
124 self.elementHash[name].append([start, self.currentLine])
125 except:
126 self.elementHash[name] = [[start, self.currentLine]]
127 if self.hashAttributes and self.hashAttributes[-1][0] == start:
128 attrs = self.hashAttributes.pop()[1]
129 for sa in attrs:
130 try:
131 self.elementHash[sa].append([start, self.currentLine])
132 except:
133 self.elementHash[sa] = [[start, self.currentLine]]
134
135 - def startElementNS(self, name, qname, attrs):
136 self.currentLine += 1
137 self.pathLines.append(self.currentLine)
138 try:
139 parent = self.pathLines[-2]
140 except:
141 parent = -1
142 attrHash = {}
143
144 if (attrs):
145 for k in attrs.keys():
146 attrHash[k] = attrs[k]
147
148 simpleName = name[1]
149 try:
150 npred = self.elementIndexes[-1][simpleName] + 1
151 self.elementIndexes[-1][simpleName] += 1
152 except IndexError:
153
154 npred = 1
155 self.elementIndexes = [{simpleName: npred}]
156 except KeyError:
157
158 npred = 1
159 self.elementIndexes[-1][simpleName] = 1
160 self.elementIndexes.append({})
161
162 self.currentText.append("4 %r, %r, %r, %r %d %d" % (name[0], simpleName, qname, attrHash, parent, npred))
163
164 saveAttrs = []
165 try:
166 hashAttrList = self.hashAttributesNames[simpleName]
167 for a in hashAttrList:
168 try:
169 saveAttrs.append("%s[@%s='%s']" % (simpleName, a, attrHash[a]))
170 except:
171 pass
172 except:
173 pass
174 try:
175 starAttrList = self.hashAttributesNames['*']
176 for a in starAttrList:
177 try:
178 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a]))
179 except:
180 pass
181 except:
182 pass
183 if saveAttrs:
184 self.hashAttributes.append((self.currentLine, saveAttrs))
185
186
187 - def endElementNS(self, name, qname):
188 self.currentLine += 1
189 start = self.pathLines.pop()
190 self.currentText.append("5 %r, %r, %r %d" % (name[0], name[1], qname, start))
191 self.currentText[start] ="%s %d" % (self.currentText[start], self.currentLine)
192 self.elementIndexes.pop()
193 try:
194 self.elementHash[name[1]].append([start, self.currentLine])
195 except:
196 self.elementHash[name[1]] = [[start, self.currentLine]]
197 if self.hashAttributes and self.hashAttributes[-1][0] == start:
198 attrs = self.hashAttributes.pop()[1]
199 for sa in attrs:
200 try:
201 self.elementHash[sa].append([start, self.currentLine])
202 except:
203 self.elementHash[sa] = [[start, self.currentLine]]
204
205 - def characters(self, text, start=0, length=-1):
206
207
208 prev = self.currentText[-1]
209 if self.stripWS and text.isspace():
210 return
211 self.currentLine += 1
212 if (len(text) != 1 and len(prev) != 3 and prev[0] == "3" and not prev[-1] in [' ', '-']):
213
214 text = ' ' + text
215 self.currentText.append("3 %s" % (text))
216 self.recordSize += len(text.split())
217
218 - def ignorableWhitespace(self, ws):
219
220 pass
221
222 - def processingInstruction(self, target, data):
223 pass
224 - def skippedEntity(self, name):
225 pass
226
286
287 s2dhandler = SaxToDomHandler()
288
290 xml = []
291 currNs = 0
292 newNamespaces = {}
293
299
303
305 attrs = []
306 for a in attribs:
307 attrs.append('%s="%s"' % (a, attribs[a]))
308 attribtxt = ' '.join(attrs)
309 if (attribtxt):
310 attribtxt = " " + attribtxt
311 self.xml.append("<%s%s>" % (name, attribtxt))
312