1
2 import socket, time
3 socket.setdefaulttimeout(30)
4
5 from baseObjects import DocumentFactory
6 from document import StringDocument
7 from record import SaxRecord
8 from bootstrap import BSParser
9 from utils import elementType, getFirstData, flattenTexts, reader, verifyXPaths
10 import re, os, c3errors, tarfile, cStringIO, sys, gzip
11 import mimetypes, httplib, urllib, urlparse, urllib2
12 import commands, codecs, types
13 from ZSI.client import Binding
14 from PyZ3950 import zoom
15 import SRW
16 from c3errors import *
17 from ftplib import FTP
18 from GoogleSearch_services import *
19 from utils import reader
20
21 mimetypes.add_type('application/marc', '.marc')
22
23
24
25
26
27
28
80
81
100
101
103 start = None
104 endtag = ""
105
106 - def __init__(self, session, stream, format, schema="", codec="", factory=None):
114
116 docs = []
117 locs = []
118 endtag = self.endtag
119 let = len(endtag)
120 myTell = 0
121 xpi = ""
122 line = ""
123 while True:
124 ol = len(line)
125 line += self.stream.read(1024)
126 pi = line.find("<?xml ")
127 if (pi > -1):
128
129 endpi = line.find("?>")
130 xpi = line[pi:endpi+2] + "\n"
131 xpi= ""
132 m = self.start.search(line)
133 if m:
134 if not self.endtag:
135 endtag = "</%s>" % m.group()[1:-1]
136 let = len(endtag)
137 s = m.start()
138 line = line[s:]
139 myTell += s
140 start = myTell
141 end = -1
142 strStart = 0
143 while end == -1:
144 if strStart:
145
146 end = line.find(endtag, strStart-let)
147 else:
148 end = line.find(endtag)
149 if end > 0:
150 tlen = end+len(endtag)
151 txt = line[:tlen]
152 line = line[tlen:]
153 myTell += tlen
154 if cache == 0:
155 yield StringDocument(xpi + txt, mimeType="text/xml", schema=self.schema)
156 elif cache == 1:
157 locs.append((start, tlen))
158 elif cache == 2:
159 docs.append(StringDocument(xpi + txt, mimeType="text/xml", schema=self.schema))
160 else:
161 strStart = len(line)
162 line += self.stream.read(1024)
163 if len(line) == ol and not m:
164 if cache == 0:
165 self.stream.close()
166 raise StopIteration
167 else:
168 break
169 self.stream.close()
170 self.locations = locs
171 self.documents = docs
172 self.length = max(len(locs), len(docs))
173
174
176
178 docs = []
179 locs = []
180 data = self.stream.read(1536)
181 myTell = 0
182 while data:
183 rt = data.find("\x1D")
184 while (rt > -1):
185 txt = data[:rt+1]
186 tlen = len(txt)
187 if cache == 0:
188 yield StringDocument(txt, mimeType="application/marc")
189 elif cache == 1:
190 locs.append((myTell, tlen))
191 elif cache == 2:
192 docs.append(StringDocument(txt, mimeType="application/marc"))
193 data = data[rt+1:]
194 myTell += tlen
195 rt = data.find("\x1D")
196 dlen = len(data)
197 data += self.stream.read(1536)
198 if (len(data) == dlen):
199
200 data = ""
201 self.stream.close()
202 self.locations = locs
203 self.documents = docs
204 self.length = max(len(locs), len(docs))
205
206
207
208
209
210
212
213 - def __init__(self, session, stream, format, schema=None, codec=None, factory=None ):