| Trees | Index | Help |
|---|
|
|
1 from baseObjects import ResultSet, ResultSetItem, Index 2 from PyZ3950 import CQLParser 3 import math, types 4 5 import sys 6 7 from xml.sax import ContentHandler, make_parser, parseString as saxParseString, ErrorHandler, InputSource as SaxInput 8 import cStringIO as StringIO 9 from xml.sax.saxutils import escape, unescape 10 import utils 11 import cPickle 1214 items = [] 15 item = None 16 set = None 17 session = None 18 currContent = "" 19 2067 68 69 localParser = make_parser() 70 localParser.setErrorHandler(ErrorHandler()) 71 localInput = SaxInput() 72 localHandler = DeserializationHandler() 73 localParser.setContentHandler(localHandler) 74 7522 self.currContent = "" 23 self.session = session 24 self.set = set 25 self.items = [] 26 self.item = None27 3133 c = self.currContent 34 if name == "queryFreq": 35 self.set.queryFreq = long(c) 36 elif name == "queryTerm": 37 self.set.queryTerm = c 38 elif name == "termWeight": 39 self.set.termWeight = float(c) 40 elif name == "queryPositions" and c: 41 if len(c) > 1: 42 self.set.queryPositions = cPickle.loads(str(c)) 43 elif name == "item": 44 self.set.append(self.item) 45 elif name == "recStore": 46 self.item.recordStore = c 47 elif name == "id": 48 if c.isdigit(): 49 self.item.docid = long(c) 50 else: 51 self.item.docid = c 52 elif name == "weight": 53 self.item.weight = float(c) 54 elif name == "scaledWeight": 55 self.item.scaledWeight = float(c) 56 elif name == "occs": 57 self.item.occurences = long(c) 58 elif name == "database": 59 self.item.database = c 60 elif name == "proxInfo" and c: 61 if len(c) > 1: 62 self.item.proxInfo = cPickle.loads(str(c)) 63 self.currContent = ""6477 82126 12784 item = items[0] 85 item.weight = sum([x.weight for x in items]) 86 item.weight = item.weight / n 87 return item8890 for i in items: 91 i.weight = i.weight * (i.resultSet.minWeight / i.resultSet.maxWeight) 92 return self._meanWeights(items, n)9395 a.weight = a.weight * (self.minWeight / self.maxWeight) 96 if b: 97 b.weight = b.weight * (self.minWeight / self.maxWeight) 98 a.weight = (a.weight + b.weight) * 2.0 99 else: 100 a.weight = a.weight / 2.0101103 a.weight = a.weight * (self.minWeight / self.maxWeight) 104 if b: 105 b.weight = b.weight * (self.minWeight / self.maxWeight) 106 a.weight = (a.weight + b.weight) * 2.0 107 else: 108 # Leave high ranking ones high 109 rlen = len(a.resultSet._list) 110 if (( rlen > 150 and item.resultSetPosition > 100) 111 or (rlen < 150 and item.resultSetPosition > rlen/2)): 112 a.weight = a.weight / 2.0113115 # Determine which item is component set, and which item is from document set 116 # If the component's parent document's id is the same as the one in the 117 # full document list, then adjust 118 119 # Normalise min/max as above 120 # Pivot default is 0.7, but allow override 121 # (Pivot * documentScore) + ((1-pivot) * componentScore) 122 123 # If not in the list then just ((1-pivot) * componentScore) 124 125 pass129 _list = [] 130 131 id = "" 132 termid = -1 133 totalOccs = 0 134 totalRecs = 0 135 expires = 0 136 index = None 137 queryTerm = "" 138 queryFreq = 0 139 queryFragment = None 140 queryPositions = [] 141 relevancy = 0 142 maxWeight = 0 143 minWeight = 0 144 termWeight = 0.0 145 recordStore = "" 146 151153 return self._list[k]154156 return len(self._list)157159 self._list = data160162 # Turn into XML 163 xml = ['<resultSet>'] 164 xml.append('<queryTerm>%s</queryTerm><queryFreq>%s</queryFreq><queryPositions>%s</queryPositions><termWeight>%s</termWeight>' % (self.queryTerm, self.queryFreq, escape(cPickle.dumps(self.queryPositions)), self.termWeight)) 165 xml.append('<items>') 166 for item in self: 167 if type(item.docid) in types.StringTypes: 168 docid = escape(item.docid) 169 else: 170 docid = str(item.docid) 171 xml.append("<item><recStore>%s</recStore><id>%s</id><occs>%s</occs><weight>%s</weight><scaledWeight>%s</scaledWeight><proxInfo>%s</proxInfo><database>%s</database></item>" % (item.recordStore, 172 docid, 173 item.occurences, 174 item.weight, 175 item.scaledWeight, 176 escape(cPickle.dumps(item.proxInfo)), 177 item.database)) 178 xml.append('</items>') 179 xml.append('</resultSet>') 180 return ''.join(xml)181183 self._list = [] 184 localHandler.reinit(session, self) 185 localInput.setByteStream(StringIO.StringIO(data)) 186 localParser.parse(localInput) 187 return None188 189 194 198200 if (db): 201 totalDocs = db.totalRecords 202 if totalDocs == 0: 203 raise ValueErorr("No documents in database?") 204 else: 205 # Uhoh. Can't do it. (XXX Better Error) 206 raise(ValueError("Don't know database for determining relevancy")) 207 208 # William S Cooper proposes: 209 constants = [-3.7, 1.269, -0.31, 0.679, -0.0674, 0.223, 2.01] 210 211 # Ray R Larson proposes: 212 constants = [-3.7, 1.269, -0.31, 0.679, -0.021, 0.223, 4.01] 213 214 # Index Configuration proposes: 215 idx = db.protocolMaps['http://www.loc.gov/zing/srw/'].resolveIndex(session, clause) 216 if (idx): 217 for x in range(7): 218 temp = idx.get_setting(session, 'lr_constant%d' % x) 219 if (temp): 220 constants[x] = float(temp) 221 222 # Query proposes: 223 relSetUri = "info:srw/cql-context-set/2/relevance-1.0" 224 for m in cql.modifiers: 225 # Already been pinged for resolve() 226 if (m.type.prefixURI == relSetUri): 227 if m.type.value[:5] == "const": 228 try: 229 constants[int(m.type.value[5])] = float(m.value) 230 except ValueError: 231 # Invalid literal for float() 232 pass 233 except IndexError: 234 # list index out of range 235 pass 236 237 sumLogQueryFreq = 0.0 238 sumQueryFreq = 0 239 sumIDF = 0.0 240 241 # Sort rss by length 242 243 # Each rs represents one unique word in query 244 for rs in others: 245 sumLogQueryFreq += math.log(rs.queryFreq) 246 sumQueryFreq += rs.queryFreq 247 n = len(rs) 248 if n: 249 rs.idf = math.log(totalDocs / float(n)) 250 x2 = math.sqrt(sumQueryFreq) 251 252 # resultSets will be sorted by item already 253 # Step through all concurrently 254 255 tmpList = [] 256 cont = 1 257 oidxs = range(1,len(others)) 258 nors = len(others) 259 positions = [0] * nors 260 all = cql.value in ['all', 'and', '=', 'prox', 'adj'] 261 maxWeight = -1 262 minWeight = 9999999999 263 264 while cont: 265 items = [others[0][positions[0]]] 266 rspos = [0] 267 for o in oidxs: 268 nitem = others[o][positions[o]] 269 if nitem == items[0]: 270 items.append(nitem) 271 rspos.append(o) 272 elif nitem < items[0]: 273 if all: 274 # skip until equal or greater 275 positions[o] +=