##// END OF EJS Templates
fixed search chunking bug and optimized chunk size
marcink -
r479:149940ba celery
parent child Browse files
Show More
@@ -16,8 +16,6 b' import os'
16 16 import sys
17 17 import traceback
18 18
19
20
21 19 #to get the pylons_app import
22 20 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
23 21
@@ -50,23 +48,21 b' SCHEMA = Schema(owner=TEXT(),'
50 48 IDX_NAME = 'HG_INDEX'
51 49 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
52 50 FRAGMENTER = SimpleFragmenter(200)
53
54
55
56 51
57 52 class ResultWrapper(object):
58 53 def __init__(self, searcher, matcher, highlight_items):
59 54 self.searcher = searcher
60 55 self.matcher = matcher
61 56 self.highlight_items = highlight_items
62 self.fragment_size = 150 * 2
57 self.fragment_size = 200 / 2
63 58
64 59 @LazyProperty
65 60 def doc_ids(self):
66 61 docs_id = []
67 62 while self.matcher.is_active():
68 63 docnum = self.matcher.id()
69 docs_id.append(docnum)
64 chunks = [offsets for offsets in self.get_chunks()]
65 docs_id.append([docnum, chunks])
70 66 self.matcher.next()
71 67 return docs_id
72 68
@@ -99,18 +95,22 b' class ResultWrapper(object):'
99 95
100 96
101 97 def get_full_content(self, docid):
102 res = self.searcher.stored_fields(docid)
98 res = self.searcher.stored_fields(docid[0])
103 99 f_path = res['path'][res['path'].find(res['repository']) \
104 100 + len(res['repository']):].lstrip('/')
105 101
106 content_short = ''.join(self.get_short_content(res))
102 content_short = self.get_short_content(res, docid[1])
107 103 res.update({'content_short':content_short,
108 104 'content_short_hl':self.highlight(content_short),
109 105 'f_path':f_path})
110 106
111 107 return res
112
113 def get_short_content(self, res):
108
109 def get_short_content(self, res, chunks):
110
111 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
112
113 def get_chunks(self):
114 114 """
115 115 Smart function that implements chunking the content
116 116 but not overlap chunks so it doesn't highlight the same
@@ -124,11 +124,11 b' class ResultWrapper(object):'
124 124 end = span.endchar or 0
125 125 start_offseted = max(0, start - self.fragment_size)
126 126 end_offseted = end + self.fragment_size
127 print start_offseted, end_offseted
127
128 128 if start_offseted < memory[-1][1]:
129 129 start_offseted = memory[-1][1]
130 130 memory.append((start_offseted, end_offseted,))
131 yield res["content"][start_offseted:end_offseted]
131 yield (start_offseted, end_offseted,)
132 132
133 133 def highlight(self, content, top=5):
134 134 hl = highlight(escape(content),
General Comments 0
You need to be logged in to leave comments. Login now