upstream/kallithea Commit - r479:149940ba

1

from os.path import dirname as dn, join as jn

1

from os.path import dirname as dn, join as jn

2

from pidlock import LockHeld, DaemonLock

2

from pidlock import LockHeld, DaemonLock

3

from pylons_app.config.environment import load_environment

3

from pylons_app.config.environment import load_environment

4

from pylons_app.model.hg_model import HgModel

4

from pylons_app.model.hg_model import HgModel

5

from shutil import rmtree

5

from shutil import rmtree

6

from webhelpers.html.builder import escape

6

from webhelpers.html.builder import escape

7

from vcs.utils.lazy import LazyProperty

7

from vcs.utils.lazy import LazyProperty

8

9

from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter

9

from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter

10

from whoosh.fields import TEXT, ID, STORED, Schema, FieldType

10

from whoosh.fields import TEXT, ID, STORED, Schema, FieldType

11

from whoosh.index import create_in, open_dir

11

from whoosh.index import create_in, open_dir

12

from whoosh.formats import Characters

12

from whoosh.formats import Characters

13

from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter

13

from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter

14

15

import os

15

import os

16

import sys

16

import sys

17

import traceback

17

import traceback

18

19

20

21

#to get the pylons_app import

19

#to get the pylons_app import

22

sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

20

sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

23

21

24

22

25

#LOCATION WE KEEP THE INDEX

23

#LOCATION WE KEEP THE INDEX

26

IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')

24

IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')

27

25

28

#EXTENSIONS WE WANT TO INDEX CONTENT OFF

26

#EXTENSIONS WE WANT TO INDEX CONTENT OFF

29

INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',

27

INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',

30

'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',

28

'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',

31

'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',

29

'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',

32

'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',

30

'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',

33

'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',

31

'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',

34

'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',

32

'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',

35

'yaws']

33

'yaws']

36

34

37

#CUSTOM ANALYZER wordsplit + lowercase filter

35

#CUSTOM ANALYZER wordsplit + lowercase filter

38

ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()

36

ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()

39

37

40

38

41

#INDEX SCHEMA DEFINITION

39

#INDEX SCHEMA DEFINITION

42

SCHEMA = Schema(owner=TEXT(),

40

SCHEMA = Schema(owner=TEXT(),

43

repository=TEXT(stored=True),

41

repository=TEXT(stored=True),

44

path=ID(stored=True, unique=True),

42

path=ID(stored=True, unique=True),

45

content=FieldType(format=Characters(ANALYZER),

43

content=FieldType(format=Characters(ANALYZER),

46

scorable=True, stored=True),

44

scorable=True, stored=True),

47

modtime=STORED(), extension=TEXT(stored=True))

45

modtime=STORED(), extension=TEXT(stored=True))

48

46

49

47

50

IDX_NAME = 'HG_INDEX'

48

IDX_NAME = 'HG_INDEX'

51

FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')

49

FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')

52

FRAGMENTER = SimpleFragmenter(200)

50

FRAGMENTER = SimpleFragmenter(200)

53

51

54

55

56

57

class ResultWrapper(object):

52

class ResultWrapper(object):

58

def __init__(self, searcher, matcher, highlight_items):

53

def __init__(self, searcher, matcher, highlight_items):

59

self.searcher = searcher

54

self.searcher = searcher

60

self.matcher = matcher

55

self.matcher = matcher

61

self.highlight_items = highlight_items

56

self.highlight_items = highlight_items

62

self.fragment_size = 150 * 2

57

self.fragment_size = 200 / 2

63

58

64

@LazyProperty

59

@LazyProperty

65

def doc_ids(self):

60

def doc_ids(self):

66

docs_id = []

61

docs_id = []

67

while self.matcher.is_active():

62

while self.matcher.is_active():

68

docnum = self.matcher.id()

63

docnum = self.matcher.id()

69

docs_id.append(docnum)

64

chunks = [offsets for offsets in self.get_chunks()]

65

docs_id.append([docnum, chunks])

70

self.matcher.next()

66

self.matcher.next()

71

return docs_id

67

return docs_id

72

68

73

def __str__(self):

69

def __str__(self):

74

return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

70

return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

75

71

76

def __repr__(self):

72

def __repr__(self):

77

return self.__str__()

73

return self.__str__()

78

74

79

def __len__(self):

75

def __len__(self):

80

return len(self.doc_ids)

76

return len(self.doc_ids)

81

77

82

def __iter__(self):

78

def __iter__(self):

83

"""

79

"""

84

Allows Iteration over results,and lazy generate content

80

Allows Iteration over results,and lazy generate content

85

81

86

*Requires* implementation of ``__getitem__`` method.

82

*Requires* implementation of ``__getitem__`` method.

87

"""

83

"""

88

for docid in self.doc_ids:

84

for docid in self.doc_ids:

89

yield self.get_full_content(docid)

85

yield self.get_full_content(docid)

90

86

91

def __getslice__(self, i, j):

87

def __getslice__(self, i, j):

92

"""

88

"""

93

Slicing of resultWrapper

89

Slicing of resultWrapper

94

"""

90

"""

95

slice = []

91

slice = []

96

for docid in self.doc_ids[i:j]:

92

for docid in self.doc_ids[i:j]:

97

slice.append(self.get_full_content(docid))

93

slice.append(self.get_full_content(docid))

98

return slice

94

return slice

99

95

100

96

101

def get_full_content(self, docid):

97

def get_full_content(self, docid):

102

res = self.searcher.stored_fields(docid)

98

res = self.searcher.stored_fields(docid[0])

103

f_path = res['path'][res['path'].find(res['repository']) \

99

f_path = res['path'][res['path'].find(res['repository']) \

104

+ len(res['repository']):].lstrip('/')

100

+ len(res['repository']):].lstrip('/')

105

101

106

content_short = ''.~~join~~(self.get_short_content(res))

102

content_short = self.get_short_content(res, docid[1])

107

res.update({'content_short':content_short,

103

res.update({'content_short':content_short,

108

'content_short_hl':self.highlight(content_short),

104

'content_short_hl':self.highlight(content_short),

109

'f_path':f_path})

105

'f_path':f_path})

110

106

111

return res

107

return res

112

108

113

def get_short_content(self, res):

109

def get_short_content(self, res, chunks):

110

111

return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])

112

113

def get_chunks(self):

114

"""

114

"""

115

Smart function that implements chunking the content

115

Smart function that implements chunking the content

116

but not overlap chunks so it doesn't highlight the same

116

but not overlap chunks so it doesn't highlight the same

117

close occurences twice.

117

close occurences twice.

118

@param matcher:

118

@param matcher:

119

@param size:

119

@param size:

120

"""

120

"""

121

memory = [(0, 0)]

121

memory = [(0, 0)]

122

for span in self.matcher.spans():

122

for span in self.matcher.spans():

123

start = span.startchar or 0

123

start = span.startchar or 0

124

end = span.endchar or 0

124

end = span.endchar or 0

125

start_offseted = max(0, start - self.fragment_size)

125

start_offseted = max(0, start - self.fragment_size)

126

end_offseted = end + self.fragment_size

126

end_offseted = end + self.fragment_size

127

print start_offseted, end_offseted

127

128

if start_offseted < memory[-1][1]:

128

if start_offseted < memory[-1][1]:

129

start_offseted = memory[-1][1]

129

start_offseted = memory[-1][1]

130

memory.append((start_offseted, end_offseted,))

130

memory.append((start_offseted, end_offseted,))

131

yield ~~res~~[~~"content"~~][start_offseted:end_offseted]

131

yield (start_offseted, end_offseted,)

132

133

def highlight(self, content, top=5):

133

def highlight(self, content, top=5):

134

hl = highlight(escape(content),

134

hl = highlight(escape(content),

135

self.highlight_items,

135

self.highlight_items,

136

analyzer=ANALYZER,

136

analyzer=ANALYZER,

137

fragmenter=FRAGMENTER,

137

fragmenter=FRAGMENTER,

138

formatter=FORMATTER,

138

formatter=FORMATTER,

139

top=top)

139

top=top)

140

return hl

140

return hl

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             from os.path import dirname as dn, join as jn
             from pidlock import LockHeld, DaemonLock
             from pylons_app.config.environment import load_environment
             from pylons_app.model.hg_model import HgModel
             from shutil import rmtree
             from webhelpers.html.builder import escape
             from vcs.utils.lazy import LazyProperty
             from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
             from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
             from whoosh.index import create_in, open_dir
             from whoosh.formats import Characters
             from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
             import os
             import sys
             import traceback
             #to get the pylons_app import
             sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
             #LOCATION WE KEEP THE INDEX
             IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
             #EXTENSIONS WE WANT TO INDEX CONTENT OFF
             INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
                                 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
                                 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
                                 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
                                 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
                                 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
                                 'yaws']
             #CUSTOM ANALYZER wordsplit + lowercase filter
             ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
             #INDEX SCHEMA DEFINITION
             SCHEMA = Schema(owner=TEXT(),
                             repository=TEXT(stored=True),
                             path=ID(stored=True, unique=True),
                             content=FieldType(format=Characters(ANALYZER),
                                          scorable=True, stored=True),
                             modtime=STORED(), extension=TEXT(stored=True))
             IDX_NAME = 'HG_INDEX'
             FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
             FRAGMENTER = SimpleFragmenter(200)
             class ResultWrapper(object):
                 def __init__(self, searcher, matcher, highlight_items):
                     self.searcher = searcher
                     self.matcher = matcher
                     self.highlight_items = highlight_items
-                    self.fragment_size = 150 * 2
+                    self.fragment_size = 200 / 2
                 @LazyProperty
                 def doc_ids(self):
                     docs_id = []
                     while self.matcher.is_active():
                         docnum = self.matcher.id()
-                        docs_id.append(docnum)
+                        chunks = [offsets for offsets in self.get_chunks()]
+                        docs_id.append([docnum, chunks])
                         self.matcher.next()
                     return docs_id
                 def __str__(self):
                     return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
                 def __repr__(self):
                     return self.__str__()
                 def __len__(self):
                     return len(self.doc_ids)
                 def __iter__(self):
                     """
                     Allows Iteration over results,and lazy generate content
                     *Requires* implementation of ``__getitem__`` method.
                     """
                     for docid in self.doc_ids:
                         yield self.get_full_content(docid)
                 def __getslice__(self, i, j):
                     """
                     Slicing of resultWrapper
                     """
                     slice = []
                     for docid in self.doc_ids[i:j]:
                         slice.append(self.get_full_content(docid))
                     return slice
                 def get_full_content(self, docid):
-                    res = self.searcher.stored_fields(docid)
+                    res = self.searcher.stored_fields(docid[0])
                     f_path = res['path'][res['path'].find(res['repository']) \
                                          + len(res['repository']):].lstrip('/')
-                    content_short = ''.join(self.get_short_content(res))
+                    content_short = self.get_short_content(res, docid[1])
                     res.update({'content_short':content_short,
                                 'content_short_hl':self.highlight(content_short),
                                 'f_path':f_path})
                     return res
-                def get_short_content(self, res):
+                def get_short_content(self, res, chunks):
+                    return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
+                def get_chunks(self):
                     """
                     Smart function that implements chunking the content
                     but not overlap chunks so it doesn't highlight the same
                     close occurences twice.
                     @param matcher:
                     @param size:
                     """
                     memory = [(0, 0)]
                     for span in self.matcher.spans():
                         start = span.startchar or 0
                         end = span.endchar or 0
                         start_offseted = max(0, start - self.fragment_size)
                         end_offseted = end + self.fragment_size
-                        print start_offseted, end_offseted
                         if start_offseted < memory[-1][1]:
                             start_offseted = memory[-1][1]
                         memory.append((start_offseted, end_offseted,))
-                        yield res["content"][start_offseted:end_offseted]
+                        yield (start_offseted, end_offseted,)
                 def highlight(self, content, top=5):
                     hl = highlight(escape(content),
                              self.highlight_items,
                              analyzer=ANALYZER,
                              fragmenter=FRAGMENTER,
                              formatter=FORMATTER,
                              top=top)
                     return hl