upstream/kallithea Commit - r3818:0d22458b

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

rhodecode.lib.diffs

3

rhodecode.lib.diffs

4

~~~~~~~~~~~~~~~~~~~

4

~~~~~~~~~~~~~~~~~~~

5

6

Set of diffing helpers, previously part of vcs

6

Set of diffing helpers, previously part of vcs

7

8

9

:created_on: Dec 4, 2011

9

:created_on: Dec 4, 2011

10

:author: marcink

10

:author: marcink

11

12

:original copyright: 2007-2008 by Armin Ronacher

12

:original copyright: 2007-2008 by Armin Ronacher

13

:license: GPLv3, see COPYING for more details.

13

:license: GPLv3, see COPYING for more details.

14

"""

14

"""

15

# This program is free software: you can redistribute it and/or modify

15

# This program is free software: you can redistribute it and/or modify

16

# it under the terms of the GNU General Public License as published by

16

# it under the terms of the GNU General Public License as published by

17

# the Free Software Foundation, either version 3 of the License, or

17

# the Free Software Foundation, either version 3 of the License, or

18

# (at your option) any later version.

18

# (at your option) any later version.

19

#

19

#

20

# This program is distributed in the hope that it will be useful,

20

# This program is distributed in the hope that it will be useful,

21

# but WITHOUT ANY WARRANTY; without even the implied warranty of

21

# but WITHOUT ANY WARRANTY; without even the implied warranty of

22

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

22

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

23

# GNU General Public License for more details.

23

# GNU General Public License for more details.

24

#

24

#

25

# You should have received a copy of the GNU General Public License

25

# You should have received a copy of the GNU General Public License

26

# along with this program. If not, see <http://www.gnu.org/licenses/>.

26

# along with this program. If not, see <http://www.gnu.org/licenses/>.

27

28

import re

28

import re

29

import difflib

29

import difflib

30

import logging

30

import logging

31

32

from itertools import tee, imap

32

from itertools import tee, imap

33

34

from pylons.i18n.translation import _

34

from pylons.i18n.translation import _

35

36

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.exceptions import VCSError

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

38

from rhodecode.lib.vcs.backends.base import EmptyChangeset

38

from rhodecode.lib.vcs.backends.base import EmptyChangeset

39

from rhodecode.lib.helpers import escape

39

from rhodecode.lib.helpers import escape

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

41

42

log = logging.getLogger(__name__)

42

log = logging.getLogger(__name__)

43

44

45

def wrap_to_table(str_):

45

def wrap_to_table(str_):

46

return '''<table class="code-difftable">

46

return '''<table class="code-difftable">

47

47

48

48

49

49

50

</tr>

50

</tr>

51

</table>''' % str_

51

</table>''' % str_

52

53

54

def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

54

def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

55

ignore_whitespace=True, line_context=3,

55

ignore_whitespace=True, line_context=3,

56

enable_comments=False):

56

enable_comments=False):

57

"""

57

"""

58

returns a wrapped diff into a table, checks for cut_off_limit and presents

58

returns a wrapped diff into a table, checks for cut_off_limit and presents

59

proper message

59

proper message

60

"""

60

"""

61

62

if filenode_old is None:

62

if filenode_old is None:

63

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

63

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

64

65

if filenode_old.is_binary or filenode_new.is_binary:

65

if filenode_old.is_binary or filenode_new.is_binary:

66

diff = wrap_to_table(_('Binary file'))

66

diff = wrap_to_table(_('Binary file'))

67

stats = (0, 0)

67

stats = (0, 0)

68

size = 0

68

size = 0

69

70

elif cut_off_limit != -1 and (cut_off_limit is None or

70

elif cut_off_limit != -1 and (cut_off_limit is None or

71

(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

71

(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

72

73

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

73

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

74

ignore_whitespace=ignore_whitespace,

74

ignore_whitespace=ignore_whitespace,

75

context=line_context)

75

context=line_context)

76

diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

76

diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

77

78

diff = diff_processor.as_html(enable_comments=enable_comments)

78

diff = diff_processor.as_html(enable_comments=enable_comments)

79

stats = diff_processor.stat()

79

stats = diff_processor.stat()

80

size = len(diff or '')

80

size = len(diff or '')

81

else:

81

else:

82

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

82

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

83

'diff menu to display this diff'))

83

'diff menu to display this diff'))

84

stats = (0, 0)

84

stats = (0, 0)

85

size = 0

85

size = 0

86

if not diff:

86

if not diff:

87

submodules = filter(lambda o: isinstance(o, SubModuleNode),

87

submodules = filter(lambda o: isinstance(o, SubModuleNode),

88

[filenode_new, filenode_old])

88

[filenode_new, filenode_old])

89

if submodules:

89

if submodules:

90

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

90

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

91

else:

91

else:

92

diff = wrap_to_table(_('No changes detected'))

92

diff = wrap_to_table(_('No changes detected'))

93

94

cs1 = filenode_old.changeset.raw_id

94

cs1 = filenode_old.changeset.raw_id

95

cs2 = filenode_new.changeset.raw_id

95

cs2 = filenode_new.changeset.raw_id

96

97

return size, cs1, cs2, diff, stats

97

return size, cs1, cs2, diff, stats

98

99

100

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

100

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

101

"""

101

"""

102

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

102

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

103

104

:param ignore_whitespace: ignore whitespaces in diff

104

:param ignore_whitespace: ignore whitespaces in diff

105

"""

105

"""

106

# make sure we pass in default context

106

# make sure we pass in default context

107

context = context or 3

107

context = context or 3

108

submodules = filter(lambda o: isinstance(o, SubModuleNode),

108

submodules = filter(lambda o: isinstance(o, SubModuleNode),

109

[filenode_new, filenode_old])

109

[filenode_new, filenode_old])

110

if submodules:

110

if submodules:

111

return ''

111

return ''

112

113

for filenode in (filenode_old, filenode_new):

113

for filenode in (filenode_old, filenode_new):

114

if not isinstance(filenode, FileNode):

114

if not isinstance(filenode, FileNode):

115

raise VCSError("Given object should be FileNode object, not %s"

115

raise VCSError("Given object should be FileNode object, not %s"

116

% filenode.__class__)

116

% filenode.__class__)

117

118

repo = filenode_new.changeset.repository

118

repo = filenode_new.changeset.repository

119

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

119

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

120

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

120

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

121

122

vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

122

vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

123

ignore_whitespace, context)

123

ignore_whitespace, context)

124

return vcs_gitdiff

124

return vcs_gitdiff

125

126

NEW_FILENODE = 1

126

NEW_FILENODE = 1

127

DEL_FILENODE = 2

127

DEL_FILENODE = 2

128

MOD_FILENODE = 3

128

MOD_FILENODE = 3

129

RENAMED_FILENODE = 4

129

RENAMED_FILENODE = 4

130

CHMOD_FILENODE = 5

130

CHMOD_FILENODE = 5

131

132

133

class DiffLimitExceeded(Exception):

133

class DiffLimitExceeded(Exception):

134

pass

134

pass

135

136

137

class LimitedDiffContainer(object):

137

class LimitedDiffContainer(object):

138

139

def __init__(self, diff_limit, cur_diff_size, diff):

139

def __init__(self, diff_limit, cur_diff_size, diff):

140

self.diff = diff

140

self.diff = diff

141

self.diff_limit = diff_limit

141

self.diff_limit = diff_limit

142

self.cur_diff_size = cur_diff_size

142

self.cur_diff_size = cur_diff_size

143

144

def __iter__(self):

144

def __iter__(self):

145

for l in self.diff:

145

for l in self.diff:

146

yield l

146

yield l

147

148

149

class DiffProcessor(object):

149

class DiffProcessor(object):

150

"""

150

"""

151

Give it a unified or git diff and it returns a list of the files that were

151

Give it a unified or git diff and it returns a list of the files that were

152

mentioned in the diff together with a dict of meta information that

152

mentioned in the diff together with a dict of meta information that

153

can be used to render it in a HTML template.

153

can be used to render it in a HTML template.

154

"""

154

"""

155

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

155

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

156

_newline_marker = re.compile(r'^\\ No newline at end of file')

156

_newline_marker = re.compile(r'^\\ No newline at end of file')

157

_git_header_re = re.compile(r"""

157

_git_header_re = re.compile(r"""

158

#^diff[ ]--git

158

#^diff[ ]--git

159

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

159

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

160

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

160

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

161

^rename[ ]from[ ](?P<rename_from>\S+)\n

161

^rename[ ]from[ ](?P<rename_from>\S+)\n

162

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

162

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

163

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

163

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

164

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

164

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

165

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

165

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

166

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

166

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

167

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

167

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

168

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

168

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

169

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

169

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

170

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

170

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

171

""", re.VERBOSE | re.MULTILINE)

171

""", re.VERBOSE | re.MULTILINE)

172

_hg_header_re = re.compile(r"""

172

_hg_header_re = re.compile(r"""

173

#^diff[ ]--git

173

#^diff[ ]--git

174

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

174

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

175

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

175

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

176

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

176

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

177

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

177

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

178

(?:^rename[ ]from[ ](?P<rename_from>\S+)\n

178

(?:^rename[ ]from[ ](?P<rename_from>\S+)\n

179

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

179

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

180

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

180

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

181

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

181

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

182

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

182

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

183

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

183

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

184

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

184

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

185

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

185

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

186

""", re.VERBOSE | re.MULTILINE)

186

""", re.VERBOSE | re.MULTILINE)

187

188

#used for inline highlighter word split

188

#used for inline highlighter word split

189

_token_re = re.compile(r'()(>|<|&|\W+?)')

189

_token_re = re.compile(r'()(>|<|&|\W+?)')

190

191

def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):

191

def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):

192

"""

192

"""

193

:param diff: a text in diff format

193

:param diff: a text in diff format

194

:param vcs: type of version controll hg or git

194

:param vcs: type of version controll hg or git

195

:param format: format of diff passed, `udiff` or `gitdiff`

195

:param format: format of diff passed, `udiff` or `gitdiff`

196

:param diff_limit: define the size of diff that is considered "big"

196

:param diff_limit: define the size of diff that is considered "big"

197

based on that parameter cut off will be triggered, set to None

197

based on that parameter cut off will be triggered, set to None

198

to show full diff

198

to show full diff

199

"""

199

"""

200

if not isinstance(diff, basestring):

200

if not isinstance(diff, basestring):

201

raise Exception('Diff must be a basestring got %s instead' % type(diff))

201

raise Exception('Diff must be a basestring got %s instead' % type(diff))

202

203

self._diff = diff

203

self._diff = diff

204

self._format = format

204

self._format = format

205

self.adds = 0

205

self.adds = 0

206

self.removes = 0

206

self.removes = 0

207

# calculate diff size

207

# calculate diff size

208

self.diff_size = len(diff)

208

self.diff_size = len(diff)

209

self.diff_limit = diff_limit

209

self.diff_limit = diff_limit

210

self.cur_diff_size = 0

210

self.cur_diff_size = 0

211

self.parsed = False

211

self.parsed = False

212

self.parsed_diff = []

212

self.parsed_diff = []

213

self.vcs = vcs

213

self.vcs = vcs

214

215

if format == 'gitdiff':

215

if format == 'gitdiff':

216

self.differ = self._highlight_line_difflib

216

self.differ = self._highlight_line_difflib

217

self._parser = self._parse_gitdiff

217

self._parser = self._parse_gitdiff

218

else:

218

else:

219

self.differ = self._highlight_line_udiff

219

self.differ = self._highlight_line_udiff

220

self._parser = self._parse_udiff

220

self._parser = self._parse_udiff

221

222

def _copy_iterator(self):

222

def _copy_iterator(self):

223

"""

223

"""

224

make a fresh copy of generator, we should not iterate thru

224

make a fresh copy of generator, we should not iterate thru

225

an original as it's needed for repeating operations on

225

an original as it's needed for repeating operations on

226

this instance of DiffProcessor

226

this instance of DiffProcessor

227

"""

227

"""

228

self.__udiff, iterator_copy = tee(self.__udiff)

228

self.__udiff, iterator_copy = tee(self.__udiff)

229

return iterator_copy

229

return iterator_copy

230

231

def _escaper(self, string):

231

def _escaper(self, string):

232

"""

232

"""

233

Escaper for diff escapes special chars and checks the diff limit

233

Escaper for diff escapes special chars and checks the diff limit

234

235

:param string:

235

:param string:

236

:type string:

236

:type string:

237

"""

237

"""

238

239

self.cur_diff_size += len(string)

239

self.cur_diff_size += len(string)

240

241

# escaper get's iterated on each .next() call and it checks if each

241

# escaper get's iterated on each .next() call and it checks if each

242

# parsed line doesn't exceed the diff limit

242

# parsed line doesn't exceed the diff limit

243

if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:

243

if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:

244

raise DiffLimitExceeded('Diff Limit Exceeded')

244

raise DiffLimitExceeded('Diff Limit Exceeded')

245

246

return safe_unicode(string).replace('&', '&')\

246

return safe_unicode(string).replace('&', '&')\

247

.replace('<', '<')\

247

.replace('<', '<')\

248

.replace('>', '>')

248

.replace('>', '>')

249

250

def _line_counter(self, l):

250

def _line_counter(self, l):

251

"""

251

"""

252

Checks each line and bumps total adds/removes for this diff

252

Checks each line and bumps total adds/removes for this diff

253

254

:param l:

254

:param l:

255

"""

255

"""

256

if l.startswith('+') and not l.startswith('+++'):

256

if l.startswith('+') and not l.startswith('+++'):

257

self.adds += 1

257

self.adds += 1

258

elif l.startswith('-') and not l.startswith('---'):

258

elif l.startswith('-') and not l.startswith('---'):

259

self.removes += 1

259

self.removes += 1

260

return safe_unicode(l)

260

return safe_unicode(l)

261

262

def _highlight_line_difflib(self, line, next_):

262

def _highlight_line_difflib(self, line, next_):

263

"""

263

"""

264

Highlight inline changes in both lines.

264

Highlight inline changes in both lines.

265

"""

265

"""

266

267

if line['action'] == 'del':

267

if line['action'] == 'del':

268

old, new = line, next_

268

old, new = line, next_

269

else:

269

else:

270

old, new = next_, line

270

old, new = next_, line

271

272

oldwords = self._token_re.split(old['line'])

272

oldwords = self._token_re.split(old['line'])

273

newwords = self._token_re.split(new['line'])

273

newwords = self._token_re.split(new['line'])

274

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

274

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

275

276

oldfragments, newfragments = [], []

276

oldfragments, newfragments = [], []

277

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

277

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

278

oldfrag = ''.join(oldwords[i1:i2])

278

oldfrag = ''.join(oldwords[i1:i2])

279

newfrag = ''.join(newwords[j1:j2])

279

newfrag = ''.join(newwords[j1:j2])

280

if tag != 'equal':

280

if tag != 'equal':

281

if oldfrag:

281

if oldfrag:

282

oldfrag = '<del>%s</del>' % oldfrag

282

oldfrag = '<del>%s</del>' % oldfrag

283

if newfrag:

283

if newfrag:

284

newfrag = '<ins>%s</ins>' % newfrag

284

newfrag = '<ins>%s</ins>' % newfrag

285

oldfragments.append(oldfrag)

285

oldfragments.append(oldfrag)

286

newfragments.append(newfrag)

286

newfragments.append(newfrag)

287

288

old['line'] = "".join(oldfragments)

288

old['line'] = "".join(oldfragments)

289

new['line'] = "".join(newfragments)

289

new['line'] = "".join(newfragments)

290

291

def _highlight_line_udiff(self, line, next_):

291

def _highlight_line_udiff(self, line, next_):

292

"""

292

"""

293

Highlight inline changes in both lines.

293

Highlight inline changes in both lines.

294

"""

294

"""

295

start = 0

295

start = 0

296

limit = min(len(line['line']), len(next_['line']))

296

limit = min(len(line['line']), len(next_['line']))

297

while start < limit and line['line'][start] == next_['line'][start]:

297

while start < limit and line['line'][start] == next_['line'][start]:

298

start += 1

298

start += 1

299

end = -1

299

end = -1

300

limit -= start

300

limit -= start

301

while -end <= limit and line['line'][end] == next_['line'][end]:

301

while -end <= limit and line['line'][end] == next_['line'][end]:

302

end -= 1

302

end -= 1

303

end += 1

303

end += 1

304

if start or end:

304

if start or end:

305

def do(l):

305

def do(l):

306

last = end + len(l['line'])

306

last = end + len(l['line'])

307

if l['action'] == 'add':

307

if l['action'] == 'add':

308

tag = 'ins'

308

tag = 'ins'

309

else:

309

else:

310

tag = 'del'

310

tag = 'del'

311

l['line'] = '%s<%s>%s</%s>%s' % (

311

l['line'] = '%s<%s>%s</%s>%s' % (

312

l['line'][:start],

312

l['line'][:start],

313

tag,

313

tag,

314

l['line'][start:last],

314

l['line'][start:last],

315

tag,

315

tag,

316

l['line'][last:]

316

l['line'][last:]

317

)

317

)

318

do(line)

318

do(line)

319

do(next_)

319

do(next_)

320

321

def _get_header(self, diff_chunk):

321

def _get_header(self, diff_chunk):

322

"""

322

"""

323

parses the diff header, and returns parts, and leftover diff

323

parses the diff header, and returns parts, and leftover diff

324

parts consists of 14 elements::

324

parts consists of 14 elements::

325

326

a_path, b_path, similarity_index, rename_from, rename_to,

326

a_path, b_path, similarity_index, rename_from, rename_to,

327

old_mode, new_mode, new_file_mode, deleted_file_mode,

327

old_mode, new_mode, new_file_mode, deleted_file_mode,

328

a_blob_id, b_blob_id, b_mode, a_file, b_file

328

a_blob_id, b_blob_id, b_mode, a_file, b_file

329

330

:param diff_chunk:

330

:param diff_chunk:

331

:type diff_chunk:

331

:type diff_chunk:

332

"""

332

"""

333

334

if self.vcs == 'git':

334

if self.vcs == 'git':

335

match = self._git_header_re.match(diff_chunk)

335

match = self._git_header_re.match(diff_chunk)

336

diff = diff_chunk[match.end():]

336

diff = diff_chunk[match.end():]

337

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

337

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

338

elif self.vcs == 'hg':

338

elif self.vcs == 'hg':

339

match = self._hg_header_re.match(diff_chunk)

339

match = self._hg_header_re.match(diff_chunk)

340

diff = diff_chunk[match.end():]

340

diff = diff_chunk[match.end():]

341

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

341

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

342

else:

342

else:

343

raise Exception('VCS type %s is not supported' % self.vcs)

343

raise Exception('VCS type %s is not supported' % self.vcs)

344

345

def _clean_line(self, line, command):

345

def _clean_line(self, line, command):

346

if command in ['+', '-', ' ']:

346

if command in ['+', '-', ' ']:

347

#only modify the line if it's actually a diff thing

347

#only modify the line if it's actually a diff thing

348

line = line[1:]

348

line = line[1:]

349

return line

349

return line

350

351

def _parse_gitdiff(self, inline_diff=True):

351

def _parse_gitdiff(self, inline_diff=True):

352

_files = []

352

_files = []

353

diff_container = lambda arg: arg

353

diff_container = lambda arg: arg

354

355

##split the diff in chunks of separate --git a/file b/file chunks

355

##split the diff in chunks of separate --git a/file b/file chunks

356

for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:

356

for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:

357

binary = False

358

binary_msg = 'unknown binary'

359

head, diff = self._get_header(raw_diff)

357

head, diff = self._get_header(raw_diff)

360

358

359

op = None

360

stats = None

361

msg = None

362

361

if not head['a_file'] and head['b_file']:

363

if not head['a_file'] and head['b_file']:

362

op = 'A'

364

op = 'A'

363

elif head['a_file'] and head['b_file']:

365

elif head['a_file'] and head['b_file']:

364

op = 'M'

366

op = 'M'

365

elif head['a_file'] and not head['b_file']:

367

elif head['a_file'] and not head['b_file']:

366

op = 'D'

368

op = 'D'

367

else:

369

else:

368

#probably we're dealing with a binary file 1

369

binary = True

370

if head['deleted_file_mode']:

370

if head['deleted_file_mode']:

371

op = 'D'

371

op = 'D'

372

stats = ['b', DEL_FILENODE]

372

stats = ['b', DEL_FILENODE]

373

~~binary_~~msg = 'deleted ~~binary~~ file'

373

msg = 'deleted file'

374

elif head['new_file_mode']:

374

elif head['new_file_mode']:

375

op = 'A'

375

op = 'A'

376

stats = ['b', NEW_FILENODE]

376

stats = ['b', NEW_FILENODE]

377

~~binary_~~msg = 'new ~~binary~~ file %s' % head['new_file_mode']

377

msg = 'new file %s' % head['new_file_mode']

378

else:

378

else:

379

if head['new_mode'] and head['old_mode']:

379

if head['new_mode'] and head['old_mode']:

380

stats = ['b', CHMOD_FILENODE]

380

stats = ['b', CHMOD_FILENODE]

381

op = 'M'

381

op = 'M'

382

~~binary_~~msg = ('modified ~~binary~~ file chmod %s => %s'

382

msg = ('modified file chmod %s => %s'

383

% (head['old_mode'], head['new_mode']))

383

% (head['old_mode'], head['new_mode']))

384

elif (head['rename_from'] and head['rename_to']

384

elif (head['rename_from'] and head['rename_to']

385

and head['rename_from'] != head['rename_to']):

385

and head['rename_from'] != head['rename_to']):

386

stats = ['b', RENAMED_FILENODE]

386

stats = ['b', RENAMED_FILENODE]

387

op = 'M'

387

op = 'M'

388

~~binary_~~msg = ('file renamed from %s to %s'

388

msg = ('file renamed from %s to %s'

389

% (head['rename_from'], head['rename_to']))

389

% (head['rename_from'], head['rename_to']))

390

else:

390

else:

391

stats = ['b', MOD_FILENODE]

391

stats = ['b', MOD_FILENODE]

392

op = 'M'

392

op = 'M'

393

~~binary_~~msg = 'modified ~~binary~~ file'

393

msg = 'modified file'

394

395

if not binary:

395

if head['a_file'] or head['b_file']: # a real diff

396

try:

396

try:

397

chunks, stats = self._parse_lines(diff)

397

chunks, stats = self._parse_lines(diff)

398

except DiffLimitExceeded:

398

except DiffLimitExceeded:

399

diff_container = lambda _diff: LimitedDiffContainer(

399

diff_container = lambda _diff: LimitedDiffContainer(

400

self.diff_limit,

400

self.diff_limit,

401

self.cur_diff_size,

401

self.cur_diff_size,

402

_diff)

402

_diff)

403

break

403

break

404

else:

404

else: # GIT binary patch (or empty diff)

405

chunks = []

405

chunks = []

406

chunks.append([{

406

if not msg: # don't overwrite more important message

407

msg = 'binary diff not shown'

408

409

if msg:

410

chunks.insert(0, [{

407

'old_lineno': '',

411

'old_lineno': '',

408

'new_lineno': '',

412

'new_lineno': '',

409

'action': 'binary',

413

'action': 'binary',

410

'line': ~~binary_~~msg,

414

'line': msg,

411

}])

415

}])

412

416

413

_files.append({

417

_files.append({

414

'filename': head['b_path'],

418

'filename': head['b_path'],

415

'old_revision': head['a_blob_id'],

419

'old_revision': head['a_blob_id'],

416

'new_revision': head['b_blob_id'],

420

'new_revision': head['b_blob_id'],

417

'chunks': chunks,

421

'chunks': chunks,

418

'operation': op,

422

'operation': op,

419

'stats': stats,

423

'stats': stats,

420

})

424

})

421

425

422

sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

426

sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

423

427

424

if not inline_diff:

428

if not inline_diff:

425

return diff_container(sorted(_files, key=sorter))

429

return diff_container(sorted(_files, key=sorter))

426

430

427

# highlight inline changes

431

# highlight inline changes

428

for diff_data in _files:

432

for diff_data in _files:

429

for chunk in diff_data['chunks']:

433

for chunk in diff_data['chunks']:

430

lineiter = iter(chunk)

434

lineiter = iter(chunk)

431

try:

435

try:

432

while 1:

436

while 1:

433

line = lineiter.next()

437

line = lineiter.next()

434

if line['action'] not in ['unmod', 'context']:

438

if line['action'] not in ['unmod', 'context']:

435

nextline = lineiter.next()

439

nextline = lineiter.next()

436

if nextline['action'] in ['unmod', 'context'] or \

440

if nextline['action'] in ['unmod', 'context'] or \

437

nextline['action'] == line['action']:

441

nextline['action'] == line['action']:

438

continue

442

continue

439

self.differ(line, nextline)

443

self.differ(line, nextline)

440

except StopIteration:

444

except StopIteration:

441

pass

445

pass

442

446

443

return diff_container(sorted(_files, key=sorter))

447

return diff_container(sorted(_files, key=sorter))

444

448

445

def _parse_udiff(self, inline_diff=True):

449

def _parse_udiff(self, inline_diff=True):

446

raise NotImplementedError()

450

raise NotImplementedError()

447

451

448

def _parse_lines(self, diff):

452

def _parse_lines(self, diff):

449

"""

453

"""

450

Parse the diff an return data for the template.

454

Parse the diff an return data for the template.

451

"""

455

"""

452

456

453

lineiter = iter(diff)

457

lineiter = iter(diff)

454

stats = [0, 0]

458

stats = [0, 0]

455

459

456

try:

460

try:

457

chunks = []

461

chunks = []

458

line = lineiter.next()

462

line = lineiter.next()

459

463

460

while line:

464

while line:

461

lines = []

465

lines = []

462

chunks.append(lines)

466

chunks.append(lines)

463

467

464

match = self._chunk_re.match(line)

468

match = self._chunk_re.match(line)

465

469

466

if not match:

470

if not match:

467

break

471

break

468

472

469

gr = match.groups()

473

gr = match.groups()

470

(old_line, old_end,

474

(old_line, old_end,

471

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

475

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

472

old_line -= 1

476

old_line -= 1

473

new_line -= 1

477

new_line -= 1

474

478

475

context = len(gr) == 5

479

context = len(gr) == 5

476

old_end += old_line

480

old_end += old_line

477

new_end += new_line

481

new_end += new_line

478

482

479

if context:

483

if context:

480

# skip context only if it's first line

484

# skip context only if it's first line

481

if int(gr[0]) > 1:

485

if int(gr[0]) > 1:

482

lines.append({

486

lines.append({

483

'old_lineno': '...',

487

'old_lineno': '...',

484

'new_lineno': '...',

488

'new_lineno': '...',

485

'action': 'context',

489

'action': 'context',

486

'line': line,

490

'line': line,

487

})

491

})

488

492

489

line = lineiter.next()

493

line = lineiter.next()

490

494

491

while old_line < old_end or new_line < new_end:

495

while old_line < old_end or new_line < new_end:

492

command = ' '

496

command = ' '

493

if line:

497

if line:

494

command = line[0]

498

command = line[0]

495

499

496

affects_old = affects_new = False

500

affects_old = affects_new = False

497

501

498

# ignore those if we don't expect them

502

# ignore those if we don't expect them

499

if command in '#@':

503

if command in '#@':

500

continue

504

continue

501

elif command == '+':

505

elif command == '+':

502

affects_new = True

506

affects_new = True

503

action = 'add'

507

action = 'add'

504

stats[0] += 1

508

stats[0] += 1

505

elif command == '-':

509

elif command == '-':

506

affects_old = True

510

affects_old = True

507

action = 'del'

511

action = 'del'

508

stats[1] += 1

512

stats[1] += 1

509

else:

513

else:

510

affects_old = affects_new = True

514

affects_old = affects_new = True

511

action = 'unmod'

515

action = 'unmod'

512

516

513

if not self._newline_marker.match(line):

517

if not self._newline_marker.match(line):

514

old_line += affects_old

518

old_line += affects_old

515

new_line += affects_new

519

new_line += affects_new

516

lines.append({

520

lines.append({

517

'old_lineno': affects_old and old_line or '',

521

'old_lineno': affects_old and old_line or '',

518

'new_lineno': affects_new and new_line or '',

522

'new_lineno': affects_new and new_line or '',

519

'action': action,

523

'action': action,

520

'line': self._clean_line(line, command)

524

'line': self._clean_line(line, command)

521

})

525

})

522

526

523

line = lineiter.next()

527

line = lineiter.next()

524

528

525

if self._newline_marker.match(line):

529

if self._newline_marker.match(line):

526

# we need to append to lines, since this is not

530

# we need to append to lines, since this is not

527

# counted in the line specs of diff

531

# counted in the line specs of diff

528

lines.append({

532

lines.append({

529

'old_lineno': '...',

533

'old_lineno': '...',

530

'new_lineno': '...',

534

'new_lineno': '...',

531

'action': 'context',

535

'action': 'context',

532

'line': self._clean_line(line, command)

536

'line': self._clean_line(line, command)

533

})

537

})

534

538

535

except StopIteration:

539

except StopIteration:

536

pass

540

pass

537

return chunks, stats

541

return chunks, stats

538

542

539

def _safe_id(self, idstring):

543

def _safe_id(self, idstring):

540

"""Make a string safe for including in an id attribute.

544

"""Make a string safe for including in an id attribute.

541

545

542

The HTML spec says that id attributes 'must begin with

546

The HTML spec says that id attributes 'must begin with

543

a letter ([A-Za-z]) and may be followed by any number

547

a letter ([A-Za-z]) and may be followed by any number

544

of letters, digits ([0-9]), hyphens ("-"), underscores

548

of letters, digits ([0-9]), hyphens ("-"), underscores

545

("_"), colons (":"), and periods (".")'. These regexps

549

("_"), colons (":"), and periods (".")'. These regexps

546

are slightly over-zealous, in that they remove colons

550

are slightly over-zealous, in that they remove colons

547

and periods unnecessarily.

551

and periods unnecessarily.

548

552

549

Whitespace is transformed into underscores, and then

553

Whitespace is transformed into underscores, and then

550

anything which is not a hyphen or a character that

554

anything which is not a hyphen or a character that

551

matches \w (alphanumerics and underscore) is removed.

555

matches \w (alphanumerics and underscore) is removed.

552

556

553

"""

557

"""

554

# Transform all whitespace to underscore

558

# Transform all whitespace to underscore

555

idstring = re.sub(r'\s', "_", '%s' % idstring)

559

idstring = re.sub(r'\s', "_", '%s' % idstring)

556

# Remove everything that is not a hyphen or a member of \w

560

# Remove everything that is not a hyphen or a member of \w

557

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

561

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

558

return idstring

562

return idstring

559

563

560

def prepare(self, inline_diff=True):

564

def prepare(self, inline_diff=True):

561

"""

565

"""

562

Prepare the passed udiff for HTML rendering. It'l return a list

566

Prepare the passed udiff for HTML rendering. It'l return a list

563

of dicts with diff information

567

of dicts with diff information

564

"""

568

"""

565

parsed = self._parser(inline_diff=inline_diff)

569

parsed = self._parser(inline_diff=inline_diff)

566

self.parsed = True

570

self.parsed = True

567

self.parsed_diff = parsed

571

self.parsed_diff = parsed

568

return parsed

572

return parsed

569

573

570

def as_raw(self, diff_lines=None):

574

def as_raw(self, diff_lines=None):

571

"""

575

"""

572

Returns raw string diff

576

Returns raw string diff

573

"""

577

"""

574

return self._diff

578

return self._diff

575

#return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

579

#return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

576

580

577

def as_html(self, table_class='code-difftable', line_class='line',

581

def as_html(self, table_class='code-difftable', line_class='line',

578

old_lineno_class='lineno old', new_lineno_class='lineno new',

582

old_lineno_class='lineno old', new_lineno_class='lineno new',

579

code_class='code', enable_comments=False, parsed_lines=None):

583

code_class='code', enable_comments=False, parsed_lines=None):

580

"""

584

"""

581

Return given diff as html table with customized css classes

585

Return given diff as html table with customized css classes

582

"""

586

"""

583

def _link_to_if(condition, label, url):

587

def _link_to_if(condition, label, url):

584

"""

588

"""

585

Generates a link if condition is meet or just the label if not.

589

Generates a link if condition is meet or just the label if not.

586

"""

590

"""

587

591

588

if condition:

592

if condition:

589

return '''<a href="%(url)s">%(label)s</a>''' % {

593

return '''<a href="%(url)s">%(label)s</a>''' % {

590

'url': url,

594

'url': url,

591

'label': label

595

'label': label

592

}

596

}

593

else:

597

else:

594

return label

598

return label

595

if not self.parsed:

599

if not self.parsed:

596

self.prepare()

600

self.prepare()

597

601

598

diff_lines = self.parsed_diff

602

diff_lines = self.parsed_diff

599

if parsed_lines:

603

if parsed_lines:

600

diff_lines = parsed_lines

604

diff_lines = parsed_lines

601

605

602

_html_empty = True

606

_html_empty = True

603

_html = []

607

_html = []

604

_html.append('''<table class="%(table_class)s">\n''' % {

608

_html.append('''<table class="%(table_class)s">\n''' % {

605

'table_class': table_class

609

'table_class': table_class

606

})

610

})

607

611

608

for diff in diff_lines:

612

for diff in diff_lines:

609

for line in diff['chunks']:

613

for line in diff['chunks']:

610

_html_empty = False

614

_html_empty = False

611

for change in line:

615

for change in line:

612

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

616

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

613

'lc': line_class,

617

'lc': line_class,

614

'action': change['action']

618

'action': change['action']

615

})

619

})

616

anchor_old_id = ''

620

anchor_old_id = ''

617

anchor_new_id = ''

621

anchor_new_id = ''

618

anchor_old = "%(filename)s_o%(oldline_no)s" % {

622

anchor_old = "%(filename)s_o%(oldline_no)s" % {

619

'filename': self._safe_id(diff['filename']),

623

'filename': self._safe_id(diff['filename']),

620

'oldline_no': change['old_lineno']

624

'oldline_no': change['old_lineno']

621

}

625

}

622

anchor_new = "%(filename)s_n%(oldline_no)s" % {

626

anchor_new = "%(filename)s_n%(oldline_no)s" % {

623

'filename': self._safe_id(diff['filename']),

627

'filename': self._safe_id(diff['filename']),

624

'oldline_no': change['new_lineno']

628

'oldline_no': change['new_lineno']

625

}

629

}

626

cond_old = (change['old_lineno'] != '...' and

630

cond_old = (change['old_lineno'] != '...' and

627

change['old_lineno'])

631

change['old_lineno'])

628

cond_new = (change['new_lineno'] != '...' and

632

cond_new = (change['new_lineno'] != '...' and

629

change['new_lineno'])

633

change['new_lineno'])

630

if cond_old:

634

if cond_old:

631

anchor_old_id = 'id="%s"' % anchor_old

635

anchor_old_id = 'id="%s"' % anchor_old

632

if cond_new:

636

if cond_new:

633

anchor_new_id = 'id="%s"' % anchor_new

637

anchor_new_id = 'id="%s"' % anchor_new

634

###########################################################

638

###########################################################

635

# OLD LINE NUMBER

639

# OLD LINE NUMBER

636

###########################################################

640

###########################################################

637

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

641

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

638

'a_id': anchor_old_id,

642

'a_id': anchor_old_id,

639

'olc': old_lineno_class

643

'olc': old_lineno_class

640

})

644

})

641

645

642

_html.append('''%(link)s''' % {

646

_html.append('''%(link)s''' % {

643

'link': _link_to_if(True, change['old_lineno'],

647

'link': _link_to_if(True, change['old_lineno'],

644

'#%s' % anchor_old)

648

'#%s' % anchor_old)

645

})

649

})

646

_html.append('''</td>\n''')

650

_html.append('''</td>\n''')

647

###########################################################

651

###########################################################

648

# NEW LINE NUMBER

652

# NEW LINE NUMBER

649

###########################################################

653

###########################################################

650

654

651

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

655

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

652

'a_id': anchor_new_id,

656

'a_id': anchor_new_id,

653

'nlc': new_lineno_class

657

'nlc': new_lineno_class

654

})

658

})

655

659

656

_html.append('''%(link)s''' % {

660

_html.append('''%(link)s''' % {

657

'link': _link_to_if(True, change['new_lineno'],

661

'link': _link_to_if(True, change['new_lineno'],

658

'#%s' % anchor_new)

662

'#%s' % anchor_new)

659

})

663

})

660

_html.append('''</td>\n''')

664

_html.append('''</td>\n''')

661

###########################################################

665

###########################################################

662

# CODE

666

# CODE

663

###########################################################

667

###########################################################

664

comments = '' if enable_comments else 'no-comment'

668

comments = '' if enable_comments else 'no-comment'

665

_html.append('''\t<td class="%(cc)s %(inc)s">''' % {

669

_html.append('''\t<td class="%(cc)s %(inc)s">''' % {

666

'cc': code_class,

670

'cc': code_class,

667

'inc': comments

671

'inc': comments

668

})

672

})

669

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

673

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

670

'code': change['line']

674

'code': change['line']

671

})

675

})

672

676

673

_html.append('''\t</td>''')

677

_html.append('''\t</td>''')

674

_html.append('''\n</tr>\n''')

678

_html.append('''\n</tr>\n''')

675

_html.append('''</table>''')

679

_html.append('''</table>''')

676

if _html_empty:

680

if _html_empty:

677

return None

681

return None

678

return ''.join(_html)

682

return ''.join(_html)

679

683

680

def stat(self):

684

def stat(self):

681

"""

685

"""

682

Returns tuple of added, and removed lines for this instance

686

Returns tuple of added, and removed lines for this instance

683

"""

687

"""

684

return self.adds, self.removes

688

return self.adds, self.removes

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
                 rhodecode.lib.diffs
                 ~~~~~~~~~~~~~~~~~~~
                 Set of diffing helpers, previously part of vcs
                 :created_on: Dec 4, 2011
                 :author: marcink
                 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
                 :original copyright: 2007-2008 by Armin Ronacher
                 :license: GPLv3, see COPYING for more details.
             """
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             import re
             import difflib
             import logging
             from itertools import tee, imap
             from pylons.i18n.translation import _
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyChangeset
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % str_
             def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
                             ignore_whitespace=True, line_context=3,
                             enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit and presents
                 proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('Binary file'))
                     stats = (0, 0)
                     size = 0
                 elif cut_off_limit != -1 and (cut_off_limit is None or
                 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = diff_processor.stat()
                     size = len(diff or '')
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = (0, 0)
                     size = 0
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.changeset.raw_id
                 cs2 = filenode_new.changeset.raw_id
                 return size, cs1, cs2, diff, stats
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.changeset.repository
                 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                             ignore_whitespace, context)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             CHMOD_FILENODE = 5
             class DiffLimitExceeded(Exception):
                 pass
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 _git_header_re = re.compile(r"""
                     #^diff[ ]--git
                         [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
                        ^rename[ ]from[ ](?P<rename_from>\S+)\n
                        ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
                     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                     (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
                     (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
                 """, re.VERBOSE | re.MULTILINE)
                 _hg_header_re = re.compile(r"""
                     #^diff[ ]--git
                         [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
                     (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
                        ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
                     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                     (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
                     (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
                 """, re.VERBOSE | re.MULTILINE)
                 #used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
                     """
                     :param diff:   a text in diff format
                     :param vcs: type of version controll hg or git
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     if not isinstance(diff, basestring):
                         raise Exception('Diff must be a basestring got %s instead' % type(diff))
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_size = len(diff)
                     self.diff_limit = diff_limit
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     self.vcs = vcs
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._parse_udiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     :type string:
                     """
                     self.cur_diff_size += len(string)
                     # escaper get's iterated on each .next() call and it checks if each
                     # parsed line doesn't exceed the diff limit
                     if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return safe_unicode(string).replace('&', '&amp;')\
                             .replace('<', '&lt;')\
                             .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == 'del':
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == 'add':
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _get_header(self, diff_chunk):
                     """
                     parses the diff header, and returns parts, and leftover diff
                     parts consists of 14 elements::
                         a_path, b_path, similarity_index, rename_from, rename_to,
                         old_mode, new_mode, new_file_mode, deleted_file_mode,
                         a_blob_id, b_blob_id, b_mode, a_file, b_file
                     :param diff_chunk:
                     :type diff_chunk:
                     """
                     if self.vcs == 'git':
                         match = self._git_header_re.match(diff_chunk)
                         diff = diff_chunk[match.end():]
                         return match.groupdict(), imap(self._escaper, diff.splitlines(1))
                     elif self.vcs == 'hg':
                         match = self._hg_header_re.match(diff_chunk)
                         diff = diff_chunk[match.end():]
                         return match.groupdict(), imap(self._escaper, diff.splitlines(1))
                     else:
                         raise Exception('VCS type %s is not supported' % self.vcs)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         #only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     ##split the diff in chunks of separate --git a/file b/file chunks
                     for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
-                        binary = False
-                        binary_msg = 'unknown binary'
                         head, diff = self._get_header(raw_diff)
+                        op = None
+                        stats = None
+                        msg = None
                         if not head['a_file'] and head['b_file']:
                             op = 'A'
                         elif head['a_file'] and head['b_file']:
                             op = 'M'
                         elif head['a_file'] and not head['b_file']:
                             op = 'D'
                         else:
-                            #probably we're dealing with a binary file 1
-                            binary = True
                             if head['deleted_file_mode']:
                                 op = 'D'
                                 stats = ['b', DEL_FILENODE]
-                                binary_msg = 'deleted binary file'
+                                msg = 'deleted file'
                             elif head['new_file_mode']:
                                 op = 'A'
                                 stats = ['b', NEW_FILENODE]
-                                binary_msg = 'new binary file %s' % head['new_file_mode']
+                                msg = 'new file %s' % head['new_file_mode']
                             else:
                                 if head['new_mode'] and head['old_mode']:
                                     stats = ['b', CHMOD_FILENODE]
                                     op = 'M'
-                                    binary_msg = ('modified binary file chmod %s => %s'
+                                    msg = ('modified file chmod %s => %s'
                                                   % (head['old_mode'], head['new_mode']))
                                 elif (head['rename_from'] and head['rename_to']
                                       and head['rename_from'] != head['rename_to']):
                                     stats = ['b', RENAMED_FILENODE]
                                     op = 'M'
-                                    binary_msg = ('file renamed from %s to %s'
+                                    msg = ('file renamed from %s to %s'
                                                   % (head['rename_from'], head['rename_to']))
                                 else:
                                     stats = ['b', MOD_FILENODE]
                                     op = 'M'
-                                    binary_msg = 'modified binary file'
+                                    msg = 'modified file'
-                        if not binary:
+                        if head['a_file'] or head['b_file']: # a real diff
                             try:
                                 chunks, stats = self._parse_lines(diff)
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: LimitedDiffContainer(
                                                             self.diff_limit,
                                                             self.cur_diff_size,
                                                             _diff)
                                 break
-                        else:
+                        else: # GIT binary patch (or empty diff)
                             chunks = []
-                            chunks.append([{
+                            if not msg: # don't overwrite more important message
+                                msg = 'binary diff not shown'
+                        if msg:
+                            chunks.insert(0, [{
                                 'old_lineno': '',
                                 'new_lineno': '',
                                 'action':     'binary',
-                                'line':       binary_msg,
+                                'line':       msg,
                             }])
                         _files.append({
                             'filename':         head['b_path'],
                             'old_revision':     head['a_blob_id'],
                             'new_revision':     head['b_blob_id'],
                             'chunks':           chunks,
                             'operation':        op,
                             'stats':            stats,
                         })
                     sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in ['unmod', 'context']:
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _parse_udiff(self, inline_diff=True):
                     raise NotImplementedError()
                 def _parse_lines(self, diff):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = iter(diff)
                     stats = [0, 0]
                     try:
                         chunks = []
                         line = lineiter.next()
                         while line:
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     'context',
                                         'line':       line,
                                     })
                             line = lineiter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = 'add'
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = 'del'
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = 'unmod'
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 line = lineiter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       'context',
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering. It'l return a list
                     of dicts with diff information
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw string diff
                     """
                     return self._diff
                     #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s">%(label)s</a>''' % {
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 comments = '' if enable_comments else 'no-comment'
                                 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
                                     'cc': code_class,
                                     'inc': comments
                                 })
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes