upstream/kallithea Commit - r3820:8df1bc51

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

rhodecode.lib.diffs

3

rhodecode.lib.diffs

4

~~~~~~~~~~~~~~~~~~~

4

~~~~~~~~~~~~~~~~~~~

5

6

Set of diffing helpers, previously part of vcs

6

Set of diffing helpers, previously part of vcs

7

8

9

:created_on: Dec 4, 2011

9

:created_on: Dec 4, 2011

10

:author: marcink

10

:author: marcink

11

12

:original copyright: 2007-2008 by Armin Ronacher

12

:original copyright: 2007-2008 by Armin Ronacher

13

:license: GPLv3, see COPYING for more details.

13

:license: GPLv3, see COPYING for more details.

14

"""

14

"""

15

# This program is free software: you can redistribute it and/or modify

15

# This program is free software: you can redistribute it and/or modify

16

# it under the terms of the GNU General Public License as published by

16

# it under the terms of the GNU General Public License as published by

17

# the Free Software Foundation, either version 3 of the License, or

17

# the Free Software Foundation, either version 3 of the License, or

18

# (at your option) any later version.

18

# (at your option) any later version.

19

#

19

#

20

# This program is distributed in the hope that it will be useful,

20

# This program is distributed in the hope that it will be useful,

21

# but WITHOUT ANY WARRANTY; without even the implied warranty of

21

# but WITHOUT ANY WARRANTY; without even the implied warranty of

22

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

22

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

23

# GNU General Public License for more details.

23

# GNU General Public License for more details.

24

#

24

#

25

# You should have received a copy of the GNU General Public License

25

# You should have received a copy of the GNU General Public License

26

# along with this program. If not, see <http://www.gnu.org/licenses/>.

26

# along with this program. If not, see <http://www.gnu.org/licenses/>.

27

28

import re

28

import re

29

import difflib

29

import difflib

30

import logging

30

import logging

31

32

from itertools import tee, imap

32

from itertools import tee, imap

33

34

from pylons.i18n.translation import _

34

from pylons.i18n.translation import _

35

36

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.exceptions import VCSError

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

38

from rhodecode.lib.vcs.backends.base import EmptyChangeset

38

from rhodecode.lib.vcs.backends.base import EmptyChangeset

39

from rhodecode.lib.helpers import escape

39

from rhodecode.lib.helpers import escape

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

41

42

log = logging.getLogger(__name__)

42

log = logging.getLogger(__name__)

43

44

45

def wrap_to_table(str_):

45

def wrap_to_table(str_):

46

return '''<table class="code-difftable">

46

return '''<table class="code-difftable">

47

47

48

48

49

49

50

</tr>

50

</tr>

51

</table>''' % str_

51

</table>''' % str_

52

53

54

def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

54

def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

55

ignore_whitespace=True, line_context=3,

55

ignore_whitespace=True, line_context=3,

56

enable_comments=False):

56

enable_comments=False):

57

"""

57

"""

58

returns a wrapped diff into a table, checks for cut_off_limit and presents

58

returns a wrapped diff into a table, checks for cut_off_limit and presents

59

proper message

59

proper message

60

"""

60

"""

61

62

if filenode_old is None:

62

if filenode_old is None:

63

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

63

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

64

65

if filenode_old.is_binary or filenode_new.is_binary:

65

if filenode_old.is_binary or filenode_new.is_binary:

66

diff = wrap_to_table(_('Binary file'))

66

diff = wrap_to_table(_('Binary file'))

67

stats = (0, 0)

67

stats = (0, 0)

68

size = 0

68

size = 0

69

70

elif cut_off_limit != -1 and (cut_off_limit is None or

70

elif cut_off_limit != -1 and (cut_off_limit is None or

71

(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

71

(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

72

73

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

73

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

74

ignore_whitespace=ignore_whitespace,

74

ignore_whitespace=ignore_whitespace,

75

context=line_context)

75

context=line_context)

76

diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

76

diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

77

78

diff = diff_processor.as_html(enable_comments=enable_comments)

78

diff = diff_processor.as_html(enable_comments=enable_comments)

79

stats = diff_processor.stat()

79

stats = diff_processor.stat()

80

size = len(diff or '')

80

size = len(diff or '')

81

else:

81

else:

82

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

82

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

83

'diff menu to display this diff'))

83

'diff menu to display this diff'))

84

stats = (0, 0)

84

stats = (0, 0)

85

size = 0

85

size = 0

86

if not diff:

86

if not diff:

87

submodules = filter(lambda o: isinstance(o, SubModuleNode),

87

submodules = filter(lambda o: isinstance(o, SubModuleNode),

88

[filenode_new, filenode_old])

88

[filenode_new, filenode_old])

89

if submodules:

89

if submodules:

90

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

90

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

91

else:

91

else:

92

diff = wrap_to_table(_('No changes detected'))

92

diff = wrap_to_table(_('No changes detected'))

93

94

cs1 = filenode_old.changeset.raw_id

94

cs1 = filenode_old.changeset.raw_id

95

cs2 = filenode_new.changeset.raw_id

95

cs2 = filenode_new.changeset.raw_id

96

97

return size, cs1, cs2, diff, stats

97

return size, cs1, cs2, diff, stats

98

99

100

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

100

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

101

"""

101

"""

102

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

102

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

103

104

:param ignore_whitespace: ignore whitespaces in diff

104

:param ignore_whitespace: ignore whitespaces in diff

105

"""

105

"""

106

# make sure we pass in default context

106

# make sure we pass in default context

107

context = context or 3

107

context = context or 3

108

submodules = filter(lambda o: isinstance(o, SubModuleNode),

108

submodules = filter(lambda o: isinstance(o, SubModuleNode),

109

[filenode_new, filenode_old])

109

[filenode_new, filenode_old])

110

if submodules:

110

if submodules:

111

return ''

111

return ''

112

113

for filenode in (filenode_old, filenode_new):

113

for filenode in (filenode_old, filenode_new):

114

if not isinstance(filenode, FileNode):

114

if not isinstance(filenode, FileNode):

115

raise VCSError("Given object should be FileNode object, not %s"

115

raise VCSError("Given object should be FileNode object, not %s"

116

% filenode.__class__)

116

% filenode.__class__)

117

118

repo = filenode_new.changeset.repository

118

repo = filenode_new.changeset.repository

119

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

119

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

120

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

120

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

121

122

vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

122

vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

123

ignore_whitespace, context)

123

ignore_whitespace, context)

124

return vcs_gitdiff

124

return vcs_gitdiff

125

126

NEW_FILENODE = 1

126

NEW_FILENODE = 1

127

DEL_FILENODE = 2

127

DEL_FILENODE = 2

128

MOD_FILENODE = 3

128

MOD_FILENODE = 3

129

RENAMED_FILENODE = 4

129

RENAMED_FILENODE = 4

130

CHMOD_FILENODE = 5

130

CHMOD_FILENODE = 5

131

132

133

class DiffLimitExceeded(Exception):

133

class DiffLimitExceeded(Exception):

134

pass

134

pass

135

136

137

class LimitedDiffContainer(object):

137

class LimitedDiffContainer(object):

138

139

def __init__(self, diff_limit, cur_diff_size, diff):

139

def __init__(self, diff_limit, cur_diff_size, diff):

140

self.diff = diff

140

self.diff = diff

141

self.diff_limit = diff_limit

141

self.diff_limit = diff_limit

142

self.cur_diff_size = cur_diff_size

142

self.cur_diff_size = cur_diff_size

143

144

def __iter__(self):

144

def __iter__(self):

145

for l in self.diff:

145

for l in self.diff:

146

yield l

146

yield l

147

148

149

class DiffProcessor(object):

149

class DiffProcessor(object):

150

"""

150

"""

151

Give it a unified or git diff and it returns a list of the files that were

151

Give it a unified or git diff and it returns a list of the files that were

152

mentioned in the diff together with a dict of meta information that

152

mentioned in the diff together with a dict of meta information that

153

can be used to render it in a HTML template.

153

can be used to render it in a HTML template.

154

"""

154

"""

155

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

155

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

156

_newline_marker = re.compile(r'^\\ No newline at end of file')

156

_newline_marker = re.compile(r'^\\ No newline at end of file')

157

_git_header_re = re.compile(r"""

157

_git_header_re = re.compile(r"""

158

#^diff[ ]--git

158

#^diff[ ]--git

159

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

159

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

160

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

160

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

161

^rename[ ]from[ ](?P<rename_from>\S+)\n

161

^rename[ ]from[ ](?P<rename_from>\S+)\n

162

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

162

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

163

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

163

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

164

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

164

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

165

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

165

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

166

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

166

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

167

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

167

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

168

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

168

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

169

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

169

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

170

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

170

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

171

""", re.VERBOSE | re.MULTILINE)

171

""", re.VERBOSE | re.MULTILINE)

172

_hg_header_re = re.compile(r"""

172

_hg_header_re = re.compile(r"""

173

#^diff[ ]--git

173

#^diff[ ]--git

174

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

174

[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

175

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

175

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

176

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

176

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

177

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

177

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

178

(?:^rename[ ]from[ ](?P<rename_from>\S+)\n

178

(?:^rename[ ]from[ ](?P<rename_from>\S+)\n

179

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

179

^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

180

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

180

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

181

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

181

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

182

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

182

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

183

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

183

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

184

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

184

(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

185

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

185

(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

186

""", re.VERBOSE | re.MULTILINE)

186

""", re.VERBOSE | re.MULTILINE)

187

188

#used for inline highlighter word split

188

#used for inline highlighter word split

189

_token_re = re.compile(r'()(>|<|&|\W+?)')

189

_token_re = re.compile(r'()(>|<|&|\W+?)')

190

191

def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):

191

def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):

192

"""

192

"""

193

:param diff: a text in diff format

193

:param diff: a text in diff format

194

:param vcs: type of version controll hg or git

194

:param vcs: type of version controll hg or git

195

:param format: format of diff passed, `udiff` or `gitdiff`

195

:param format: format of diff passed, `udiff` or `gitdiff`

196

:param diff_limit: define the size of diff that is considered "big"

196

:param diff_limit: define the size of diff that is considered "big"

197

based on that parameter cut off will be triggered, set to None

197

based on that parameter cut off will be triggered, set to None

198

to show full diff

198

to show full diff

199

"""

199

"""

200

if not isinstance(diff, basestring):

200

if not isinstance(diff, basestring):

201

raise Exception('Diff must be a basestring got %s instead' % type(diff))

201

raise Exception('Diff must be a basestring got %s instead' % type(diff))

202

203

self._diff = diff

203

self._diff = diff

204

self._format = format

204

self._format = format

205

self.adds = 0

205

self.adds = 0

206

self.removes = 0

206

self.removes = 0

207

# calculate diff size

207

# calculate diff size

208

self.diff_size = len(diff)

208

self.diff_size = len(diff)

209

self.diff_limit = diff_limit

209

self.diff_limit = diff_limit

210

self.cur_diff_size = 0

210

self.cur_diff_size = 0

211

self.parsed = False

211

self.parsed = False

212

self.parsed_diff = []

212

self.parsed_diff = []

213

self.vcs = vcs

213

self.vcs = vcs

214

215

if format == 'gitdiff':

215

if format == 'gitdiff':

216

self.differ = self._highlight_line_difflib

216

self.differ = self._highlight_line_difflib

217

self._parser = self._parse_gitdiff

217

self._parser = self._parse_gitdiff

218

else:

218

else:

219

self.differ = self._highlight_line_udiff

219

self.differ = self._highlight_line_udiff

220

self._parser = self._parse_udiff

220

self._parser = self._parse_udiff

221

222

def _copy_iterator(self):

222

def _copy_iterator(self):

223

"""

223

"""

224

make a fresh copy of generator, we should not iterate thru

224

make a fresh copy of generator, we should not iterate thru

225

an original as it's needed for repeating operations on

225

an original as it's needed for repeating operations on

226

this instance of DiffProcessor

226

this instance of DiffProcessor

227

"""

227

"""

228

self.__udiff, iterator_copy = tee(self.__udiff)

228

self.__udiff, iterator_copy = tee(self.__udiff)

229

return iterator_copy

229

return iterator_copy

230

231

def _escaper(self, string):

231

def _escaper(self, string):

232

"""

232

"""

233

Escaper for diff escapes special chars and checks the diff limit

233

Escaper for diff escapes special chars and checks the diff limit

234

235

:param string:

235

:param string:

236

:type string:

236

:type string:

237

"""

237

"""

238

239

self.cur_diff_size += len(string)

239

self.cur_diff_size += len(string)

240

241

# escaper get's iterated on each .next() call and it checks if each

241

# escaper get's iterated on each .next() call and it checks if each

242

# parsed line doesn't exceed the diff limit

242

# parsed line doesn't exceed the diff limit

243

if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:

243

if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:

244

raise DiffLimitExceeded('Diff Limit Exceeded')

244

raise DiffLimitExceeded('Diff Limit Exceeded')

245

246

return safe_unicode(string).replace('&', '&')\

246

return safe_unicode(string).replace('&', '&')\

247

.replace('<', '<')\

247

.replace('<', '<')\

248

.replace('>', '>')

248

.replace('>', '>')

249

250

def _line_counter(self, l):

250

def _line_counter(self, l):

251

"""

251

"""

252

Checks each line and bumps total adds/removes for this diff

252

Checks each line and bumps total adds/removes for this diff

253

254

:param l:

254

:param l:

255

"""

255

"""

256

if l.startswith('+') and not l.startswith('+++'):

256

if l.startswith('+') and not l.startswith('+++'):

257

self.adds += 1

257

self.adds += 1

258

elif l.startswith('-') and not l.startswith('---'):

258

elif l.startswith('-') and not l.startswith('---'):

259

self.removes += 1

259

self.removes += 1

260

return safe_unicode(l)

260

return safe_unicode(l)

261

262

def _highlight_line_difflib(self, line, next_):

262

def _highlight_line_difflib(self, line, next_):

263

"""

263

"""

264

Highlight inline changes in both lines.

264

Highlight inline changes in both lines.

265

"""

265

"""

266

267

if line['action'] == 'del':

267

if line['action'] == 'del':

268

old, new = line, next_

268

old, new = line, next_

269

else:

269

else:

270

old, new = next_, line

270

old, new = next_, line

271

272

oldwords = self._token_re.split(old['line'])

272

oldwords = self._token_re.split(old['line'])

273

newwords = self._token_re.split(new['line'])

273

newwords = self._token_re.split(new['line'])

274

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

274

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

275

276

oldfragments, newfragments = [], []

276

oldfragments, newfragments = [], []

277

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

277

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

278

oldfrag = ''.join(oldwords[i1:i2])

278

oldfrag = ''.join(oldwords[i1:i2])

279

newfrag = ''.join(newwords[j1:j2])

279

newfrag = ''.join(newwords[j1:j2])

280

if tag != 'equal':

280

if tag != 'equal':

281

if oldfrag:

281

if oldfrag:

282

oldfrag = '<del>%s</del>' % oldfrag

282

oldfrag = '<del>%s</del>' % oldfrag

283

if newfrag:

283

if newfrag:

284

newfrag = '<ins>%s</ins>' % newfrag

284

newfrag = '<ins>%s</ins>' % newfrag

285

oldfragments.append(oldfrag)

285

oldfragments.append(oldfrag)

286

newfragments.append(newfrag)

286

newfragments.append(newfrag)

287

288

old['line'] = "".join(oldfragments)

288

old['line'] = "".join(oldfragments)

289

new['line'] = "".join(newfragments)

289

new['line'] = "".join(newfragments)

290

291

def _highlight_line_udiff(self, line, next_):

291

def _highlight_line_udiff(self, line, next_):

292

"""

292

"""

293

Highlight inline changes in both lines.

293

Highlight inline changes in both lines.

294

"""

294

"""

295

start = 0

295

start = 0

296

limit = min(len(line['line']), len(next_['line']))

296

limit = min(len(line['line']), len(next_['line']))

297

while start < limit and line['line'][start] == next_['line'][start]:

297

while start < limit and line['line'][start] == next_['line'][start]:

298

start += 1

298

start += 1

299

end = -1

299

end = -1

300

limit -= start

300

limit -= start

301

while -end <= limit and line['line'][end] == next_['line'][end]:

301

while -end <= limit and line['line'][end] == next_['line'][end]:

302

end -= 1

302

end -= 1

303

end += 1

303

end += 1

304

if start or end:

304

if start or end:

305

def do(l):

305

def do(l):

306

last = end + len(l['line'])

306

last = end + len(l['line'])

307

if l['action'] == 'add':

307

if l['action'] == 'add':

308

tag = 'ins'

308

tag = 'ins'

309

else:

309

else:

310

tag = 'del'

310

tag = 'del'

311

l['line'] = '%s<%s>%s</%s>%s' % (

311

l['line'] = '%s<%s>%s</%s>%s' % (

312

l['line'][:start],

312

l['line'][:start],

313

tag,

313

tag,

314

l['line'][start:last],

314

l['line'][start:last],

315

tag,

315

tag,

316

l['line'][last:]

316

l['line'][last:]

317

)

317

)

318

do(line)

318

do(line)

319

do(next_)

319

do(next_)

320

321

def _get_header(self, diff_chunk):

321

def _get_header(self, diff_chunk):

322

"""

322

"""

323

parses the diff header, and returns parts, and leftover diff

323

parses the diff header, and returns parts, and leftover diff

324

parts consists of 14 elements::

324

parts consists of 14 elements::

325

326

a_path, b_path, similarity_index, rename_from, rename_to,

326

a_path, b_path, similarity_index, rename_from, rename_to,

327

old_mode, new_mode, new_file_mode, deleted_file_mode,

327

old_mode, new_mode, new_file_mode, deleted_file_mode,

328

a_blob_id, b_blob_id, b_mode, a_file, b_file

328

a_blob_id, b_blob_id, b_mode, a_file, b_file

329

330

:param diff_chunk:

330

:param diff_chunk:

331

:type diff_chunk:

331

:type diff_chunk:

332

"""

332

"""

333

334

if self.vcs == 'git':

334

if self.vcs == 'git':

335

match = self._git_header_re.match(diff_chunk)

335

match = self._git_header_re.match(diff_chunk)

336

diff = diff_chunk[match.end():]

336

diff = diff_chunk[match.end():]

337

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

337

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

338

elif self.vcs == 'hg':

338

elif self.vcs == 'hg':

339

match = self._hg_header_re.match(diff_chunk)

339

match = self._hg_header_re.match(diff_chunk)

340

diff = diff_chunk[match.end():]

340

diff = diff_chunk[match.end():]

341

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

341

return match.groupdict(), imap(self._escaper, diff.splitlines(1))

342

else:

342

else:

343

raise Exception('VCS type %s is not supported' % self.vcs)

343

raise Exception('VCS type %s is not supported' % self.vcs)

344

345

def _clean_line(self, line, command):

345

def _clean_line(self, line, command):

346

if command in ['+', '-', ' ']:

346

if command in ['+', '-', ' ']:

347

#only modify the line if it's actually a diff thing

347

#only modify the line if it's actually a diff thing

348

line = line[1:]

348

line = line[1:]

349

return line

349

return line

350

351

def _parse_gitdiff(self, inline_diff=True):

351

def _parse_gitdiff(self, inline_diff=True):

352

_files = []

352

_files = []

353

diff_container = lambda arg: arg

353

diff_container = lambda arg: arg

354

355

##split the diff in chunks of separate --git a/file b/file chunks

355

##split the diff in chunks of separate --git a/file b/file chunks

356

for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:

356

for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:

357

head, diff = self._get_header(raw_diff)

357

head, diff = self._get_header(raw_diff)

358

359

op = None

359

op = None

360

stats = None

360

stats = None

361

msgs = []

361

msgs = []

362

363

if not head['a_file'] and head['b_file']:

364

op = 'A'

365

stats = ['b', NEW_FILENODE]

366

msgs.append('new file')

367

elif head['a_file'] and head['b_file']:

368

op = 'M'

369

stats = ['b', MOD_FILENODE]

370

elif head['a_file'] and not head['b_file']:

371

op = 'D'

372

stats = ['b', DEL_FILENODE]

373

msgs.append('deleted file')

374

else:

375

if head['deleted_file_mode']:

363

if head['deleted_file_mode']:

376

op = 'D'

364

op = 'D'

377

stats = ['b', DEL_FILENODE]

365

stats = ['b', DEL_FILENODE]

378

msgs.append('deleted file')

366

msgs.append('deleted file')

379

elif head['new_file_mode']:

367

elif head['new_file_mode']:

380

op = 'A'

368

op = 'A'

381

stats = ['b', NEW_FILENODE]

369

stats = ['b', NEW_FILENODE]

382

msgs.append('new file %s' % head['new_file_mode'])

370

msgs.append('new file %s' % head['new_file_mode'])

383

else:

371

else:

384

if head['new_mode'] and head['old_mode']:

372

if head['new_mode'] and head['old_mode']:

385

op = 'M'

373

op = 'M'

386

stats = ['b', CHMOD_FILENODE]

374

stats = ['b', CHMOD_FILENODE]

387

msgs.append('modified file chmod %s => %s'

375

msgs.append('modified file chmod %s => %s'

388

% (head['old_mode'], head['new_mode']))

376

% (head['old_mode'], head['new_mode']))

389

if (head['rename_from'] and head['rename_to']

377

if (head['rename_from'] and head['rename_to']

390

and head['rename_from'] != head['rename_to']):

378

and head['rename_from'] != head['rename_to']):

391

op = 'M'

379

op = 'M'

392

stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE

380

stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE

393

msgs.append('file renamed from %s to %s'

381

msgs.append('file renamed from %s to %s'

394

% (head['rename_from'], head['rename_to']))

382

% (head['rename_from'], head['rename_to']))

383

if op is None: # fall back: detect missed old style add or remove

384

if not head['a_file'] and head['b_file']:

385

op = 'A'

386

stats = ['b', NEW_FILENODE]

387

msgs.append('new file')

388

elif head['a_file'] and not head['b_file']:

389

op = 'D'

390

stats = ['b', DEL_FILENODE]

391

msgs.append('deleted file')

395

if op is None:

392

if op is None:

396

op = 'M'

393

op = 'M'

397

stats = ['b', MOD_FILENODE]

394

stats = ['b', MOD_FILENODE]

398

395

399

if head['a_file'] or head['b_file']: # a real diff

396

if head['a_file'] or head['b_file']: # a real diff

400

try:

397

try:

401

chunks, stats = self._parse_lines(diff)

398

chunks, stats = self._parse_lines(diff)

402

except DiffLimitExceeded:

399

except DiffLimitExceeded:

403

diff_container = lambda _diff: LimitedDiffContainer(

400

diff_container = lambda _diff: LimitedDiffContainer(

404

self.diff_limit,

401

self.diff_limit,

405

self.cur_diff_size,

402

self.cur_diff_size,

406

_diff)

403

_diff)

407

break

404

break

408

else: # GIT binary patch (or empty diff)

405

else: # GIT binary patch (or empty diff)

409

chunks = []

406

chunks = []

410

msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file

407

msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file

411

408

412

if msgs:

409

if msgs:

413

chunks.insert(0, [{

410

chunks.insert(0, [{

414

'old_lineno': '',

411

'old_lineno': '',

415

'new_lineno': '',

412

'new_lineno': '',

416

'action': 'binary',

413

'action': 'binary',

417

'line': msg,

414

'line': msg,

418

} for msg in msgs])

415

} for msg in msgs])

419

416

420

_files.append({

417

_files.append({

421

'filename': head['b_path'],

418

'filename': head['b_path'],

422

'old_revision': head['a_blob_id'],

419

'old_revision': head['a_blob_id'],

423

'new_revision': head['b_blob_id'],

420

'new_revision': head['b_blob_id'],

424

'chunks': chunks,

421

'chunks': chunks,

425

'operation': op,

422

'operation': op,

426

'stats': stats,

423

'stats': stats,

427

})

424

})

428

425

429

sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

426

sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

430

427

431

if not inline_diff:

428

if not inline_diff:

432

return diff_container(sorted(_files, key=sorter))

429

return diff_container(sorted(_files, key=sorter))

433

430

434

# highlight inline changes

431

# highlight inline changes

435

for diff_data in _files:

432

for diff_data in _files:

436

for chunk in diff_data['chunks']:

433

for chunk in diff_data['chunks']:

437

lineiter = iter(chunk)

434

lineiter = iter(chunk)

438

try:

435

try:

439

while 1:

436

while 1:

440

line = lineiter.next()

437

line = lineiter.next()

441

if line['action'] not in ['unmod', 'context']:

438

if line['action'] not in ['unmod', 'context']:

442

nextline = lineiter.next()

439

nextline = lineiter.next()

443

if nextline['action'] in ['unmod', 'context'] or \

440

if nextline['action'] in ['unmod', 'context'] or \

444

nextline['action'] == line['action']:

441

nextline['action'] == line['action']:

445

continue

442

continue

446

self.differ(line, nextline)

443

self.differ(line, nextline)

447

except StopIteration:

444

except StopIteration:

448

pass

445

pass

449

446

450

return diff_container(sorted(_files, key=sorter))

447

return diff_container(sorted(_files, key=sorter))

451

448

452

def _parse_udiff(self, inline_diff=True):

449

def _parse_udiff(self, inline_diff=True):

453

raise NotImplementedError()

450

raise NotImplementedError()

454

451

455

def _parse_lines(self, diff):

452

def _parse_lines(self, diff):

456

"""

453

"""

457

Parse the diff an return data for the template.

454

Parse the diff an return data for the template.

458

"""

455

"""

459

456

460

lineiter = iter(diff)

457

lineiter = iter(diff)

461

stats = [0, 0]

458

stats = [0, 0]

462

459

463

try:

460

try:

464

chunks = []

461

chunks = []

465

line = lineiter.next()

462

line = lineiter.next()

466

463

467

while line:

464

while line:

468

lines = []

465

lines = []

469

chunks.append(lines)

466

chunks.append(lines)

470

467

471

match = self._chunk_re.match(line)

468

match = self._chunk_re.match(line)

472

469

473

if not match:

470

if not match:

474

break

471

break

475

472

476

gr = match.groups()

473

gr = match.groups()

477

(old_line, old_end,

474

(old_line, old_end,

478

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

475

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

479

old_line -= 1

476

old_line -= 1

480

new_line -= 1

477

new_line -= 1

481

478

482

context = len(gr) == 5

479

context = len(gr) == 5

483

old_end += old_line

480

old_end += old_line

484

new_end += new_line

481

new_end += new_line

485

482

486

if context:

483

if context:

487

# skip context only if it's first line

484

# skip context only if it's first line

488

if int(gr[0]) > 1:

485

if int(gr[0]) > 1:

489

lines.append({

486

lines.append({

490

'old_lineno': '...',

487

'old_lineno': '...',

491

'new_lineno': '...',

488

'new_lineno': '...',

492

'action': 'context',

489

'action': 'context',

493

'line': line,

490

'line': line,

494

})

491

})

495

492

496

line = lineiter.next()

493

line = lineiter.next()

497

494

498

while old_line < old_end or new_line < new_end:

495

while old_line < old_end or new_line < new_end:

499

command = ' '

496

command = ' '

500

if line:

497

if line:

501

command = line[0]

498

command = line[0]

502

499

503

affects_old = affects_new = False

500

affects_old = affects_new = False

504

501

505

# ignore those if we don't expect them

502

# ignore those if we don't expect them

506

if command in '#@':

503

if command in '#@':

507

continue

504

continue

508

elif command == '+':

505

elif command == '+':

509

affects_new = True

506

affects_new = True

510

action = 'add'

507

action = 'add'

511

stats[0] += 1

508

stats[0] += 1

512

elif command == '-':

509

elif command == '-':

513

affects_old = True

510

affects_old = True

514

action = 'del'

511

action = 'del'

515

stats[1] += 1

512

stats[1] += 1

516

else:

513

else:

517

affects_old = affects_new = True

514

affects_old = affects_new = True

518

action = 'unmod'

515

action = 'unmod'

519

516

520

if not self._newline_marker.match(line):

517

if not self._newline_marker.match(line):

521

old_line += affects_old

518

old_line += affects_old

522

new_line += affects_new

519

new_line += affects_new

523

lines.append({

520

lines.append({

524

'old_lineno': affects_old and old_line or '',

521

'old_lineno': affects_old and old_line or '',

525

'new_lineno': affects_new and new_line or '',

522

'new_lineno': affects_new and new_line or '',

526

'action': action,

523

'action': action,

527

'line': self._clean_line(line, command)

524

'line': self._clean_line(line, command)

528

})

525

})

529

526

530

line = lineiter.next()

527

line = lineiter.next()

531

528

532

if self._newline_marker.match(line):

529

if self._newline_marker.match(line):

533

# we need to append to lines, since this is not

530

# we need to append to lines, since this is not

534

# counted in the line specs of diff

531

# counted in the line specs of diff

535

lines.append({

532

lines.append({

536

'old_lineno': '...',

533

'old_lineno': '...',

537

'new_lineno': '...',

534

'new_lineno': '...',

538

'action': 'context',

535

'action': 'context',

539

'line': self._clean_line(line, command)

536

'line': self._clean_line(line, command)

540

})

537

})

541

538

542

except StopIteration:

539

except StopIteration:

543

pass

540

pass

544

return chunks, stats

541

return chunks, stats

545

542

546

def _safe_id(self, idstring):

543

def _safe_id(self, idstring):

547

"""Make a string safe for including in an id attribute.

544

"""Make a string safe for including in an id attribute.

548

545

549

The HTML spec says that id attributes 'must begin with

546

The HTML spec says that id attributes 'must begin with

550

a letter ([A-Za-z]) and may be followed by any number

547

a letter ([A-Za-z]) and may be followed by any number

551

of letters, digits ([0-9]), hyphens ("-"), underscores

548

of letters, digits ([0-9]), hyphens ("-"), underscores

552

("_"), colons (":"), and periods (".")'. These regexps

549

("_"), colons (":"), and periods (".")'. These regexps

553

are slightly over-zealous, in that they remove colons

550

are slightly over-zealous, in that they remove colons

554

and periods unnecessarily.

551

and periods unnecessarily.

555

552

556

Whitespace is transformed into underscores, and then

553

Whitespace is transformed into underscores, and then

557

anything which is not a hyphen or a character that

554

anything which is not a hyphen or a character that

558

matches \w (alphanumerics and underscore) is removed.

555

matches \w (alphanumerics and underscore) is removed.

559

556

560

"""

557

"""

561

# Transform all whitespace to underscore

558

# Transform all whitespace to underscore

562

idstring = re.sub(r'\s', "_", '%s' % idstring)

559

idstring = re.sub(r'\s', "_", '%s' % idstring)

563

# Remove everything that is not a hyphen or a member of \w

560

# Remove everything that is not a hyphen or a member of \w

564

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

561

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

565

return idstring

562

return idstring

566

563

567

def prepare(self, inline_diff=True):

564

def prepare(self, inline_diff=True):

568

"""

565

"""

569

Prepare the passed udiff for HTML rendering. It'l return a list

566

Prepare the passed udiff for HTML rendering. It'l return a list

570

of dicts with diff information

567

of dicts with diff information

571

"""

568

"""

572

parsed = self._parser(inline_diff=inline_diff)

569

parsed = self._parser(inline_diff=inline_diff)

573

self.parsed = True

570

self.parsed = True

574

self.parsed_diff = parsed

571

self.parsed_diff = parsed

575

return parsed

572

return parsed

576

573

577

def as_raw(self, diff_lines=None):

574

def as_raw(self, diff_lines=None):

578

"""

575

"""

579

Returns raw string diff

576

Returns raw string diff

580

"""

577

"""

581

return self._diff

578

return self._diff

582

#return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

579

#return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

583

580

584

def as_html(self, table_class='code-difftable', line_class='line',

581

def as_html(self, table_class='code-difftable', line_class='line',

585

old_lineno_class='lineno old', new_lineno_class='lineno new',

582

old_lineno_class='lineno old', new_lineno_class='lineno new',

586

code_class='code', enable_comments=False, parsed_lines=None):

583

code_class='code', enable_comments=False, parsed_lines=None):

587

"""

584

"""

588

Return given diff as html table with customized css classes

585

Return given diff as html table with customized css classes

589

"""

586

"""

590

def _link_to_if(condition, label, url):

587

def _link_to_if(condition, label, url):

591

"""

588

"""

592

Generates a link if condition is meet or just the label if not.

589

Generates a link if condition is meet or just the label if not.

593

"""

590

"""

594

591

595

if condition:

592

if condition:

596

return '''<a href="%(url)s">%(label)s</a>''' % {

593

return '''<a href="%(url)s">%(label)s</a>''' % {

597

'url': url,

594

'url': url,

598

'label': label

595

'label': label

599

}

596

}

600

else:

597

else:

601

return label

598

return label

602

if not self.parsed:

599

if not self.parsed:

603

self.prepare()

600

self.prepare()

604

601

605

diff_lines = self.parsed_diff

602

diff_lines = self.parsed_diff

606

if parsed_lines:

603

if parsed_lines:

607

diff_lines = parsed_lines

604

diff_lines = parsed_lines

608

605

609

_html_empty = True

606

_html_empty = True

610

_html = []

607

_html = []

611

_html.append('''<table class="%(table_class)s">\n''' % {

608

_html.append('''<table class="%(table_class)s">\n''' % {

612

'table_class': table_class

609

'table_class': table_class

613

})

610

})

614

611

615

for diff in diff_lines:

612

for diff in diff_lines:

616

for line in diff['chunks']:

613

for line in diff['chunks']:

617

_html_empty = False

614

_html_empty = False

618

for change in line:

615

for change in line:

619

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

616

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

620

'lc': line_class,

617

'lc': line_class,

621

'action': change['action']

618

'action': change['action']

622

})

619

})

623

anchor_old_id = ''

620

anchor_old_id = ''

624

anchor_new_id = ''

621

anchor_new_id = ''

625

anchor_old = "%(filename)s_o%(oldline_no)s" % {

622

anchor_old = "%(filename)s_o%(oldline_no)s" % {

626

'filename': self._safe_id(diff['filename']),

623

'filename': self._safe_id(diff['filename']),

627

'oldline_no': change['old_lineno']

624

'oldline_no': change['old_lineno']

628

}

625

}

629

anchor_new = "%(filename)s_n%(oldline_no)s" % {

626

anchor_new = "%(filename)s_n%(oldline_no)s" % {

630

'filename': self._safe_id(diff['filename']),

627

'filename': self._safe_id(diff['filename']),

631

'oldline_no': change['new_lineno']

628

'oldline_no': change['new_lineno']

632

}

629

}

633

cond_old = (change['old_lineno'] != '...' and

630

cond_old = (change['old_lineno'] != '...' and

634

change['old_lineno'])

631

change['old_lineno'])

635

cond_new = (change['new_lineno'] != '...' and

632

cond_new = (change['new_lineno'] != '...' and

636

change['new_lineno'])

633

change['new_lineno'])

637

if cond_old:

634

if cond_old:

638

anchor_old_id = 'id="%s"' % anchor_old

635

anchor_old_id = 'id="%s"' % anchor_old

639

if cond_new:

636

if cond_new:

640

anchor_new_id = 'id="%s"' % anchor_new

637

anchor_new_id = 'id="%s"' % anchor_new

641

###########################################################

638

###########################################################

642

# OLD LINE NUMBER

639

# OLD LINE NUMBER

643

###########################################################

640

###########################################################

644

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

641

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

645

'a_id': anchor_old_id,

642

'a_id': anchor_old_id,

646

'olc': old_lineno_class

643

'olc': old_lineno_class

647

})

644

})

648

645

649

_html.append('''%(link)s''' % {

646

_html.append('''%(link)s''' % {

650

'link': _link_to_if(True, change['old_lineno'],

647

'link': _link_to_if(True, change['old_lineno'],

651

'#%s' % anchor_old)

648

'#%s' % anchor_old)

652

})

649

})

653

_html.append('''</td>\n''')

650

_html.append('''</td>\n''')

654

###########################################################

651

###########################################################

655

# NEW LINE NUMBER

652

# NEW LINE NUMBER

656

###########################################################

653

###########################################################

657

654

658

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

655

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

659

'a_id': anchor_new_id,

656

'a_id': anchor_new_id,

660

'nlc': new_lineno_class

657

'nlc': new_lineno_class

661

})

658

})

662

659

663

_html.append('''%(link)s''' % {

660

_html.append('''%(link)s''' % {

664

'link': _link_to_if(True, change['new_lineno'],

661

'link': _link_to_if(True, change['new_lineno'],

665

'#%s' % anchor_new)

662

'#%s' % anchor_new)

666

})

663

})

667

_html.append('''</td>\n''')

664

_html.append('''</td>\n''')

668

###########################################################

665

###########################################################

669

# CODE

666

# CODE

670

###########################################################

667

###########################################################

671

comments = '' if enable_comments else 'no-comment'

668

comments = '' if enable_comments else 'no-comment'

672

_html.append('''\t<td class="%(cc)s %(inc)s">''' % {

669

_html.append('''\t<td class="%(cc)s %(inc)s">''' % {

673

'cc': code_class,

670

'cc': code_class,

674

'inc': comments

671

'inc': comments

675

})

672

})

676

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

673

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

677

'code': change['line']

674

'code': change['line']

678

})

675

})

679

676

680

_html.append('''\t</td>''')

677

_html.append('''\t</td>''')

681

_html.append('''\n</tr>\n''')

678

_html.append('''\n</tr>\n''')

682

_html.append('''</table>''')

679

_html.append('''</table>''')

683

if _html_empty:

680

if _html_empty:

684

return None

681

return None

685

return ''.join(_html)

682

return ''.join(_html)

686

683

687

def stat(self):

684

def stat(self):

688

"""

685

"""

689

Returns tuple of added, and removed lines for this instance

686

Returns tuple of added, and removed lines for this instance

690

"""

687

"""

691

return self.adds, self.removes

688

return self.adds, self.removes

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
                 rhodecode.lib.diffs
                 ~~~~~~~~~~~~~~~~~~~
                 Set of diffing helpers, previously part of vcs
                 :created_on: Dec 4, 2011
                 :author: marcink
                 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
                 :original copyright: 2007-2008 by Armin Ronacher
                 :license: GPLv3, see COPYING for more details.
             """
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             import re
             import difflib
             import logging
             from itertools import tee, imap
             from pylons.i18n.translation import _
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyChangeset
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % str_
             def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
                             ignore_whitespace=True, line_context=3,
                             enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit and presents
                 proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('Binary file'))
                     stats = (0, 0)
                     size = 0
                 elif cut_off_limit != -1 and (cut_off_limit is None or
                 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = diff_processor.stat()
                     size = len(diff or '')
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = (0, 0)
                     size = 0
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.changeset.raw_id
                 cs2 = filenode_new.changeset.raw_id
                 return size, cs1, cs2, diff, stats
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.changeset.repository
                 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                             ignore_whitespace, context)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             CHMOD_FILENODE = 5
             class DiffLimitExceeded(Exception):
                 pass
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 _git_header_re = re.compile(r"""
                     #^diff[ ]--git
                         [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
                        ^rename[ ]from[ ](?P<rename_from>\S+)\n
                        ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
                     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                     (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
                     (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
                 """, re.VERBOSE | re.MULTILINE)
                 _hg_header_re = re.compile(r"""
                     #^diff[ ]--git
                         [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
                     (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
                        ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
                     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                     (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
                     (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
                 """, re.VERBOSE | re.MULTILINE)
                 #used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
                     """
                     :param diff:   a text in diff format
                     :param vcs: type of version controll hg or git
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     if not isinstance(diff, basestring):
                         raise Exception('Diff must be a basestring got %s instead' % type(diff))
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_size = len(diff)
                     self.diff_limit = diff_limit
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     self.vcs = vcs
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._parse_udiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     :type string:
                     """
                     self.cur_diff_size += len(string)
                     # escaper get's iterated on each .next() call and it checks if each
                     # parsed line doesn't exceed the diff limit
                     if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return safe_unicode(string).replace('&', '&amp;')\
                             .replace('<', '&lt;')\
                             .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == 'del':
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == 'add':
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _get_header(self, diff_chunk):
                     """
                     parses the diff header, and returns parts, and leftover diff
                     parts consists of 14 elements::
                         a_path, b_path, similarity_index, rename_from, rename_to,
                         old_mode, new_mode, new_file_mode, deleted_file_mode,
                         a_blob_id, b_blob_id, b_mode, a_file, b_file
                     :param diff_chunk:
                     :type diff_chunk:
                     """
                     if self.vcs == 'git':
                         match = self._git_header_re.match(diff_chunk)
                         diff = diff_chunk[match.end():]
                         return match.groupdict(), imap(self._escaper, diff.splitlines(1))
                     elif self.vcs == 'hg':
                         match = self._hg_header_re.match(diff_chunk)
                         diff = diff_chunk[match.end():]
                         return match.groupdict(), imap(self._escaper, diff.splitlines(1))
                     else:
                         raise Exception('VCS type %s is not supported' % self.vcs)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         #only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     ##split the diff in chunks of separate --git a/file b/file chunks
                     for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
                         head, diff = self._get_header(raw_diff)
                         op = None
                         stats = None
                         msgs = []
-                        if not head['a_file'] and head['b_file']:
-                            op = 'A'
-                            stats = ['b', NEW_FILENODE]
-                            msgs.append('new file')
-                        elif head['a_file'] and head['b_file']:
-                            op = 'M'
-                            stats = ['b', MOD_FILENODE]
-                        elif head['a_file'] and not head['b_file']:
-                            op = 'D'
-                            stats = ['b', DEL_FILENODE]
-                            msgs.append('deleted file')
-                        else:
                         if head['deleted_file_mode']:
                             op = 'D'
                             stats = ['b', DEL_FILENODE]
                             msgs.append('deleted file')
                         elif head['new_file_mode']:
                             op = 'A'
                             stats = ['b', NEW_FILENODE]
                             msgs.append('new file %s' % head['new_file_mode'])
                         else:
                             if head['new_mode'] and head['old_mode']:
                                 op = 'M'
                                 stats = ['b', CHMOD_FILENODE]
                                 msgs.append('modified file chmod %s => %s'
                                               % (head['old_mode'], head['new_mode']))
                             if (head['rename_from'] and head['rename_to']
                                   and head['rename_from'] != head['rename_to']):
                                 op = 'M'
                                 stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE
                                 msgs.append('file renamed from %s to %s'
                                               % (head['rename_from'], head['rename_to']))
+                            if op is None: # fall back: detect missed old style add or remove
+                                if not head['a_file'] and head['b_file']:
+                                    op = 'A'
+                                    stats = ['b', NEW_FILENODE]
+                                    msgs.append('new file')
+                                elif head['a_file'] and not head['b_file']:
+                                    op = 'D'
+                                    stats = ['b', DEL_FILENODE]
+                                    msgs.append('deleted file')
                             if op is None:
                                 op = 'M'
                                 stats = ['b', MOD_FILENODE]
                         if head['a_file'] or head['b_file']: # a real diff
                             try:
                                 chunks, stats = self._parse_lines(diff)
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: LimitedDiffContainer(
                                                             self.diff_limit,
                                                             self.cur_diff_size,
                                                             _diff)
                                 break
                         else: # GIT binary patch (or empty diff)
                             chunks = []
                             msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file
                         if msgs:
                             chunks.insert(0, [{
                                 'old_lineno': '',
                                 'new_lineno': '',
                                 'action':     'binary',
                                 'line':       msg,
                                 } for msg in msgs])
                         _files.append({
                             'filename':         head['b_path'],
                             'old_revision':     head['a_blob_id'],
                             'new_revision':     head['b_blob_id'],
                             'chunks':           chunks,
                             'operation':        op,
                             'stats':            stats,
                         })
                     sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in ['unmod', 'context']:
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _parse_udiff(self, inline_diff=True):
                     raise NotImplementedError()
                 def _parse_lines(self, diff):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = iter(diff)
                     stats = [0, 0]
                     try:
                         chunks = []
                         line = lineiter.next()
                         while line:
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     'context',
                                         'line':       line,
                                     })
                             line = lineiter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = 'add'
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = 'del'
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = 'unmod'
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 line = lineiter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       'context',
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering. It'l return a list
                     of dicts with diff information
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw string diff
                     """
                     return self._diff
                     #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s">%(label)s</a>''' % {
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 comments = '' if enable_comments else 'no-comment'
                                 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
                                     'cc': code_class,
                                     'inc': comments
                                 })
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes