upstream/kallithea Commit - r8525:d437cc48

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

# This program is free software: you can redistribute it and/or modify

2

# This program is free software: you can redistribute it and/or modify

3

# it under the terms of the GNU General Public License as published by

3

# it under the terms of the GNU General Public License as published by

4

# the Free Software Foundation, either version 3 of the License, or

4

# the Free Software Foundation, either version 3 of the License, or

5

# (at your option) any later version.

5

# (at your option) any later version.

6

#

6

#

7

# This program is distributed in the hope that it will be useful,

7

# This program is distributed in the hope that it will be useful,

8

# but WITHOUT ANY WARRANTY; without even the implied warranty of

8

# but WITHOUT ANY WARRANTY; without even the implied warranty of

9

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

9

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

10

# GNU General Public License for more details.

10

# GNU General Public License for more details.

11

#

11

#

12

# You should have received a copy of the GNU General Public License

12

# You should have received a copy of the GNU General Public License

13

# along with this program. If not, see <http://www.gnu.org/licenses/>.

13

# along with this program. If not, see <http://www.gnu.org/licenses/>.

14

"""

14

"""

15

kallithea.lib.diffs

15

kallithea.lib.diffs

16

~~~~~~~~~~~~~~~~~~~

16

~~~~~~~~~~~~~~~~~~~

17

18

Set of diffing helpers, previously part of vcs

18

Set of diffing helpers, previously part of vcs

19

20

21

This file was forked by the Kallithea project in July 2014.

21

This file was forked by the Kallithea project in July 2014.

22

Original author and date, and relevant copyright and licensing information is below:

22

Original author and date, and relevant copyright and licensing information is below:

23

:created_on: Dec 4, 2011

23

:created_on: Dec 4, 2011

24

:author: marcink

24

:author: marcink

25

26

:license: GPLv3, see LICENSE.md for more details.

26

:license: GPLv3, see LICENSE.md for more details.

27

"""

27

"""

28

import difflib

28

import difflib

29

import logging

29

import logging

30

import re

30

import re

31

32

from tg.i18n import ugettext as _

32

from tg.i18n import ugettext as _

33

34

from kallithea.lib import webutils

34

from kallithea.lib import webutils

35

from kallithea.lib.utils2 import safe_str

35

from kallithea.lib.utils2 import safe_str

36

from kallithea.lib.vcs.backends.base import EmptyChangeset

36

from kallithea.lib.vcs.backends.base import EmptyChangeset

37

from kallithea.lib.vcs.exceptions import VCSError

37

from kallithea.lib.vcs.exceptions import VCSError

38

from kallithea.lib.vcs.nodes import FileNode, SubModuleNode

38

from kallithea.lib.vcs.nodes import FileNode, SubModuleNode

39

40

41

log = logging.getLogger(__name__)

41

log = logging.getLogger(__name__)

42

43

44

def _safe_id(idstring):

44

def _safe_id(idstring):

45

r"""Make a string safe for including in an id attribute.

45

r"""Make a string safe for including in an id attribute.

46

47

The HTML spec says that id attributes 'must begin with

47

The HTML spec says that id attributes 'must begin with

48

a letter ([A-Za-z]) and may be followed by any number

48

a letter ([A-Za-z]) and may be followed by any number

49

of letters, digits ([0-9]), hyphens ("-"), underscores

49

of letters, digits ([0-9]), hyphens ("-"), underscores

50

("_"), colons (":"), and periods (".")'. These regexps

50

("_"), colons (":"), and periods (".")'. These regexps

51

are slightly over-zealous, in that they remove colons

51

are slightly over-zealous, in that they remove colons

52

and periods unnecessarily.

52

and periods unnecessarily.

53

54

Whitespace is transformed into underscores, and then

54

Whitespace is transformed into underscores, and then

55

anything which is not a hyphen or a character that

55

anything which is not a hyphen or a character that

56

matches \w (alphanumerics and underscore) is removed.

56

matches \w (alphanumerics and underscore) is removed.

57

58

"""

58

"""

59

# Transform all whitespace to underscore

59

# Transform all whitespace to underscore

60

idstring = re.sub(r'\s', "_", idstring)

60

idstring = re.sub(r'\s', "_", idstring)

61

# Remove everything that is not a hyphen or a member of \w

61

# Remove everything that is not a hyphen or a member of \w

62

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

62

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

63

return idstring

63

return idstring

64

65

66

def as_html(table_class='code-difftable', line_class='line',

66

def as_html(table_class='code-difftable', line_class='line',

67

old_lineno_class='lineno old', new_lineno_class='lineno new',

67

old_lineno_class='lineno old', new_lineno_class='lineno new',

68

no_lineno_class='lineno',

68

no_lineno_class='lineno',

69

code_class='code', parsed_lines=None):

69

code_class='code', parsed_lines=None):

70

"""

70

"""

71

Return given diff as html table with customized css classes

71

Return given diff as html table with customized css classes

72

"""

72

"""

73

_html_empty = True

73

_html_empty = True

74

_html = []

74

_html = []

75

_html.append('''<table class="%(table_class)s">\n''' % {

75

_html.append('''<table class="%(table_class)s">\n''' % {

76

'table_class': table_class

76

'table_class': table_class

77

})

77

})

78

79

for file_info in parsed_lines:

79

for file_info in parsed_lines:

80

count_no_lineno = 0 # counter to allow comments on lines without new/old line numbers

80

count_no_lineno = 0 # counter to allow comments on lines without new/old line numbers

81

for chunk in file_info['chunks']:

81

for chunk in file_info['chunks']:

82

_html_empty = False

82

_html_empty = False

83

for change in chunk:

83

for change in chunk:

84

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

84

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

85

'lc': line_class,

85

'lc': line_class,

86

'action': change['action']

86

'action': change['action']

87

})

87

})

88

if change['old_lineno'] or change['new_lineno']:

88

if change['old_lineno'] or change['new_lineno']:

89

###########################################################

89

###########################################################

90

# OLD LINE NUMBER

90

# OLD LINE NUMBER

91

###########################################################

91

###########################################################

92

anchor_old = "%(filename)s_o%(oldline_no)s" % {

92

anchor_old = "%(filename)s_o%(oldline_no)s" % {

93

'filename': _safe_id(file_info['filename']),

93

'filename': _safe_id(file_info['filename']),

94

'oldline_no': change['old_lineno']

94

'oldline_no': change['old_lineno']

95

}

95

}

96

anchor_old_id = ''

96

anchor_old_id = ''

97

if change['old_lineno']:

97

if change['old_lineno']:

98

anchor_old_id = 'id="%s"' % anchor_old

98

anchor_old_id = 'id="%s"' % anchor_old

99

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

99

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

100

'a_id': anchor_old_id,

100

'a_id': anchor_old_id,

101

'olc': old_lineno_class,

101

'olc': old_lineno_class,

102

})

102

})

103

_html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {

103

_html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {

104

'label': change['old_lineno'],

104

'label': change['old_lineno'],

105

'url': '#%s' % anchor_old,

105

'url': '#%s' % anchor_old,

106

})

106

})

107

_html.append('''</td>\n''')

107

_html.append('''</td>\n''')

108

###########################################################

108

###########################################################

109

# NEW LINE NUMBER

109

# NEW LINE NUMBER

110

###########################################################

110

###########################################################

111

anchor_new = "%(filename)s_n%(newline_no)s" % {

111

anchor_new = "%(filename)s_n%(newline_no)s" % {

112

'filename': _safe_id(file_info['filename']),

112

'filename': _safe_id(file_info['filename']),

113

'newline_no': change['new_lineno']

113

'newline_no': change['new_lineno']

114

}

114

}

115

anchor_new_id = ''

115

anchor_new_id = ''

116

if change['new_lineno']:

116

if change['new_lineno']:

117

anchor_new_id = 'id="%s"' % anchor_new

117

anchor_new_id = 'id="%s"' % anchor_new

118

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

118

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

119

'a_id': anchor_new_id,

119

'a_id': anchor_new_id,

120

'nlc': new_lineno_class

120

'nlc': new_lineno_class

121

})

121

})

122

_html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {

122

_html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {

123

'label': change['new_lineno'],

123

'label': change['new_lineno'],

124

'url': '#%s' % anchor_new,

124

'url': '#%s' % anchor_new,

125

})

125

})

126

_html.append('''</td>\n''')

126

_html.append('''</td>\n''')

127

else:

127

else:

128

###########################################################

128

###########################################################

129

# NO LINE NUMBER

129

# NO LINE NUMBER

130

###########################################################

130

###########################################################

131

anchor = "%(filename)s_%(count_no_lineno)s" % {

131

anchor = "%(filename)s_%(count_no_lineno)s" % {

132

'filename': _safe_id(file_info['filename']),

132

'filename': _safe_id(file_info['filename']),

133

'count_no_lineno': count_no_lineno,

133

'count_no_lineno': count_no_lineno,

134

}

134

}

135

count_no_lineno += 1

135

count_no_lineno += 1

136

_html.append('''\t<td id="%(anchor)s" class="%(olc)s" colspan="2">''' % {

136

_html.append('''\t<td id="%(anchor)s" class="%(olc)s" colspan="2">''' % {

137

'anchor': anchor,

137

'anchor': anchor,

138

'olc': no_lineno_class,

138

'olc': no_lineno_class,

139

})

139

})

140

_html.append('''</td>\n''')

140

_html.append('''</td>\n''')

141

###########################################################

141

###########################################################

142

# CODE

142

# CODE

143

###########################################################

143

###########################################################

144

_html.append('''\t<td class="%(cc)s">''' % {

144

_html.append('''\t<td class="%(cc)s">''' % {

145

'cc': code_class,

145

'cc': code_class,

146

})

146

})

147

_html.append('''\n\t\t<div class="add-bubble"><div> </div></div><pre>%(code)s</pre>\n''' % {

147

_html.append('''\n\t\t<div class="add-bubble"><div> </div></div><pre>%(code)s</pre>\n''' % {

148

'code': change['line']

148

'code': change['line']

149

})

149

})

150

151

_html.append('''\t</td>''')

151

_html.append('''\t</td>''')

152

_html.append('''\n</tr>\n''')

152

_html.append('''\n</tr>\n''')

153

_html.append('''</table>''')

153

_html.append('''</table>''')

154

if _html_empty:

154

if _html_empty:

155

return None

155

return None

156

return ''.join(_html)

156

return ''.join(_html)

157

158

159

def wrap_to_table(html):

159

def wrap_to_table(html):

160

"""Given a string with html, return it wrapped in a table, similar to what

160

"""Given a string with html, return it wrapped in a table, similar to what

161

~~DiffProcessor~~ returns."""

161

as_html returns."""

162

return '''\

162

return '''\

163

163

164

164

165

165

166

166

167

</tr>

167

</tr>

168

</table>''' % html

168

</table>''' % html

169

170

171

def wrapped_diff(filenode_old, filenode_new, diff_limit=None,

171

def wrapped_diff(filenode_old, filenode_new, diff_limit=None,

172

ignore_whitespace=True, line_context=3):

172

ignore_whitespace=True, line_context=3):

173

"""

173

"""

174

Returns a file diff wrapped into a table.

174

Returns a file diff wrapped into a table.

175

Checks for diff_limit and presents a message if the diff is too big.

175

Checks for diff_limit and presents a message if the diff is too big.

176

"""

176

"""

177

if filenode_old is None:

177

if filenode_old is None:

178

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

178

filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

179

180

op = None

180

op = None

181

a_path = filenode_old.path # default, might be overriden by actual rename in diff

181

a_path = filenode_old.path # default, might be overriden by actual rename in diff

182

if filenode_old.is_binary or filenode_new.is_binary:

182

if filenode_old.is_binary or filenode_new.is_binary:

183

html_diff = wrap_to_table(_('Binary file'))

183

html_diff = wrap_to_table(_('Binary file'))

184

stats = (0, 0)

184

stats = (0, 0)

185

186

elif diff_limit != -1 and (

186

elif diff_limit != -1 and (

187

diff_limit is None or

187

diff_limit is None or

188

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

188

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

189

190

raw_diff = get_gitdiff(filenode_old, filenode_new,

190

raw_diff = get_gitdiff(filenode_old, filenode_new,

191

ignore_whitespace=ignore_whitespace,

191

ignore_whitespace=ignore_whitespace,

192

context=line_context)

192

context=line_context)

193

diff_processor = DiffProcessor(raw_diff)

193

diff_processor = DiffProcessor(raw_diff)

194

if diff_processor.parsed: # there should be exactly one element, for the specified file

194

if diff_processor.parsed: # there should be exactly one element, for the specified file

195

f = diff_processor.parsed[0]

195

f = diff_processor.parsed[0]

196

op = f['operation']

196

op = f['operation']

197

a_path = f['old_filename']

197

a_path = f['old_filename']

198

199

html_diff = as_html(parsed_lines=diff_processor.parsed)

199

html_diff = as_html(parsed_lines=diff_processor.parsed)

200

stats = diff_processor.stat()

200

stats = diff_processor.stat()

201

202

else:

202

else:

203

html_diff = wrap_to_table(_('Changeset was too big and was cut off, use '

203

html_diff = wrap_to_table(_('Changeset was too big and was cut off, use '

204

'diff menu to display this diff'))

204

'diff menu to display this diff'))

205

stats = (0, 0)

205

stats = (0, 0)

206

207

if not html_diff:

207

if not html_diff:

208

submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]

208

submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]

209

if submodules:

209

if submodules:

210

html_diff = wrap_to_table(webutils.escape('Submodule %r' % submodules[0]))

210

html_diff = wrap_to_table(webutils.escape('Submodule %r' % submodules[0]))

211

else:

211

else:

212

html_diff = wrap_to_table(_('No changes detected'))

212

html_diff = wrap_to_table(_('No changes detected'))

213

214

cs1 = filenode_old.changeset.raw_id

214

cs1 = filenode_old.changeset.raw_id

215

cs2 = filenode_new.changeset.raw_id

215

cs2 = filenode_new.changeset.raw_id

216

217

return cs1, cs2, a_path, html_diff, stats, op

217

return cs1, cs2, a_path, html_diff, stats, op

218

219

220

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

220

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

221

"""

221

"""

222

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

222

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

223

"""

223

"""

224

# make sure we pass in default context

224

# make sure we pass in default context

225

context = context or 3

225

context = context or 3

226

submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]

226

submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]

227

if submodules:

227

if submodules:

228

return b''

228

return b''

229

230

for filenode in (filenode_old, filenode_new):

230

for filenode in (filenode_old, filenode_new):

231

if not isinstance(filenode, FileNode):

231

if not isinstance(filenode, FileNode):

232

raise VCSError("Given object should be FileNode object, not %s"

232

raise VCSError("Given object should be FileNode object, not %s"

233

% filenode.__class__)

233

% filenode.__class__)

234

235

repo = filenode_new.changeset.repository

235

repo = filenode_new.changeset.repository

236

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

236

old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

237

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

237

new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

238

239

vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,

239

vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,

240

ignore_whitespace, context)

240

ignore_whitespace, context)

241

return vcs_gitdiff

241

return vcs_gitdiff

242

243

244

def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):

244

def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):

245

"""

245

"""

246

A thin wrapper around vcs lib get_diff.

246

A thin wrapper around vcs lib get_diff.

247

"""

247

"""

248

try:

248

try:

249

return scm_instance.get_diff(rev1, rev2, path=path,

249

return scm_instance.get_diff(rev1, rev2, path=path,

250

ignore_whitespace=ignore_whitespace, context=context)

250

ignore_whitespace=ignore_whitespace, context=context)

251

except MemoryError:

251

except MemoryError:

252

webutils.flash('MemoryError: Diff is too big', category='error')

252

webutils.flash('MemoryError: Diff is too big', category='error')

253

return b''

253

return b''

254

255

256

NEW_FILENODE = 1

256

NEW_FILENODE = 1

257

DEL_FILENODE = 2

257

DEL_FILENODE = 2

258

MOD_FILENODE = 3

258

MOD_FILENODE = 3

259

RENAMED_FILENODE = 4

259

RENAMED_FILENODE = 4

260

COPIED_FILENODE = 5

260

COPIED_FILENODE = 5

261

CHMOD_FILENODE = 6

261

CHMOD_FILENODE = 6

262

BIN_FILENODE = 7

262

BIN_FILENODE = 7

263

264

265

class DiffProcessor(object):

265

class DiffProcessor(object):

266

"""

266

"""

267

Give it a unified or git diff and it returns a list of the files that were

267

Give it a unified or git diff and it returns a list of the files that were

268

mentioned in the diff together with a dict of meta information that

268

mentioned in the diff together with a dict of meta information that

269

can be used to render it in a HTML template.

269

can be used to render it in a HTML template.

270

"""

270

"""

271

_diff_git_re = re.compile(b'^diff --git', re.MULTILINE)

271

_diff_git_re = re.compile(b'^diff --git', re.MULTILINE)

272

273

def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):

273

def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):

274

"""

274

"""

275

:param diff: a text in diff format

275

:param diff: a text in diff format

276

:param vcs: type of version control hg or git

276

:param vcs: type of version control hg or git

277

:param diff_limit: define the size of diff that is considered "big"

277

:param diff_limit: define the size of diff that is considered "big"

278

based on that parameter cut off will be triggered, set to None

278

based on that parameter cut off will be triggered, set to None

279

to show full diff

279

to show full diff

280

"""

280

"""

281

if not isinstance(diff, bytes):

281

if not isinstance(diff, bytes):

282

raise Exception('Diff must be bytes - got %s' % type(diff))

282

raise Exception('Diff must be bytes - got %s' % type(diff))

283

284

self._diff = memoryview(diff)

284

self._diff = memoryview(diff)

285

self.adds = 0

285

self.adds = 0

286

self.removes = 0

286

self.removes = 0

287

self.diff_limit = diff_limit

287

self.diff_limit = diff_limit

288

self.limited_diff = False

288

self.limited_diff = False

289

self.vcs = vcs

289

self.vcs = vcs

290

self.parsed = self._parse_gitdiff(inline_diff=inline_diff)

290

self.parsed = self._parse_gitdiff(inline_diff=inline_diff)

291

292

def _parse_gitdiff(self, inline_diff):

292

def _parse_gitdiff(self, inline_diff):

293

"""Parse self._diff and return a list of dicts with meta info and chunks for each file.

293

"""Parse self._diff and return a list of dicts with meta info and chunks for each file.

294

Might set limited_diff.

294

Might set limited_diff.

295

Optionally, do an extra pass and to extra markup of one-liner changes.

295

Optionally, do an extra pass and to extra markup of one-liner changes.

296

"""

296

"""

297

_files = [] # list of dicts with meta info and chunks

297

_files = [] # list of dicts with meta info and chunks

298

299

starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]

299

starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]

300

starts.append(len(self._diff))

300

starts.append(len(self._diff))

301

302

for start, end in zip(starts, starts[1:]):

302

for start, end in zip(starts, starts[1:]):

303

if self.diff_limit and end > self.diff_limit:

303

if self.diff_limit and end > self.diff_limit:

304

self.limited_diff = True

304

self.limited_diff = True

305

continue

305

continue

306

307

head, diff_lines = _get_header(self.vcs, self._diff[start:end])

307

head, diff_lines = _get_header(self.vcs, self._diff[start:end])

308

309

op = None

309

op = None

310

stats = {

310

stats = {

311

'added': 0,

311

'added': 0,

312

'deleted': 0,

312

'deleted': 0,

313

'binary': False,

313

'binary': False,

314

'ops': {},

314

'ops': {},

315

}

315

}

316

317

if head['deleted_file_mode']:

317

if head['deleted_file_mode']:

318

op = 'removed'

318

op = 'removed'

319

stats['binary'] = True

319

stats['binary'] = True

320

stats['ops'][DEL_FILENODE] = 'deleted file'

320

stats['ops'][DEL_FILENODE] = 'deleted file'

321

322

elif head['new_file_mode']:

322

elif head['new_file_mode']:

323

op = 'added'

323

op = 'added'

324

stats['binary'] = True

324

stats['binary'] = True

325

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

325

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

326

else: # modify operation, can be cp, rename, chmod

326

else: # modify operation, can be cp, rename, chmod

327

# CHMOD

327

# CHMOD

328

if head['new_mode'] and head['old_mode']:

328

if head['new_mode'] and head['old_mode']:

329

op = 'modified'

329

op = 'modified'

330

stats['binary'] = True

330

stats['binary'] = True

331

stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'

331

stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'

332

% (head['old_mode'], head['new_mode']))

332

% (head['old_mode'], head['new_mode']))

333

# RENAME

333

# RENAME

334

if (head['rename_from'] and head['rename_to']

334

if (head['rename_from'] and head['rename_to']

335

and head['rename_from'] != head['rename_to']):

335

and head['rename_from'] != head['rename_to']):

336

op = 'renamed'

336

op = 'renamed'

337

stats['binary'] = True

337

stats['binary'] = True

338

stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'

338

stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'

339

% (head['rename_from'], head['rename_to']))

339

% (head['rename_from'], head['rename_to']))

340

# COPY

340

# COPY

341

if head.get('copy_from') and head.get('copy_to'):

341

if head.get('copy_from') and head.get('copy_to'):

342

op = 'modified'

342

op = 'modified'

343

stats['binary'] = True

343

stats['binary'] = True

344

stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'

344

stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'

345

% (head['copy_from'], head['copy_to']))

345

% (head['copy_from'], head['copy_to']))

346

# FALL BACK: detect missed old style add or remove

346

# FALL BACK: detect missed old style add or remove

347

if op is None:

347

if op is None:

348

if not head['a_file'] and head['b_file']:

348

if not head['a_file'] and head['b_file']:

349

op = 'added'

349

op = 'added'

350

stats['binary'] = True

350

stats['binary'] = True

351

stats['ops'][NEW_FILENODE] = 'new file'

351

stats['ops'][NEW_FILENODE] = 'new file'

352

353

elif head['a_file'] and not head['b_file']:

353

elif head['a_file'] and not head['b_file']:

354

op = 'removed'

354

op = 'removed'

355

stats['binary'] = True

355

stats['binary'] = True

356

stats['ops'][DEL_FILENODE] = 'deleted file'

356

stats['ops'][DEL_FILENODE] = 'deleted file'

357

358

# it's not ADD not DELETE

358

# it's not ADD not DELETE

359

if op is None:

359

if op is None:

360

op = 'modified'

360

op = 'modified'

361

stats['binary'] = True

361

stats['binary'] = True

362

stats['ops'][MOD_FILENODE] = 'modified file'

362

stats['ops'][MOD_FILENODE] = 'modified file'

363

364

# a real non-binary diff

364

# a real non-binary diff

365

if head['a_file'] or head['b_file']:

365

if head['a_file'] or head['b_file']:

366

chunks, added, deleted = _parse_lines(diff_lines)

366

chunks, added, deleted = _parse_lines(diff_lines)

367

stats['binary'] = False

367

stats['binary'] = False

368

stats['added'] = added

368

stats['added'] = added

369

stats['deleted'] = deleted

369

stats['deleted'] = deleted

370

# explicit mark that it's a modified file

370

# explicit mark that it's a modified file

371

if op == 'modified':

371

if op == 'modified':

372

stats['ops'][MOD_FILENODE] = 'modified file'

372

stats['ops'][MOD_FILENODE] = 'modified file'

373

else: # Git binary patch (or empty diff)

373

else: # Git binary patch (or empty diff)

374

# Git binary patch

374

# Git binary patch

375

if head['bin_patch']:

375

if head['bin_patch']:

376

stats['ops'][BIN_FILENODE] = 'binary diff not shown'

376

stats['ops'][BIN_FILENODE] = 'binary diff not shown'

377

chunks = []

377

chunks = []

378

379

if op == 'removed' and chunks:

379

if op == 'removed' and chunks:

380

# a way of seeing deleted content could perhaps be nice - but

380

# a way of seeing deleted content could perhaps be nice - but

381

# not with the current UI

381

# not with the current UI

382

chunks = []

382

chunks = []

383

384

chunks.insert(0, [{

384

chunks.insert(0, [{

385

'old_lineno': '',

385

'old_lineno': '',

386

'new_lineno': '',

386

'new_lineno': '',

387

'action': 'context',

387

'action': 'context',

388

'line': msg,

388

'line': msg,

389

} for _op, msg in stats['ops'].items()

389

} for _op, msg in stats['ops'].items()

390

if _op not in [MOD_FILENODE]])

390

if _op not in [MOD_FILENODE]])

391

392

_files.append({

392

_files.append({

393

'old_filename': head['a_path'],

393

'old_filename': head['a_path'],

394

'filename': head['b_path'],

394

'filename': head['b_path'],

395

'old_revision': head['a_blob_id'],

395

'old_revision': head['a_blob_id'],

396

'new_revision': head['b_blob_id'],

396

'new_revision': head['b_blob_id'],

397

'chunks': chunks,

397

'chunks': chunks,

398

'operation': op,

398

'operation': op,

399

'stats': stats,

399

'stats': stats,

400

})

400

})

401

402

if not inline_diff:

402

if not inline_diff:

403

return _files

403

return _files

404

405

# highlight inline changes when one del is followed by one add

405

# highlight inline changes when one del is followed by one add

406

for diff_data in _files:

406

for diff_data in _files:

407

for chunk in diff_data['chunks']:

407

for chunk in diff_data['chunks']:

408

lineiter = iter(chunk)

408

lineiter = iter(chunk)

409

try:

409

try:

410

peekline = next(lineiter)

410

peekline = next(lineiter)

411

while True:

411

while True:

412

# find a first del line

412

# find a first del line

413

while peekline['action'] != 'del':

413

while peekline['action'] != 'del':

414

peekline = next(lineiter)

414

peekline = next(lineiter)

415

delline = peekline

415

delline = peekline

416

peekline = next(lineiter)

416

peekline = next(lineiter)

417

# if not followed by add, eat all following del lines

417

# if not followed by add, eat all following del lines

418

if peekline['action'] != 'add':

418

if peekline['action'] != 'add':

419

while peekline['action'] == 'del':

419

while peekline['action'] == 'del':

420

peekline = next(lineiter)

420

peekline = next(lineiter)

421

continue

421

continue

422

# found an add - make sure it is the only one

422

# found an add - make sure it is the only one

423

addline = peekline

423

addline = peekline

424

try:

424

try:

425

peekline = next(lineiter)

425

peekline = next(lineiter)

426

except StopIteration:

426

except StopIteration:

427

# add was last line - ok

427

# add was last line - ok

428

_highlight_inline_diff(delline, addline)

428

_highlight_inline_diff(delline, addline)

429

raise

429

raise

430

if peekline['action'] != 'add':

430

if peekline['action'] != 'add':

431

# there was only one add line - ok

431

# there was only one add line - ok

432

_highlight_inline_diff(delline, addline)

432

_highlight_inline_diff(delline, addline)

433

except StopIteration:

433

except StopIteration:

434

pass

434

pass

435

436

return _files

436

return _files

437

438

def stat(self):

438

def stat(self):

439

"""

439

"""

440

Returns tuple of added, and removed lines for this instance

440

Returns tuple of added, and removed lines for this instance

441

"""

441

"""

442

return self.adds, self.removes

442

return self.adds, self.removes

443

444

445

_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')

445

_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')

446

447

448

def _escaper(diff_line):

448

def _escaper(diff_line):

449

r"""

449

r"""

450

Do HTML escaping/markup of a single diff line (including first +/- column)

450

Do HTML escaping/markup of a single diff line (including first +/- column)

451

452

>>> _escaper('foobar')

452

>>> _escaper('foobar')

453

'foobar'

453

'foobar'

454

>>> _escaper('@foo & bar')

454

>>> _escaper('@foo & bar')

455

'@foo & bar'

455

'@foo & bar'

456

>>> _escaper('+foo < bar')

456

>>> _escaper('+foo < bar')

457

'+foo < bar'

457

'+foo < bar'

458

>>> _escaper('-foo > bar')

458

>>> _escaper('-foo > bar')

459

'-foo > bar'

459

'-foo > bar'

460

>>> _escaper(' <foo>')

460

>>> _escaper(' <foo>')

461

' <foo>'

461

' <foo>'

462

>>> _escaper(' foo\tbar')

462

>>> _escaper(' foo\tbar')

463

' foo\tbar'

463

' foo\tbar'

464

>>> _escaper(' foo\rbar\r')

464

>>> _escaper(' foo\rbar\r')

465

' foobar'

465

' foobar'

466

>>> _escaper(' foo\t')

466

>>> _escaper(' foo\t')

467

' foo\t'

467

' foo\t'

468

>>> _escaper(' foo ')

468

>>> _escaper(' foo ')

469

' foo '

469

' foo '

470

>>> _escaper(' foo ')

470

>>> _escaper(' foo ')

471

' foo '

471

' foo '

472

>>> _escaper(' ')

472

>>> _escaper(' ')

473

' '

473

' '

474

>>> _escaper(' ')

474

>>> _escaper(' ')

475

' '

475

' '

476

>>> _escaper(' \t')

476

>>> _escaper(' \t')

477

' \t'

477

' \t'

478

>>> _escaper(' \t ')

478

>>> _escaper(' \t ')

479

' \t '

479

' \t '

480

>>> _escaper(' \t')

480

>>> _escaper(' \t')

481

' \t'

481

' \t'

482

>>> _escaper(' \t\t ')

482

>>> _escaper(' \t\t ')

483

' \t\t '

483

' \t\t '

484

>>> _escaper(' \t\t')

484

>>> _escaper(' \t\t')

485

' \t\t'

485

' \t\t'

486

>>> _escaper(' foo&bar<baz> ')

486

>>> _escaper(' foo&bar<baz> ')

487

' foo&bar<baz> '

487

' foo&bar<baz> '

488

"""

488

"""

489

490

def substitute(m):

490

def substitute(m):

491

groups = m.groups()

491

groups = m.groups()

492

if groups[0]:

492

if groups[0]:

493

return '&'

493

return '&'

494

if groups[1]:

494

if groups[1]:

495

return '<'

495

return '<'

496

if groups[2]:

496

if groups[2]:

497

return '>'

497

return '>'

498

if groups[3]:

498

if groups[3]:

499

if groups[4] is not None: # end of line

499

if groups[4] is not None: # end of line

500

return '\t'

500

return '\t'

501

return '\t'

501

return '\t'

502

if groups[5]:

502

if groups[5]:

503

return ''

503

return ''

504

if groups[6]:

504

if groups[6]:

505

if m.start() == 0:

505

if m.start() == 0:

506

return ' ' # first column space shouldn't make empty lines show up as trailing space

506

return ' ' # first column space shouldn't make empty lines show up as trailing space

507

return ' '

507

return ' '

508

assert False

508

assert False

509

510

return _escape_re.sub(substitute, diff_line)

510

return _escape_re.sub(substitute, diff_line)

511

512

513

_git_header_re = re.compile(br"""

513

_git_header_re = re.compile(br"""

514

^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

514

^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

515

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

515

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

516

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

516

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

517

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

517

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

518

^rename[ ]from[ ](?P<rename_from>.+)\n

518

^rename[ ]from[ ](?P<rename_from>.+)\n

519

^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?

519

^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?

520

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

520

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

521

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

521

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

522

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

522

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

523

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

523

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

524

(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

524

(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

525

(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?

525

(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?

526

(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?

526

(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?

527

""", re.VERBOSE | re.MULTILINE)

527

""", re.VERBOSE | re.MULTILINE)

528

529

530

_hg_header_re = re.compile(br"""

530

_hg_header_re = re.compile(br"""

531

^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

531

^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

532

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

532

(?:^old[ ]mode[ ](?P<old_mode>\d+)\n

533

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

533

^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

534

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

534

(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

535

(?:^rename[ ]from[ ](?P<rename_from>.+)\n

535

(?:^rename[ ]from[ ](?P<rename_from>.+)\n

536

^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?

536

^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?

537

(?:^copy[ ]from[ ](?P<copy_from>.+)\n

537

(?:^copy[ ]from[ ](?P<copy_from>.+)\n

538

^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?

538

^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?

539

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

539

(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

540

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

540

(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

541

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

541

(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

542

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

542

\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

543

(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

543

(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

544

(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?

544

(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?

545

(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?

545

(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?

546

""", re.VERBOSE | re.MULTILINE)

546

""", re.VERBOSE | re.MULTILINE)

547

548

549

_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')

549

_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')

550

551

552

def _get_header(vcs, diff_chunk):

552

def _get_header(vcs, diff_chunk):

553

"""

553

"""

554

Parses a Git diff for a single file (header and chunks) and returns a tuple with:

554

Parses a Git diff for a single file (header and chunks) and returns a tuple with:

555

556

1. A dict with meta info:

556

1. A dict with meta info:

557

558

a_path, b_path, similarity_index, rename_from, rename_to,

558

a_path, b_path, similarity_index, rename_from, rename_to,

559

old_mode, new_mode, new_file_mode, deleted_file_mode,

559

old_mode, new_mode, new_file_mode, deleted_file_mode,

560

a_blob_id, b_blob_id, b_mode, a_file, b_file

560

a_blob_id, b_blob_id, b_mode, a_file, b_file

561

562

2. An iterator yielding lines with simple HTML markup.

562

2. An iterator yielding lines with simple HTML markup.

563

"""

563

"""

564

match = None

564

match = None

565

if vcs == 'git':

565

if vcs == 'git':

566

match = _git_header_re.match(diff_chunk)

566

match = _git_header_re.match(diff_chunk)

567

elif vcs == 'hg':

567

elif vcs == 'hg':

568

match = _hg_header_re.match(diff_chunk)

568

match = _hg_header_re.match(diff_chunk)

569

if match is None:

569

if match is None:

570

raise Exception('diff not recognized as valid %s diff' % vcs)

570

raise Exception('diff not recognized as valid %s diff' % vcs)

571

meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}

571

meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}

572

rest = diff_chunk[match.end():]

572

rest = diff_chunk[match.end():]

573

if rest:

573

if rest:

574

if _header_next_check.match(rest):

574

if _header_next_check.match(rest):

575

raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))

575

raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))

576

if rest[-1:] != b'\n':

576

if rest[-1:] != b'\n':

577

# The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.

577

# The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.

578

rest = bytes(rest) + b'\n'

578

rest = bytes(rest) + b'\n'

579

diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))

579

diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))

580

return meta_info, diff_lines

580

return meta_info, diff_lines

581

582

583

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

583

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

584

_newline_marker = re.compile(r'^\\ No newline at end of file')

584

_newline_marker = re.compile(r'^\\ No newline at end of file')

585

586

587

def _parse_lines(diff_lines):

587

def _parse_lines(diff_lines):

588

"""

588

"""

589

Given an iterator of diff body lines, parse them and return a dict per

589

Given an iterator of diff body lines, parse them and return a dict per

590

line and added/removed totals.

590

line and added/removed totals.

591

"""

591

"""

592

added = deleted = 0

592

added = deleted = 0

593

old_line = old_end = new_line = new_end = None

593

old_line = old_end = new_line = new_end = None

594

595

chunks = []

595

chunks = []

596

try:

596

try:

597

line = next(diff_lines)

597

line = next(diff_lines)

598

599

while True:

599

while True:

600

lines = []

600

lines = []

601

chunks.append(lines)

601

chunks.append(lines)

602

603

match = _chunk_re.match(line)

603

match = _chunk_re.match(line)

604

605

if not match:

605

if not match:

606

raise Exception('error parsing diff @@ line %r' % line)

606

raise Exception('error parsing diff @@ line %r' % line)

607

608

gr = match.groups()

608

gr = match.groups()

609

(old_line, old_end,

609

(old_line, old_end,

610

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

610

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

611

old_line -= 1

611

old_line -= 1

612

new_line -= 1

612

new_line -= 1

613

614

context = len(gr) == 5

614

context = len(gr) == 5

615

old_end += old_line

615

old_end += old_line

616

new_end += new_line

616

new_end += new_line

617

618

if context:

618

if context:

619

# skip context only if it's first line

619

# skip context only if it's first line

620

if int(gr[0]) > 1:

620

if int(gr[0]) > 1:

621

lines.append({

621

lines.append({

622

'old_lineno': '',

622

'old_lineno': '',

623

'new_lineno': '',

623

'new_lineno': '',

624

'action': 'context',

624

'action': 'context',

625

'line': line,

625

'line': line,

626

})

626

})

627

628

line = next(diff_lines)

628

line = next(diff_lines)

629

630

while old_line < old_end or new_line < new_end:

630

while old_line < old_end or new_line < new_end:

631

if not line:

631

if not line:

632

raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))

632

raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))

633

634

affects_old = affects_new = False

634

affects_old = affects_new = False

635

636

command = line[0]

636

command = line[0]

637

if command == '+':

637

if command == '+':

638

affects_new = True

638

affects_new = True

639

action = 'add'

639

action = 'add'

640

added += 1

640

added += 1

641

elif command == '-':

641

elif command == '-':

642

affects_old = True

642

affects_old = True

643

action = 'del'

643

action = 'del'

644

deleted += 1

644

deleted += 1

645

elif command == ' ':

645

elif command == ' ':

646

affects_old = affects_new = True

646

affects_old = affects_new = True

647

action = 'unmod'

647

action = 'unmod'

648

else:

648

else:

649

raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))

649

raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))

650

651

if not _newline_marker.match(line):

651

if not _newline_marker.match(line):

652

old_line += affects_old

652

old_line += affects_old

653

new_line += affects_new

653

new_line += affects_new

654

lines.append({

654

lines.append({

655

'old_lineno': affects_old and old_line or '',

655

'old_lineno': affects_old and old_line or '',

656

'new_lineno': affects_new and new_line or '',

656

'new_lineno': affects_new and new_line or '',

657

'action': action,

657

'action': action,

658

'line': line[1:],

658

'line': line[1:],

659

})

659

})

660

661

line = next(diff_lines)

661

line = next(diff_lines)

662

663

if _newline_marker.match(line):

663

if _newline_marker.match(line):

664

# we need to append to lines, since this is not

664

# we need to append to lines, since this is not

665

# counted in the line specs of diff

665

# counted in the line specs of diff

666

lines.append({

666

lines.append({

667

'old_lineno': '',

667

'old_lineno': '',

668

'new_lineno': '',

668

'new_lineno': '',

669

'action': 'context',

669

'action': 'context',

670

'line': line,

670

'line': line,

671

})

671

})

672

line = next(diff_lines)

672

line = next(diff_lines)

673

if old_line > old_end:

673

if old_line > old_end:

674

raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))

674

raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))

675

if new_line > new_end:

675

if new_line > new_end:

676

raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))

676

raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))

677

except StopIteration:

677

except StopIteration:

678

pass

678

pass

679

if old_line != old_end or new_line != new_end:

679

if old_line != old_end or new_line != new_end:

680

raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))

680

raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))

681

682

return chunks, added, deleted

682

return chunks, added, deleted

683

684

# Used for inline highlighter word split, must match the substitutions in _escaper

684

# Used for inline highlighter word split, must match the substitutions in _escaper

685

_token_re = re.compile(r'()(&|<|>|\t|| |\W+?)')

685

_token_re = re.compile(r'()(&|<|>|\t|| |\W+?)')

686

687

688

def _highlight_inline_diff(old, new):

688

def _highlight_inline_diff(old, new):

689

"""

689

"""

690

Highlight simple add/remove in two lines given as info dicts. They are

690

Highlight simple add/remove in two lines given as info dicts. They are

691

modified in place and given markup with <del>/<ins>.

691

modified in place and given markup with <del>/<ins>.

692

"""

692

"""

693

assert old['action'] == 'del'

693

assert old['action'] == 'del'

694

assert new['action'] == 'add'

694

assert new['action'] == 'add'

695

696

oldwords = _token_re.split(old['line'])

696

oldwords = _token_re.split(old['line'])

697

newwords = _token_re.split(new['line'])

697

newwords = _token_re.split(new['line'])

698

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

698

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

699

700

oldfragments, newfragments = [], []

700

oldfragments, newfragments = [], []

701

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

701

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

702

oldfrag = ''.join(oldwords[i1:i2])

702

oldfrag = ''.join(oldwords[i1:i2])

703

newfrag = ''.join(newwords[j1:j2])

703

newfrag = ''.join(newwords[j1:j2])

704

if tag != 'equal':

704

if tag != 'equal':

705

if oldfrag:

705

if oldfrag:

706

oldfrag = '<del>%s</del>' % oldfrag

706

oldfrag = '<del>%s</del>' % oldfrag

707

if newfrag:

707

if newfrag:

708

newfrag = '<ins>%s</ins>' % newfrag

708

newfrag = '<ins>%s</ins>' % newfrag

709

oldfragments.append(oldfrag)

709

oldfragments.append(oldfrag)

710

newfragments.append(newfrag)

710

newfragments.append(newfrag)

711

712

old['line'] = "".join(oldfragments)

712

old['line'] = "".join(oldfragments)

713

new['line'] = "".join(newfragments)

713

new['line'] = "".join(newfragments)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             """
             kallithea.lib.diffs
             ~~~~~~~~~~~~~~~~~~~
             Set of diffing helpers, previously part of vcs
             This file was forked by the Kallithea project in July 2014.
             Original author and date, and relevant copyright and licensing information is below:
             :created_on: Dec 4, 2011
             :author: marcink
             :copyright: (c) 2013 RhodeCode GmbH, and others.
             :license: GPLv3, see LICENSE.md for more details.
             """
             import difflib
             import logging
             import re
             from tg.i18n import ugettext as _
             from kallithea.lib import webutils
             from kallithea.lib.utils2 import safe_str
             from kallithea.lib.vcs.backends.base import EmptyChangeset
             from kallithea.lib.vcs.exceptions import VCSError
             from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
             log = logging.getLogger(__name__)
             def _safe_id(idstring):
                 r"""Make a string safe for including in an id attribute.
                 The HTML spec says that id attributes 'must begin with
                 a letter ([A-Za-z]) and may be followed by any number
                 of letters, digits ([0-9]), hyphens ("-"), underscores
                 ("_"), colons (":"), and periods (".")'. These regexps
                 are slightly over-zealous, in that they remove colons
                 and periods unnecessarily.
                 Whitespace is transformed into underscores, and then
                 anything which is not a hyphen or a character that
                 matches \w (alphanumerics and underscore) is removed.
                 """
                 # Transform all whitespace to underscore
                 idstring = re.sub(r'\s', "_", idstring)
                 # Remove everything that is not a hyphen or a member of \w
                 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                 return idstring
             def as_html(table_class='code-difftable', line_class='line',
                         old_lineno_class='lineno old', new_lineno_class='lineno new',
                         no_lineno_class='lineno',
                         code_class='code', parsed_lines=None):
                 """
                 Return given diff as html table with customized css classes
                 """
                 _html_empty = True
                 _html = []
                 _html.append('''<table class="%(table_class)s">\n''' % {
                     'table_class': table_class
                 })
                 for file_info in parsed_lines:
                     count_no_lineno = 0  # counter to allow comments on lines without new/old line numbers
                     for chunk in file_info['chunks']:
                         _html_empty = False
                         for change in chunk:
                             _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                 'lc': line_class,
                                 'action': change['action']
                             })
                             if change['old_lineno'] or change['new_lineno']:
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': _safe_id(file_info['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_old_id = ''
                                 if change['old_lineno']:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class,
                                 })
                                 _html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {
                                     'label': change['old_lineno'],
                                     'url': '#%s' % anchor_old,
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 anchor_new = "%(filename)s_n%(newline_no)s" % {
                                     'filename': _safe_id(file_info['filename']),
                                     'newline_no': change['new_lineno']
                                 }
                                 anchor_new_id = ''
                                 if change['new_lineno']:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {
                                     'label': change['new_lineno'],
                                     'url': '#%s' % anchor_new,
                                 })
                                 _html.append('''</td>\n''')
                             else:
                                 ###########################################################
                                 # NO LINE NUMBER
                                 ###########################################################
                                 anchor = "%(filename)s_%(count_no_lineno)s" % {
                                     'filename': _safe_id(file_info['filename']),
                                     'count_no_lineno': count_no_lineno,
                                 }
                                 count_no_lineno += 1
                                 _html.append('''\t<td id="%(anchor)s" class="%(olc)s" colspan="2">''' % {
                                     'anchor': anchor,
                                     'olc': no_lineno_class,
                                 })
                                 _html.append('''</td>\n''')
                             ###########################################################
                             # CODE
                             ###########################################################
                             _html.append('''\t<td class="%(cc)s">''' % {
                                 'cc': code_class,
                             })
                             _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
                                 'code': change['line']
                             })
                             _html.append('''\t</td>''')
                             _html.append('''\n</tr>\n''')
                 _html.append('''</table>''')
                 if _html_empty:
                     return None
                 return ''.join(_html)
             def wrap_to_table(html):
                 """Given a string with html, return it wrapped in a table, similar to what
-                DiffProcessor returns."""
+                as_html returns."""
                 return '''\
                           <table class="code-difftable">
                             <tr class="line">
                             <td class="lineno new"></td>
                             <td class="code"><pre>%s</pre></td>
                             </tr>
                           </table>''' % html
             def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
                             ignore_whitespace=True, line_context=3):
                 """
                 Returns a file diff wrapped into a table.
                 Checks for diff_limit and presents a message if the diff is too big.
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
                 op = None
                 a_path = filenode_old.path # default, might be overriden by actual rename in diff
                 if filenode_old.is_binary or filenode_new.is_binary:
                     html_diff = wrap_to_table(_('Binary file'))
                     stats = (0, 0)
                 elif diff_limit != -1 and (
                         diff_limit is None or
                         (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
                     raw_diff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(raw_diff)
                     if diff_processor.parsed: # there should be exactly one element, for the specified file
                         f = diff_processor.parsed[0]
                         op = f['operation']
                         a_path = f['old_filename']
                     html_diff = as_html(parsed_lines=diff_processor.parsed)
                     stats = diff_processor.stat()
                 else:
                     html_diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = (0, 0)
                 if not html_diff:
                     submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
                     if submodules:
                         html_diff = wrap_to_table(webutils.escape('Submodule %r' % submodules[0]))
                     else:
                         html_diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.changeset.raw_id
                 cs2 = filenode_new.changeset.raw_id
                 return cs1, cs2, a_path, html_diff, stats, op
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 """
                 # make sure we pass in default context
                 context = context or 3
                 submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
                 if submodules:
                     return b''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.changeset.repository
                 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,
                                        ignore_whitespace, context)
                 return vcs_gitdiff
             def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):
                 """
                 A thin wrapper around vcs lib get_diff.
                 """
                 try:
                     return scm_instance.get_diff(rev1, rev2, path=path,
                                                  ignore_whitespace=ignore_whitespace, context=context)
                 except MemoryError:
                     webutils.flash('MemoryError: Diff is too big', category='error')
                     return b''
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 """
                 _diff_git_re = re.compile(b'^diff --git', re.MULTILINE)
                 def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
                     """
                     :param diff:   a text in diff format
                     :param vcs: type of version control hg or git
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     if not isinstance(diff, bytes):
                         raise Exception('Diff must be bytes - got %s' % type(diff))
                     self._diff = memoryview(diff)
                     self.adds = 0
                     self.removes = 0
                     self.diff_limit = diff_limit
                     self.limited_diff = False
                     self.vcs = vcs
                     self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
                 def _parse_gitdiff(self, inline_diff):
                     """Parse self._diff and return a list of dicts with meta info and chunks for each file.
                     Might set limited_diff.
                     Optionally, do an extra pass and to extra markup of one-liner changes.
                     """
                     _files = [] # list of dicts with meta info and chunks
                     starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
                     starts.append(len(self._diff))
                     for start, end in zip(starts, starts[1:]):
                         if self.diff_limit and end > self.diff_limit:
                             self.limited_diff = True
                             continue
                         head, diff_lines = _get_header(self.vcs, self._diff[start:end])
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = 'removed'
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = 'added'
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be cp, rename, chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = 'modified'
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
                                                     % (head['old_mode'], head['new_mode']))
                             # RENAME
                             if (head['rename_from'] and head['rename_to']
                                   and head['rename_from'] != head['rename_to']):
                                 op = 'renamed'
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
                                                 % (head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = 'modified'
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
                                                     % (head['copy_from'], head['copy_to']))
                             # FALL BACK: detect missed old style add or remove
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = 'added'
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = 'removed'
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = 'modified'
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             chunks, added, deleted = _parse_lines(diff_lines)
                             stats['binary'] = False
                             stats['added'] = added
                             stats['deleted'] = deleted
                             # explicit mark that it's a modified file
                             if op == 'modified':
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         else:  # Git binary patch (or empty diff)
                             # Git binary patch
                             if head['bin_patch']:
                                 stats['ops'][BIN_FILENODE] = 'binary diff not shown'
                             chunks = []
                         if op == 'removed' and chunks:
                             # a way of seeing deleted content could perhaps be nice - but
                             # not with the current UI
                             chunks = []
                         chunks.insert(0, [{
                             'old_lineno': '',
                             'new_lineno': '',
                             'action':     'context',
                             'line':       msg,
                             } for _op, msg in stats['ops'].items()
                               if _op not in [MOD_FILENODE]])
                         _files.append({
                             'old_filename':     head['a_path'],
                             'filename':         head['b_path'],
                             'old_revision':     head['a_blob_id'],
                             'new_revision':     head['b_blob_id'],
                             'chunks':           chunks,
                             'operation':        op,
                             'stats':            stats,
                         })
                     if not inline_diff:
                         return _files
                     # highlight inline changes when one del is followed by one add
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 peekline = next(lineiter)
                                 while True:
                                     # find a first del line
                                     while peekline['action'] != 'del':
                                         peekline = next(lineiter)
                                     delline = peekline
                                     peekline = next(lineiter)
                                     # if not followed by add, eat all following del lines
                                     if peekline['action'] != 'add':
                                         while peekline['action'] == 'del':
                                             peekline = next(lineiter)
                                         continue
                                     # found an add - make sure it is the only one
                                     addline = peekline
                                     try:
                                         peekline = next(lineiter)
                                     except StopIteration:
                                         # add was last line - ok
                                         _highlight_inline_diff(delline, addline)
                                         raise
                                     if peekline['action'] != 'add':
                                         # there was only one add line - ok
                                         _highlight_inline_diff(delline, addline)
                             except StopIteration:
                                 pass
                     return _files
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
             _escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')
             def _escaper(diff_line):
                 r"""
                 Do HTML escaping/markup of a single diff line (including first +/- column)
                 >>> _escaper('foobar')
                 'foobar'
                 >>> _escaper('@foo & bar')
                 '@foo &amp; bar'
                 >>> _escaper('+foo < bar')
                 '+foo &lt; bar'
                 >>> _escaper('-foo > bar')
                 '-foo &gt; bar'
                 >>> _escaper(' <foo>')
                 ' &lt;foo&gt;'
                 >>> _escaper(' foo\tbar')
                 ' foo<u>\t</u>bar'
                 >>> _escaper(' foo\rbar\r')
                 ' foo<u class="cr"></u>bar<u class="cr"></u>'
                 >>> _escaper(' foo\t')
                 ' foo<u>\t</u><i></i>'
                 >>> _escaper(' foo ')
                 ' foo <i></i>'
                 >>> _escaper(' foo  ')
                 ' foo  <i></i>'
                 >>> _escaper(' ')
                 ' '
                 >>> _escaper('  ')
                 '  <i></i>'
                 >>> _escaper(' \t')
                 ' <u>\t</u><i></i>'
                 >>> _escaper(' \t  ')
                 ' <u>\t</u>  <i></i>'
                 >>> _escaper('   \t')
                 '   <u>\t</u><i></i>'
                 >>> _escaper(' \t\t  ')
                 ' <u>\t</u><u>\t</u>  <i></i>'
                 >>> _escaper('   \t\t')
                 '   <u>\t</u><u>\t</u><i></i>'
                 >>> _escaper(' foo&bar<baz>  ')
                 ' foo&amp;bar&lt;baz&gt;  <i></i>'
                 """
                 def substitute(m):
                     groups = m.groups()
                     if groups[0]:
                         return '&amp;'
                     if groups[1]:
                         return '&lt;'
                     if groups[2]:
                         return '&gt;'
                     if groups[3]:
                         if groups[4] is not None:  # end of line
                             return '<u>\t</u><i></i>'
                         return '<u>\t</u>'
                     if groups[5]:
                         return '<u class="cr"></u>'
                     if groups[6]:
                         if m.start() == 0:
                             return ' '  # first column space shouldn't make empty lines show up as trailing space
                         return ' <i></i>'
                     assert False
                 return _escape_re.sub(substitute, diff_line)
             _git_header_re = re.compile(br"""
                 ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                    ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
                    ^rename[ ]from[ ](?P<rename_from>.+)\n
                    ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
                 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                     \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                 (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
                 (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
                 (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
             """, re.VERBOSE | re.MULTILINE)
             _hg_header_re = re.compile(br"""
                 ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
                 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                    ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
                 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
                 (?:^rename[ ]from[ ](?P<rename_from>.+)\n
                    ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
                 (?:^copy[ ]from[ ](?P<copy_from>.+)\n
                    ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
                 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
                     \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                 (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
                 (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
                 (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
             """, re.VERBOSE | re.MULTILINE)
             _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
             def _get_header(vcs, diff_chunk):
                 """
                 Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
                     a_path, b_path, similarity_index, rename_from, rename_to,
                     old_mode, new_mode, new_file_mode, deleted_file_mode,
                     a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
                 """
                 match = None
                 if vcs == 'git':
                     match = _git_header_re.match(diff_chunk)
                 elif vcs == 'hg':
                     match = _hg_header_re.match(diff_chunk)
                 if match is None:
                     raise Exception('diff not recognized as valid %s diff' % vcs)
                 meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
                 rest = diff_chunk[match.end():]
                 if rest:
                     if _header_next_check.match(rest):
                         raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
                     if rest[-1:] != b'\n':
                         # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
                         rest = bytes(rest) + b'\n'
                 diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
                 return meta_info, diff_lines
             _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
             _newline_marker = re.compile(r'^\\ No newline at end of file')
             def _parse_lines(diff_lines):
                 """
                 Given an iterator of diff body lines, parse them and return a dict per
                 line and added/removed totals.
                 """
                 added = deleted = 0
                 old_line = old_end = new_line = new_end = None
                 chunks = []
                 try:
                     line = next(diff_lines)
                     while True:
                         lines = []
                         chunks.append(lines)
                         match = _chunk_re.match(line)
                         if not match:
                             raise Exception('error parsing diff @@ line %r' % line)
                         gr = match.groups()
                         (old_line, old_end,
                          new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                         old_line -= 1
                         new_line -= 1
                         context = len(gr) == 5
                         old_end += old_line
                         new_end += new_line
                         if context:
                             # skip context only if it's first line
                             if int(gr[0]) > 1:
                                 lines.append({
                                     'old_lineno': '',
                                     'new_lineno': '',
                                     'action':     'context',
                                     'line':       line,
                                 })
                         line = next(diff_lines)
                         while old_line < old_end or new_line < new_end:
                             if not line:
                                 raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
                             affects_old = affects_new = False
                             command = line[0]
                             if command == '+':
                                 affects_new = True
                                 action = 'add'
                                 added += 1
                             elif command == '-':
                                 affects_old = True
                                 action = 'del'
                                 deleted += 1
                             elif command == ' ':
                                 affects_old = affects_new = True
                                 action = 'unmod'
                             else:
                                 raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
                             if not _newline_marker.match(line):
                                 old_line += affects_old
                                 new_line += affects_new
                                 lines.append({
                                     'old_lineno':   affects_old and old_line or '',
                                     'new_lineno':   affects_new and new_line or '',
                                     'action':       action,
                                     'line':         line[1:],
                                 })
                             line = next(diff_lines)
                             if _newline_marker.match(line):
                                 # we need to append to lines, since this is not
                                 # counted in the line specs of diff
                                 lines.append({
                                     'old_lineno':   '',
                                     'new_lineno':   '',
                                     'action':       'context',
                                     'line':         line,
                                 })
                                 line = next(diff_lines)
                         if old_line > old_end:
                             raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
                         if new_line > new_end:
                             raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
                 except StopIteration:
                     pass
                 if old_line != old_end or new_line != new_end:
                     raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
                 return chunks, added, deleted
             # Used for inline highlighter word split, must match the substitutions in _escaper
             _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
             def _highlight_inline_diff(old, new):
                 """
                 Highlight simple add/remove in two lines given as info dicts. They are
                 modified in place and given markup with <del>/<ins>.
                 """
                 assert old['action'] == 'del'
                 assert new['action'] == 'add'
                 oldwords = _token_re.split(old['line'])
                 newwords = _token_re.split(new['line'])
                 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                 oldfragments, newfragments = [], []
                 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                     oldfrag = ''.join(oldwords[i1:i2])
                     newfrag = ''.join(newwords[j1:j2])
                     if tag != 'equal':
                         if oldfrag:
                             oldfrag = '<del>%s</del>' % oldfrag
                         if newfrag:
                             newfrag = '<ins>%s</ins>' % newfrag
                     oldfragments.append(oldfrag)
                     newfragments.append(newfrag)
                 old['line'] = "".join(oldfragments)
                 new['line'] = "".join(newfragments)