u/ewong/rhodecode-enterprise-ce-fork Commit - r679:df6d63d7

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import collections

26

import collections

27

import re

27

import re

28

import difflib

28

import difflib

29

import logging

29

import logging

30

31

from itertools import tee, imap

31

from itertools import tee, imap

32

33

from pylons.i18n.translation import _

33

from pylons.i18n.translation import _

34

35

from rhodecode.lib.vcs.exceptions import VCSError

35

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

36

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.vcs.backends.base import EmptyCommit

37

from rhodecode.lib.vcs.backends.base import EmptyCommit

38

from rhodecode.lib.helpers import escape

38

from rhodecode.lib.helpers import escape

39

from rhodecode.lib.utils2 import safe_unicode

39

from rhodecode.lib.utils2 import safe_unicode

40

41

log = logging.getLogger(__name__)

41

log = logging.getLogger(__name__)

42

43

# define max context, a file with more than this numbers of lines is unusable

44

# in browser anyway

45

MAX_CONTEXT = 1024 * 1014

46

43

47

44

class OPS(object):

48

class OPS(object):

45

ADD = 'A'

49

ADD = 'A'

46

MOD = 'M'

50

MOD = 'M'

47

DEL = 'D'

51

DEL = 'D'

48

52

49

53

50

def wrap_to_table(str_):

54

def wrap_to_table(str_):

51

return '''<table class="code-difftable">

55

return '''<table class="code-difftable">

52

56

53

57

54

58

55

59

56

</tr>

60

</tr>

57

</table>''' % (_('Click to comment'), str_)

61

</table>''' % (_('Click to comment'), str_)

58

62

59

63

60

def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,

64

def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,

61

show_full_diff=False, ignore_whitespace=True, line_context=3,

65

show_full_diff=False, ignore_whitespace=True, line_context=3,

62

enable_comments=False):

66

enable_comments=False):

63

"""

67

"""

64

returns a wrapped diff into a table, checks for cut_off_limit for file and

68

returns a wrapped diff into a table, checks for cut_off_limit for file and

65

whole diff and presents proper message

69

whole diff and presents proper message

66

"""

70

"""

67

71

68

if filenode_old is None:

72

if filenode_old is None:

69

filenode_old = FileNode(filenode_new.path, '', EmptyCommit())

73

filenode_old = FileNode(filenode_new.path, '', EmptyCommit())

70

74

71

if filenode_old.is_binary or filenode_new.is_binary:

75

if filenode_old.is_binary or filenode_new.is_binary:

72

diff = wrap_to_table(_('Binary file'))

76

diff = wrap_to_table(_('Binary file'))

73

stats = None

77

stats = None

74

size = 0

78

size = 0

75

data = None

79

data = None

76

80

77

elif diff_limit != -1 and (diff_limit is None or

81

elif diff_limit != -1 and (diff_limit is None or

78

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

82

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

79

83

80

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

84

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

81

ignore_whitespace=ignore_whitespace,

85

ignore_whitespace=ignore_whitespace,

82

context=line_context)

86

context=line_context)

83

diff_processor = DiffProcessor(

87

diff_processor = DiffProcessor(

84

f_gitdiff, format='gitdiff', diff_limit=diff_limit,

88

f_gitdiff, format='gitdiff', diff_limit=diff_limit,

85

file_limit=file_limit, show_full_diff=show_full_diff)

89

file_limit=file_limit, show_full_diff=show_full_diff)

86

_parsed = diff_processor.prepare()

90

_parsed = diff_processor.prepare()

87

91

88

diff = diff_processor.as_html(enable_comments=enable_comments)

92

diff = diff_processor.as_html(enable_comments=enable_comments)

89

stats = _parsed[0]['stats'] if _parsed else None

93

stats = _parsed[0]['stats'] if _parsed else None

90

size = len(diff or '')

94

size = len(diff or '')

91

data = _parsed[0] if _parsed else None

95

data = _parsed[0] if _parsed else None

92

else:

96

else:

93

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

97

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

94

'diff menu to display this diff'))

98

'diff menu to display this diff'))

95

stats = None

99

stats = None

96

size = 0

100

size = 0

97

data = None

101

data = None

98

if not diff:

102

if not diff:

99

submodules = filter(lambda o: isinstance(o, SubModuleNode),

103

submodules = filter(lambda o: isinstance(o, SubModuleNode),

100

[filenode_new, filenode_old])

104

[filenode_new, filenode_old])

101

if submodules:

105

if submodules:

102

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

106

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

103

else:

107

else:

104

diff = wrap_to_table(_('No changes detected'))

108

diff = wrap_to_table(_('No changes detected'))

105

109

106

cs1 = filenode_old.commit.raw_id

110

cs1 = filenode_old.commit.raw_id

107

cs2 = filenode_new.commit.raw_id

111

cs2 = filenode_new.commit.raw_id

108

112

109

return size, cs1, cs2, diff, stats, data

113

return size, cs1, cs2, diff, stats, data

110

114

111

115

112

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

116

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

113

"""

117

"""

114

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

118

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

115

119

116

:param ignore_whitespace: ignore whitespaces in diff

120

:param ignore_whitespace: ignore whitespaces in diff

117

"""

121

"""

118

# make sure we pass in default context

122

# make sure we pass in default context

119

context = context or 3

123

context = context or 3

124

# protect against IntOverflow when passing HUGE context

125

if context > MAX_CONTEXT:

126

context = MAX_CONTEXT

127

120

submodules = filter(lambda o: isinstance(o, SubModuleNode),

128

submodules = filter(lambda o: isinstance(o, SubModuleNode),

121

[filenode_new, filenode_old])

129

[filenode_new, filenode_old])

122

if submodules:

130

if submodules:

123

return ''

131

return ''

124

132

125

for filenode in (filenode_old, filenode_new):

133

for filenode in (filenode_old, filenode_new):

126

if not isinstance(filenode, FileNode):

134

if not isinstance(filenode, FileNode):

127

raise VCSError(

135

raise VCSError(

128

"Given object should be FileNode object, not %s"

136

"Given object should be FileNode object, not %s"

129

% filenode.__class__)

137

% filenode.__class__)

130

138

131

repo = filenode_new.commit.repository

139

repo = filenode_new.commit.repository

132

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

140

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

133

new_commit = filenode_new.commit

141

new_commit = filenode_new.commit

134

142

135

vcs_gitdiff = repo.get_diff(

143

vcs_gitdiff = repo.get_diff(

136

old_commit, new_commit, filenode_new.path,

144

old_commit, new_commit, filenode_new.path,

137

ignore_whitespace, context, path1=filenode_old.path)

145

ignore_whitespace, context, path1=filenode_old.path)

138

return vcs_gitdiff

146

return vcs_gitdiff

139

147

140

NEW_FILENODE = 1

148

NEW_FILENODE = 1

141

DEL_FILENODE = 2

149

DEL_FILENODE = 2

142

MOD_FILENODE = 3

150

MOD_FILENODE = 3

143

RENAMED_FILENODE = 4

151

RENAMED_FILENODE = 4

144

COPIED_FILENODE = 5

152

COPIED_FILENODE = 5

145

CHMOD_FILENODE = 6

153

CHMOD_FILENODE = 6

146

BIN_FILENODE = 7

154

BIN_FILENODE = 7

147

155

148

156

149

class LimitedDiffContainer(object):

157

class LimitedDiffContainer(object):

150

158

151

def __init__(self, diff_limit, cur_diff_size, diff):

159

def __init__(self, diff_limit, cur_diff_size, diff):

152

self.diff = diff

160

self.diff = diff

153

self.diff_limit = diff_limit

161

self.diff_limit = diff_limit

154

self.cur_diff_size = cur_diff_size

162

self.cur_diff_size = cur_diff_size

155

163

156

def __getitem__(self, key):

164

def __getitem__(self, key):

157

return self.diff.__getitem__(key)

165

return self.diff.__getitem__(key)

158

166

159

def __iter__(self):

167

def __iter__(self):

160

for l in self.diff:

168

for l in self.diff:

161

yield l

169

yield l

162

170

163

171

164

class Action(object):

172

class Action(object):

165

"""

173

"""

166

Contains constants for the action value of the lines in a parsed diff.

174

Contains constants for the action value of the lines in a parsed diff.

167

"""

175

"""

168

176

169

ADD = 'add'

177

ADD = 'add'

170

DELETE = 'del'

178

DELETE = 'del'

171

UNMODIFIED = 'unmod'

179

UNMODIFIED = 'unmod'

172

180

173

CONTEXT = 'context'

181

CONTEXT = 'context'

174

182

175

183

176

class DiffProcessor(object):

184

class DiffProcessor(object):

177

"""

185

"""

178

Give it a unified or git diff and it returns a list of the files that were

186

Give it a unified or git diff and it returns a list of the files that were

179

mentioned in the diff together with a dict of meta information that

187

mentioned in the diff together with a dict of meta information that

180

can be used to render it in a HTML template.

188

can be used to render it in a HTML template.

181

189

182

.. note:: Unicode handling

190

.. note:: Unicode handling

183

191

184

The original diffs are a byte sequence and can contain filenames

192

The original diffs are a byte sequence and can contain filenames

185

in mixed encodings. This class generally returns `unicode` objects

193

in mixed encodings. This class generally returns `unicode` objects

186

since the result is intended for presentation to the user.

194

since the result is intended for presentation to the user.

187

195

188

"""

196

"""

189

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

197

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

190

_newline_marker = re.compile(r'^\\ No newline at end of file')

198

_newline_marker = re.compile(r'^\\ No newline at end of file')

191

199

192

# used for inline highlighter word split

200

# used for inline highlighter word split

193

_token_re = re.compile(r'()(>|<|&|\W+?)')

201

_token_re = re.compile(r'()(>|<|&|\W+?)')

194

202

195

def __init__(self, diff, format='gitdiff', diff_limit=None,

203

def __init__(self, diff, format='gitdiff', diff_limit=None,

196

file_limit=None, show_full_diff=True):

204

file_limit=None, show_full_diff=True):

197

"""

205

"""

198

:param diff: A `Diff` object representing a diff from a vcs backend

206

:param diff: A `Diff` object representing a diff from a vcs backend

199

:param format: format of diff passed, `udiff` or `gitdiff`

207

:param format: format of diff passed, `udiff` or `gitdiff`

200

:param diff_limit: define the size of diff that is considered "big"

208

:param diff_limit: define the size of diff that is considered "big"

201

based on that parameter cut off will be triggered, set to None

209

based on that parameter cut off will be triggered, set to None

202

to show full diff

210

to show full diff

203

"""

211

"""

204

self._diff = diff

212

self._diff = diff

205

self._format = format

213

self._format = format

206

self.adds = 0

214

self.adds = 0

207

self.removes = 0

215

self.removes = 0

208

# calculate diff size

216

# calculate diff size

209

self.diff_limit = diff_limit

217

self.diff_limit = diff_limit

210

self.file_limit = file_limit

218

self.file_limit = file_limit

211

self.show_full_diff = show_full_diff

219

self.show_full_diff = show_full_diff

212

self.cur_diff_size = 0

220

self.cur_diff_size = 0

213

self.parsed = False

221

self.parsed = False

214

self.parsed_diff = []

222

self.parsed_diff = []

215

223

216

if format == 'gitdiff':

224

if format == 'gitdiff':

217

self.differ = self._highlight_line_difflib

225

self.differ = self._highlight_line_difflib

218

self._parser = self._parse_gitdiff

226

self._parser = self._parse_gitdiff

219

else:

227

else:

220

self.differ = self._highlight_line_udiff

228

self.differ = self._highlight_line_udiff

221

self._parser = self._parse_udiff

229

self._parser = self._parse_udiff

222

230

223

def _copy_iterator(self):

231

def _copy_iterator(self):

224

"""

232

"""

225

make a fresh copy of generator, we should not iterate thru

233

make a fresh copy of generator, we should not iterate thru

226

an original as it's needed for repeating operations on

234

an original as it's needed for repeating operations on

227

this instance of DiffProcessor

235

this instance of DiffProcessor

228

"""

236

"""

229

self.__udiff, iterator_copy = tee(self.__udiff)

237

self.__udiff, iterator_copy = tee(self.__udiff)

230

return iterator_copy

238

return iterator_copy

231

239

232

def _escaper(self, string):

240

def _escaper(self, string):

233

"""

241

"""

234

Escaper for diff escapes special chars and checks the diff limit

242

Escaper for diff escapes special chars and checks the diff limit

235

243

236

:param string:

244

:param string:

237

"""

245

"""

238

246

239

self.cur_diff_size += len(string)

247

self.cur_diff_size += len(string)

240

248

241

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

249

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

242

raise DiffLimitExceeded('Diff Limit Exceeded')

250

raise DiffLimitExceeded('Diff Limit Exceeded')

243

251

244

return safe_unicode(string)\

252

return safe_unicode(string)\

245

.replace('&', '&')\

253

.replace('&', '&')\

246

.replace('<', '<')\

254

.replace('<', '<')\

247

.replace('>', '>')

255

.replace('>', '>')

248

256

249

def _line_counter(self, l):

257

def _line_counter(self, l):

250

"""

258

"""

251

Checks each line and bumps total adds/removes for this diff

259

Checks each line and bumps total adds/removes for this diff

252

260

253

:param l:

261

:param l:

254

"""

262

"""

255

if l.startswith('+') and not l.startswith('+++'):

263

if l.startswith('+') and not l.startswith('+++'):

256

self.adds += 1

264

self.adds += 1

257

elif l.startswith('-') and not l.startswith('---'):

265

elif l.startswith('-') and not l.startswith('---'):

258

self.removes += 1

266

self.removes += 1

259

return safe_unicode(l)

267

return safe_unicode(l)

260

268

261

def _highlight_line_difflib(self, line, next_):

269

def _highlight_line_difflib(self, line, next_):

262

"""

270

"""

263

Highlight inline changes in both lines.

271

Highlight inline changes in both lines.

264

"""

272

"""

265

273

266

if line['action'] == Action.DELETE:

274

if line['action'] == Action.DELETE:

267

old, new = line, next_

275

old, new = line, next_

268

else:

276

else:

269

old, new = next_, line

277

old, new = next_, line

270

278

271

oldwords = self._token_re.split(old['line'])

279

oldwords = self._token_re.split(old['line'])

272

newwords = self._token_re.split(new['line'])

280

newwords = self._token_re.split(new['line'])

273

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

281

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

274

282

275

oldfragments, newfragments = [], []

283

oldfragments, newfragments = [], []

276

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

284

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

277

oldfrag = ''.join(oldwords[i1:i2])

285

oldfrag = ''.join(oldwords[i1:i2])

278

newfrag = ''.join(newwords[j1:j2])

286

newfrag = ''.join(newwords[j1:j2])

279

if tag != 'equal':

287

if tag != 'equal':

280

if oldfrag:

288

if oldfrag:

281

oldfrag = '<del>%s</del>' % oldfrag

289

oldfrag = '<del>%s</del>' % oldfrag

282

if newfrag:

290

if newfrag:

283

newfrag = '<ins>%s</ins>' % newfrag

291

newfrag = '<ins>%s</ins>' % newfrag

284

oldfragments.append(oldfrag)

292

oldfragments.append(oldfrag)

285

newfragments.append(newfrag)

293

newfragments.append(newfrag)

286

294

287

old['line'] = "".join(oldfragments)

295

old['line'] = "".join(oldfragments)

288

new['line'] = "".join(newfragments)

296

new['line'] = "".join(newfragments)

289

297

290

def _highlight_line_udiff(self, line, next_):

298

def _highlight_line_udiff(self, line, next_):

291

"""

299

"""

292

Highlight inline changes in both lines.

300

Highlight inline changes in both lines.

293

"""

301

"""

294

start = 0

302

start = 0

295

limit = min(len(line['line']), len(next_['line']))

303

limit = min(len(line['line']), len(next_['line']))

296

while start < limit and line['line'][start] == next_['line'][start]:

304

while start < limit and line['line'][start] == next_['line'][start]:

297

start += 1

305

start += 1

298

end = -1

306

end = -1

299

limit -= start

307

limit -= start

300

while -end <= limit and line['line'][end] == next_['line'][end]:

308

while -end <= limit and line['line'][end] == next_['line'][end]:

301

end -= 1

309

end -= 1

302

end += 1

310

end += 1

303

if start or end:

311

if start or end:

304

def do(l):

312

def do(l):

305

last = end + len(l['line'])

313

last = end + len(l['line'])

306

if l['action'] == Action.ADD:

314

if l['action'] == Action.ADD:

307

tag = 'ins'

315

tag = 'ins'

308

else:

316

else:

309

tag = 'del'

317

tag = 'del'

310

l['line'] = '%s<%s>%s</%s>%s' % (

318

l['line'] = '%s<%s>%s</%s>%s' % (

311

l['line'][:start],

319

l['line'][:start],

312

tag,

320

tag,

313

l['line'][start:last],

321

l['line'][start:last],

314

tag,

322

tag,

315

l['line'][last:]

323

l['line'][last:]

316

)

324

)

317

do(line)

325

do(line)

318

do(next_)

326

do(next_)

319

327

320

def _clean_line(self, line, command):

328

def _clean_line(self, line, command):

321

if command in ['+', '-', ' ']:

329

if command in ['+', '-', ' ']:

322

# only modify the line if it's actually a diff thing

330

# only modify the line if it's actually a diff thing

323

line = line[1:]

331

line = line[1:]

324

return line

332

return line

325

333

326

def _parse_gitdiff(self, inline_diff=True):

334

def _parse_gitdiff(self, inline_diff=True):

327

_files = []

335

_files = []

328

diff_container = lambda arg: arg

336

diff_container = lambda arg: arg

329

337

330

for chunk in self._diff.chunks():

338

for chunk in self._diff.chunks():

331

head = chunk.header

339

head = chunk.header

332

340

333

diff = imap(self._escaper, chunk.diff.splitlines(1))

341

diff = imap(self._escaper, chunk.diff.splitlines(1))

334

raw_diff = chunk.raw

342

raw_diff = chunk.raw

335

limited_diff = False

343

limited_diff = False

336

exceeds_limit = False

344

exceeds_limit = False

337

345

338

op = None

346

op = None

339

stats = {

347

stats = {

340

'added': 0,

348

'added': 0,

341

'deleted': 0,

349

'deleted': 0,

342

'binary': False,

350

'binary': False,

343

'ops': {},

351

'ops': {},

344

}

352

}

345

353

346

if head['deleted_file_mode']:

354

if head['deleted_file_mode']:

347

op = OPS.DEL

355

op = OPS.DEL

348

stats['binary'] = True

356

stats['binary'] = True

349

stats['ops'][DEL_FILENODE] = 'deleted file'

357

stats['ops'][DEL_FILENODE] = 'deleted file'

350

358

351

elif head['new_file_mode']:

359

elif head['new_file_mode']:

352

op = OPS.ADD

360

op = OPS.ADD

353

stats['binary'] = True

361

stats['binary'] = True

354

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

362

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

355

else: # modify operation, can be copy, rename or chmod

363

else: # modify operation, can be copy, rename or chmod

356

364

357

# CHMOD

365

# CHMOD

358

if head['new_mode'] and head['old_mode']:

366

if head['new_mode'] and head['old_mode']:

359

op = OPS.MOD

367

op = OPS.MOD

360

stats['binary'] = True

368

stats['binary'] = True

361

stats['ops'][CHMOD_FILENODE] = (

369

stats['ops'][CHMOD_FILENODE] = (

362

'modified file chmod %s => %s' % (

370

'modified file chmod %s => %s' % (

363

head['old_mode'], head['new_mode']))

371

head['old_mode'], head['new_mode']))

364

# RENAME

372

# RENAME

365

if head['rename_from'] != head['rename_to']:

373

if head['rename_from'] != head['rename_to']:

366

op = OPS.MOD

374

op = OPS.MOD

367

stats['binary'] = True

375

stats['binary'] = True

368

stats['ops'][RENAMED_FILENODE] = (

376

stats['ops'][RENAMED_FILENODE] = (

369

'file renamed from %s to %s' % (

377

'file renamed from %s to %s' % (

370

head['rename_from'], head['rename_to']))

378

head['rename_from'], head['rename_to']))

371

# COPY

379

# COPY

372

if head.get('copy_from') and head.get('copy_to'):

380

if head.get('copy_from') and head.get('copy_to'):

373

op = OPS.MOD

381

op = OPS.MOD

374

stats['binary'] = True

382

stats['binary'] = True

375

stats['ops'][COPIED_FILENODE] = (

383

stats['ops'][COPIED_FILENODE] = (

376

'file copied from %s to %s' % (

384

'file copied from %s to %s' % (

377

head['copy_from'], head['copy_to']))

385

head['copy_from'], head['copy_to']))

378

386

379

# If our new parsed headers didn't match anything fallback to

387

# If our new parsed headers didn't match anything fallback to

380

# old style detection

388

# old style detection

381

if op is None:

389

if op is None:

382

if not head['a_file'] and head['b_file']:

390

if not head['a_file'] and head['b_file']:

383

op = OPS.ADD

391

op = OPS.ADD

384

stats['binary'] = True

392

stats['binary'] = True

385

stats['ops'][NEW_FILENODE] = 'new file'

393

stats['ops'][NEW_FILENODE] = 'new file'

386

394

387

elif head['a_file'] and not head['b_file']:

395

elif head['a_file'] and not head['b_file']:

388

op = OPS.DEL

396

op = OPS.DEL

389

stats['binary'] = True

397

stats['binary'] = True

390

stats['ops'][DEL_FILENODE] = 'deleted file'

398

stats['ops'][DEL_FILENODE] = 'deleted file'

391

399

392

# it's not ADD not DELETE

400

# it's not ADD not DELETE

393

if op is None:

401

if op is None:

394

op = OPS.MOD

402

op = OPS.MOD

395

stats['binary'] = True

403

stats['binary'] = True

396

stats['ops'][MOD_FILENODE] = 'modified file'

404

stats['ops'][MOD_FILENODE] = 'modified file'

397

405

398

# a real non-binary diff

406

# a real non-binary diff

399

if head['a_file'] or head['b_file']:

407

if head['a_file'] or head['b_file']:

400

try:

408

try:

401

raw_diff, chunks, _stats = self._parse_lines(diff)

409

raw_diff, chunks, _stats = self._parse_lines(diff)

402

stats['binary'] = False

410

stats['binary'] = False

403

stats['added'] = _stats[0]

411

stats['added'] = _stats[0]

404

stats['deleted'] = _stats[1]

412

stats['deleted'] = _stats[1]

405

# explicit mark that it's a modified file

413

# explicit mark that it's a modified file

406

if op == OPS.MOD:

414

if op == OPS.MOD:

407

stats['ops'][MOD_FILENODE] = 'modified file'

415

stats['ops'][MOD_FILENODE] = 'modified file'

408

exceeds_limit = len(raw_diff) > self.file_limit

416

exceeds_limit = len(raw_diff) > self.file_limit

409

417

410

# changed from _escaper function so we validate size of

418

# changed from _escaper function so we validate size of

411

# each file instead of the whole diff

419

# each file instead of the whole diff

412

# diff will hide big files but still show small ones

420

# diff will hide big files but still show small ones

413

# from my tests, big files are fairly safe to be parsed

421

# from my tests, big files are fairly safe to be parsed

414

# but the browser is the bottleneck

422

# but the browser is the bottleneck

415

if not self.show_full_diff and exceeds_limit:

423

if not self.show_full_diff and exceeds_limit:

416

raise DiffLimitExceeded('File Limit Exceeded')

424

raise DiffLimitExceeded('File Limit Exceeded')

417

425

418

except DiffLimitExceeded:

426

except DiffLimitExceeded:

419

diff_container = lambda _diff: \

427

diff_container = lambda _diff: \

420

LimitedDiffContainer(

428

LimitedDiffContainer(

421

self.diff_limit, self.cur_diff_size, _diff)

429

self.diff_limit, self.cur_diff_size, _diff)

422

430

423

exceeds_limit = len(raw_diff) > self.file_limit

431

exceeds_limit = len(raw_diff) > self.file_limit

424

limited_diff = True

432

limited_diff = True

425

chunks = []

433

chunks = []

426

434

427

else: # GIT format binary patch, or possibly empty diff

435

else: # GIT format binary patch, or possibly empty diff

428

if head['bin_patch']:

436

if head['bin_patch']:

429

# we have operation already extracted, but we mark simply

437

# we have operation already extracted, but we mark simply

430

# it's a diff we wont show for binary files

438

# it's a diff we wont show for binary files

431

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

439

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

432

chunks = []

440

chunks = []

433

441

434

if chunks and not self.show_full_diff and op == OPS.DEL:

442

if chunks and not self.show_full_diff and op == OPS.DEL:

435

# if not full diff mode show deleted file contents

443

# if not full diff mode show deleted file contents

436

# TODO: anderson: if the view is not too big, there is no way

444

# TODO: anderson: if the view is not too big, there is no way

437

# to see the content of the file

445

# to see the content of the file

438

chunks = []

446

chunks = []

439

447

440

chunks.insert(0, [{

448

chunks.insert(0, [{

441

'old_lineno': '',

449

'old_lineno': '',

442

'new_lineno': '',

450

'new_lineno': '',

443

'action': Action.CONTEXT,

451

'action': Action.CONTEXT,

444

'line': msg,

452

'line': msg,

445

} for _op, msg in stats['ops'].iteritems()

453

} for _op, msg in stats['ops'].iteritems()

446

if _op not in [MOD_FILENODE]])

454

if _op not in [MOD_FILENODE]])

447

455

448

_files.append({

456

_files.append({

449

'filename': safe_unicode(head['b_path']),

457

'filename': safe_unicode(head['b_path']),

450

'old_revision': head['a_blob_id'],

458

'old_revision': head['a_blob_id'],

451

'new_revision': head['b_blob_id'],

459

'new_revision': head['b_blob_id'],

452

'chunks': chunks,

460

'chunks': chunks,

453

'raw_diff': safe_unicode(raw_diff),

461

'raw_diff': safe_unicode(raw_diff),

454

'operation': op,

462

'operation': op,

455

'stats': stats,

463

'stats': stats,

456

'exceeds_limit': exceeds_limit,

464

'exceeds_limit': exceeds_limit,

457

'is_limited_diff': limited_diff,

465

'is_limited_diff': limited_diff,

458

})

466

})

459

467

460

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

468

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

461

OPS.DEL: 2}.get(info['operation'])

469

OPS.DEL: 2}.get(info['operation'])

462

470

463

if not inline_diff:

471

if not inline_diff:

464

return diff_container(sorted(_files, key=sorter))

472

return diff_container(sorted(_files, key=sorter))

465

473

466

# highlight inline changes

474

# highlight inline changes

467

for diff_data in _files:

475

for diff_data in _files:

468

for chunk in diff_data['chunks']:

476

for chunk in diff_data['chunks']:

469

lineiter = iter(chunk)

477

lineiter = iter(chunk)

470

try:

478

try:

471

while 1:

479

while 1:

472

line = lineiter.next()

480

line = lineiter.next()

473

if line['action'] not in (

481

if line['action'] not in (

474

Action.UNMODIFIED, Action.CONTEXT):

482

Action.UNMODIFIED, Action.CONTEXT):

475

nextline = lineiter.next()

483

nextline = lineiter.next()

476

if nextline['action'] in ['unmod', 'context'] or \

484

if nextline['action'] in ['unmod', 'context'] or \

477

nextline['action'] == line['action']:

485

nextline['action'] == line['action']:

478

continue

486

continue

479

self.differ(line, nextline)

487

self.differ(line, nextline)

480

except StopIteration:

488

except StopIteration:

481

pass

489

pass

482

490

483

return diff_container(sorted(_files, key=sorter))

491

return diff_container(sorted(_files, key=sorter))

484

492

485

def _parse_udiff(self, inline_diff=True):

493

def _parse_udiff(self, inline_diff=True):

486

raise NotImplementedError()

494

raise NotImplementedError()

487

495

488

def _parse_lines(self, diff):

496

def _parse_lines(self, diff):

489

"""

497

"""

490

Parse the diff an return data for the template.

498

Parse the diff an return data for the template.

491

"""

499

"""

492

500

493

lineiter = iter(diff)

501

lineiter = iter(diff)

494

stats = [0, 0]

502

stats = [0, 0]

495

chunks = []

503

chunks = []

496

raw_diff = []

504

raw_diff = []

497

505

498

try:

506

try:

499

line = lineiter.next()

507

line = lineiter.next()

500

508

501

while line:

509

while line:

502

raw_diff.append(line)

510

raw_diff.append(line)

503

lines = []

511

lines = []

504

chunks.append(lines)

512

chunks.append(lines)

505

513

506

match = self._chunk_re.match(line)

514

match = self._chunk_re.match(line)

507

515

508

if not match:

516

if not match:

509

break

517

break

510

518

511

gr = match.groups()

519

gr = match.groups()

512

(old_line, old_end,

520

(old_line, old_end,

513

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

521

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

514

old_line -= 1

522

old_line -= 1

515

new_line -= 1

523

new_line -= 1

516

524

517

context = len(gr) == 5

525

context = len(gr) == 5

518

old_end += old_line

526

old_end += old_line

519

new_end += new_line

527

new_end += new_line

520

528

521

if context:

529

if context:

522

# skip context only if it's first line

530

# skip context only if it's first line

523

if int(gr[0]) > 1:

531

if int(gr[0]) > 1:

524

lines.append({

532

lines.append({

525

'old_lineno': '...',

533

'old_lineno': '...',

526

'new_lineno': '...',

534

'new_lineno': '...',

527

'action': Action.CONTEXT,

535

'action': Action.CONTEXT,

528

'line': line,

536

'line': line,

529

})

537

})

530

538

531

line = lineiter.next()

539

line = lineiter.next()

532

540

533

while old_line < old_end or new_line < new_end:

541

while old_line < old_end or new_line < new_end:

534

command = ' '

542

command = ' '

535

if line:

543

if line:

536

command = line[0]

544

command = line[0]

537

545

538

affects_old = affects_new = False

546

affects_old = affects_new = False

539

547

540

# ignore those if we don't expect them

548

# ignore those if we don't expect them

541

if command in '#@':

549

if command in '#@':

542

continue

550

continue

543

elif command == '+':

551

elif command == '+':

544

affects_new = True

552

affects_new = True

545

action = Action.ADD

553

action = Action.ADD

546

stats[0] += 1

554

stats[0] += 1

547

elif command == '-':

555

elif command == '-':

548

affects_old = True

556

affects_old = True

549

action = Action.DELETE

557

action = Action.DELETE

550

stats[1] += 1

558

stats[1] += 1

551

else:

559

else:

552

affects_old = affects_new = True

560

affects_old = affects_new = True

553

action = Action.UNMODIFIED

561

action = Action.UNMODIFIED

554

562

555

if not self._newline_marker.match(line):

563

if not self._newline_marker.match(line):

556

old_line += affects_old

564

old_line += affects_old

557

new_line += affects_new

565

new_line += affects_new

558

lines.append({

566

lines.append({

559

'old_lineno': affects_old and old_line or '',

567

'old_lineno': affects_old and old_line or '',

560

'new_lineno': affects_new and new_line or '',

568

'new_lineno': affects_new and new_line or '',

561

'action': action,

569

'action': action,

562

'line': self._clean_line(line, command)

570

'line': self._clean_line(line, command)

563

})

571

})

564

raw_diff.append(line)

572

raw_diff.append(line)

565

573

566

line = lineiter.next()

574

line = lineiter.next()

567

575

568

if self._newline_marker.match(line):

576

if self._newline_marker.match(line):

569

# we need to append to lines, since this is not

577

# we need to append to lines, since this is not

570

# counted in the line specs of diff

578

# counted in the line specs of diff

571

lines.append({

579

lines.append({

572

'old_lineno': '...',

580

'old_lineno': '...',

573

'new_lineno': '...',

581

'new_lineno': '...',

574

'action': Action.CONTEXT,

582

'action': Action.CONTEXT,

575

'line': self._clean_line(line, command)

583

'line': self._clean_line(line, command)

576

})

584

})

577

585

578

except StopIteration:

586

except StopIteration:

579

pass

587

pass

580

return ''.join(raw_diff), chunks, stats

588

return ''.join(raw_diff), chunks, stats

581

589

582

def _safe_id(self, idstring):

590

def _safe_id(self, idstring):

583

"""Make a string safe for including in an id attribute.

591

"""Make a string safe for including in an id attribute.

584

592

585

The HTML spec says that id attributes 'must begin with

593

The HTML spec says that id attributes 'must begin with

586

a letter ([A-Za-z]) and may be followed by any number

594

a letter ([A-Za-z]) and may be followed by any number

587

of letters, digits ([0-9]), hyphens ("-"), underscores

595

of letters, digits ([0-9]), hyphens ("-"), underscores

588

("_"), colons (":"), and periods (".")'. These regexps

596

("_"), colons (":"), and periods (".")'. These regexps

589

are slightly over-zealous, in that they remove colons

597

are slightly over-zealous, in that they remove colons

590

and periods unnecessarily.

598

and periods unnecessarily.

591

599

592

Whitespace is transformed into underscores, and then

600

Whitespace is transformed into underscores, and then

593

anything which is not a hyphen or a character that

601

anything which is not a hyphen or a character that

594

matches \w (alphanumerics and underscore) is removed.

602

matches \w (alphanumerics and underscore) is removed.

595

603

596

"""

604

"""

597

# Transform all whitespace to underscore

605

# Transform all whitespace to underscore

598

idstring = re.sub(r'\s', "_", '%s' % idstring)

606

idstring = re.sub(r'\s', "_", '%s' % idstring)

599

# Remove everything that is not a hyphen or a member of \w

607

# Remove everything that is not a hyphen or a member of \w

600

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

608

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

601

return idstring

609

return idstring

602

610

603

def prepare(self, inline_diff=True):

611

def prepare(self, inline_diff=True):

604

"""

612

"""

605

Prepare the passed udiff for HTML rendering.

613

Prepare the passed udiff for HTML rendering.

606

614

607

:return: A list of dicts with diff information.

615

:return: A list of dicts with diff information.

608

"""

616

"""

609

parsed = self._parser(inline_diff=inline_diff)

617

parsed = self._parser(inline_diff=inline_diff)

610

self.parsed = True

618

self.parsed = True

611

self.parsed_diff = parsed

619

self.parsed_diff = parsed

612

return parsed

620

return parsed

613

621

614

def as_raw(self, diff_lines=None):

622

def as_raw(self, diff_lines=None):

615

"""

623

"""

616

Returns raw diff as a byte string

624

Returns raw diff as a byte string

617

"""

625

"""

618

return self._diff.raw

626

return self._diff.raw

619

627

620

def as_html(self, table_class='code-difftable', line_class='line',

628

def as_html(self, table_class='code-difftable', line_class='line',

621

old_lineno_class='lineno old', new_lineno_class='lineno new',

629

old_lineno_class='lineno old', new_lineno_class='lineno new',

622

code_class='code', enable_comments=False, parsed_lines=None):

630

code_class='code', enable_comments=False, parsed_lines=None):

623

"""

631

"""

624

Return given diff as html table with customized css classes

632

Return given diff as html table with customized css classes

625

"""

633

"""

626

def _link_to_if(condition, label, url):

634

def _link_to_if(condition, label, url):

627

"""

635

"""

628

Generates a link if condition is meet or just the label if not.

636

Generates a link if condition is meet or just the label if not.

629

"""

637

"""

630

638

631

if condition:

639

if condition:

632

return '''<a href="%(url)s" class="tooltip"

640

return '''<a href="%(url)s" class="tooltip"

633

title="%(title)s">%(label)s</a>''' % {

641

title="%(title)s">%(label)s</a>''' % {

634

'title': _('Click to select line'),

642

'title': _('Click to select line'),

635

'url': url,

643

'url': url,

636

'label': label

644

'label': label

637

}

645

}

638

else:

646

else:

639

return label

647

return label

640

if not self.parsed:

648

if not self.parsed:

641

self.prepare()

649

self.prepare()

642

650

643

diff_lines = self.parsed_diff

651

diff_lines = self.parsed_diff

644

if parsed_lines:

652

if parsed_lines:

645

diff_lines = parsed_lines

653

diff_lines = parsed_lines

646

654

647

_html_empty = True

655

_html_empty = True

648

_html = []

656

_html = []

649

_html.append('''<table class="%(table_class)s">\n''' % {

657

_html.append('''<table class="%(table_class)s">\n''' % {

650

'table_class': table_class

658

'table_class': table_class

651

})

659

})

652

660

653

for diff in diff_lines:

661

for diff in diff_lines:

654

for line in diff['chunks']:

662

for line in diff['chunks']:

655

_html_empty = False

663

_html_empty = False

656

for change in line:

664

for change in line:

657

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

665

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

658

'lc': line_class,

666

'lc': line_class,

659

'action': change['action']

667

'action': change['action']

660

})

668

})

661

anchor_old_id = ''

669

anchor_old_id = ''

662

anchor_new_id = ''

670

anchor_new_id = ''

663

anchor_old = "%(filename)s_o%(oldline_no)s" % {

671

anchor_old = "%(filename)s_o%(oldline_no)s" % {

664

'filename': self._safe_id(diff['filename']),

672

'filename': self._safe_id(diff['filename']),

665

'oldline_no': change['old_lineno']

673

'oldline_no': change['old_lineno']

666

}

674

}

667

anchor_new = "%(filename)s_n%(oldline_no)s" % {

675

anchor_new = "%(filename)s_n%(oldline_no)s" % {

668

'filename': self._safe_id(diff['filename']),

676

'filename': self._safe_id(diff['filename']),

669

'oldline_no': change['new_lineno']

677

'oldline_no': change['new_lineno']

670

}

678

}

671

cond_old = (change['old_lineno'] != '...' and

679

cond_old = (change['old_lineno'] != '...' and

672

change['old_lineno'])

680

change['old_lineno'])

673

cond_new = (change['new_lineno'] != '...' and

681

cond_new = (change['new_lineno'] != '...' and

674

change['new_lineno'])

682

change['new_lineno'])

675

if cond_old:

683

if cond_old:

676

anchor_old_id = 'id="%s"' % anchor_old

684

anchor_old_id = 'id="%s"' % anchor_old

677

if cond_new:

685

if cond_new:

678

anchor_new_id = 'id="%s"' % anchor_new

686

anchor_new_id = 'id="%s"' % anchor_new

679

687

680

if change['action'] != Action.CONTEXT:

688

if change['action'] != Action.CONTEXT:

681

anchor_link = True

689

anchor_link = True

682

else:

690

else:

683

anchor_link = False

691

anchor_link = False

684

692

685

###########################################################

693

###########################################################

686

# COMMENT ICON

694

# COMMENT ICON

687

###########################################################

695

###########################################################

688

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

696

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

689

697

690

if enable_comments and change['action'] != Action.CONTEXT:

698

if enable_comments and change['action'] != Action.CONTEXT:

691

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

699

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

692

700

693

_html.append('''</span></td>\n''')

701

_html.append('''</span></td>\n''')

694

702

695

###########################################################

703

###########################################################

696

# OLD LINE NUMBER

704

# OLD LINE NUMBER

697

###########################################################

705

###########################################################

698

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

706

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

699

'a_id': anchor_old_id,

707

'a_id': anchor_old_id,

700

'olc': old_lineno_class

708

'olc': old_lineno_class

701

})

709

})

702

710

703

_html.append('''%(link)s''' % {

711

_html.append('''%(link)s''' % {

704

'link': _link_to_if(anchor_link, change['old_lineno'],

712

'link': _link_to_if(anchor_link, change['old_lineno'],

705

'#%s' % anchor_old)

713

'#%s' % anchor_old)

706

})

714

})

707

_html.append('''</td>\n''')

715

_html.append('''</td>\n''')

708

###########################################################

716

###########################################################

709

# NEW LINE NUMBER

717

# NEW LINE NUMBER

710

###########################################################

718

###########################################################

711

719

712

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

720

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

713

'a_id': anchor_new_id,

721

'a_id': anchor_new_id,

714

'nlc': new_lineno_class

722

'nlc': new_lineno_class

715

})

723

})

716

724

717

_html.append('''%(link)s''' % {

725

_html.append('''%(link)s''' % {

718

'link': _link_to_if(anchor_link, change['new_lineno'],

726

'link': _link_to_if(anchor_link, change['new_lineno'],

719

'#%s' % anchor_new)

727

'#%s' % anchor_new)

720

})

728

})

721

_html.append('''</td>\n''')

729

_html.append('''</td>\n''')

722

###########################################################

730

###########################################################

723

# CODE

731

# CODE

724

###########################################################

732

###########################################################

725

code_classes = [code_class]

733

code_classes = [code_class]

726

if (not enable_comments or

734

if (not enable_comments or

727

change['action'] == Action.CONTEXT):

735

change['action'] == Action.CONTEXT):

728

code_classes.append('no-comment')

736

code_classes.append('no-comment')

729

_html.append('\t<td class="%s">' % ' '.join(code_classes))

737

_html.append('\t<td class="%s">' % ' '.join(code_classes))

730

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

738

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

731

'code': change['line']

739

'code': change['line']

732

})

740

})

733

741

734

_html.append('''\t</td>''')

742

_html.append('''\t</td>''')

735

_html.append('''\n</tr>\n''')

743

_html.append('''\n</tr>\n''')

736

_html.append('''</table>''')

744

_html.append('''</table>''')

737

if _html_empty:

745

if _html_empty:

738

return None

746

return None

739

return ''.join(_html)

747

return ''.join(_html)

740

748

741

def stat(self):

749

def stat(self):

742

"""

750

"""

743

Returns tuple of added, and removed lines for this instance

751

Returns tuple of added, and removed lines for this instance

744

"""

752

"""

745

return self.adds, self.removes

753

return self.adds, self.removes

746

754

747

def get_context_of_line(

755

def get_context_of_line(

748

self, path, diff_line=None, context_before=3, context_after=3):

756

self, path, diff_line=None, context_before=3, context_after=3):

749

"""

757

"""

750

Returns the context lines for the specified diff line.

758

Returns the context lines for the specified diff line.

751

759

752

:type diff_line: :class:`DiffLineNumber`

760

:type diff_line: :class:`DiffLineNumber`

753

"""

761

"""

754

assert self.parsed, "DiffProcessor is not initialized."

762

assert self.parsed, "DiffProcessor is not initialized."

755

763

756

if None not in diff_line:

764

if None not in diff_line:

757

raise ValueError(

765

raise ValueError(

758

"Cannot specify both line numbers: {}".format(diff_line))

766

"Cannot specify both line numbers: {}".format(diff_line))

759

767

760

file_diff = self._get_file_diff(path)

768

file_diff = self._get_file_diff(path)

761

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

769

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

762

770

763

first_line_to_include = max(idx - context_before, 0)

771

first_line_to_include = max(idx - context_before, 0)

764

first_line_after_context = idx + context_after + 1

772

first_line_after_context = idx + context_after + 1

765

context_lines = chunk[first_line_to_include:first_line_after_context]

773

context_lines = chunk[first_line_to_include:first_line_after_context]

766

774

767

line_contents = [

775

line_contents = [

768

_context_line(line) for line in context_lines

776

_context_line(line) for line in context_lines

769

if _is_diff_content(line)]

777

if _is_diff_content(line)]

770

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

778

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

771

# Once they are fixed, we can drop this line here.

779

# Once they are fixed, we can drop this line here.

772

if line_contents:

780

if line_contents:

773

line_contents[-1] = (

781

line_contents[-1] = (

774

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

782

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

775

return line_contents

783

return line_contents

776

784

777

def find_context(self, path, context, offset=0):

785

def find_context(self, path, context, offset=0):

778

"""

786

"""

779

Finds the given `context` inside of the diff.

787

Finds the given `context` inside of the diff.

780

788

781

Use the parameter `offset` to specify which offset the target line has

789

Use the parameter `offset` to specify which offset the target line has

782

inside of the given `context`. This way the correct diff line will be

790

inside of the given `context`. This way the correct diff line will be

783

returned.

791

returned.

784

792

785

:param offset: Shall be used to specify the offset of the main line

793

:param offset: Shall be used to specify the offset of the main line

786

within the given `context`.

794

within the given `context`.

787

"""

795

"""

788

if offset < 0 or offset >= len(context):

796

if offset < 0 or offset >= len(context):

789

raise ValueError(

797

raise ValueError(

790

"Only positive values up to the length of the context "

798

"Only positive values up to the length of the context "

791

"minus one are allowed.")

799

"minus one are allowed.")

792

800

793

matches = []

801

matches = []

794

file_diff = self._get_file_diff(path)

802

file_diff = self._get_file_diff(path)

795

803

796

for chunk in file_diff['chunks']:

804

for chunk in file_diff['chunks']:

797

context_iter = iter(context)

805

context_iter = iter(context)

798

for line_idx, line in enumerate(chunk):

806

for line_idx, line in enumerate(chunk):

799

try:

807

try:

800

if _context_line(line) == context_iter.next():

808

if _context_line(line) == context_iter.next():

801

continue

809

continue

802

except StopIteration:

810

except StopIteration:

803

matches.append((line_idx, chunk))

811

matches.append((line_idx, chunk))

804

context_iter = iter(context)

812

context_iter = iter(context)

805

813

806

# Increment position and triger StopIteration

814

# Increment position and triger StopIteration

807

# if we had a match at the end

815

# if we had a match at the end

808

line_idx += 1

816

line_idx += 1

809

try:

817

try:

810

context_iter.next()

818

context_iter.next()

811

except StopIteration:

819

except StopIteration:

812

matches.append((line_idx, chunk))

820

matches.append((line_idx, chunk))

813

821

814

effective_offset = len(context) - offset

822

effective_offset = len(context) - offset

815

found_at_diff_lines = [

823

found_at_diff_lines = [

816

_line_to_diff_line_number(chunk[idx - effective_offset])

824

_line_to_diff_line_number(chunk[idx - effective_offset])

817

for idx, chunk in matches]

825

for idx, chunk in matches]

818

826

819

return found_at_diff_lines

827

return found_at_diff_lines

820

828

821

def _get_file_diff(self, path):

829

def _get_file_diff(self, path):

822

for file_diff in self.parsed_diff:

830

for file_diff in self.parsed_diff:

823

if file_diff['filename'] == path:

831

if file_diff['filename'] == path:

824

break

832

break

825

else:

833

else:

826

raise FileNotInDiffException("File {} not in diff".format(path))

834

raise FileNotInDiffException("File {} not in diff".format(path))

827

return file_diff

835

return file_diff

828

836

829

def _find_chunk_line_index(self, file_diff, diff_line):

837

def _find_chunk_line_index(self, file_diff, diff_line):

830

for chunk in file_diff['chunks']:

838

for chunk in file_diff['chunks']:

831

for idx, line in enumerate(chunk):

839

for idx, line in enumerate(chunk):

832

if line['old_lineno'] == diff_line.old:

840

if line['old_lineno'] == diff_line.old:

833

return chunk, idx

841

return chunk, idx

834

if line['new_lineno'] == diff_line.new:

842

if line['new_lineno'] == diff_line.new:

835

return chunk, idx

843

return chunk, idx

836

raise LineNotInDiffException(

844

raise LineNotInDiffException(

837

"The line {} is not part of the diff.".format(diff_line))

845

"The line {} is not part of the diff.".format(diff_line))

838

846

839

847

840

def _is_diff_content(line):

848

def _is_diff_content(line):

841

return line['action'] in (

849

return line['action'] in (

842

Action.UNMODIFIED, Action.ADD, Action.DELETE)

850

Action.UNMODIFIED, Action.ADD, Action.DELETE)

843

851

844

852

845

def _context_line(line):

853

def _context_line(line):

846

return (line['action'], line['line'])

854

return (line['action'], line['line'])

847

855

848

856

849

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

857

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

850

858

851

859

852

def _line_to_diff_line_number(line):

860

def _line_to_diff_line_number(line):

853

new_line_no = line['new_lineno'] or None

861

new_line_no = line['new_lineno'] or None

854

old_line_no = line['old_lineno'] or None

862

old_line_no = line['old_lineno'] or None

855

return DiffLineNumber(old=old_line_no, new=new_line_no)

863

return DiffLineNumber(old=old_line_no, new=new_line_no)

856

864

857

865

858

class FileNotInDiffException(Exception):

866

class FileNotInDiffException(Exception):

859

"""

867

"""

860

Raised when the context for a missing file is requested.

868

Raised when the context for a missing file is requested.

861

869

862

If you request the context for a line in a file which is not part of the

870

If you request the context for a line in a file which is not part of the

863

given diff, then this exception is raised.

871

given diff, then this exception is raised.

864

"""

872

"""

865

873

866

874

867

class LineNotInDiffException(Exception):

875

class LineNotInDiffException(Exception):

868

"""

876

"""

869

Raised when the context for a missing line is requested.

877

Raised when the context for a missing line is requested.

870

878

871

If you request the context for a line in a file and this line is not

879

If you request the context for a line in a file and this line is not

872

part of the given diff, then this exception is raised.

880

part of the given diff, then this exception is raised.

873

"""

881

"""

874

882

875

883

876

class DiffLimitExceeded(Exception):

884

class DiffLimitExceeded(Exception):

877

pass

885

pass

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2016  RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import collections
             import re
             import difflib
             import logging
             from itertools import tee, imap
             from pylons.i18n.translation import _
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyCommit
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils2 import safe_unicode
             log = logging.getLogger(__name__)
+            # define max context, a file with more than this numbers of lines is unusable
+            # in browser anyway
+            MAX_CONTEXT = 1024 * 1014
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % (_('Click to comment'), str_)
             def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
                              show_full_diff=False, ignore_whitespace=True, line_context=3,
                              enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit for file and
                 whole diff and presents proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('Binary file'))
                     stats = None
                     size = 0
                     data = None
                 elif diff_limit != -1 and (diff_limit is None or
                     (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(
                         f_gitdiff, format='gitdiff', diff_limit=diff_limit,
                         file_limit=file_limit, show_full_diff=show_full_diff)
                     _parsed = diff_processor.prepare()
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = _parsed[0]['stats'] if _parsed else None
                     size = len(diff or '')
                     data = _parsed[0] if _parsed else None
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = None
                     size = 0
                     data = None
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.commit.raw_id
                 cs2 = filenode_new.commit.raw_id
                 return size, cs1, cs2, diff, stats, data
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
+                # protect against IntOverflow when passing HUGE context
+                if context > MAX_CONTEXT:
+                    context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._parse_udiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return safe_unicode(string)\
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, chunk.diff.splitlines(1))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _parse_udiff(self, inline_diff=True):
                     raise NotImplementedError()
                 def _parse_lines(self, diff):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = iter(diff)
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = lineiter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = lineiter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = lineiter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICON
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass