rhodecode-enterprise-ce Commit - r678:1aa46c57

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import collections

26

import collections

27

import re

27

import re

28

import difflib

28

import difflib

29

import logging

29

import logging

30

31

from itertools import tee, imap

31

from itertools import tee, imap

32

33

from pylons.i18n.translation import _

33

from pylons.i18n.translation import _

34

35

from rhodecode.lib.vcs.exceptions import VCSError

35

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

36

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.vcs.backends.base import EmptyCommit

37

from rhodecode.lib.vcs.backends.base import EmptyCommit

38

from rhodecode.lib.helpers import escape

38

from rhodecode.lib.helpers import escape

39

from rhodecode.lib.utils2 import safe_unicode

39

from rhodecode.lib.utils2 import safe_unicode

40

41

log = logging.getLogger(__name__)

41

log = logging.getLogger(__name__)

42

43

44

class OPS(object):

44

class OPS(object):

45

ADD = 'A'

45

ADD = 'A'

46

MOD = 'M'

46

MOD = 'M'

47

DEL = 'D'

47

DEL = 'D'

48

49

def wrap_to_table(str_):

50

def wrap_to_table(str_):

50

return '''<table class="code-difftable">

51

return '''<table class="code-difftable">

51

52

52

53

53

54

54

55

55

</tr>

56

</tr>

56

</table>''' % (_('Click to comment'), str_)

57

</table>''' % (_('Click to comment'), str_)

57

58

59

def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,

60

def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,

60

show_full_diff=False, ignore_whitespace=True, line_context=3,

61

show_full_diff=False, ignore_whitespace=True, line_context=3,

61

enable_comments=False):

62

enable_comments=False):

62

"""

63

"""

63

returns a wrapped diff into a table, checks for cut_off_limit for file and

64

returns a wrapped diff into a table, checks for cut_off_limit for file and

64

whole diff and presents proper message

65

whole diff and presents proper message

65

"""

66

"""

66

67

if filenode_old is None:

68

if filenode_old is None:

68

filenode_old = FileNode(filenode_new.path, '', EmptyCommit())

69

filenode_old = FileNode(filenode_new.path, '', EmptyCommit())

69

70

if filenode_old.is_binary or filenode_new.is_binary:

71

if filenode_old.is_binary or filenode_new.is_binary:

71

diff = wrap_to_table(_('Binary file'))

72

diff = wrap_to_table(_('Binary file'))

72

stats = None

73

stats = None

73

size = 0

74

size = 0

74

data = None

75

data = None

75

76

elif diff_limit != -1 and (diff_limit is None or

77

elif diff_limit != -1 and (diff_limit is None or

77

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

78

(filenode_old.size < diff_limit and filenode_new.size < diff_limit)):

78

79

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

80

f_gitdiff = get_gitdiff(filenode_old, filenode_new,

80

ignore_whitespace=ignore_whitespace,

81

ignore_whitespace=ignore_whitespace,

81

context=line_context)

82

context=line_context)

82

diff_processor = DiffProcessor(~~f_gitdiff~~, ~~format~~=~~'gitdiff'~~, ~~diff_limit~~=~~diff_limit~~,

83

diff_processor = DiffProcessor(

84

f_gitdiff, format='gitdiff', diff_limit=diff_limit,

83

file_limit=file_limit, show_full_diff=show_full_diff)

85

file_limit=file_limit, show_full_diff=show_full_diff)

84

_parsed = diff_processor.prepare()

86

_parsed = diff_processor.prepare()

85

87

86

diff = diff_processor.as_html(enable_comments=enable_comments)

88

diff = diff_processor.as_html(enable_comments=enable_comments)

87

stats = _parsed[0]['stats'] if _parsed else None

89

stats = _parsed[0]['stats'] if _parsed else None

88

size = len(diff or '')

90

size = len(diff or '')

89

data = _parsed[0] if _parsed else None

91

data = _parsed[0] if _parsed else None

90

else:

92

else:

91

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

93

diff = wrap_to_table(_('Changeset was too big and was cut off, use '

92

'diff menu to display this diff'))

94

'diff menu to display this diff'))

93

stats = None

95

stats = None

94

size = 0

96

size = 0

95

data = None

97

data = None

96

if not diff:

98

if not diff:

97

submodules = filter(lambda o: isinstance(o, SubModuleNode),

99

submodules = filter(lambda o: isinstance(o, SubModuleNode),

98

[filenode_new, filenode_old])

100

[filenode_new, filenode_old])

99

if submodules:

101

if submodules:

100

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

102

diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

101

else:

103

else:

102

diff = wrap_to_table(_('No changes detected'))

104

diff = wrap_to_table(_('No changes detected'))

103

105

104

cs1 = filenode_old.commit.raw_id

106

cs1 = filenode_old.commit.raw_id

105

cs2 = filenode_new.commit.raw_id

107

cs2 = filenode_new.commit.raw_id

106

108

107

return size, cs1, cs2, diff, stats, data

109

return size, cs1, cs2, diff, stats, data

108

110

109

111

110

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

112

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

111

"""

113

"""

112

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

114

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

113

115

114

:param ignore_whitespace: ignore whitespaces in diff

116

:param ignore_whitespace: ignore whitespaces in diff

115

"""

117

"""

116

# make sure we pass in default context

118

# make sure we pass in default context

117

context = context or 3

119

context = context or 3

118

submodules = filter(lambda o: isinstance(o, SubModuleNode),

120

submodules = filter(lambda o: isinstance(o, SubModuleNode),

119

[filenode_new, filenode_old])

121

[filenode_new, filenode_old])

120

if submodules:

122

if submodules:

121

return ''

123

return ''

122

124

123

for filenode in (filenode_old, filenode_new):

125

for filenode in (filenode_old, filenode_new):

124

if not isinstance(filenode, FileNode):

126

if not isinstance(filenode, FileNode):

125

raise VCSError(

127

raise VCSError(

126

"Given object should be FileNode object, not %s"

128

"Given object should be FileNode object, not %s"

127

% filenode.__class__)

129

% filenode.__class__)

128

130

129

repo = filenode_new.commit.repository

131

repo = filenode_new.commit.repository

130

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

132

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

131

new_commit = filenode_new.commit

133

new_commit = filenode_new.commit

132

134

133

vcs_gitdiff = repo.get_diff(

135

vcs_gitdiff = repo.get_diff(

134

old_commit, new_commit, filenode_new.path,

136

old_commit, new_commit, filenode_new.path,

135

ignore_whitespace, context, path1=filenode_old.path)

137

ignore_whitespace, context, path1=filenode_old.path)

136

return vcs_gitdiff

138

return vcs_gitdiff

137

139

138

NEW_FILENODE = 1

140

NEW_FILENODE = 1

139

DEL_FILENODE = 2

141

DEL_FILENODE = 2

140

MOD_FILENODE = 3

142

MOD_FILENODE = 3

141

RENAMED_FILENODE = 4

143

RENAMED_FILENODE = 4

142

COPIED_FILENODE = 5

144

COPIED_FILENODE = 5

143

CHMOD_FILENODE = 6

145

CHMOD_FILENODE = 6

144

BIN_FILENODE = 7

146

BIN_FILENODE = 7

145

147

146

148

147

class LimitedDiffContainer(object):

149

class LimitedDiffContainer(object):

148

150

149

def __init__(self, diff_limit, cur_diff_size, diff):

151

def __init__(self, diff_limit, cur_diff_size, diff):

150

self.diff = diff

152

self.diff = diff

151

self.diff_limit = diff_limit

153

self.diff_limit = diff_limit

152

self.cur_diff_size = cur_diff_size

154

self.cur_diff_size = cur_diff_size

153

155

154

def __getitem__(self, key):

156

def __getitem__(self, key):

155

return self.diff.__getitem__(key)

157

return self.diff.__getitem__(key)

156

158

157

def __iter__(self):

159

def __iter__(self):

158

for l in self.diff:

160

for l in self.diff:

159

yield l

161

yield l

160

162

161

163

162

class Action(object):

164

class Action(object):

163

"""

165

"""

164

Contains constants for the action value of the lines in a parsed diff.

166

Contains constants for the action value of the lines in a parsed diff.

165

"""

167

"""

166

168

167

ADD = 'add'

169

ADD = 'add'

168

DELETE = 'del'

170

DELETE = 'del'

169

UNMODIFIED = 'unmod'

171

UNMODIFIED = 'unmod'

170

172

171

CONTEXT = 'context'

173

CONTEXT = 'context'

172

174

173

175

174

class DiffProcessor(object):

176

class DiffProcessor(object):

175

"""

177

"""

176

Give it a unified or git diff and it returns a list of the files that were

178

Give it a unified or git diff and it returns a list of the files that were

177

mentioned in the diff together with a dict of meta information that

179

mentioned in the diff together with a dict of meta information that

178

can be used to render it in a HTML template.

180

can be used to render it in a HTML template.

179

181

180

.. note:: Unicode handling

182

.. note:: Unicode handling

181

183

182

The original diffs are a byte sequence and can contain filenames

184

The original diffs are a byte sequence and can contain filenames

183

in mixed encodings. This class generally returns `unicode` objects

185

in mixed encodings. This class generally returns `unicode` objects

184

since the result is intended for presentation to the user.

186

since the result is intended for presentation to the user.

185

187

186

"""

188

"""

187

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

189

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

188

_newline_marker = re.compile(r'^\\ No newline at end of file')

190

_newline_marker = re.compile(r'^\\ No newline at end of file')

189

191

190

# used for inline highlighter word split

192

# used for inline highlighter word split

191

_token_re = re.compile(r'()(>|<|&|\W+?)')

193

_token_re = re.compile(r'()(>|<|&|\W+?)')

192

194

193

def __init__(self, diff, format='gitdiff', diff_limit=None, ~~file_limit~~=~~None~~, ~~show_full_diff~~=~~True~~):

195

def __init__(self, diff, format='gitdiff', diff_limit=None,

196

file_limit=None, show_full_diff=True):

194

"""

197

"""

195

:param diff: A `Diff` object representing a diff from a vcs backend

198

:param diff: A `Diff` object representing a diff from a vcs backend

196

:param format: format of diff passed, `udiff` or `gitdiff`

199

:param format: format of diff passed, `udiff` or `gitdiff`

197

:param diff_limit: define the size of diff that is considered "big"

200

:param diff_limit: define the size of diff that is considered "big"

198

based on that parameter cut off will be triggered, set to None

201

based on that parameter cut off will be triggered, set to None

199

to show full diff

202

to show full diff

200

"""

203

"""

201

self._diff = diff

204

self._diff = diff

202

self._format = format

205

self._format = format

203

self.adds = 0

206

self.adds = 0

204

self.removes = 0

207

self.removes = 0

205

# calculate diff size

208

# calculate diff size

206

self.diff_limit = diff_limit

209

self.diff_limit = diff_limit

207

self.file_limit = file_limit

210

self.file_limit = file_limit

208

self.show_full_diff = show_full_diff

211

self.show_full_diff = show_full_diff

209

self.cur_diff_size = 0

212

self.cur_diff_size = 0

210

self.parsed = False

213

self.parsed = False

211

self.parsed_diff = []

214

self.parsed_diff = []

212

215

213

if format == 'gitdiff':

216

if format == 'gitdiff':

214

self.differ = self._highlight_line_difflib

217

self.differ = self._highlight_line_difflib

215

self._parser = self._parse_gitdiff

218

self._parser = self._parse_gitdiff

216

else:

219

else:

217

self.differ = self._highlight_line_udiff

220

self.differ = self._highlight_line_udiff

218

self._parser = self._parse_udiff

221

self._parser = self._parse_udiff

219

222

220

def _copy_iterator(self):

223

def _copy_iterator(self):

221

"""

224

"""

222

make a fresh copy of generator, we should not iterate thru

225

make a fresh copy of generator, we should not iterate thru

223

an original as it's needed for repeating operations on

226

an original as it's needed for repeating operations on

224

this instance of DiffProcessor

227

this instance of DiffProcessor

225

"""

228

"""

226

self.__udiff, iterator_copy = tee(self.__udiff)

229

self.__udiff, iterator_copy = tee(self.__udiff)

227

return iterator_copy

230

return iterator_copy

228

231

229

def _escaper(self, string):

232

def _escaper(self, string):

230

"""

233

"""

231

Escaper for diff escapes special chars and checks the diff limit

234

Escaper for diff escapes special chars and checks the diff limit

232

235

233

:param string:

236

:param string:

234

"""

237

"""

235

238

236

self.cur_diff_size += len(string)

239

self.cur_diff_size += len(string)

237

240

238

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

241

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

239

raise DiffLimitExceeded('Diff Limit Exceeded')

242

raise DiffLimitExceeded('Diff Limit Exceeded')

240

243

241

return safe_unicode(string)\

244

return safe_unicode(string)\

242

.replace('&', '&')\

245

.replace('&', '&')\

243

.replace('<', '<')\

246

.replace('<', '<')\

244

.replace('>', '>')

247

.replace('>', '>')

245

248

246

def _line_counter(self, l):

249

def _line_counter(self, l):

247

"""

250

"""

248

Checks each line and bumps total adds/removes for this diff

251

Checks each line and bumps total adds/removes for this diff

249

252

250

:param l:

253

:param l:

251

"""

254

"""

252

if l.startswith('+') and not l.startswith('+++'):

255

if l.startswith('+') and not l.startswith('+++'):

253

self.adds += 1

256

self.adds += 1

254

elif l.startswith('-') and not l.startswith('---'):

257

elif l.startswith('-') and not l.startswith('---'):

255

self.removes += 1

258

self.removes += 1

256

return safe_unicode(l)

259

return safe_unicode(l)

257

260

258

def _highlight_line_difflib(self, line, next_):

261

def _highlight_line_difflib(self, line, next_):

259

"""

262

"""

260

Highlight inline changes in both lines.

263

Highlight inline changes in both lines.

261

"""

264

"""

262

265

263

if line['action'] == Action.DELETE:

266

if line['action'] == Action.DELETE:

264

old, new = line, next_

267

old, new = line, next_

265

else:

268

else:

266

old, new = next_, line

269

old, new = next_, line

267

270

268

oldwords = self._token_re.split(old['line'])

271

oldwords = self._token_re.split(old['line'])

269

newwords = self._token_re.split(new['line'])

272

newwords = self._token_re.split(new['line'])

270

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

273

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

271

274

272

oldfragments, newfragments = [], []

275

oldfragments, newfragments = [], []

273

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

276

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

274

oldfrag = ''.join(oldwords[i1:i2])

277

oldfrag = ''.join(oldwords[i1:i2])

275

newfrag = ''.join(newwords[j1:j2])

278

newfrag = ''.join(newwords[j1:j2])

276

if tag != 'equal':

279

if tag != 'equal':

277

if oldfrag:

280

if oldfrag:

278

oldfrag = '<del>%s</del>' % oldfrag

281

oldfrag = '<del>%s</del>' % oldfrag

279

if newfrag:

282

if newfrag:

280

newfrag = '<ins>%s</ins>' % newfrag

283

newfrag = '<ins>%s</ins>' % newfrag

281

oldfragments.append(oldfrag)

284

oldfragments.append(oldfrag)

282

newfragments.append(newfrag)

285

newfragments.append(newfrag)

283

286

284

old['line'] = "".join(oldfragments)

287

old['line'] = "".join(oldfragments)

285

new['line'] = "".join(newfragments)

288

new['line'] = "".join(newfragments)

286

289

287

def _highlight_line_udiff(self, line, next_):

290

def _highlight_line_udiff(self, line, next_):

288

"""

291

"""

289

Highlight inline changes in both lines.

292

Highlight inline changes in both lines.

290

"""

293

"""

291

start = 0

294

start = 0

292

limit = min(len(line['line']), len(next_['line']))

295

limit = min(len(line['line']), len(next_['line']))

293

while start < limit and line['line'][start] == next_['line'][start]:

296

while start < limit and line['line'][start] == next_['line'][start]:

294

start += 1

297

start += 1

295

end = -1

298

end = -1

296

limit -= start

299

limit -= start

297

while -end <= limit and line['line'][end] == next_['line'][end]:

300

while -end <= limit and line['line'][end] == next_['line'][end]:

298

end -= 1

301

end -= 1

299

end += 1

302

end += 1

300

if start or end:

303

if start or end:

301

def do(l):

304

def do(l):

302

last = end + len(l['line'])

305

last = end + len(l['line'])

303

if l['action'] == Action.ADD:

306

if l['action'] == Action.ADD:

304

tag = 'ins'

307

tag = 'ins'

305

else:

308

else:

306

tag = 'del'

309

tag = 'del'

307

l['line'] = '%s<%s>%s</%s>%s' % (

310

l['line'] = '%s<%s>%s</%s>%s' % (

308

l['line'][:start],

311

l['line'][:start],

309

tag,

312

tag,

310

l['line'][start:last],

313

l['line'][start:last],

311

tag,

314

tag,

312

l['line'][last:]

315

l['line'][last:]

313

)

316

)

314

do(line)

317

do(line)

315

do(next_)

318

do(next_)

316

319

317

def _clean_line(self, line, command):

320

def _clean_line(self, line, command):

318

if command in ['+', '-', ' ']:

321

if command in ['+', '-', ' ']:

319

# only modify the line if it's actually a diff thing

322

# only modify the line if it's actually a diff thing

320

line = line[1:]

323

line = line[1:]

321

return line

324

return line

322

325

323

def _parse_gitdiff(self, inline_diff=True):

326

def _parse_gitdiff(self, inline_diff=True):

324

_files = []

327

_files = []

325

diff_container = lambda arg: arg

328

diff_container = lambda arg: arg

326

329

327

for chunk in self._diff.chunks():

330

for chunk in self._diff.chunks():

328

head = chunk.header

331

head = chunk.header

329

332

330

diff = imap(self._escaper, chunk.diff.splitlines(1))

333

diff = imap(self._escaper, chunk.diff.splitlines(1))

331

raw_diff = chunk.raw

334

raw_diff = chunk.raw

332

limited_diff = False

335

limited_diff = False

333

exceeds_limit = False

336

exceeds_limit = False

334

337

335

op = None

338

op = None

336

stats = {

339

stats = {

337

'added': 0,

340

'added': 0,

338

'deleted': 0,

341

'deleted': 0,

339

'binary': False,

342

'binary': False,

340

'ops': {},

343

'ops': {},

341

}

344

}

342

345

343

if head['deleted_file_mode']:

346

if head['deleted_file_mode']:

344

op = OPS.DEL

347

op = OPS.DEL

345

stats['binary'] = True

348

stats['binary'] = True

346

stats['ops'][DEL_FILENODE] = 'deleted file'

349

stats['ops'][DEL_FILENODE] = 'deleted file'

347

350

348

elif head['new_file_mode']:

351

elif head['new_file_mode']:

349

op = OPS.ADD

352

op = OPS.ADD

350

stats['binary'] = True

353

stats['binary'] = True

351

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

354

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

352

else: # modify operation, can be copy, rename or chmod

355

else: # modify operation, can be copy, rename or chmod

353

356

354

# CHMOD

357

# CHMOD

355

if head['new_mode'] and head['old_mode']:

358

if head['new_mode'] and head['old_mode']:

356

op = OPS.MOD

359

op = OPS.MOD

357

stats['binary'] = True

360

stats['binary'] = True

358

stats['ops'][CHMOD_FILENODE] = (

361

stats['ops'][CHMOD_FILENODE] = (

359

'modified file chmod %s => %s' % (

362

'modified file chmod %s => %s' % (

360

head['old_mode'], head['new_mode']))

363

head['old_mode'], head['new_mode']))

361

# RENAME

364

# RENAME

362

if head['rename_from'] != head['rename_to']:

365

if head['rename_from'] != head['rename_to']:

363

op = OPS.MOD

366

op = OPS.MOD

364

stats['binary'] = True

367

stats['binary'] = True

365

stats['ops'][RENAMED_FILENODE] = (

368

stats['ops'][RENAMED_FILENODE] = (

366

'file renamed from %s to %s' % (

369

'file renamed from %s to %s' % (

367

head['rename_from'], head['rename_to']))

370

head['rename_from'], head['rename_to']))

368

# COPY

371

# COPY

369

if head.get('copy_from') and head.get('copy_to'):

372

if head.get('copy_from') and head.get('copy_to'):

370

op = OPS.MOD

373

op = OPS.MOD

371

stats['binary'] = True

374

stats['binary'] = True

372

stats['ops'][COPIED_FILENODE] = (

375

stats['ops'][COPIED_FILENODE] = (

373

'file copied from %s to %s' % (

376

'file copied from %s to %s' % (

374

head['copy_from'], head['copy_to']))

377

head['copy_from'], head['copy_to']))

375

378

376

# If our new parsed headers didn't match anything fallback to

379

# If our new parsed headers didn't match anything fallback to

377

# old style detection

380

# old style detection

378

if op is None:

381

if op is None:

379

if not head['a_file'] and head['b_file']:

382

if not head['a_file'] and head['b_file']:

380

op = OPS.ADD

383

op = OPS.ADD

381

stats['binary'] = True

384

stats['binary'] = True

382

stats['ops'][NEW_FILENODE] = 'new file'

385

stats['ops'][NEW_FILENODE] = 'new file'

383

386

384

elif head['a_file'] and not head['b_file']:

387

elif head['a_file'] and not head['b_file']:

385

op = OPS.DEL

388

op = OPS.DEL

386

stats['binary'] = True

389

stats['binary'] = True

387

stats['ops'][DEL_FILENODE] = 'deleted file'

390

stats['ops'][DEL_FILENODE] = 'deleted file'

388

391

389

# it's not ADD not DELETE

392

# it's not ADD not DELETE

390

if op is None:

393

if op is None:

391

op = OPS.MOD

394

op = OPS.MOD

392

stats['binary'] = True

395

stats['binary'] = True

393

stats['ops'][MOD_FILENODE] = 'modified file'

396

stats['ops'][MOD_FILENODE] = 'modified file'

394

397

395

# a real non-binary diff

398

# a real non-binary diff

396

if head['a_file'] or head['b_file']:

399

if head['a_file'] or head['b_file']:

397

try:

400

try:

398

raw_diff, chunks, _stats = self._parse_lines(diff)

401

raw_diff, chunks, _stats = self._parse_lines(diff)

399

stats['binary'] = False

402

stats['binary'] = False

400

stats['added'] = _stats[0]

403

stats['added'] = _stats[0]

401

stats['deleted'] = _stats[1]

404

stats['deleted'] = _stats[1]

402

# explicit mark that it's a modified file

405

# explicit mark that it's a modified file

403

if op == OPS.MOD:

406

if op == OPS.MOD:

404

stats['ops'][MOD_FILENODE] = 'modified file'

407

stats['ops'][MOD_FILENODE] = 'modified file'

405

exceeds_limit = len(raw_diff) > self.file_limit

408

exceeds_limit = len(raw_diff) > self.file_limit

406

409

407

# changed from _escaper function so we validate size of

410

# changed from _escaper function so we validate size of

408

# each file instead of the whole diff

411

# each file instead of the whole diff

409

# diff will hide big files but still show small ones

412

# diff will hide big files but still show small ones

410

# from my tests, big files are fairly safe to be parsed

413

# from my tests, big files are fairly safe to be parsed

411

# but the browser is the bottleneck

414

# but the browser is the bottleneck

412

if not self.show_full_diff and exceeds_limit:

415

if not self.show_full_diff and exceeds_limit:

413

raise DiffLimitExceeded('File Limit Exceeded')

416

raise DiffLimitExceeded('File Limit Exceeded')

414

417

415

except DiffLimitExceeded:

418

except DiffLimitExceeded:

416

diff_container = lambda _diff: \

419

diff_container = lambda _diff: \

417

LimitedDiffContainer(

420

LimitedDiffContainer(

418

self.diff_limit, self.cur_diff_size, _diff)

421

self.diff_limit, self.cur_diff_size, _diff)

419

422

420

exceeds_limit = len(raw_diff) > self.file_limit

423

exceeds_limit = len(raw_diff) > self.file_limit

421

limited_diff = True

424

limited_diff = True

422

chunks = []

425

chunks = []

423

426

424

else: # GIT format binary patch, or possibly empty diff

427

else: # GIT format binary patch, or possibly empty diff

425

if head['bin_patch']:

428

if head['bin_patch']:

426

# we have operation already extracted, but we mark simply

429

# we have operation already extracted, but we mark simply

427

# it's a diff we wont show for binary files

430

# it's a diff we wont show for binary files

428

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

431

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

429

chunks = []

432

chunks = []

430

433

431

if chunks and not self.show_full_diff and op == OPS.DEL:

434

if chunks and not self.show_full_diff and op == OPS.DEL:

432

# if not full diff mode show deleted file contents

435

# if not full diff mode show deleted file contents

433

# TODO: anderson: if the view is not too big, there is no way

436

# TODO: anderson: if the view is not too big, there is no way

434

# to see the content of the file

437

# to see the content of the file

435

chunks = []

438

chunks = []

436

439

437

chunks.insert(0, [{

440

chunks.insert(0, [{

438

'old_lineno': '',

441

'old_lineno': '',

439

'new_lineno': '',

442

'new_lineno': '',

440

'action': Action.CONTEXT,

443

'action': Action.CONTEXT,

441

'line': msg,

444

'line': msg,

442

} for _op, msg in stats['ops'].iteritems()

445

} for _op, msg in stats['ops'].iteritems()

443

if _op not in [MOD_FILENODE]])

446

if _op not in [MOD_FILENODE]])

444

447

445

_files.append({

448

_files.append({

446

'filename': safe_unicode(head['b_path']),

449

'filename': safe_unicode(head['b_path']),

447

'old_revision': head['a_blob_id'],

450

'old_revision': head['a_blob_id'],

448

'new_revision': head['b_blob_id'],

451

'new_revision': head['b_blob_id'],

449

'chunks': chunks,

452

'chunks': chunks,

450

'raw_diff': safe_unicode(raw_diff),

453

'raw_diff': safe_unicode(raw_diff),

451

'operation': op,

454

'operation': op,

452

'stats': stats,

455

'stats': stats,

453

'exceeds_limit': exceeds_limit,

456

'exceeds_limit': exceeds_limit,

454

'is_limited_diff': limited_diff,

457

'is_limited_diff': limited_diff,

455

})

458

})

456

459

457

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

460

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

458

OPS.DEL: 2}.get(info['operation'])

461

OPS.DEL: 2}.get(info['operation'])

459

462

460

if not inline_diff:

463

if not inline_diff:

461

return diff_container(sorted(_files, key=sorter))

464

return diff_container(sorted(_files, key=sorter))

462

465

463

# highlight inline changes

466

# highlight inline changes

464

for diff_data in _files:

467

for diff_data in _files:

465

for chunk in diff_data['chunks']:

468

for chunk in diff_data['chunks']:

466

lineiter = iter(chunk)

469

lineiter = iter(chunk)

467

try:

470

try:

468

while 1:

471

while 1:

469

line = lineiter.next()

472

line = lineiter.next()

470

if line['action'] not in (

473

if line['action'] not in (

471

Action.UNMODIFIED, Action.CONTEXT):

474

Action.UNMODIFIED, Action.CONTEXT):

472

nextline = lineiter.next()

475

nextline = lineiter.next()

473

if nextline['action'] in ['unmod', 'context'] or \

476

if nextline['action'] in ['unmod', 'context'] or \

474

nextline['action'] == line['action']:

477

nextline['action'] == line['action']:

475

continue

478

continue

476

self.differ(line, nextline)

479

self.differ(line, nextline)

477

except StopIteration:

480

except StopIteration:

478

pass

481

pass

479

482

480

return diff_container(sorted(_files, key=sorter))

483

return diff_container(sorted(_files, key=sorter))

481

484

482

def _parse_udiff(self, inline_diff=True):

485

def _parse_udiff(self, inline_diff=True):

483

raise NotImplementedError()

486

raise NotImplementedError()

484

487

485

def _parse_lines(self, diff):

488

def _parse_lines(self, diff):

486

"""

489

"""

487

Parse the diff an return data for the template.

490

Parse the diff an return data for the template.

488

"""

491

"""

489

492

490

lineiter = iter(diff)

493

lineiter = iter(diff)

491

stats = [0, 0]

494

stats = [0, 0]

492

chunks = []

495

chunks = []

493

raw_diff = []

496

raw_diff = []

494

497

495

try:

498

try:

496

line = lineiter.next()

499

line = lineiter.next()

497

500

498

while line:

501

while line:

499

raw_diff.append(line)

502

raw_diff.append(line)

500

lines = []

503

lines = []

501

chunks.append(lines)

504

chunks.append(lines)

502

505

503

match = self._chunk_re.match(line)

506

match = self._chunk_re.match(line)

504

507

505

if not match:

508

if not match:

506

break

509

break

507

510

508

gr = match.groups()

511

gr = match.groups()

509

(old_line, old_end,

512

(old_line, old_end,

510

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

513

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

511

old_line -= 1

514

old_line -= 1

512

new_line -= 1

515

new_line -= 1

513

516

514

context = len(gr) == 5

517

context = len(gr) == 5

515

old_end += old_line

518

old_end += old_line

516

new_end += new_line

519

new_end += new_line

517

520

518

if context:

521

if context:

519

# skip context only if it's first line

522

# skip context only if it's first line

520

if int(gr[0]) > 1:

523

if int(gr[0]) > 1:

521

lines.append({

524

lines.append({

522

'old_lineno': '...',

525

'old_lineno': '...',

523

'new_lineno': '...',

526

'new_lineno': '...',

524

'action': Action.CONTEXT,

527

'action': Action.CONTEXT,

525

'line': line,

528

'line': line,

526

})

529

})

527

530

528

line = lineiter.next()

531

line = lineiter.next()

529

532

530

while old_line < old_end or new_line < new_end:

533

while old_line < old_end or new_line < new_end:

531

command = ' '

534

command = ' '

532

if line:

535

if line:

533

command = line[0]

536

command = line[0]

534

537

535

affects_old = affects_new = False

538

affects_old = affects_new = False

536

539

537

# ignore those if we don't expect them

540

# ignore those if we don't expect them

538

if command in '#@':

541

if command in '#@':

539

continue

542

continue

540

elif command == '+':

543

elif command == '+':

541

affects_new = True

544

affects_new = True

542

action = Action.ADD

545

action = Action.ADD

543

stats[0] += 1

546

stats[0] += 1

544

elif command == '-':

547

elif command == '-':

545

affects_old = True

548

affects_old = True

546

action = Action.DELETE

549

action = Action.DELETE

547

stats[1] += 1

550

stats[1] += 1

548

else:

551

else:

549

affects_old = affects_new = True

552

affects_old = affects_new = True

550

action = Action.UNMODIFIED

553

action = Action.UNMODIFIED

551

554

552

if not self._newline_marker.match(line):

555

if not self._newline_marker.match(line):

553

old_line += affects_old

556

old_line += affects_old

554

new_line += affects_new

557

new_line += affects_new

555

lines.append({

558

lines.append({

556

'old_lineno': affects_old and old_line or '',

559

'old_lineno': affects_old and old_line or '',

557

'new_lineno': affects_new and new_line or '',

560

'new_lineno': affects_new and new_line or '',

558

'action': action,

561

'action': action,

559

'line': self._clean_line(line, command)

562

'line': self._clean_line(line, command)

560

})

563

})

561

raw_diff.append(line)

564

raw_diff.append(line)

562

565

563

line = lineiter.next()

566

line = lineiter.next()

564

567

565

if self._newline_marker.match(line):

568

if self._newline_marker.match(line):

566

# we need to append to lines, since this is not

569

# we need to append to lines, since this is not

567

# counted in the line specs of diff

570

# counted in the line specs of diff

568

lines.append({

571

lines.append({

569

'old_lineno': '...',

572

'old_lineno': '...',

570

'new_lineno': '...',

573

'new_lineno': '...',

571

'action': Action.CONTEXT,

574

'action': Action.CONTEXT,

572

'line': self._clean_line(line, command)

575

'line': self._clean_line(line, command)

573

})

576

})

574

577

575

except StopIteration:

578

except StopIteration:

576

pass

579

pass

577

return ''.join(raw_diff), chunks, stats

580

return ''.join(raw_diff), chunks, stats

578

581

579

def _safe_id(self, idstring):

582

def _safe_id(self, idstring):

580

"""Make a string safe for including in an id attribute.

583

"""Make a string safe for including in an id attribute.

581

584

582

The HTML spec says that id attributes 'must begin with

585

The HTML spec says that id attributes 'must begin with

583

a letter ([A-Za-z]) and may be followed by any number

586

a letter ([A-Za-z]) and may be followed by any number

584

of letters, digits ([0-9]), hyphens ("-"), underscores

587

of letters, digits ([0-9]), hyphens ("-"), underscores

585

("_"), colons (":"), and periods (".")'. These regexps

588

("_"), colons (":"), and periods (".")'. These regexps

586

are slightly over-zealous, in that they remove colons

589

are slightly over-zealous, in that they remove colons

587

and periods unnecessarily.

590

and periods unnecessarily.

588

591

589

Whitespace is transformed into underscores, and then

592

Whitespace is transformed into underscores, and then

590

anything which is not a hyphen or a character that

593

anything which is not a hyphen or a character that

591

matches \w (alphanumerics and underscore) is removed.

594

matches \w (alphanumerics and underscore) is removed.

592

595

593

"""

596

"""

594

# Transform all whitespace to underscore

597

# Transform all whitespace to underscore

595

idstring = re.sub(r'\s', "_", '%s' % idstring)

598

idstring = re.sub(r'\s', "_", '%s' % idstring)

596

# Remove everything that is not a hyphen or a member of \w

599

# Remove everything that is not a hyphen or a member of \w

597

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

600

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

598

return idstring

601

return idstring

599

602

600

def prepare(self, inline_diff=True):

603

def prepare(self, inline_diff=True):

601

"""

604

"""

602

Prepare the passed udiff for HTML rendering.

605

Prepare the passed udiff for HTML rendering.

603

606

604

:return: A list of dicts with diff information.

607

:return: A list of dicts with diff information.

605

"""

608

"""

606

parsed = self._parser(inline_diff=inline_diff)

609

parsed = self._parser(inline_diff=inline_diff)

607

self.parsed = True

610

self.parsed = True

608

self.parsed_diff = parsed

611

self.parsed_diff = parsed

609

return parsed

612

return parsed

610

613

611

def as_raw(self, diff_lines=None):

614

def as_raw(self, diff_lines=None):

612

"""

615

"""

613

Returns raw diff as a byte string

616

Returns raw diff as a byte string

614

"""

617

"""

615

return self._diff.raw

618

return self._diff.raw

616

619

617

def as_html(self, table_class='code-difftable', line_class='line',

620

def as_html(self, table_class='code-difftable', line_class='line',

618

old_lineno_class='lineno old', new_lineno_class='lineno new',

621

old_lineno_class='lineno old', new_lineno_class='lineno new',

619

code_class='code', enable_comments=False, parsed_lines=None):

622

code_class='code', enable_comments=False, parsed_lines=None):

620

"""

623

"""

621

Return given diff as html table with customized css classes

624

Return given diff as html table with customized css classes

622

"""

625

"""

623

def _link_to_if(condition, label, url):

626

def _link_to_if(condition, label, url):

624

"""

627

"""

625

Generates a link if condition is meet or just the label if not.

628

Generates a link if condition is meet or just the label if not.

626

"""

629

"""

627

630

628

if condition:

631

if condition:

629

return '''<a href="%(url)s" class="tooltip"

632

return '''<a href="%(url)s" class="tooltip"

630

title="%(title)s">%(label)s</a>''' % {

633

title="%(title)s">%(label)s</a>''' % {

631

'title': _('Click to select line'),

634

'title': _('Click to select line'),

632

'url': url,

635

'url': url,

633

'label': label

636

'label': label

634

}

637

}

635

else:

638

else:

636

return label

639

return label

637

if not self.parsed:

640

if not self.parsed:

638

self.prepare()

641

self.prepare()

639

642

640

diff_lines = self.parsed_diff

643

diff_lines = self.parsed_diff

641

if parsed_lines:

644

if parsed_lines:

642

diff_lines = parsed_lines

645

diff_lines = parsed_lines

643

646

644

_html_empty = True

647

_html_empty = True

645

_html = []

648

_html = []

646

_html.append('''<table class="%(table_class)s">\n''' % {

649

_html.append('''<table class="%(table_class)s">\n''' % {

647

'table_class': table_class

650

'table_class': table_class

648

})

651

})

649

652

650

for diff in diff_lines:

653

for diff in diff_lines:

651

for line in diff['chunks']:

654

for line in diff['chunks']:

652

_html_empty = False

655

_html_empty = False

653

for change in line:

656

for change in line:

654

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

657

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

655

'lc': line_class,

658

'lc': line_class,

656

'action': change['action']

659

'action': change['action']

657

})

660

})

658

anchor_old_id = ''

661

anchor_old_id = ''

659

anchor_new_id = ''

662

anchor_new_id = ''

660

anchor_old = "%(filename)s_o%(oldline_no)s" % {

663

anchor_old = "%(filename)s_o%(oldline_no)s" % {

661

'filename': self._safe_id(diff['filename']),

664

'filename': self._safe_id(diff['filename']),

662

'oldline_no': change['old_lineno']

665

'oldline_no': change['old_lineno']

663

}

666

}

664

anchor_new = "%(filename)s_n%(oldline_no)s" % {

667

anchor_new = "%(filename)s_n%(oldline_no)s" % {

665

'filename': self._safe_id(diff['filename']),

668

'filename': self._safe_id(diff['filename']),

666

'oldline_no': change['new_lineno']

669

'oldline_no': change['new_lineno']

667

}

670

}

668

cond_old = (change['old_lineno'] != '...' and

671

cond_old = (change['old_lineno'] != '...' and

669

change['old_lineno'])

672

change['old_lineno'])

670

cond_new = (change['new_lineno'] != '...' and

673

cond_new = (change['new_lineno'] != '...' and

671

change['new_lineno'])

674

change['new_lineno'])

672

if cond_old:

675

if cond_old:

673

anchor_old_id = 'id="%s"' % anchor_old

676

anchor_old_id = 'id="%s"' % anchor_old

674

if cond_new:

677

if cond_new:

675

anchor_new_id = 'id="%s"' % anchor_new

678

anchor_new_id = 'id="%s"' % anchor_new

676

679

677

if change['action'] != Action.CONTEXT:

680

if change['action'] != Action.CONTEXT:

678

anchor_link = True

681

anchor_link = True

679

else:

682

else:

680

anchor_link = False

683

anchor_link = False

681

684

682

###########################################################

685

###########################################################

683

# COMMENT ICON

686

# COMMENT ICON

684

###########################################################

687

###########################################################

685

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

688

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

686

689

687

if enable_comments and change['action'] != Action.CONTEXT:

690

if enable_comments and change['action'] != Action.CONTEXT:

688

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

691

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

689

692

690

_html.append('''</span></td>\n''')

693

_html.append('''</span></td>\n''')

691

694

692

###########################################################

695

###########################################################

693

# OLD LINE NUMBER

696

# OLD LINE NUMBER

694

###########################################################

697

###########################################################

695

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

698

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

696

'a_id': anchor_old_id,

699

'a_id': anchor_old_id,

697

'olc': old_lineno_class

700

'olc': old_lineno_class

698

})

701

})

699

702

700

_html.append('''%(link)s''' % {

703

_html.append('''%(link)s''' % {

701

'link': _link_to_if(anchor_link, change['old_lineno'],

704

'link': _link_to_if(anchor_link, change['old_lineno'],

702

'#%s' % anchor_old)

705

'#%s' % anchor_old)

703

})

706

})

704

_html.append('''</td>\n''')

707

_html.append('''</td>\n''')

705

###########################################################

708

###########################################################

706

# NEW LINE NUMBER

709

# NEW LINE NUMBER

707

###########################################################

710

###########################################################

708

711

709

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

712

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

710

'a_id': anchor_new_id,

713

'a_id': anchor_new_id,

711

'nlc': new_lineno_class

714

'nlc': new_lineno_class

712

})

715

})

713

716

714

_html.append('''%(link)s''' % {

717

_html.append('''%(link)s''' % {

715

'link': _link_to_if(anchor_link, change['new_lineno'],

718

'link': _link_to_if(anchor_link, change['new_lineno'],

716

'#%s' % anchor_new)

719

'#%s' % anchor_new)

717

})

720

})

718

_html.append('''</td>\n''')

721

_html.append('''</td>\n''')

719

###########################################################

722

###########################################################

720

# CODE

723

# CODE

721

###########################################################

724

###########################################################

722

code_classes = [code_class]

725

code_classes = [code_class]

723

if (not enable_comments or

726

if (not enable_comments or

724

change['action'] == Action.CONTEXT):

727

change['action'] == Action.CONTEXT):

725

code_classes.append('no-comment')

728

code_classes.append('no-comment')

726

_html.append('\t<td class="%s">' % ' '.join(code_classes))

729

_html.append('\t<td class="%s">' % ' '.join(code_classes))

727

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

730

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

728

'code': change['line']

731

'code': change['line']

729

})

732

})

730

733

731

_html.append('''\t</td>''')

734

_html.append('''\t</td>''')

732

_html.append('''\n</tr>\n''')

735

_html.append('''\n</tr>\n''')

733

_html.append('''</table>''')

736

_html.append('''</table>''')

734

if _html_empty:

737

if _html_empty:

735

return None

738

return None

736

return ''.join(_html)

739

return ''.join(_html)

737

740

738

def stat(self):

741

def stat(self):

739

"""

742

"""

740

Returns tuple of added, and removed lines for this instance

743

Returns tuple of added, and removed lines for this instance

741

"""

744

"""

742

return self.adds, self.removes

745

return self.adds, self.removes

743

746

744

def get_context_of_line(

747

def get_context_of_line(

745

self, path, diff_line=None, context_before=3, context_after=3):

748

self, path, diff_line=None, context_before=3, context_after=3):

746

"""

749

"""

747

Returns the context lines for the specified diff line.

750

Returns the context lines for the specified diff line.

748

751

749

:type diff_line: :class:`DiffLineNumber`

752

:type diff_line: :class:`DiffLineNumber`

750

"""

753

"""

751

assert self.parsed, "DiffProcessor is not initialized."

754

assert self.parsed, "DiffProcessor is not initialized."

752

755

753

if None not in diff_line:

756

if None not in diff_line:

754

raise ValueError(

757

raise ValueError(

755

"Cannot specify both line numbers: {}".format(diff_line))

758

"Cannot specify both line numbers: {}".format(diff_line))

756

759

757

file_diff = self._get_file_diff(path)

760

file_diff = self._get_file_diff(path)

758

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

761

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

759

762

760

first_line_to_include = max(idx - context_before, 0)

763

first_line_to_include = max(idx - context_before, 0)

761

first_line_after_context = idx + context_after + 1

764

first_line_after_context = idx + context_after + 1

762

context_lines = chunk[first_line_to_include:first_line_after_context]

765

context_lines = chunk[first_line_to_include:first_line_after_context]

763

766

764

line_contents = [

767

line_contents = [

765

_context_line(line) for line in context_lines

768

_context_line(line) for line in context_lines

766

if _is_diff_content(line)]

769

if _is_diff_content(line)]

767

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

770

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

768

# Once they are fixed, we can drop this line here.

771

# Once they are fixed, we can drop this line here.

769

if line_contents:

772

if line_contents:

770

line_contents[-1] = (

773

line_contents[-1] = (

771

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

774

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

772

return line_contents

775

return line_contents

773

776

774

def find_context(self, path, context, offset=0):

777

def find_context(self, path, context, offset=0):

775

"""

778

"""

776

Finds the given `context` inside of the diff.

779

Finds the given `context` inside of the diff.

777

780

778

Use the parameter `offset` to specify which offset the target line has

781

Use the parameter `offset` to specify which offset the target line has

779

inside of the given `context`. This way the correct diff line will be

782

inside of the given `context`. This way the correct diff line will be

780

returned.

783

returned.

781

784

782

:param offset: Shall be used to specify the offset of the main line

785

:param offset: Shall be used to specify the offset of the main line

783

within the given `context`.

786

within the given `context`.

784

"""

787

"""

785

if offset < 0 or offset >= len(context):

788

if offset < 0 or offset >= len(context):

786

raise ValueError(

789

raise ValueError(

787

"Only positive values up to the length of the context "

790

"Only positive values up to the length of the context "

788

"minus one are allowed.")

791

"minus one are allowed.")

789

792

790

matches = []

793

matches = []

791

file_diff = self._get_file_diff(path)

794

file_diff = self._get_file_diff(path)

792

795

793

for chunk in file_diff['chunks']:

796

for chunk in file_diff['chunks']:

794

context_iter = iter(context)

797

context_iter = iter(context)

795

for line_idx, line in enumerate(chunk):

798

for line_idx, line in enumerate(chunk):

796

try:

799

try:

797

if _context_line(line) == context_iter.next():

800

if _context_line(line) == context_iter.next():

798

continue

801

continue

799

except StopIteration:

802

except StopIteration:

800

matches.append((line_idx, chunk))

803

matches.append((line_idx, chunk))

801

context_iter = iter(context)

804

context_iter = iter(context)

802

805

803

# Increment position and triger StopIteration

806

# Increment position and triger StopIteration

804

# if we had a match at the end

807

# if we had a match at the end

805

line_idx += 1

808

line_idx += 1

806

try:

809

try:

807

context_iter.next()

810

context_iter.next()

808

except StopIteration:

811

except StopIteration:

809

matches.append((line_idx, chunk))

812

matches.append((line_idx, chunk))

810

813

811

effective_offset = len(context) - offset

814

effective_offset = len(context) - offset

812

found_at_diff_lines = [

815

found_at_diff_lines = [

813

_line_to_diff_line_number(chunk[idx - effective_offset])

816

_line_to_diff_line_number(chunk[idx - effective_offset])

814

for idx, chunk in matches]

817

for idx, chunk in matches]

815

818

816

return found_at_diff_lines

819

return found_at_diff_lines

817

820

818

def _get_file_diff(self, path):

821

def _get_file_diff(self, path):

819

for file_diff in self.parsed_diff:

822

for file_diff in self.parsed_diff:

820

if file_diff['filename'] == path:

823

if file_diff['filename'] == path:

821

break

824

break

822

else:

825

else:

823

raise FileNotInDiffException("File {} not in diff".format(path))

826

raise FileNotInDiffException("File {} not in diff".format(path))

824

return file_diff

827

return file_diff

825

828

826

def _find_chunk_line_index(self, file_diff, diff_line):

829

def _find_chunk_line_index(self, file_diff, diff_line):

827

for chunk in file_diff['chunks']:

830

for chunk in file_diff['chunks']:

828

for idx, line in enumerate(chunk):

831

for idx, line in enumerate(chunk):

829

if line['old_lineno'] == diff_line.old:

832

if line['old_lineno'] == diff_line.old:

830

return chunk, idx

833

return chunk, idx

831

if line['new_lineno'] == diff_line.new:

834

if line['new_lineno'] == diff_line.new:

832

return chunk, idx

835

return chunk, idx

833

raise LineNotInDiffException(

836

raise LineNotInDiffException(

834

"The line {} is not part of the diff.".format(diff_line))

837

"The line {} is not part of the diff.".format(diff_line))

835

838

836

839

837

def _is_diff_content(line):

840

def _is_diff_content(line):

838

return line['action'] in (

841

return line['action'] in (

839

Action.UNMODIFIED, Action.ADD, Action.DELETE)

842

Action.UNMODIFIED, Action.ADD, Action.DELETE)

840

843

841

844

842

def _context_line(line):

845

def _context_line(line):

843

return (line['action'], line['line'])

846

return (line['action'], line['line'])

844

847

845

848

846

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

849

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

847

850

848

851

849

def _line_to_diff_line_number(line):

852

def _line_to_diff_line_number(line):

850

new_line_no = line['new_lineno'] or None

853

new_line_no = line['new_lineno'] or None

851

old_line_no = line['old_lineno'] or None

854

old_line_no = line['old_lineno'] or None

852

return DiffLineNumber(old=old_line_no, new=new_line_no)

855

return DiffLineNumber(old=old_line_no, new=new_line_no)

853

856

854

857

855

class FileNotInDiffException(Exception):

858

class FileNotInDiffException(Exception):

856

"""

859

"""

857

Raised when the context for a missing file is requested.

860

Raised when the context for a missing file is requested.

858

861

859

If you request the context for a line in a file which is not part of the

862

If you request the context for a line in a file which is not part of the

860

given diff, then this exception is raised.

863

given diff, then this exception is raised.

861

"""

864

"""

862

865

863

866

864

class LineNotInDiffException(Exception):

867

class LineNotInDiffException(Exception):

865

"""

868

"""

866

Raised when the context for a missing line is requested.

869

Raised when the context for a missing line is requested.

867

870

868

If you request the context for a line in a file and this line is not

871

If you request the context for a line in a file and this line is not

869

part of the given diff, then this exception is raised.

872

part of the given diff, then this exception is raised.

870

"""

873

"""

871

874

872

875

873

class DiffLimitExceeded(Exception):

876

class DiffLimitExceeded(Exception):

874

pass

877

pass

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2016  RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import collections
             import re
             import difflib
             import logging
             from itertools import tee, imap
             from pylons.i18n.translation import _
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyCommit
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils2 import safe_unicode
             log = logging.getLogger(__name__)
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % (_('Click to comment'), str_)
             def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
                              show_full_diff=False, ignore_whitespace=True, line_context=3,
                              enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit for file and
                 whole diff and presents proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('Binary file'))
                     stats = None
                     size = 0
                     data = None
                 elif diff_limit != -1 and (diff_limit is None or
                     (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
-                    diff_processor = DiffProcessor(f_gitdiff, format='gitdiff', diff_limit=diff_limit,
+                    diff_processor = DiffProcessor(
+                        f_gitdiff, format='gitdiff', diff_limit=diff_limit,
                         file_limit=file_limit, show_full_diff=show_full_diff)
                     _parsed = diff_processor.prepare()
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = _parsed[0]['stats'] if _parsed else None
                     size = len(diff or '')
                     data = _parsed[0] if _parsed else None
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = None
                     size = 0
                     data = None
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.commit.raw_id
                 cs2 = filenode_new.commit.raw_id
                 return size, cs1, cs2, diff, stats, data
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
-                def __init__(self, diff, format='gitdiff', diff_limit=None, file_limit=None, show_full_diff=True):
+                def __init__(self, diff, format='gitdiff', diff_limit=None,
+                             file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._parse_udiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return safe_unicode(string)\
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, chunk.diff.splitlines(1))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _parse_udiff(self, inline_diff=True):
                     raise NotImplementedError()
                 def _parse_lines(self, diff):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = iter(diff)
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = lineiter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = lineiter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = lineiter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICON
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass