rhodecode-enterprise-ce Commit - r3838:a11aca8c

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import os

26

import os

27

import re

27

import re

28

import bz2

28

import bz2

29

import time

29

30

import collections

31

import collections

31

import difflib

32

import difflib

32

import logging

33

import logging

33

import cPickle as pickle

34

import cPickle as pickle

34

from itertools import tee, imap

35

from itertools import tee, imap

35

36

from rhodecode.lib.vcs.exceptions import VCSError

37

from rhodecode.lib.vcs.exceptions import VCSError

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

38

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

38

from rhodecode.lib.utils2 import safe_unicode, safe_str

39

from rhodecode.lib.utils2 import safe_unicode, safe_str

39

40

log = logging.getLogger(__name__)

41

log = logging.getLogger(__name__)

41

42

# define max context, a file with more than this numbers of lines is unusable

43

# define max context, a file with more than this numbers of lines is unusable

43

# in browser anyway

44

# in browser anyway

44

MAX_CONTEXT = 20 * 1024

45

MAX_CONTEXT = 20 * 1024

45

DEFAULT_CONTEXT = 3

46

DEFAULT_CONTEXT = 3

46

47

48

def get_diff_context(request):

49

def get_diff_context(request):

49

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

50

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

50

51

52

def get_diff_whitespace_flag(request):

53

def get_diff_whitespace_flag(request):

53

return request.GET.get('ignorews', '') == '1'

54

return request.GET.get('ignorews', '') == '1'

54

55

56

class OPS(object):

57

class OPS(object):

57

ADD = 'A'

58

ADD = 'A'

58

MOD = 'M'

59

MOD = 'M'

59

DEL = 'D'

60

DEL = 'D'

60

61

62

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

63

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

63

"""

64

"""

64

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

65

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

65

66

:param ignore_whitespace: ignore whitespaces in diff

67

:param ignore_whitespace: ignore whitespaces in diff

67

"""

68

"""

68

# make sure we pass in default context

69

# make sure we pass in default context

69

context = context or 3

70

context = context or 3

70

# protect against IntOverflow when passing HUGE context

71

# protect against IntOverflow when passing HUGE context

71

if context > MAX_CONTEXT:

72

if context > MAX_CONTEXT:

72

context = MAX_CONTEXT

73

context = MAX_CONTEXT

73

74

submodules = filter(lambda o: isinstance(o, SubModuleNode),

75

submodules = filter(lambda o: isinstance(o, SubModuleNode),

75

[filenode_new, filenode_old])

76

[filenode_new, filenode_old])

76

if submodules:

77

if submodules:

77

return ''

78

return ''

78

79

for filenode in (filenode_old, filenode_new):

80

for filenode in (filenode_old, filenode_new):

80

if not isinstance(filenode, FileNode):

81

if not isinstance(filenode, FileNode):

81

raise VCSError(

82

raise VCSError(

82

"Given object should be FileNode object, not %s"

83

"Given object should be FileNode object, not %s"

83

% filenode.__class__)

84

% filenode.__class__)

84

85

repo = filenode_new.commit.repository

86

repo = filenode_new.commit.repository

86

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

87

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

87

new_commit = filenode_new.commit

88

new_commit = filenode_new.commit

88

89

vcs_gitdiff = repo.get_diff(

90

vcs_gitdiff = repo.get_diff(

90

old_commit, new_commit, filenode_new.path,

91

old_commit, new_commit, filenode_new.path,

91

ignore_whitespace, context, path1=filenode_old.path)

92

ignore_whitespace, context, path1=filenode_old.path)

92

return vcs_gitdiff

93

return vcs_gitdiff

93

94

NEW_FILENODE = 1

95

NEW_FILENODE = 1

95

DEL_FILENODE = 2

96

DEL_FILENODE = 2

96

MOD_FILENODE = 3

97

MOD_FILENODE = 3

97

RENAMED_FILENODE = 4

98

RENAMED_FILENODE = 4

98

COPIED_FILENODE = 5

99

COPIED_FILENODE = 5

99

CHMOD_FILENODE = 6

100

CHMOD_FILENODE = 6

100

BIN_FILENODE = 7

101

BIN_FILENODE = 7

101

102

103

class LimitedDiffContainer(object):

104

class LimitedDiffContainer(object):

104

105

def __init__(self, diff_limit, cur_diff_size, diff):

106

def __init__(self, diff_limit, cur_diff_size, diff):

106

self.diff = diff

107

self.diff = diff

107

self.diff_limit = diff_limit

108

self.diff_limit = diff_limit

108

self.cur_diff_size = cur_diff_size

109

self.cur_diff_size = cur_diff_size

109

110

def __getitem__(self, key):

111

def __getitem__(self, key):

111

return self.diff.__getitem__(key)

112

return self.diff.__getitem__(key)

112

113

def __iter__(self):

114

def __iter__(self):

114

for l in self.diff:

115

for l in self.diff:

115

yield l

116

yield l

116

117

118

class Action(object):

119

class Action(object):

119

"""

120

"""

120

Contains constants for the action value of the lines in a parsed diff.

121

Contains constants for the action value of the lines in a parsed diff.

121

"""

122

"""

122

123

ADD = 'add'

124

ADD = 'add'

124

DELETE = 'del'

125

DELETE = 'del'

125

UNMODIFIED = 'unmod'

126

UNMODIFIED = 'unmod'

126

127

CONTEXT = 'context'

128

CONTEXT = 'context'

128

OLD_NO_NL = 'old-no-nl'

129

OLD_NO_NL = 'old-no-nl'

129

NEW_NO_NL = 'new-no-nl'

130

NEW_NO_NL = 'new-no-nl'

130

131

132

class DiffProcessor(object):

133

class DiffProcessor(object):

133

"""

134

"""

134

Give it a unified or git diff and it returns a list of the files that were

135

Give it a unified or git diff and it returns a list of the files that were

135

mentioned in the diff together with a dict of meta information that

136

mentioned in the diff together with a dict of meta information that

136

can be used to render it in a HTML template.

137

can be used to render it in a HTML template.

137

138

.. note:: Unicode handling

139

.. note:: Unicode handling

139

140

The original diffs are a byte sequence and can contain filenames

141

The original diffs are a byte sequence and can contain filenames

141

in mixed encodings. This class generally returns `unicode` objects

142

in mixed encodings. This class generally returns `unicode` objects

142

since the result is intended for presentation to the user.

143

since the result is intended for presentation to the user.

143

144

"""

145

"""

145

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

146

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

146

_newline_marker = re.compile(r'^\\ No newline at end of file')

147

_newline_marker = re.compile(r'^\\ No newline at end of file')

147

148

# used for inline highlighter word split

149

# used for inline highlighter word split

149

_token_re = re.compile(r'()(>|<|&|\W+?)')

150

_token_re = re.compile(r'()(>|<|&|\W+?)')

150

151

# collapse ranges of commits over given number

152

# collapse ranges of commits over given number

152

_collapse_commits_over = 5

153

_collapse_commits_over = 5

153

154

def __init__(self, diff, format='gitdiff', diff_limit=None,

155

def __init__(self, diff, format='gitdiff', diff_limit=None,

155

file_limit=None, show_full_diff=True):

156

file_limit=None, show_full_diff=True):

156

"""

157

"""

157

:param diff: A `Diff` object representing a diff from a vcs backend

158

:param diff: A `Diff` object representing a diff from a vcs backend

158

:param format: format of diff passed, `udiff` or `gitdiff`

159

:param format: format of diff passed, `udiff` or `gitdiff`

159

:param diff_limit: define the size of diff that is considered "big"

160

:param diff_limit: define the size of diff that is considered "big"

160

based on that parameter cut off will be triggered, set to None

161

based on that parameter cut off will be triggered, set to None

161

to show full diff

162

to show full diff

162

"""

163

"""

163

self._diff = diff

164

self._diff = diff

164

self._format = format

165

self._format = format

165

self.adds = 0

166

self.adds = 0

166

self.removes = 0

167

self.removes = 0

167

# calculate diff size

168

# calculate diff size

168

self.diff_limit = diff_limit

169

self.diff_limit = diff_limit

169

self.file_limit = file_limit

170

self.file_limit = file_limit

170

self.show_full_diff = show_full_diff

171

self.show_full_diff = show_full_diff

171

self.cur_diff_size = 0

172

self.cur_diff_size = 0

172

self.parsed = False

173

self.parsed = False

173

self.parsed_diff = []

174

self.parsed_diff = []

174

175

log.debug('Initialized DiffProcessor with %s mode', format)

176

log.debug('Initialized DiffProcessor with %s mode', format)

176

if format == 'gitdiff':

177

if format == 'gitdiff':

177

self.differ = self._highlight_line_difflib

178

self.differ = self._highlight_line_difflib

178

self._parser = self._parse_gitdiff

179

self._parser = self._parse_gitdiff

179

else:

180

else:

180

self.differ = self._highlight_line_udiff

181

self.differ = self._highlight_line_udiff

181

self._parser = self._new_parse_gitdiff

182

self._parser = self._new_parse_gitdiff

182

183

def _copy_iterator(self):

184

def _copy_iterator(self):

184

"""

185

"""

185

make a fresh copy of generator, we should not iterate thru

186

make a fresh copy of generator, we should not iterate thru

186

an original as it's needed for repeating operations on

187

an original as it's needed for repeating operations on

187

this instance of DiffProcessor

188

this instance of DiffProcessor

188

"""

189

"""

189

self.__udiff, iterator_copy = tee(self.__udiff)

190

self.__udiff, iterator_copy = tee(self.__udiff)

190

return iterator_copy

191

return iterator_copy

191

192

def _escaper(self, string):

193

def _escaper(self, string):

193

"""

194

"""

194

Escaper for diff escapes special chars and checks the diff limit

195

Escaper for diff escapes special chars and checks the diff limit

195

196

:param string:

197

:param string:

197

"""

198

"""

198

self.cur_diff_size += len(string)

199

self.cur_diff_size += len(string)

199

200

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

201

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

201

raise DiffLimitExceeded('Diff Limit Exceeded')

202

raise DiffLimitExceeded('Diff Limit Exceeded')

202

203

return string \

204

return string \

204

.replace('&', '&')\

205

.replace('&', '&')\

205

.replace('<', '<')\

206

.replace('<', '<')\

206

.replace('>', '>')

207

.replace('>', '>')

207

208

def _line_counter(self, l):

209

def _line_counter(self, l):

209

"""

210

"""

210

Checks each line and bumps total adds/removes for this diff

211

Checks each line and bumps total adds/removes for this diff

211

212

:param l:

213

:param l:

213

"""

214

"""

214

if l.startswith('+') and not l.startswith('+++'):

215

if l.startswith('+') and not l.startswith('+++'):

215

self.adds += 1

216

self.adds += 1

216

elif l.startswith('-') and not l.startswith('---'):

217

elif l.startswith('-') and not l.startswith('---'):

217

self.removes += 1

218

self.removes += 1

218

return safe_unicode(l)

219

return safe_unicode(l)

219

220

def _highlight_line_difflib(self, line, next_):

221

def _highlight_line_difflib(self, line, next_):

221

"""

222

"""

222

Highlight inline changes in both lines.

223

Highlight inline changes in both lines.

223

"""

224

"""

224

225

if line['action'] == Action.DELETE:

226

if line['action'] == Action.DELETE:

226

old, new = line, next_

227

old, new = line, next_

227

else:

228

else:

228

old, new = next_, line

229

old, new = next_, line

229

230

oldwords = self._token_re.split(old['line'])

231

oldwords = self._token_re.split(old['line'])

231

newwords = self._token_re.split(new['line'])

232

newwords = self._token_re.split(new['line'])

232

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

233

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

233

234

oldfragments, newfragments = [], []

235

oldfragments, newfragments = [], []

235

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

236

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

236

oldfrag = ''.join(oldwords[i1:i2])

237

oldfrag = ''.join(oldwords[i1:i2])

237

newfrag = ''.join(newwords[j1:j2])

238

newfrag = ''.join(newwords[j1:j2])

238

if tag != 'equal':

239

if tag != 'equal':

239

if oldfrag:

240

if oldfrag:

240

oldfrag = '<del>%s</del>' % oldfrag

241

oldfrag = '<del>%s</del>' % oldfrag

241

if newfrag:

242

if newfrag:

242

newfrag = '<ins>%s</ins>' % newfrag

243

newfrag = '<ins>%s</ins>' % newfrag

243

oldfragments.append(oldfrag)

244

oldfragments.append(oldfrag)

244

newfragments.append(newfrag)

245

newfragments.append(newfrag)

245

246

old['line'] = "".join(oldfragments)

247

old['line'] = "".join(oldfragments)

247

new['line'] = "".join(newfragments)

248

new['line'] = "".join(newfragments)

248

249

def _highlight_line_udiff(self, line, next_):

250

def _highlight_line_udiff(self, line, next_):

250

"""

251

"""

251

Highlight inline changes in both lines.

252

Highlight inline changes in both lines.

252

"""

253

"""

253

start = 0

254

start = 0

254

limit = min(len(line['line']), len(next_['line']))

255

limit = min(len(line['line']), len(next_['line']))

255

while start < limit and line['line'][start] == next_['line'][start]:

256

while start < limit and line['line'][start] == next_['line'][start]:

256

start += 1

257

start += 1

257

end = -1

258

end = -1

258

limit -= start

259

limit -= start

259

while -end <= limit and line['line'][end] == next_['line'][end]:

260

while -end <= limit and line['line'][end] == next_['line'][end]:

260

end -= 1

261

end -= 1

261

end += 1

262

end += 1

262

if start or end:

263

if start or end:

263

def do(l):

264

def do(l):

264

last = end + len(l['line'])

265

last = end + len(l['line'])

265

if l['action'] == Action.ADD:

266

if l['action'] == Action.ADD:

266

tag = 'ins'

267

tag = 'ins'

267

else:

268

else:

268

tag = 'del'

269

tag = 'del'

269

l['line'] = '%s<%s>%s</%s>%s' % (

270

l['line'] = '%s<%s>%s</%s>%s' % (

270

l['line'][:start],

271

l['line'][:start],

271

tag,

272

tag,

272

l['line'][start:last],

273

l['line'][start:last],

273

tag,

274

tag,

274

l['line'][last:]

275

l['line'][last:]

275

)

276

)

276

do(line)

277

do(line)

277

do(next_)

278

do(next_)

278

279

def _clean_line(self, line, command):

280

def _clean_line(self, line, command):

280

if command in ['+', '-', ' ']:

281

if command in ['+', '-', ' ']:

281

# only modify the line if it's actually a diff thing

282

# only modify the line if it's actually a diff thing

282

line = line[1:]

283

line = line[1:]

283

return line

284

return line

284

285

def _parse_gitdiff(self, inline_diff=True):

286

def _parse_gitdiff(self, inline_diff=True):

286

_files = []

287

_files = []

287

diff_container = lambda arg: arg

288

diff_container = lambda arg: arg

288

289

for chunk in self._diff.chunks():

290

for chunk in self._diff.chunks():

290

head = chunk.header

291

head = chunk.header

291

292

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

293

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

293

raw_diff = chunk.raw

294

raw_diff = chunk.raw

294

limited_diff = False

295

limited_diff = False

295

exceeds_limit = False

296

exceeds_limit = False

296

297

op = None

298

op = None

298

stats = {

299

stats = {

299

'added': 0,

300

'added': 0,

300

'deleted': 0,

301

'deleted': 0,

301

'binary': False,

302

'binary': False,

302

'ops': {},

303

'ops': {},

303

}

304

}

304

305

if head['deleted_file_mode']:

306

if head['deleted_file_mode']:

306

op = OPS.DEL

307

op = OPS.DEL

307

stats['binary'] = True

308

stats['binary'] = True

308

stats['ops'][DEL_FILENODE] = 'deleted file'

309

stats['ops'][DEL_FILENODE] = 'deleted file'

309

310

elif head['new_file_mode']:

311

elif head['new_file_mode']:

311

op = OPS.ADD

312

op = OPS.ADD

312

stats['binary'] = True

313

stats['binary'] = True

313

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

314

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

314

else: # modify operation, can be copy, rename or chmod

315

else: # modify operation, can be copy, rename or chmod

315

316

# CHMOD

317

# CHMOD

317

if head['new_mode'] and head['old_mode']:

318

if head['new_mode'] and head['old_mode']:

318

op = OPS.MOD

319

op = OPS.MOD

319

stats['binary'] = True

320

stats['binary'] = True

320

stats['ops'][CHMOD_FILENODE] = (

321

stats['ops'][CHMOD_FILENODE] = (

321

'modified file chmod %s => %s' % (

322

'modified file chmod %s => %s' % (

322

head['old_mode'], head['new_mode']))

323

head['old_mode'], head['new_mode']))

323

# RENAME

324

# RENAME

324

if head['rename_from'] != head['rename_to']:

325

if head['rename_from'] != head['rename_to']:

325

op = OPS.MOD

326

op = OPS.MOD

326

stats['binary'] = True

327

stats['binary'] = True

327

stats['ops'][RENAMED_FILENODE] = (

328

stats['ops'][RENAMED_FILENODE] = (

328

'file renamed from %s to %s' % (

329

'file renamed from %s to %s' % (

329

head['rename_from'], head['rename_to']))

330

head['rename_from'], head['rename_to']))

330

# COPY

331

# COPY

331

if head.get('copy_from') and head.get('copy_to'):

332

if head.get('copy_from') and head.get('copy_to'):

332

op = OPS.MOD

333

op = OPS.MOD

333

stats['binary'] = True

334

stats['binary'] = True

334

stats['ops'][COPIED_FILENODE] = (

335

stats['ops'][COPIED_FILENODE] = (

335

'file copied from %s to %s' % (

336

'file copied from %s to %s' % (

336

head['copy_from'], head['copy_to']))

337

head['copy_from'], head['copy_to']))

337

338

# If our new parsed headers didn't match anything fallback to

339

# If our new parsed headers didn't match anything fallback to

339

# old style detection

340

# old style detection

340

if op is None:

341

if op is None:

341

if not head['a_file'] and head['b_file']:

342

if not head['a_file'] and head['b_file']:

342

op = OPS.ADD

343

op = OPS.ADD

343

stats['binary'] = True

344

stats['binary'] = True

344

stats['ops'][NEW_FILENODE] = 'new file'

345

stats['ops'][NEW_FILENODE] = 'new file'

345

346

elif head['a_file'] and not head['b_file']:

347

elif head['a_file'] and not head['b_file']:

347

op = OPS.DEL

348

op = OPS.DEL

348

stats['binary'] = True

349

stats['binary'] = True

349

stats['ops'][DEL_FILENODE] = 'deleted file'

350

stats['ops'][DEL_FILENODE] = 'deleted file'

350

351

# it's not ADD not DELETE

352

# it's not ADD not DELETE

352

if op is None:

353

if op is None:

353

op = OPS.MOD

354

op = OPS.MOD

354

stats['binary'] = True

355

stats['binary'] = True

355

stats['ops'][MOD_FILENODE] = 'modified file'

356

stats['ops'][MOD_FILENODE] = 'modified file'

356

357

# a real non-binary diff

358

# a real non-binary diff

358

if head['a_file'] or head['b_file']:

359

if head['a_file'] or head['b_file']:

359

try:

360

try:

360

raw_diff, chunks, _stats = self._parse_lines(diff)

361

raw_diff, chunks, _stats = self._parse_lines(diff)

361

stats['binary'] = False

362

stats['binary'] = False

362

stats['added'] = _stats[0]

363

stats['added'] = _stats[0]

363

stats['deleted'] = _stats[1]

364

stats['deleted'] = _stats[1]

364

# explicit mark that it's a modified file

365

# explicit mark that it's a modified file

365

if op == OPS.MOD:

366

if op == OPS.MOD:

366

stats['ops'][MOD_FILENODE] = 'modified file'

367

stats['ops'][MOD_FILENODE] = 'modified file'

367

exceeds_limit = len(raw_diff) > self.file_limit

368

exceeds_limit = len(raw_diff) > self.file_limit

368

369

# changed from _escaper function so we validate size of

370

# changed from _escaper function so we validate size of

370

# each file instead of the whole diff

371

# each file instead of the whole diff

371

# diff will hide big files but still show small ones

372

# diff will hide big files but still show small ones

372

# from my tests, big files are fairly safe to be parsed

373

# from my tests, big files are fairly safe to be parsed

373

# but the browser is the bottleneck

374

# but the browser is the bottleneck

374

if not self.show_full_diff and exceeds_limit:

375

if not self.show_full_diff and exceeds_limit:

375

raise DiffLimitExceeded('File Limit Exceeded')

376

raise DiffLimitExceeded('File Limit Exceeded')

376

377

except DiffLimitExceeded:

378

except DiffLimitExceeded:

378

diff_container = lambda _diff: \

379

diff_container = lambda _diff: \

379

LimitedDiffContainer(

380

LimitedDiffContainer(

380

self.diff_limit, self.cur_diff_size, _diff)

381

self.diff_limit, self.cur_diff_size, _diff)

381

382

exceeds_limit = len(raw_diff) > self.file_limit

383

exceeds_limit = len(raw_diff) > self.file_limit

383

limited_diff = True

384

limited_diff = True

384

chunks = []

385

chunks = []

385

386

else: # GIT format binary patch, or possibly empty diff

387

else: # GIT format binary patch, or possibly empty diff

387

if head['bin_patch']:

388

if head['bin_patch']:

388

# we have operation already extracted, but we mark simply

389

# we have operation already extracted, but we mark simply

389

# it's a diff we wont show for binary files

390

# it's a diff we wont show for binary files

390

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

391

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

391

chunks = []

392

chunks = []

392

393

if chunks and not self.show_full_diff and op == OPS.DEL:

394

if chunks and not self.show_full_diff and op == OPS.DEL:

394

# if not full diff mode show deleted file contents

395

# if not full diff mode show deleted file contents

395

# TODO: anderson: if the view is not too big, there is no way

396

# TODO: anderson: if the view is not too big, there is no way

396

# to see the content of the file

397

# to see the content of the file

397

chunks = []

398

chunks = []

398

399

chunks.insert(0, [{

400

chunks.insert(0, [{

400

'old_lineno': '',

401

'old_lineno': '',

401

'new_lineno': '',

402

'new_lineno': '',

402

'action': Action.CONTEXT,

403

'action': Action.CONTEXT,

403

'line': msg,

404

'line': msg,

404

} for _op, msg in stats['ops'].iteritems()

405

} for _op, msg in stats['ops'].iteritems()

405

if _op not in [MOD_FILENODE]])

406

if _op not in [MOD_FILENODE]])

406

407

_files.append({

408

_files.append({

408

'filename': safe_unicode(head['b_path']),

409

'filename': safe_unicode(head['b_path']),

409

'old_revision': head['a_blob_id'],

410

'old_revision': head['a_blob_id'],

410

'new_revision': head['b_blob_id'],

411

'new_revision': head['b_blob_id'],

411

'chunks': chunks,

412

'chunks': chunks,

412

'raw_diff': safe_unicode(raw_diff),

413

'raw_diff': safe_unicode(raw_diff),

413

'operation': op,

414

'operation': op,

414

'stats': stats,

415

'stats': stats,

415

'exceeds_limit': exceeds_limit,

416

'exceeds_limit': exceeds_limit,

416

'is_limited_diff': limited_diff,

417

'is_limited_diff': limited_diff,

417

})

418

})

418

419

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

420

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

420

OPS.DEL: 2}.get(info['operation'])

421

OPS.DEL: 2}.get(info['operation'])

421

422

if not inline_diff:

423

if not inline_diff:

423

return diff_container(sorted(_files, key=sorter))

424

return diff_container(sorted(_files, key=sorter))

424

425

# highlight inline changes

426

# highlight inline changes

426

for diff_data in _files:

427

for diff_data in _files:

427

for chunk in diff_data['chunks']:

428

for chunk in diff_data['chunks']:

428

lineiter = iter(chunk)

429

lineiter = iter(chunk)

429

try:

430

try:

430

while 1:

431

while 1:

431

line = lineiter.next()

432

line = lineiter.next()

432

if line['action'] not in (

433

if line['action'] not in (

433

Action.UNMODIFIED, Action.CONTEXT):

434

Action.UNMODIFIED, Action.CONTEXT):

434

nextline = lineiter.next()

435

nextline = lineiter.next()

435

if nextline['action'] in ['unmod', 'context'] or \

436

if nextline['action'] in ['unmod', 'context'] or \

436

nextline['action'] == line['action']:

437

nextline['action'] == line['action']:

437

continue

438

continue

438

self.differ(line, nextline)

439

self.differ(line, nextline)

439

except StopIteration:

440

except StopIteration:

440

pass

441

pass

441

442

return diff_container(sorted(_files, key=sorter))

443

return diff_container(sorted(_files, key=sorter))

443

444

def _check_large_diff(self):

445

def _check_large_diff(self):

445

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

446

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

446

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

447

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

447

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

448

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

448

449

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

450

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

450

def _new_parse_gitdiff(self, inline_diff=True):

451

def _new_parse_gitdiff(self, inline_diff=True):

451

_files = []

452

_files = []

452

453

# this can be overriden later to a LimitedDiffContainer type

454

# this can be overriden later to a LimitedDiffContainer type

454

diff_container = lambda arg: arg

455

diff_container = lambda arg: arg

455

456

for chunk in self._diff.chunks():

457

for chunk in self._diff.chunks():

457

head = chunk.header

458

head = chunk.header

458

log.debug('parsing diff %r', head)

459

log.debug('parsing diff %r', head)

459

460

raw_diff = chunk.raw

461

raw_diff = chunk.raw

461

limited_diff = False

462

limited_diff = False

462

exceeds_limit = False

463

exceeds_limit = False

463

464

op = None

465

op = None

465

stats = {

466

stats = {

466

'added': 0,

467

'added': 0,

467

'deleted': 0,

468

'deleted': 0,

468

'binary': False,

469

'binary': False,

469

'old_mode': None,

470

'old_mode': None,

470

'new_mode': None,

471

'new_mode': None,

471

'ops': {},

472

'ops': {},

472

}

473

}

473

if head['old_mode']:

474

if head['old_mode']:

474

stats['old_mode'] = head['old_mode']

475

stats['old_mode'] = head['old_mode']

475

if head['new_mode']:

476

if head['new_mode']:

476

stats['new_mode'] = head['new_mode']

477

stats['new_mode'] = head['new_mode']

477

if head['b_mode']:

478

if head['b_mode']:

478

stats['new_mode'] = head['b_mode']

479

stats['new_mode'] = head['b_mode']

479

480

# delete file

481

# delete file

481

if head['deleted_file_mode']:

482

if head['deleted_file_mode']:

482

op = OPS.DEL

483

op = OPS.DEL

483

stats['binary'] = True

484

stats['binary'] = True

484

stats['ops'][DEL_FILENODE] = 'deleted file'

485

stats['ops'][DEL_FILENODE] = 'deleted file'

485

486

# new file

487

# new file

487

elif head['new_file_mode']:

488

elif head['new_file_mode']:

488

op = OPS.ADD

489

op = OPS.ADD

489

stats['binary'] = True

490

stats['binary'] = True

490

stats['old_mode'] = None

491

stats['old_mode'] = None

491

stats['new_mode'] = head['new_file_mode']

492

stats['new_mode'] = head['new_file_mode']

492

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

493

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

493

494

# modify operation, can be copy, rename or chmod

495

# modify operation, can be copy, rename or chmod

495

else:

496

else:

496

# CHMOD

497

# CHMOD

497

if head['new_mode'] and head['old_mode']:

498

if head['new_mode'] and head['old_mode']:

498

op = OPS.MOD

499

op = OPS.MOD

499

stats['binary'] = True

500

stats['binary'] = True

500

stats['ops'][CHMOD_FILENODE] = (

501

stats['ops'][CHMOD_FILENODE] = (

501

'modified file chmod %s => %s' % (

502

'modified file chmod %s => %s' % (

502

head['old_mode'], head['new_mode']))

503

head['old_mode'], head['new_mode']))

503

504

# RENAME

505

# RENAME

505

if head['rename_from'] != head['rename_to']:

506

if head['rename_from'] != head['rename_to']:

506

op = OPS.MOD

507

op = OPS.MOD

507

stats['binary'] = True

508

stats['binary'] = True

508

stats['renamed'] = (head['rename_from'], head['rename_to'])

509

stats['renamed'] = (head['rename_from'], head['rename_to'])

509

stats['ops'][RENAMED_FILENODE] = (

510

stats['ops'][RENAMED_FILENODE] = (

510

'file renamed from %s to %s' % (

511

'file renamed from %s to %s' % (

511

head['rename_from'], head['rename_to']))

512

head['rename_from'], head['rename_to']))

512

# COPY

513

# COPY

513

if head.get('copy_from') and head.get('copy_to'):

514

if head.get('copy_from') and head.get('copy_to'):

514

op = OPS.MOD

515

op = OPS.MOD

515

stats['binary'] = True

516

stats['binary'] = True

516

stats['copied'] = (head['copy_from'], head['copy_to'])

517

stats['copied'] = (head['copy_from'], head['copy_to'])

517

stats['ops'][COPIED_FILENODE] = (

518

stats['ops'][COPIED_FILENODE] = (

518

'file copied from %s to %s' % (

519

'file copied from %s to %s' % (

519

head['copy_from'], head['copy_to']))

520

head['copy_from'], head['copy_to']))

520

521

# If our new parsed headers didn't match anything fallback to

522

# If our new parsed headers didn't match anything fallback to

522

# old style detection

523

# old style detection

523

if op is None:

524

if op is None:

524

if not head['a_file'] and head['b_file']:

525

if not head['a_file'] and head['b_file']:

525

op = OPS.ADD

526

op = OPS.ADD

526

stats['binary'] = True

527

stats['binary'] = True

527

stats['new_file'] = True

528

stats['new_file'] = True

528

stats['ops'][NEW_FILENODE] = 'new file'

529

stats['ops'][NEW_FILENODE] = 'new file'

529

530

elif head['a_file'] and not head['b_file']:

531

elif head['a_file'] and not head['b_file']:

531

op = OPS.DEL

532

op = OPS.DEL

532

stats['binary'] = True

533

stats['binary'] = True

533

stats['ops'][DEL_FILENODE] = 'deleted file'

534

stats['ops'][DEL_FILENODE] = 'deleted file'

534

535

# it's not ADD not DELETE

536

# it's not ADD not DELETE

536

if op is None:

537

if op is None:

537

op = OPS.MOD

538

op = OPS.MOD

538

stats['binary'] = True

539

stats['binary'] = True

539

stats['ops'][MOD_FILENODE] = 'modified file'

540

stats['ops'][MOD_FILENODE] = 'modified file'

540

541

# a real non-binary diff

542

# a real non-binary diff

542

if head['a_file'] or head['b_file']:

543

if head['a_file'] or head['b_file']:

543

# simulate splitlines, so we keep the line end part

544

# simulate splitlines, so we keep the line end part

544

diff = self.diff_splitter(chunk.diff)

545

diff = self.diff_splitter(chunk.diff)

545

546

# append each file to the diff size

547

# append each file to the diff size

547

raw_chunk_size = len(raw_diff)

548

raw_chunk_size = len(raw_diff)

548

549

exceeds_limit = raw_chunk_size > self.file_limit

550

exceeds_limit = raw_chunk_size > self.file_limit

550

self.cur_diff_size += raw_chunk_size

551

self.cur_diff_size += raw_chunk_size

551

552

try:

553

try:

553

# Check each file instead of the whole diff.

554

# Check each file instead of the whole diff.

554

# Diff will hide big files but still show small ones.

555

# Diff will hide big files but still show small ones.

555

# From the tests big files are fairly safe to be parsed

556

# From the tests big files are fairly safe to be parsed

556

# but the browser is the bottleneck.

557

# but the browser is the bottleneck.

557

if not self.show_full_diff and exceeds_limit:

558

if not self.show_full_diff and exceeds_limit:

558

log.debug('File `%s` exceeds current file_limit of %s',

559

log.debug('File `%s` exceeds current file_limit of %s',

559

safe_unicode(head['b_path']), self.file_limit)

560

safe_unicode(head['b_path']), self.file_limit)

560

raise DiffLimitExceeded(

561

raise DiffLimitExceeded(

561

'File Limit %s Exceeded', self.file_limit)

562

'File Limit %s Exceeded', self.file_limit)

562

563

self._check_large_diff()

564

self._check_large_diff()

564

565

raw_diff, chunks, _stats = self._new_parse_lines(diff)

566

raw_diff, chunks, _stats = self._new_parse_lines(diff)

566

stats['binary'] = False

567

stats['binary'] = False

567

stats['added'] = _stats[0]

568

stats['added'] = _stats[0]

568

stats['deleted'] = _stats[1]

569

stats['deleted'] = _stats[1]

569

# explicit mark that it's a modified file

570

# explicit mark that it's a modified file

570

if op == OPS.MOD:

571

if op == OPS.MOD:

571

stats['ops'][MOD_FILENODE] = 'modified file'

572

stats['ops'][MOD_FILENODE] = 'modified file'

572

573

except DiffLimitExceeded:

574

except DiffLimitExceeded:

574

diff_container = lambda _diff: \

575

diff_container = lambda _diff: \

575

LimitedDiffContainer(

576

LimitedDiffContainer(

576

self.diff_limit, self.cur_diff_size, _diff)

577

self.diff_limit, self.cur_diff_size, _diff)

577

578

limited_diff = True

579

limited_diff = True

579

chunks = []

580

chunks = []

580

581

else: # GIT format binary patch, or possibly empty diff

582

else: # GIT format binary patch, or possibly empty diff

582

if head['bin_patch']:

583

if head['bin_patch']:

583

# we have operation already extracted, but we mark simply

584

# we have operation already extracted, but we mark simply

584

# it's a diff we wont show for binary files

585

# it's a diff we wont show for binary files

585

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

586

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

586

chunks = []

587

chunks = []

587

588

# Hide content of deleted node by setting empty chunks

589

# Hide content of deleted node by setting empty chunks

589

if chunks and not self.show_full_diff and op == OPS.DEL:

590

if chunks and not self.show_full_diff and op == OPS.DEL:

590

# if not full diff mode show deleted file contents

591

# if not full diff mode show deleted file contents

591

# TODO: anderson: if the view is not too big, there is no way

592

# TODO: anderson: if the view is not too big, there is no way

592

# to see the content of the file

593

# to see the content of the file

593

chunks = []

594

chunks = []

594

595

chunks.insert(

596

chunks.insert(

596

0, [{'old_lineno': '',

597

0, [{'old_lineno': '',

597

'new_lineno': '',

598

'new_lineno': '',

598

'action': Action.CONTEXT,

599

'action': Action.CONTEXT,

599

'line': msg,

600

'line': msg,

600

} for _op, msg in stats['ops'].iteritems()

601

} for _op, msg in stats['ops'].iteritems()

601

if _op not in [MOD_FILENODE]])

602

if _op not in [MOD_FILENODE]])

602

603

original_filename = safe_unicode(head['a_path'])

604

original_filename = safe_unicode(head['a_path'])

604

_files.append({

605

_files.append({

605

'original_filename': original_filename,

606

'original_filename': original_filename,

606

'filename': safe_unicode(head['b_path']),

607

'filename': safe_unicode(head['b_path']),

607

'old_revision': head['a_blob_id'],

608

'old_revision': head['a_blob_id'],

608

'new_revision': head['b_blob_id'],

609

'new_revision': head['b_blob_id'],

609

'chunks': chunks,

610

'chunks': chunks,

610

'raw_diff': safe_unicode(raw_diff),

611

'raw_diff': safe_unicode(raw_diff),

611

'operation': op,

612

'operation': op,

612

'stats': stats,

613

'stats': stats,

613

'exceeds_limit': exceeds_limit,

614

'exceeds_limit': exceeds_limit,

614

'is_limited_diff': limited_diff,

615

'is_limited_diff': limited_diff,

615

})

616

})

616

617

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

618

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

618

OPS.DEL: 2}.get(info['operation'])

619

OPS.DEL: 2}.get(info['operation'])

619

620

return diff_container(sorted(_files, key=sorter))

621

return diff_container(sorted(_files, key=sorter))

621

622

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

623

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

623

def _parse_lines(self, diff_iter):

624

def _parse_lines(self, diff_iter):

624

"""

625

"""

625

Parse the diff an return data for the template.

626

Parse the diff an return data for the template.

626

"""

627

"""

627

628

stats = [0, 0]

629

stats = [0, 0]

629

chunks = []

630

chunks = []

630

raw_diff = []

631

raw_diff = []

631

632

try:

633

try:

633

line = diff_iter.next()

634

line = diff_iter.next()

634

635

while line:

636

while line:

636

raw_diff.append(line)

637

raw_diff.append(line)

637

lines = []

638

lines = []

638

chunks.append(lines)

639

chunks.append(lines)

639

640

match = self._chunk_re.match(line)

641

match = self._chunk_re.match(line)

641

642

if not match:

643

if not match:

643

break

644

break

644

645

gr = match.groups()

646

gr = match.groups()

646

(old_line, old_end,

647

(old_line, old_end,

647

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

648

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

648

old_line -= 1

649

old_line -= 1

649

new_line -= 1

650

new_line -= 1

650

651

context = len(gr) == 5

652

context = len(gr) == 5

652

old_end += old_line

653

old_end += old_line

653

new_end += new_line

654

new_end += new_line

654

655

if context:

656

if context:

656

# skip context only if it's first line

657

# skip context only if it's first line

657

if int(gr[0]) > 1:

658

if int(gr[0]) > 1:

658

lines.append({

659

lines.append({

659

'old_lineno': '...',

660

'old_lineno': '...',

660

'new_lineno': '...',

661

'new_lineno': '...',

661

'action': Action.CONTEXT,

662

'action': Action.CONTEXT,

662

'line': line,

663

'line': line,

663

})

664

})

664

665

line = diff_iter.next()

666

line = diff_iter.next()

666

667

while old_line < old_end or new_line < new_end:

668

while old_line < old_end or new_line < new_end:

668

command = ' '

669

command = ' '

669

if line:

670

if line:

670

command = line[0]

671

command = line[0]

671

672

affects_old = affects_new = False

673

affects_old = affects_new = False

673

674

# ignore those if we don't expect them

675

# ignore those if we don't expect them

675

if command in '#@':

676

if command in '#@':

676

continue

677

continue

677

elif command == '+':

678

elif command == '+':

678

affects_new = True

679

affects_new = True

679

action = Action.ADD

680

action = Action.ADD

680

stats[0] += 1

681

stats[0] += 1

681

elif command == '-':

682

elif command == '-':

682

affects_old = True

683

affects_old = True

683

action = Action.DELETE

684

action = Action.DELETE

684

stats[1] += 1

685

stats[1] += 1

685

else:

686

else:

686

affects_old = affects_new = True

687

affects_old = affects_new = True

687

action = Action.UNMODIFIED

688

action = Action.UNMODIFIED

688

689

if not self._newline_marker.match(line):

690

if not self._newline_marker.match(line):

690

old_line += affects_old

691

old_line += affects_old

691

new_line += affects_new

692

new_line += affects_new

692

lines.append({

693

lines.append({

693

'old_lineno': affects_old and old_line or '',

694

'old_lineno': affects_old and old_line or '',

694

'new_lineno': affects_new and new_line or '',

695

'new_lineno': affects_new and new_line or '',

695

'action': action,

696

'action': action,

696

'line': self._clean_line(line, command)

697

'line': self._clean_line(line, command)

697

})

698

})

698

raw_diff.append(line)

699

raw_diff.append(line)

699

700

line = diff_iter.next()

701

line = diff_iter.next()

701

702

if self._newline_marker.match(line):

703

if self._newline_marker.match(line):

703

# we need to append to lines, since this is not

704

# we need to append to lines, since this is not

704

# counted in the line specs of diff

705

# counted in the line specs of diff

705

lines.append({

706

lines.append({

706

'old_lineno': '...',

707

'old_lineno': '...',

707

'new_lineno': '...',

708

'new_lineno': '...',

708

'action': Action.CONTEXT,

709

'action': Action.CONTEXT,

709

'line': self._clean_line(line, command)

710

'line': self._clean_line(line, command)

710

})

711

})

711

712

except StopIteration:

713

except StopIteration:

713

pass

714

pass

714

return ''.join(raw_diff), chunks, stats

715

return ''.join(raw_diff), chunks, stats

715

716

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

717

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

717

def _new_parse_lines(self, diff_iter):

718

def _new_parse_lines(self, diff_iter):

718

"""

719

"""

719

Parse the diff an return data for the template.

720

Parse the diff an return data for the template.

720

"""

721

"""

721

722

stats = [0, 0]

723

stats = [0, 0]

723

chunks = []

724

chunks = []

724

raw_diff = []

725

raw_diff = []

725

726

try:

727

try:

727

line = diff_iter.next()

728

line = diff_iter.next()

728

729

while line:

730

while line:

730

raw_diff.append(line)

731

raw_diff.append(line)

731

# match header e.g @@ -0,0 +1 @@\n'

732

# match header e.g @@ -0,0 +1 @@\n'

732

match = self._chunk_re.match(line)

733

match = self._chunk_re.match(line)

733

734

if not match:

735

if not match:

735

break

736

break

736

737

gr = match.groups()

738

gr = match.groups()

738

(old_line, old_end,

739

(old_line, old_end,

739

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

740

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

740

741

lines = []

742

lines = []

742

hunk = {

743

hunk = {

743

'section_header': gr[-1],

744

'section_header': gr[-1],

744

'source_start': old_line,

745

'source_start': old_line,

745

'source_length': old_end,

746

'source_length': old_end,

746

'target_start': new_line,

747

'target_start': new_line,

747

'target_length': new_end,

748

'target_length': new_end,

748

'lines': lines,

749

'lines': lines,

749

}

750

}

750

chunks.append(hunk)

751

chunks.append(hunk)

751

752

old_line -= 1

753

old_line -= 1

753

new_line -= 1

754

new_line -= 1

754

755

context = len(gr) == 5

756

context = len(gr) == 5

756

old_end += old_line

757

old_end += old_line

757

new_end += new_line

758

new_end += new_line

758

759

line = diff_iter.next()

760

line = diff_iter.next()

760

761

while old_line < old_end or new_line < new_end:

762

while old_line < old_end or new_line < new_end:

762

command = ' '

763

command = ' '

763

if line:

764

if line:

764

command = line[0]

765

command = line[0]

765

766

affects_old = affects_new = False

767

affects_old = affects_new = False

767

768

# ignore those if we don't expect them

769

# ignore those if we don't expect them

769

if command in '#@':

770

if command in '#@':

770

continue

771

continue

771

elif command == '+':

772

elif command == '+':

772

affects_new = True

773

affects_new = True

773

action = Action.ADD

774

action = Action.ADD

774

stats[0] += 1

775

stats[0] += 1

775

elif command == '-':

776

elif command == '-':

776

affects_old = True

777

affects_old = True

777

action = Action.DELETE

778

action = Action.DELETE

778

stats[1] += 1

779

stats[1] += 1

779

else:

780

else:

780

affects_old = affects_new = True

781

affects_old = affects_new = True

781

action = Action.UNMODIFIED

782

action = Action.UNMODIFIED

782

783

if not self._newline_marker.match(line):

784

if not self._newline_marker.match(line):

784

old_line += affects_old

785

old_line += affects_old

785

new_line += affects_new

786

new_line += affects_new

786

lines.append({

787

lines.append({

787

'old_lineno': affects_old and old_line or '',

788

'old_lineno': affects_old and old_line or '',

788

'new_lineno': affects_new and new_line or '',

789

'new_lineno': affects_new and new_line or '',

789

'action': action,

790

'action': action,

790

'line': self._clean_line(line, command)

791

'line': self._clean_line(line, command)

791

})

792

})

792

raw_diff.append(line)

793

raw_diff.append(line)

793

794

line = diff_iter.next()

795

line = diff_iter.next()

795

796

if self._newline_marker.match(line):

797

if self._newline_marker.match(line):

797

# we need to append to lines, since this is not

798

# we need to append to lines, since this is not

798

# counted in the line specs of diff

799

# counted in the line specs of diff

799

if affects_old:

800

if affects_old:

800

action = Action.OLD_NO_NL

801

action = Action.OLD_NO_NL

801

elif affects_new:

802

elif affects_new:

802

action = Action.NEW_NO_NL

803

action = Action.NEW_NO_NL

803

else:

804

else:

804

raise Exception('invalid context for no newline')

805

raise Exception('invalid context for no newline')

805

806

lines.append({

807

lines.append({

807

'old_lineno': None,

808

'old_lineno': None,

808

'new_lineno': None,

809

'new_lineno': None,

809

'action': action,

810

'action': action,

810

'line': self._clean_line(line, command)

811

'line': self._clean_line(line, command)

811

})

812

})

812

813

except StopIteration:

814

except StopIteration:

814

pass

815

pass

815

816

return ''.join(raw_diff), chunks, stats

817

return ''.join(raw_diff), chunks, stats

817

818

def _safe_id(self, idstring):

819

def _safe_id(self, idstring):

819

"""Make a string safe for including in an id attribute.

820

"""Make a string safe for including in an id attribute.

820

821

The HTML spec says that id attributes 'must begin with

822

The HTML spec says that id attributes 'must begin with

822

a letter ([A-Za-z]) and may be followed by any number

823

a letter ([A-Za-z]) and may be followed by any number

823

of letters, digits ([0-9]), hyphens ("-"), underscores

824

of letters, digits ([0-9]), hyphens ("-"), underscores

824

("_"), colons (":"), and periods (".")'. These regexps

825

("_"), colons (":"), and periods (".")'. These regexps

825

are slightly over-zealous, in that they remove colons

826

are slightly over-zealous, in that they remove colons

826

and periods unnecessarily.

827

and periods unnecessarily.

827

828

Whitespace is transformed into underscores, and then

829

Whitespace is transformed into underscores, and then

829

anything which is not a hyphen or a character that

830

anything which is not a hyphen or a character that

830

matches \w (alphanumerics and underscore) is removed.

831

matches \w (alphanumerics and underscore) is removed.

831

832

"""

833

"""

833

# Transform all whitespace to underscore

834

# Transform all whitespace to underscore

834

idstring = re.sub(r'\s', "_", '%s' % idstring)

835

idstring = re.sub(r'\s', "_", '%s' % idstring)

835

# Remove everything that is not a hyphen or a member of \w

836

# Remove everything that is not a hyphen or a member of \w

836

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

837

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

837

return idstring

838

return idstring

838

839

@classmethod

840

@classmethod

840

def diff_splitter(cls, string):

841

def diff_splitter(cls, string):

841

"""

842

"""

842

Diff split that emulates .splitlines() but works only on \n

843

Diff split that emulates .splitlines() but works only on \n

843

"""

844

"""

844

if not string:

845

if not string:

845

return

846

return

846

elif string == '\n':

847

elif string == '\n':

847

yield u'\n'

848

yield u'\n'

848

else:

849

else:

849

850

has_newline = string.endswith('\n')

851

has_newline = string.endswith('\n')

851

elements = string.split('\n')

852

elements = string.split('\n')

852

if has_newline:

853

if has_newline:

853

# skip last element as it's empty string from newlines

854

# skip last element as it's empty string from newlines

854

elements = elements[:-1]

855

elements = elements[:-1]

855

856

len_elements = len(elements)

857

len_elements = len(elements)

857

858

for cnt, line in enumerate(elements, start=1):

859

for cnt, line in enumerate(elements, start=1):

859

last_line = cnt == len_elements

860

last_line = cnt == len_elements

860

if last_line and not has_newline:

861

if last_line and not has_newline:

861

yield safe_unicode(line)

862

yield safe_unicode(line)

862

else:

863

else:

863

yield safe_unicode(line) + '\n'

864

yield safe_unicode(line) + '\n'

864

865

def prepare(self, inline_diff=True):

866

def prepare(self, inline_diff=True):

866

"""

867

"""

867

Prepare the passed udiff for HTML rendering.

868

Prepare the passed udiff for HTML rendering.

868

869

:return: A list of dicts with diff information.

870

:return: A list of dicts with diff information.

870

"""

871

"""

871

parsed = self._parser(inline_diff=inline_diff)

872

parsed = self._parser(inline_diff=inline_diff)

872

self.parsed = True

873

self.parsed = True

873

self.parsed_diff = parsed

874

self.parsed_diff = parsed

874

return parsed

875

return parsed

875

876

def as_raw(self, diff_lines=None):

877

def as_raw(self, diff_lines=None):

877

"""

878

"""

878

Returns raw diff as a byte string

879

Returns raw diff as a byte string

879

"""

880

"""

880

return self._diff.raw

881

return self._diff.raw

881

882

def as_html(self, table_class='code-difftable', line_class='line',

883

def as_html(self, table_class='code-difftable', line_class='line',

883

old_lineno_class='lineno old', new_lineno_class='lineno new',

884

old_lineno_class='lineno old', new_lineno_class='lineno new',

884

code_class='code', enable_comments=False, parsed_lines=None):

885

code_class='code', enable_comments=False, parsed_lines=None):

885

"""

886

"""

886

Return given diff as html table with customized css classes

887

Return given diff as html table with customized css classes

887

"""

888

"""

888

# TODO(marcink): not sure how to pass in translator

889

# TODO(marcink): not sure how to pass in translator

889

# here in an efficient way, leave the _ for proper gettext extraction

890

# here in an efficient way, leave the _ for proper gettext extraction

890

_ = lambda s: s

891

_ = lambda s: s

891

892

def _link_to_if(condition, label, url):

893

def _link_to_if(condition, label, url):

893

"""

894

"""

894

Generates a link if condition is meet or just the label if not.

895

Generates a link if condition is meet or just the label if not.

895

"""

896

"""

896

897

if condition:

898

if condition:

898

return '''<a href="%(url)s" class="tooltip"

899

return '''<a href="%(url)s" class="tooltip"

899

title="%(title)s">%(label)s</a>''' % {

900

title="%(title)s">%(label)s</a>''' % {

900

'title': _('Click to select line'),

901

'title': _('Click to select line'),

901

'url': url,

902

'url': url,

902

'label': label

903

'label': label

903

}

904

}

904

else:

905

else:

905

return label

906

return label

906

if not self.parsed:

907

if not self.parsed:

907

self.prepare()

908

self.prepare()

908

909

diff_lines = self.parsed_diff

910

diff_lines = self.parsed_diff

910

if parsed_lines:

911

if parsed_lines:

911

diff_lines = parsed_lines

912

diff_lines = parsed_lines

912

913

_html_empty = True

914

_html_empty = True

914

_html = []

915

_html = []

915

_html.append('''<table class="%(table_class)s">\n''' % {

916

_html.append('''<table class="%(table_class)s">\n''' % {

916

'table_class': table_class

917

'table_class': table_class

917

})

918

})

918

919

for diff in diff_lines:

920

for diff in diff_lines:

920

for line in diff['chunks']:

921

for line in diff['chunks']:

921

_html_empty = False

922

_html_empty = False

922

for change in line:

923

for change in line:

923

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

924

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

924

'lc': line_class,

925

'lc': line_class,

925

'action': change['action']

926

'action': change['action']

926

})

927

})

927

anchor_old_id = ''

928

anchor_old_id = ''

928

anchor_new_id = ''

929

anchor_new_id = ''

929

anchor_old = "%(filename)s_o%(oldline_no)s" % {

930

anchor_old = "%(filename)s_o%(oldline_no)s" % {

930

'filename': self._safe_id(diff['filename']),

931

'filename': self._safe_id(diff['filename']),

931

'oldline_no': change['old_lineno']

932

'oldline_no': change['old_lineno']

932

}

933

}

933

anchor_new = "%(filename)s_n%(oldline_no)s" % {

934

anchor_new = "%(filename)s_n%(oldline_no)s" % {

934

'filename': self._safe_id(diff['filename']),

935

'filename': self._safe_id(diff['filename']),

935

'oldline_no': change['new_lineno']

936

'oldline_no': change['new_lineno']

936

}

937

}

937

cond_old = (change['old_lineno'] != '...' and

938

cond_old = (change['old_lineno'] != '...' and

938

change['old_lineno'])

939

change['old_lineno'])

939

cond_new = (change['new_lineno'] != '...' and

940

cond_new = (change['new_lineno'] != '...' and

940

change['new_lineno'])

941

change['new_lineno'])

941

if cond_old:

942

if cond_old:

942

anchor_old_id = 'id="%s"' % anchor_old

943

anchor_old_id = 'id="%s"' % anchor_old

943

if cond_new:

944

if cond_new:

944

anchor_new_id = 'id="%s"' % anchor_new

945

anchor_new_id = 'id="%s"' % anchor_new

945

946

if change['action'] != Action.CONTEXT:

947

if change['action'] != Action.CONTEXT:

947

anchor_link = True

948

anchor_link = True

948

else:

949

else:

949

anchor_link = False

950

anchor_link = False

950

951

###########################################################

952

###########################################################

952

# COMMENT ICONS

953

# COMMENT ICONS

953

###########################################################

954

###########################################################

954

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

955

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

955

956

if enable_comments and change['action'] != Action.CONTEXT:

957

if enable_comments and change['action'] != Action.CONTEXT:

957

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

958

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

958

959

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

960

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

960

961

###########################################################

962

###########################################################

962

# OLD LINE NUMBER

963

# OLD LINE NUMBER

963

###########################################################

964

###########################################################

964

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

965

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

965

'a_id': anchor_old_id,

966

'a_id': anchor_old_id,

966

'olc': old_lineno_class

967

'olc': old_lineno_class

967

})

968

})

968

969

_html.append('''%(link)s''' % {

970

_html.append('''%(link)s''' % {

970

'link': _link_to_if(anchor_link, change['old_lineno'],

971

'link': _link_to_if(anchor_link, change['old_lineno'],

971

'#%s' % anchor_old)

972

'#%s' % anchor_old)

972

})

973

})

973

_html.append('''</td>\n''')

974

_html.append('''</td>\n''')

974

###########################################################

975

###########################################################

975

# NEW LINE NUMBER

976

# NEW LINE NUMBER

976

###########################################################

977

###########################################################

977

978

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

979

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

979

'a_id': anchor_new_id,

980

'a_id': anchor_new_id,

980

'nlc': new_lineno_class

981

'nlc': new_lineno_class

981

})

982

})

982

983

_html.append('''%(link)s''' % {

984

_html.append('''%(link)s''' % {

984

'link': _link_to_if(anchor_link, change['new_lineno'],

985

'link': _link_to_if(anchor_link, change['new_lineno'],

985

'#%s' % anchor_new)

986

'#%s' % anchor_new)

986

})

987

})

987

_html.append('''</td>\n''')

988

_html.append('''</td>\n''')

988

###########################################################

989

###########################################################

989

# CODE

990

# CODE

990

###########################################################

991

###########################################################

991

code_classes = [code_class]

992

code_classes = [code_class]

992

if (not enable_comments or

993

if (not enable_comments or

993

change['action'] == Action.CONTEXT):

994

change['action'] == Action.CONTEXT):

994

code_classes.append('no-comment')

995

code_classes.append('no-comment')

995

_html.append('\t<td class="%s">' % ' '.join(code_classes))

996

_html.append('\t<td class="%s">' % ' '.join(code_classes))

996

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

997

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

997

'code': change['line']

998

'code': change['line']

998

})

999

})

999

1000

_html.append('''\t</td>''')

1001

_html.append('''\t</td>''')

1001

_html.append('''\n</tr>\n''')

1002

_html.append('''\n</tr>\n''')

1002

_html.append('''</table>''')

1003

_html.append('''</table>''')

1003

if _html_empty:

1004

if _html_empty:

1004

return None

1005

return None

1005

return ''.join(_html)

1006

return ''.join(_html)

1006

1007

def stat(self):

1008

def stat(self):

1008

"""

1009

"""

1009

Returns tuple of added, and removed lines for this instance

1010

Returns tuple of added, and removed lines for this instance

1010

"""

1011

"""

1011

return self.adds, self.removes

1012

return self.adds, self.removes

1012

1013

def get_context_of_line(

1014

def get_context_of_line(

1014

self, path, diff_line=None, context_before=3, context_after=3):

1015

self, path, diff_line=None, context_before=3, context_after=3):

1015

"""

1016

"""

1016

Returns the context lines for the specified diff line.

1017

Returns the context lines for the specified diff line.

1017

1018

:type diff_line: :class:`DiffLineNumber`

1019

:type diff_line: :class:`DiffLineNumber`

1019

"""

1020

"""

1020

assert self.parsed, "DiffProcessor is not initialized."

1021

assert self.parsed, "DiffProcessor is not initialized."

1021

1022

if None not in diff_line:

1023

if None not in diff_line:

1023

raise ValueError(

1024

raise ValueError(

1024

"Cannot specify both line numbers: {}".format(diff_line))

1025

"Cannot specify both line numbers: {}".format(diff_line))

1025

1026

file_diff = self._get_file_diff(path)

1027

file_diff = self._get_file_diff(path)

1027

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1028

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1028

1029

first_line_to_include = max(idx - context_before, 0)

1030

first_line_to_include = max(idx - context_before, 0)

1030

first_line_after_context = idx + context_after + 1

1031

first_line_after_context = idx + context_after + 1

1031

context_lines = chunk[first_line_to_include:first_line_after_context]

1032

context_lines = chunk[first_line_to_include:first_line_after_context]

1032

1033

line_contents = [

1034

line_contents = [

1034

_context_line(line) for line in context_lines

1035

_context_line(line) for line in context_lines

1035

if _is_diff_content(line)]

1036

if _is_diff_content(line)]

1036

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1037

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1037

# Once they are fixed, we can drop this line here.

1038

# Once they are fixed, we can drop this line here.

1038

if line_contents:

1039

if line_contents:

1039

line_contents[-1] = (

1040

line_contents[-1] = (

1040

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1041

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1041

return line_contents

1042

return line_contents

1042

1043

def find_context(self, path, context, offset=0):

1044

def find_context(self, path, context, offset=0):

1044

"""

1045

"""

1045

Finds the given `context` inside of the diff.

1046

Finds the given `context` inside of the diff.

1046

1047

Use the parameter `offset` to specify which offset the target line has

1048

Use the parameter `offset` to specify which offset the target line has

1048

inside of the given `context`. This way the correct diff line will be

1049

inside of the given `context`. This way the correct diff line will be

1049

returned.

1050

returned.

1050

1051

:param offset: Shall be used to specify the offset of the main line

1052

:param offset: Shall be used to specify the offset of the main line

1052

within the given `context`.

1053

within the given `context`.

1053

"""

1054

"""

1054

if offset < 0 or offset >= len(context):

1055

if offset < 0 or offset >= len(context):

1055

raise ValueError(

1056

raise ValueError(

1056

"Only positive values up to the length of the context "

1057

"Only positive values up to the length of the context "

1057

"minus one are allowed.")

1058

"minus one are allowed.")

1058

1059

matches = []

1060

matches = []

1060

file_diff = self._get_file_diff(path)

1061

file_diff = self._get_file_diff(path)

1061

1062

for chunk in file_diff['chunks']:

1063

for chunk in file_diff['chunks']:

1063

context_iter = iter(context)

1064

context_iter = iter(context)

1064

for line_idx, line in enumerate(chunk):

1065

for line_idx, line in enumerate(chunk):

1065

try:

1066

try:

1066

if _context_line(line) == context_iter.next():

1067

if _context_line(line) == context_iter.next():

1067

continue

1068

continue

1068

except StopIteration:

1069

except StopIteration:

1069

matches.append((line_idx, chunk))

1070

matches.append((line_idx, chunk))

1070

context_iter = iter(context)

1071

context_iter = iter(context)

1071

1072

# Increment position and triger StopIteration

1073

# Increment position and triger StopIteration

1073

# if we had a match at the end

1074

# if we had a match at the end

1074

line_idx += 1

1075

line_idx += 1

1075

try:

1076

try:

1076

context_iter.next()

1077

context_iter.next()

1077

except StopIteration:

1078

except StopIteration:

1078

matches.append((line_idx, chunk))

1079

matches.append((line_idx, chunk))

1079

1080

effective_offset = len(context) - offset

1081

effective_offset = len(context) - offset

1081

found_at_diff_lines = [

1082

found_at_diff_lines = [

1082

_line_to_diff_line_number(chunk[idx - effective_offset])

1083

_line_to_diff_line_number(chunk[idx - effective_offset])

1083

for idx, chunk in matches]

1084

for idx, chunk in matches]

1084

1085

return found_at_diff_lines

1086

return found_at_diff_lines

1086

1087

def _get_file_diff(self, path):

1088

def _get_file_diff(self, path):

1088

for file_diff in self.parsed_diff:

1089

for file_diff in self.parsed_diff:

1089

if file_diff['filename'] == path:

1090

if file_diff['filename'] == path:

1090

break

1091

break

1091

else:

1092

else:

1092

raise FileNotInDiffException("File {} not in diff".format(path))

1093

raise FileNotInDiffException("File {} not in diff".format(path))

1093

return file_diff

1094

return file_diff

1094

1095

def _find_chunk_line_index(self, file_diff, diff_line):

1096

def _find_chunk_line_index(self, file_diff, diff_line):

1096

for chunk in file_diff['chunks']:

1097

for chunk in file_diff['chunks']:

1097

for idx, line in enumerate(chunk):

1098

for idx, line in enumerate(chunk):

1098

if line['old_lineno'] == diff_line.old:

1099

if line['old_lineno'] == diff_line.old:

1099

return chunk, idx

1100

return chunk, idx

1100

if line['new_lineno'] == diff_line.new:

1101

if line['new_lineno'] == diff_line.new:

1101

return chunk, idx

1102

return chunk, idx

1102

raise LineNotInDiffException(

1103

raise LineNotInDiffException(

1103

"The line {} is not part of the diff.".format(diff_line))

1104

"The line {} is not part of the diff.".format(diff_line))

1104

1105

1106

def _is_diff_content(line):

1107

def _is_diff_content(line):

1107

return line['action'] in (

1108

return line['action'] in (

1108

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1109

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1109

1110

1111

def _context_line(line):

1112

def _context_line(line):

1112

return (line['action'], line['line'])

1113

return (line['action'], line['line'])

1113

1114

1115

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1116

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1116

1117

1118

def _line_to_diff_line_number(line):

1119

def _line_to_diff_line_number(line):

1119

new_line_no = line['new_lineno'] or None

1120

new_line_no = line['new_lineno'] or None

1120

old_line_no = line['old_lineno'] or None

1121

old_line_no = line['old_lineno'] or None

1121

return DiffLineNumber(old=old_line_no, new=new_line_no)

1122

return DiffLineNumber(old=old_line_no, new=new_line_no)

1122

1123

1124

class FileNotInDiffException(Exception):

1125

class FileNotInDiffException(Exception):

1125

"""

1126

"""

1126

Raised when the context for a missing file is requested.

1127

Raised when the context for a missing file is requested.

1127

1128

If you request the context for a line in a file which is not part of the

1129

If you request the context for a line in a file which is not part of the

1129

given diff, then this exception is raised.

1130

given diff, then this exception is raised.

1130

"""

1131

"""

1131

1132

1133

class LineNotInDiffException(Exception):

1134

class LineNotInDiffException(Exception):

1134

"""

1135

"""

1135

Raised when the context for a missing line is requested.

1136

Raised when the context for a missing line is requested.

1136

1137

If you request the context for a line in a file and this line is not

1138

If you request the context for a line in a file and this line is not

1138

part of the given diff, then this exception is raised.

1139

part of the given diff, then this exception is raised.

1139

"""

1140

"""

1140

1141

1142

class DiffLimitExceeded(Exception):

1143

class DiffLimitExceeded(Exception):

1143

pass

1144

pass

1144

1145

1146

# NOTE(marcink): if diffs.mako change, probably this

1147

# NOTE(marcink): if diffs.mako change, probably this

1147

# needs a bump to next version

1148

# needs a bump to next version

1148

CURRENT_DIFF_VERSION = 'v4'

1149

CURRENT_DIFF_VERSION = 'v4'

1149

1150

1151

def _cleanup_cache_file(cached_diff_file):

1152

def _cleanup_cache_file(cached_diff_file):

1152

# cleanup file to not store it "damaged"

1153

# cleanup file to not store it "damaged"

1153

try:

1154

try:

1154

os.remove(cached_diff_file)

1155

os.remove(cached_diff_file)

1155

except Exception:

1156

except Exception:

1156

log.exception('Failed to cleanup path %s', cached_diff_file)

1157

log.exception('Failed to cleanup path %s', cached_diff_file)

1157

1158

1159

def cache_diff(cached_diff_file, diff, commits):

1160

def cache_diff(cached_diff_file, diff, commits):

1160

1161

struct = {

1162

struct = {

1162

'version': CURRENT_DIFF_VERSION,

1163

'version': CURRENT_DIFF_VERSION,

1163

'diff': diff,

1164

'diff': diff,

1164

'commits': commits

1165

'commits': commits

1165

}

1166

}

1166

1167

try:

1168

try:

1168

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1169

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1169

pickle.dump(struct, f)

1170

pickle.dump(struct, f)

1170

log.debug('Saved diff cache under %s', cached_diff_file)

1171

log.debug('Saved diff cache under %s', cached_diff_file)

1171

except Exception:

1172

except Exception:

1172

log.warn('Failed to save cache', exc_info=True)

1173

log.warn('Failed to save cache', exc_info=True)

1173

_cleanup_cache_file(cached_diff_file)

1174

_cleanup_cache_file(cached_diff_file)

1174

1175

1176

def load_cached_diff(cached_diff_file):

1177

def load_cached_diff(cached_diff_file):

1177

1178

default_struct = {

1179

default_struct = {

1179

'version': CURRENT_DIFF_VERSION,

1180

'version': CURRENT_DIFF_VERSION,

1180

'diff': None,

1181

'diff': None,

1181

'commits': None

1182

'commits': None

1182

}

1183

}

1183

1184

has_cache = os.path.isfile(cached_diff_file)

1185

has_cache = os.path.isfile(cached_diff_file)

1185

if not has_cache:

1186

if not has_cache:

1186

return default_struct

1187

return default_struct

1187

1188

data = None

1189

data = None

1190

start = time.time()

1189

try:

1191

try:

1190

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1192

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1191

data = pickle.load(f)

1193

data = pickle.load(f)

1192

log.debug('Loaded diff cache from %s', cached_diff_file)

1194

load_time = time.time() - start

1195

log.debug('Loaded diff cache from %s in %.3fs', cached_diff_file, load_time)

1193

except Exception:

1196

except Exception:

1194

log.warn('Failed to read diff cache file', exc_info=True)

1197

log.warn('Failed to read diff cache file', exc_info=True)

1195

1198

1196

if not data:

1199

if not data:

1197

data = default_struct

1200

data = default_struct

1198

1201

1199

if not isinstance(data, dict):

1202

if not isinstance(data, dict):

1200

# old version of data ?

1203

# old version of data ?

1201

data = default_struct

1204

data = default_struct

1202

1205

1203

# check version

1206

# check version

1204

if data.get('version') != CURRENT_DIFF_VERSION:

1207

if data.get('version') != CURRENT_DIFF_VERSION:

1205

# purge cache

1208

# purge cache

1206

_cleanup_cache_file(cached_diff_file)

1209

_cleanup_cache_file(cached_diff_file)

1207

return default_struct

1210

return default_struct

1208

1211

1209

return data

1212

return data

1210

1213

1211

1214

1212

def generate_diff_cache_key(*args):

1215

def generate_diff_cache_key(*args):

1213

"""

1216

"""

1214

Helper to generate a cache key using arguments

1217

Helper to generate a cache key using arguments

1215

"""

1218

"""

1216

def arg_mapper(input_param):

1219

def arg_mapper(input_param):

1217

input_param = safe_str(input_param)

1220

input_param = safe_str(input_param)

1218

# we cannot allow '/' in arguments since it would allow

1221

# we cannot allow '/' in arguments since it would allow

1219

# subdirectory usage

1222

# subdirectory usage

1220

input_param.replace('/', '_')

1223

input_param.replace('/', '_')

1221

return input_param or None # prevent empty string arguments

1224

return input_param or None # prevent empty string arguments

1222

1225

1223

return '_'.join([

1226

return '_'.join([

1224

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1227

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1225

1228

1226

1229

1227

def diff_cache_exist(cache_storage, *args):

1230

def diff_cache_exist(cache_storage, *args):

1228

"""

1231

"""

1229

Based on all generated arguments check and return a cache path

1232

Based on all generated arguments check and return a cache path

1230

"""

1233

"""

1231

cache_key = generate_diff_cache_key(*args)

1234

cache_key = generate_diff_cache_key(*args)

1232

cache_file_path = os.path.join(cache_storage, cache_key)

1235

cache_file_path = os.path.join(cache_storage, cache_key)

1233

# prevent path traversal attacks using some param that have e.g '../../'

1236

# prevent path traversal attacks using some param that have e.g '../../'

1234

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1237

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1235

raise ValueError('Final path must be within {}'.format(cache_storage))

1238

raise ValueError('Final path must be within {}'.format(cache_storage))

1236

1239

1237

return cache_file_path

1240

return cache_file_path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2019 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import os
             import re
             import bz2
+            import time
             import collections
             import difflib
             import logging
             import cPickle as pickle
             from itertools import tee, imap
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             # define max context, a file with more than this numbers of lines is unusable
             # in browser anyway
             MAX_CONTEXT = 20 * 1024
             DEFAULT_CONTEXT = 3
             def get_diff_context(request):
                 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
             def get_diff_whitespace_flag(request):
                 return request.GET.get('ignorews', '') == '1'
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 # protect against IntOverflow when passing HUGE context
                 if context > MAX_CONTEXT:
                     context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
                 OLD_NO_NL = 'old-no-nl'
                 NEW_NO_NL = 'new-no-nl'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 # collapse ranges of commits over given number
                 _collapse_commits_over = 5
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     log.debug('Initialized DiffProcessor with %s mode', format)
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._new_parse_gitdiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return string \
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _check_large_diff(self):
                     log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                 def _new_parse_gitdiff(self, inline_diff=True):
                     _files = []
                     # this can be overriden later to a LimitedDiffContainer type
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         log.debug('parsing diff %r', head)
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'old_mode': None,
                             'new_mode': None,
                             'ops': {},
                         }
                         if head['old_mode']:
                             stats['old_mode'] = head['old_mode']
                         if head['new_mode']:
                             stats['new_mode'] = head['new_mode']
                         if head['b_mode']:
                             stats['new_mode'] = head['b_mode']
                         # delete file
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         # new file
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['old_mode'] = None
                             stats['new_mode'] = head['new_file_mode']
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         # modify operation, can be copy, rename or chmod
                         else:
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['renamed'] = (head['rename_from'], head['rename_to'])
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['copied'] = (head['copy_from'], head['copy_to'])
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['new_file'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             # simulate splitlines, so we keep the line end part
                             diff = self.diff_splitter(chunk.diff)
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                             exceeds_limit = raw_chunk_size > self.file_limit
                             self.cur_diff_size += raw_chunk_size
                             try:
                                 # Check each file instead of the whole diff.
                                 # Diff will hide big files but still show small ones.
                                 # From the tests big files are fairly safe to be parsed
                                 # but the browser is the bottleneck.
                                 if not self.show_full_diff and exceeds_limit:
                                     log.debug('File `%s` exceeds current file_limit of %s',
                                               safe_unicode(head['b_path']), self.file_limit)
                                     raise DiffLimitExceeded(
                                         'File Limit %s Exceeded', self.file_limit)
                                 self._check_large_diff()
                                 raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         # Hide content of deleted node by setting empty chunks
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(
 , [{'old_lineno': '',
                                  'new_lineno': '',
                                  'action': Action.CONTEXT,
                                  'line': msg,
                                  } for _op, msg in stats['ops'].iteritems()
                                 if _op not in [MOD_FILENODE]])
                         original_filename = safe_unicode(head['a_path'])
                         _files.append({
                             'original_filename': original_filename,
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
                 def _parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                 def _new_parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             # match header e.g @@ -0,0 +1 @@\n'
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             lines = []
                             hunk = {
                                 'section_header': gr[-1],
                                 'source_start': old_line,
                                 'source_length': old_end,
                                 'target_start': new_line,
                                 'target_length': new_end,
                                 'lines': lines,
                             }
                             chunks.append(hunk)
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     if affects_old:
                                         action = Action.OLD_NO_NL
                                     elif affects_new:
                                         action = Action.NEW_NO_NL
                                     else:
                                         raise Exception('invalid context for no newline')
                                     lines.append({
                                         'old_lineno':   None,
                                         'new_lineno':   None,
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 @classmethod
                 def diff_splitter(cls, string):
                     """
                     Diff split that emulates .splitlines() but works only on \n
                     """
                     if not string:
                         return
                     elif string == '\n':
                         yield u'\n'
                     else:
                         has_newline = string.endswith('\n')
                         elements = string.split('\n')
                         if has_newline:
                             # skip last element as it's empty string from newlines
                             elements = elements[:-1]
                         len_elements = len(elements)
                         for cnt, line in enumerate(elements, start=1):
                             last_line = cnt == len_elements
                             if last_line and not has_newline:
                                 yield safe_unicode(line)
                             else:
                                 yield safe_unicode(line) + '\n'
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     # TODO(marcink): not sure how to pass in translator
                     # here in an efficient way, leave the _ for proper gettext extraction
                     _ = lambda s: s
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICONS
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass
             # NOTE(marcink): if diffs.mako change, probably this
             # needs a bump to next version
             CURRENT_DIFF_VERSION = 'v4'
             def _cleanup_cache_file(cached_diff_file):
                 # cleanup file to not store it "damaged"
                 try:
                     os.remove(cached_diff_file)
                 except Exception:
                     log.exception('Failed to cleanup path %s', cached_diff_file)
             def cache_diff(cached_diff_file, diff, commits):
                 struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': diff,
                     'commits': commits
                 }
                 try:
                     with bz2.BZ2File(cached_diff_file, 'wb') as f:
                         pickle.dump(struct, f)
                     log.debug('Saved diff cache under %s', cached_diff_file)
                 except Exception:
                     log.warn('Failed to save cache', exc_info=True)
                     _cleanup_cache_file(cached_diff_file)
             def load_cached_diff(cached_diff_file):
                 default_struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': None,
                     'commits': None
                 }
                 has_cache = os.path.isfile(cached_diff_file)
                 if not has_cache:
                     return default_struct
                 data = None
+                start = time.time()
                 try:
                     with bz2.BZ2File(cached_diff_file, 'rb') as f:
                         data = pickle.load(f)
-                    log.debug('Loaded diff cache from %s', cached_diff_file)
+                    load_time = time.time() - start
+                    log.debug('Loaded diff cache from %s in %.3fs', cached_diff_file, load_time)
                 except Exception:
                     log.warn('Failed to read diff cache file', exc_info=True)
                 if not data:
                     data = default_struct
                 if not isinstance(data, dict):
                     # old version of data ?
                     data = default_struct
                 # check version
                 if data.get('version') != CURRENT_DIFF_VERSION:
                     # purge cache
                     _cleanup_cache_file(cached_diff_file)
                     return default_struct
                 return data
             def generate_diff_cache_key(*args):
                 """
                 Helper to generate a cache key using arguments
                 """
                 def arg_mapper(input_param):
                     input_param = safe_str(input_param)
                     # we cannot allow '/' in arguments since it would allow
                     # subdirectory usage
                     input_param.replace('/', '_')
                     return input_param or None  # prevent empty string arguments
                 return '_'.join([
                     '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
             def diff_cache_exist(cache_storage, *args):
                 """
                 Based on all generated arguments check and return a cache path
                 """
                 cache_key = generate_diff_cache_key(*args)
                 cache_file_path = os.path.join(cache_storage, cache_key)
                 # prevent path traversal attacks using some param that have e.g '../../'
                 if not os.path.abspath(cache_file_path).startswith(cache_storage):
                     raise ValueError('Final path must be within {}'.format(cache_storage))
                 return cache_file_path