rhodecode-enterprise-ce Commit - r3854:7b87073e

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import os

26

import os

27

import re

27

import re

28

import bz2

28

import bz2

29

import gzip

29

import time

30

import time

30

31

import collections

32

import collections

32

import difflib

33

import difflib

33

import logging

34

import logging

34

import cPickle as pickle

35

import cPickle as pickle

35

from itertools import tee, imap

36

from itertools import tee, imap

36

37

from rhodecode.lib.vcs.exceptions import VCSError

38

from rhodecode.lib.vcs.exceptions import VCSError

38

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

39

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

39

from rhodecode.lib.utils2 import safe_unicode, safe_str

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

40

41

log = logging.getLogger(__name__)

42

log = logging.getLogger(__name__)

42

43

# define max context, a file with more than this numbers of lines is unusable

44

# define max context, a file with more than this numbers of lines is unusable

44

# in browser anyway

45

# in browser anyway

45

MAX_CONTEXT = 20 * 1024

46

MAX_CONTEXT = 20 * 1024

46

DEFAULT_CONTEXT = 3

47

DEFAULT_CONTEXT = 3

47

48

49

def get_diff_context(request):

50

def get_diff_context(request):

50

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

51

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

51

52

53

def get_diff_whitespace_flag(request):

54

def get_diff_whitespace_flag(request):

54

return request.GET.get('ignorews', '') == '1'

55

return request.GET.get('ignorews', '') == '1'

55

56

57

class OPS(object):

58

class OPS(object):

58

ADD = 'A'

59

ADD = 'A'

59

MOD = 'M'

60

MOD = 'M'

60

DEL = 'D'

61

DEL = 'D'

61

62

63

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

64

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

64

"""

65

"""

65

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

66

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

66

67

:param ignore_whitespace: ignore whitespaces in diff

68

:param ignore_whitespace: ignore whitespaces in diff

68

"""

69

"""

69

# make sure we pass in default context

70

# make sure we pass in default context

70

context = context or 3

71

context = context or 3

71

# protect against IntOverflow when passing HUGE context

72

# protect against IntOverflow when passing HUGE context

72

if context > MAX_CONTEXT:

73

if context > MAX_CONTEXT:

73

context = MAX_CONTEXT

74

context = MAX_CONTEXT

74

75

submodules = filter(lambda o: isinstance(o, SubModuleNode),

76

submodules = filter(lambda o: isinstance(o, SubModuleNode),

76

[filenode_new, filenode_old])

77

[filenode_new, filenode_old])

77

if submodules:

78

if submodules:

78

return ''

79

return ''

79

80

for filenode in (filenode_old, filenode_new):

81

for filenode in (filenode_old, filenode_new):

81

if not isinstance(filenode, FileNode):

82

if not isinstance(filenode, FileNode):

82

raise VCSError(

83

raise VCSError(

83

"Given object should be FileNode object, not %s"

84

"Given object should be FileNode object, not %s"

84

% filenode.__class__)

85

% filenode.__class__)

85

86

repo = filenode_new.commit.repository

87

repo = filenode_new.commit.repository

87

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

88

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

88

new_commit = filenode_new.commit

89

new_commit = filenode_new.commit

89

90

vcs_gitdiff = repo.get_diff(

91

vcs_gitdiff = repo.get_diff(

91

old_commit, new_commit, filenode_new.path,

92

old_commit, new_commit, filenode_new.path,

92

ignore_whitespace, context, path1=filenode_old.path)

93

ignore_whitespace, context, path1=filenode_old.path)

93

return vcs_gitdiff

94

return vcs_gitdiff

94

95

NEW_FILENODE = 1

96

NEW_FILENODE = 1

96

DEL_FILENODE = 2

97

DEL_FILENODE = 2

97

MOD_FILENODE = 3

98

MOD_FILENODE = 3

98

RENAMED_FILENODE = 4

99

RENAMED_FILENODE = 4

99

COPIED_FILENODE = 5

100

COPIED_FILENODE = 5

100

CHMOD_FILENODE = 6

101

CHMOD_FILENODE = 6

101

BIN_FILENODE = 7

102

BIN_FILENODE = 7

102

103

104

class LimitedDiffContainer(object):

105

class LimitedDiffContainer(object):

105

106

def __init__(self, diff_limit, cur_diff_size, diff):

107

def __init__(self, diff_limit, cur_diff_size, diff):

107

self.diff = diff

108

self.diff = diff

108

self.diff_limit = diff_limit

109

self.diff_limit = diff_limit

109

self.cur_diff_size = cur_diff_size

110

self.cur_diff_size = cur_diff_size

110

111

def __getitem__(self, key):

112

def __getitem__(self, key):

112

return self.diff.__getitem__(key)

113

return self.diff.__getitem__(key)

113

114

def __iter__(self):

115

def __iter__(self):

115

for l in self.diff:

116

for l in self.diff:

116

yield l

117

yield l

117

118

119

class Action(object):

120

class Action(object):

120

"""

121

"""

121

Contains constants for the action value of the lines in a parsed diff.

122

Contains constants for the action value of the lines in a parsed diff.

122

"""

123

"""

123

124

ADD = 'add'

125

ADD = 'add'

125

DELETE = 'del'

126

DELETE = 'del'

126

UNMODIFIED = 'unmod'

127

UNMODIFIED = 'unmod'

127

128

CONTEXT = 'context'

129

CONTEXT = 'context'

129

OLD_NO_NL = 'old-no-nl'

130

OLD_NO_NL = 'old-no-nl'

130

NEW_NO_NL = 'new-no-nl'

131

NEW_NO_NL = 'new-no-nl'

131

132

133

class DiffProcessor(object):

134

class DiffProcessor(object):

134

"""

135

"""

135

Give it a unified or git diff and it returns a list of the files that were

136

Give it a unified or git diff and it returns a list of the files that were

136

mentioned in the diff together with a dict of meta information that

137

mentioned in the diff together with a dict of meta information that

137

can be used to render it in a HTML template.

138

can be used to render it in a HTML template.

138

139

.. note:: Unicode handling

140

.. note:: Unicode handling

140

141

The original diffs are a byte sequence and can contain filenames

142

The original diffs are a byte sequence and can contain filenames

142

in mixed encodings. This class generally returns `unicode` objects

143

in mixed encodings. This class generally returns `unicode` objects

143

since the result is intended for presentation to the user.

144

since the result is intended for presentation to the user.

144

145

"""

146

"""

146

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

147

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

147

_newline_marker = re.compile(r'^\\ No newline at end of file')

148

_newline_marker = re.compile(r'^\\ No newline at end of file')

148

149

# used for inline highlighter word split

150

# used for inline highlighter word split

150

_token_re = re.compile(r'()(>|<|&|\W+?)')

151

_token_re = re.compile(r'()(>|<|&|\W+?)')

151

152

# collapse ranges of commits over given number

153

# collapse ranges of commits over given number

153

_collapse_commits_over = 5

154

_collapse_commits_over = 5

154

155

def __init__(self, diff, format='gitdiff', diff_limit=None,

156

def __init__(self, diff, format='gitdiff', diff_limit=None,

156

file_limit=None, show_full_diff=True):

157

file_limit=None, show_full_diff=True):

157

"""

158

"""

158

:param diff: A `Diff` object representing a diff from a vcs backend

159

:param diff: A `Diff` object representing a diff from a vcs backend

159

:param format: format of diff passed, `udiff` or `gitdiff`

160

:param format: format of diff passed, `udiff` or `gitdiff`

160

:param diff_limit: define the size of diff that is considered "big"

161

:param diff_limit: define the size of diff that is considered "big"

161

based on that parameter cut off will be triggered, set to None

162

based on that parameter cut off will be triggered, set to None

162

to show full diff

163

to show full diff

163

"""

164

"""

164

self._diff = diff

165

self._diff = diff

165

self._format = format

166

self._format = format

166

self.adds = 0

167

self.adds = 0

167

self.removes = 0

168

self.removes = 0

168

# calculate diff size

169

# calculate diff size

169

self.diff_limit = diff_limit

170

self.diff_limit = diff_limit

170

self.file_limit = file_limit

171

self.file_limit = file_limit

171

self.show_full_diff = show_full_diff

172

self.show_full_diff = show_full_diff

172

self.cur_diff_size = 0

173

self.cur_diff_size = 0

173

self.parsed = False

174

self.parsed = False

174

self.parsed_diff = []

175

self.parsed_diff = []

175

176

log.debug('Initialized DiffProcessor with %s mode', format)

177

log.debug('Initialized DiffProcessor with %s mode', format)

177

if format == 'gitdiff':

178

if format == 'gitdiff':

178

self.differ = self._highlight_line_difflib

179

self.differ = self._highlight_line_difflib

179

self._parser = self._parse_gitdiff

180

self._parser = self._parse_gitdiff

180

else:

181

else:

181

self.differ = self._highlight_line_udiff

182

self.differ = self._highlight_line_udiff

182

self._parser = self._new_parse_gitdiff

183

self._parser = self._new_parse_gitdiff

183

184

def _copy_iterator(self):

185

def _copy_iterator(self):

185

"""

186

"""

186

make a fresh copy of generator, we should not iterate thru

187

make a fresh copy of generator, we should not iterate thru

187

an original as it's needed for repeating operations on

188

an original as it's needed for repeating operations on

188

this instance of DiffProcessor

189

this instance of DiffProcessor

189

"""

190

"""

190

self.__udiff, iterator_copy = tee(self.__udiff)

191

self.__udiff, iterator_copy = tee(self.__udiff)

191

return iterator_copy

192

return iterator_copy

192

193

def _escaper(self, string):

194

def _escaper(self, string):

194

"""

195

"""

195

Escaper for diff escapes special chars and checks the diff limit

196

Escaper for diff escapes special chars and checks the diff limit

196

197

:param string:

198

:param string:

198

"""

199

"""

199

self.cur_diff_size += len(string)

200

self.cur_diff_size += len(string)

200

201

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

202

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

202

raise DiffLimitExceeded('Diff Limit Exceeded')

203

raise DiffLimitExceeded('Diff Limit Exceeded')

203

204

return string \

205

return string \

205

.replace('&', '&')\

206

.replace('&', '&')\

206

.replace('<', '<')\

207

.replace('<', '<')\

207

.replace('>', '>')

208

.replace('>', '>')

208

209

def _line_counter(self, l):

210

def _line_counter(self, l):

210

"""

211

"""

211

Checks each line and bumps total adds/removes for this diff

212

Checks each line and bumps total adds/removes for this diff

212

213

:param l:

214

:param l:

214

"""

215

"""

215

if l.startswith('+') and not l.startswith('+++'):

216

if l.startswith('+') and not l.startswith('+++'):

216

self.adds += 1

217

self.adds += 1

217

elif l.startswith('-') and not l.startswith('---'):

218

elif l.startswith('-') and not l.startswith('---'):

218

self.removes += 1

219

self.removes += 1

219

return safe_unicode(l)

220

return safe_unicode(l)

220

221

def _highlight_line_difflib(self, line, next_):

222

def _highlight_line_difflib(self, line, next_):

222

"""

223

"""

223

Highlight inline changes in both lines.

224

Highlight inline changes in both lines.

224

"""

225

"""

225

226

if line['action'] == Action.DELETE:

227

if line['action'] == Action.DELETE:

227

old, new = line, next_

228

old, new = line, next_

228

else:

229

else:

229

old, new = next_, line

230

old, new = next_, line

230

231

oldwords = self._token_re.split(old['line'])

232

oldwords = self._token_re.split(old['line'])

232

newwords = self._token_re.split(new['line'])

233

newwords = self._token_re.split(new['line'])

233

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

234

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

234

235

oldfragments, newfragments = [], []

236

oldfragments, newfragments = [], []

236

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

237

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

237

oldfrag = ''.join(oldwords[i1:i2])

238

oldfrag = ''.join(oldwords[i1:i2])

238

newfrag = ''.join(newwords[j1:j2])

239

newfrag = ''.join(newwords[j1:j2])

239

if tag != 'equal':

240

if tag != 'equal':

240

if oldfrag:

241

if oldfrag:

241

oldfrag = '<del>%s</del>' % oldfrag

242

oldfrag = '<del>%s</del>' % oldfrag

242

if newfrag:

243

if newfrag:

243

newfrag = '<ins>%s</ins>' % newfrag

244

newfrag = '<ins>%s</ins>' % newfrag

244

oldfragments.append(oldfrag)

245

oldfragments.append(oldfrag)

245

newfragments.append(newfrag)

246

newfragments.append(newfrag)

246

247

old['line'] = "".join(oldfragments)

248

old['line'] = "".join(oldfragments)

248

new['line'] = "".join(newfragments)

249

new['line'] = "".join(newfragments)

249

250

def _highlight_line_udiff(self, line, next_):

251

def _highlight_line_udiff(self, line, next_):

251

"""

252

"""

252

Highlight inline changes in both lines.

253

Highlight inline changes in both lines.

253

"""

254

"""

254

start = 0

255

start = 0

255

limit = min(len(line['line']), len(next_['line']))

256

limit = min(len(line['line']), len(next_['line']))

256

while start < limit and line['line'][start] == next_['line'][start]:

257

while start < limit and line['line'][start] == next_['line'][start]:

257

start += 1

258

start += 1

258

end = -1

259

end = -1

259

limit -= start

260

limit -= start

260

while -end <= limit and line['line'][end] == next_['line'][end]:

261

while -end <= limit and line['line'][end] == next_['line'][end]:

261

end -= 1

262

end -= 1

262

end += 1

263

end += 1

263

if start or end:

264

if start or end:

264

def do(l):

265

def do(l):

265

last = end + len(l['line'])

266

last = end + len(l['line'])

266

if l['action'] == Action.ADD:

267

if l['action'] == Action.ADD:

267

tag = 'ins'

268

tag = 'ins'

268

else:

269

else:

269

tag = 'del'

270

tag = 'del'

270

l['line'] = '%s<%s>%s</%s>%s' % (

271

l['line'] = '%s<%s>%s</%s>%s' % (

271

l['line'][:start],

272

l['line'][:start],

272

tag,

273

tag,

273

l['line'][start:last],

274

l['line'][start:last],

274

tag,

275

tag,

275

l['line'][last:]

276

l['line'][last:]

276

)

277

)

277

do(line)

278

do(line)

278

do(next_)

279

do(next_)

279

280

def _clean_line(self, line, command):

281

def _clean_line(self, line, command):

281

if command in ['+', '-', ' ']:

282

if command in ['+', '-', ' ']:

282

# only modify the line if it's actually a diff thing

283

# only modify the line if it's actually a diff thing

283

line = line[1:]

284

line = line[1:]

284

return line

285

return line

285

286

def _parse_gitdiff(self, inline_diff=True):

287

def _parse_gitdiff(self, inline_diff=True):

287

_files = []

288

_files = []

288

diff_container = lambda arg: arg

289

diff_container = lambda arg: arg

289

290

for chunk in self._diff.chunks():

291

for chunk in self._diff.chunks():

291

head = chunk.header

292

head = chunk.header

292

293

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

294

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

294

raw_diff = chunk.raw

295

raw_diff = chunk.raw

295

limited_diff = False

296

limited_diff = False

296

exceeds_limit = False

297

exceeds_limit = False

297

298

op = None

299

op = None

299

stats = {

300

stats = {

300

'added': 0,

301

'added': 0,

301

'deleted': 0,

302

'deleted': 0,

302

'binary': False,

303

'binary': False,

303

'ops': {},

304

'ops': {},

304

}

305

}

305

306

if head['deleted_file_mode']:

307

if head['deleted_file_mode']:

307

op = OPS.DEL

308

op = OPS.DEL

308

stats['binary'] = True

309

stats['binary'] = True

309

stats['ops'][DEL_FILENODE] = 'deleted file'

310

stats['ops'][DEL_FILENODE] = 'deleted file'

310

311

elif head['new_file_mode']:

312

elif head['new_file_mode']:

312

op = OPS.ADD

313

op = OPS.ADD

313

stats['binary'] = True

314

stats['binary'] = True

314

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

315

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

315

else: # modify operation, can be copy, rename or chmod

316

else: # modify operation, can be copy, rename or chmod

316

317

# CHMOD

318

# CHMOD

318

if head['new_mode'] and head['old_mode']:

319

if head['new_mode'] and head['old_mode']:

319

op = OPS.MOD

320

op = OPS.MOD

320

stats['binary'] = True

321

stats['binary'] = True

321

stats['ops'][CHMOD_FILENODE] = (

322

stats['ops'][CHMOD_FILENODE] = (

322

'modified file chmod %s => %s' % (

323

'modified file chmod %s => %s' % (

323

head['old_mode'], head['new_mode']))

324

head['old_mode'], head['new_mode']))

324

# RENAME

325

# RENAME

325

if head['rename_from'] != head['rename_to']:

326

if head['rename_from'] != head['rename_to']:

326

op = OPS.MOD

327

op = OPS.MOD

327

stats['binary'] = True

328

stats['binary'] = True

328

stats['ops'][RENAMED_FILENODE] = (

329

stats['ops'][RENAMED_FILENODE] = (

329

'file renamed from %s to %s' % (

330

'file renamed from %s to %s' % (

330

head['rename_from'], head['rename_to']))

331

head['rename_from'], head['rename_to']))

331

# COPY

332

# COPY

332

if head.get('copy_from') and head.get('copy_to'):

333

if head.get('copy_from') and head.get('copy_to'):

333

op = OPS.MOD

334

op = OPS.MOD

334

stats['binary'] = True

335

stats['binary'] = True

335

stats['ops'][COPIED_FILENODE] = (

336

stats['ops'][COPIED_FILENODE] = (

336

'file copied from %s to %s' % (

337

'file copied from %s to %s' % (

337

head['copy_from'], head['copy_to']))

338

head['copy_from'], head['copy_to']))

338

339

# If our new parsed headers didn't match anything fallback to

340

# If our new parsed headers didn't match anything fallback to

340

# old style detection

341

# old style detection

341

if op is None:

342

if op is None:

342

if not head['a_file'] and head['b_file']:

343

if not head['a_file'] and head['b_file']:

343

op = OPS.ADD

344

op = OPS.ADD

344

stats['binary'] = True

345

stats['binary'] = True

345

stats['ops'][NEW_FILENODE] = 'new file'

346

stats['ops'][NEW_FILENODE] = 'new file'

346

347

elif head['a_file'] and not head['b_file']:

348

elif head['a_file'] and not head['b_file']:

348

op = OPS.DEL

349

op = OPS.DEL

349

stats['binary'] = True

350

stats['binary'] = True

350

stats['ops'][DEL_FILENODE] = 'deleted file'

351

stats['ops'][DEL_FILENODE] = 'deleted file'

351

352

# it's not ADD not DELETE

353

# it's not ADD not DELETE

353

if op is None:

354

if op is None:

354

op = OPS.MOD

355

op = OPS.MOD

355

stats['binary'] = True

356

stats['binary'] = True

356

stats['ops'][MOD_FILENODE] = 'modified file'

357

stats['ops'][MOD_FILENODE] = 'modified file'

357

358

# a real non-binary diff

359

# a real non-binary diff

359

if head['a_file'] or head['b_file']:

360

if head['a_file'] or head['b_file']:

360

try:

361

try:

361

raw_diff, chunks, _stats = self._parse_lines(diff)

362

raw_diff, chunks, _stats = self._parse_lines(diff)

362

stats['binary'] = False

363

stats['binary'] = False

363

stats['added'] = _stats[0]

364

stats['added'] = _stats[0]

364

stats['deleted'] = _stats[1]

365

stats['deleted'] = _stats[1]

365

# explicit mark that it's a modified file

366

# explicit mark that it's a modified file

366

if op == OPS.MOD:

367

if op == OPS.MOD:

367

stats['ops'][MOD_FILENODE] = 'modified file'

368

stats['ops'][MOD_FILENODE] = 'modified file'

368

exceeds_limit = len(raw_diff) > self.file_limit

369

exceeds_limit = len(raw_diff) > self.file_limit

369

370

# changed from _escaper function so we validate size of

371

# changed from _escaper function so we validate size of

371

# each file instead of the whole diff

372

# each file instead of the whole diff

372

# diff will hide big files but still show small ones

373

# diff will hide big files but still show small ones

373

# from my tests, big files are fairly safe to be parsed

374

# from my tests, big files are fairly safe to be parsed

374

# but the browser is the bottleneck

375

# but the browser is the bottleneck

375

if not self.show_full_diff and exceeds_limit:

376

if not self.show_full_diff and exceeds_limit:

376

raise DiffLimitExceeded('File Limit Exceeded')

377

raise DiffLimitExceeded('File Limit Exceeded')

377

378

except DiffLimitExceeded:

379

except DiffLimitExceeded:

379

diff_container = lambda _diff: \

380

diff_container = lambda _diff: \

380

LimitedDiffContainer(

381

LimitedDiffContainer(

381

self.diff_limit, self.cur_diff_size, _diff)

382

self.diff_limit, self.cur_diff_size, _diff)

382

383

exceeds_limit = len(raw_diff) > self.file_limit

384

exceeds_limit = len(raw_diff) > self.file_limit

384

limited_diff = True

385

limited_diff = True

385

chunks = []

386

chunks = []

386

387

else: # GIT format binary patch, or possibly empty diff

388

else: # GIT format binary patch, or possibly empty diff

388

if head['bin_patch']:

389

if head['bin_patch']:

389

# we have operation already extracted, but we mark simply

390

# we have operation already extracted, but we mark simply

390

# it's a diff we wont show for binary files

391

# it's a diff we wont show for binary files

391

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

392

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

392

chunks = []

393

chunks = []

393

394

if chunks and not self.show_full_diff and op == OPS.DEL:

395

if chunks and not self.show_full_diff and op == OPS.DEL:

395

# if not full diff mode show deleted file contents

396

# if not full diff mode show deleted file contents

396

# TODO: anderson: if the view is not too big, there is no way

397

# TODO: anderson: if the view is not too big, there is no way

397

# to see the content of the file

398

# to see the content of the file

398

chunks = []

399

chunks = []

399

400

chunks.insert(0, [{

401

chunks.insert(0, [{

401

'old_lineno': '',

402

'old_lineno': '',

402

'new_lineno': '',

403

'new_lineno': '',

403

'action': Action.CONTEXT,

404

'action': Action.CONTEXT,

404

'line': msg,

405

'line': msg,

405

} for _op, msg in stats['ops'].iteritems()

406

} for _op, msg in stats['ops'].iteritems()

406

if _op not in [MOD_FILENODE]])

407

if _op not in [MOD_FILENODE]])

407

408

_files.append({

409

_files.append({

409

'filename': safe_unicode(head['b_path']),

410

'filename': safe_unicode(head['b_path']),

410

'old_revision': head['a_blob_id'],

411

'old_revision': head['a_blob_id'],

411

'new_revision': head['b_blob_id'],

412

'new_revision': head['b_blob_id'],

412

'chunks': chunks,

413

'chunks': chunks,

413

'raw_diff': safe_unicode(raw_diff),

414

'raw_diff': safe_unicode(raw_diff),

414

'operation': op,

415

'operation': op,

415

'stats': stats,

416

'stats': stats,

416

'exceeds_limit': exceeds_limit,

417

'exceeds_limit': exceeds_limit,

417

'is_limited_diff': limited_diff,

418

'is_limited_diff': limited_diff,

418

})

419

})

419

420

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

421

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

421

OPS.DEL: 2}.get(info['operation'])

422

OPS.DEL: 2}.get(info['operation'])

422

423

if not inline_diff:

424

if not inline_diff:

424

return diff_container(sorted(_files, key=sorter))

425

return diff_container(sorted(_files, key=sorter))

425

426

# highlight inline changes

427

# highlight inline changes

427

for diff_data in _files:

428

for diff_data in _files:

428

for chunk in diff_data['chunks']:

429

for chunk in diff_data['chunks']:

429

lineiter = iter(chunk)

430

lineiter = iter(chunk)

430

try:

431

try:

431

while 1:

432

while 1:

432

line = lineiter.next()

433

line = lineiter.next()

433

if line['action'] not in (

434

if line['action'] not in (

434

Action.UNMODIFIED, Action.CONTEXT):

435

Action.UNMODIFIED, Action.CONTEXT):

435

nextline = lineiter.next()

436

nextline = lineiter.next()

436

if nextline['action'] in ['unmod', 'context'] or \

437

if nextline['action'] in ['unmod', 'context'] or \

437

nextline['action'] == line['action']:

438

nextline['action'] == line['action']:

438

continue

439

continue

439

self.differ(line, nextline)

440

self.differ(line, nextline)

440

except StopIteration:

441

except StopIteration:

441

pass

442

pass

442

443

return diff_container(sorted(_files, key=sorter))

444

return diff_container(sorted(_files, key=sorter))

444

445

def _check_large_diff(self):

446

def _check_large_diff(self):

446

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

447

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

447

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

448

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

448

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

449

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

449

450

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

451

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

451

def _new_parse_gitdiff(self, inline_diff=True):

452

def _new_parse_gitdiff(self, inline_diff=True):

452

_files = []

453

_files = []

453

454

# this can be overriden later to a LimitedDiffContainer type

455

# this can be overriden later to a LimitedDiffContainer type

455

diff_container = lambda arg: arg

456

diff_container = lambda arg: arg

456

457

for chunk in self._diff.chunks():

458

for chunk in self._diff.chunks():

458

head = chunk.header

459

head = chunk.header

459

log.debug('parsing diff %r', head)

460

log.debug('parsing diff %r', head)

460

461

raw_diff = chunk.raw

462

raw_diff = chunk.raw

462

limited_diff = False

463

limited_diff = False

463

exceeds_limit = False

464

exceeds_limit = False

464

465

op = None

466

op = None

466

stats = {

467

stats = {

467

'added': 0,

468

'added': 0,

468

'deleted': 0,

469

'deleted': 0,

469

'binary': False,

470

'binary': False,

470

'old_mode': None,

471

'old_mode': None,

471

'new_mode': None,

472

'new_mode': None,

472

'ops': {},

473

'ops': {},

473

}

474

}

474

if head['old_mode']:

475

if head['old_mode']:

475

stats['old_mode'] = head['old_mode']

476

stats['old_mode'] = head['old_mode']

476

if head['new_mode']:

477

if head['new_mode']:

477

stats['new_mode'] = head['new_mode']

478

stats['new_mode'] = head['new_mode']

478

if head['b_mode']:

479

if head['b_mode']:

479

stats['new_mode'] = head['b_mode']

480

stats['new_mode'] = head['b_mode']

480

481

# delete file

482

# delete file

482

if head['deleted_file_mode']:

483

if head['deleted_file_mode']:

483

op = OPS.DEL

484

op = OPS.DEL

484

stats['binary'] = True

485

stats['binary'] = True

485

stats['ops'][DEL_FILENODE] = 'deleted file'

486

stats['ops'][DEL_FILENODE] = 'deleted file'

486

487

# new file

488

# new file

488

elif head['new_file_mode']:

489

elif head['new_file_mode']:

489

op = OPS.ADD

490

op = OPS.ADD

490

stats['binary'] = True

491

stats['binary'] = True

491

stats['old_mode'] = None

492

stats['old_mode'] = None

492

stats['new_mode'] = head['new_file_mode']

493

stats['new_mode'] = head['new_file_mode']

493

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

494

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

494

495

# modify operation, can be copy, rename or chmod

496

# modify operation, can be copy, rename or chmod

496

else:

497

else:

497

# CHMOD

498

# CHMOD

498

if head['new_mode'] and head['old_mode']:

499

if head['new_mode'] and head['old_mode']:

499

op = OPS.MOD

500

op = OPS.MOD

500

stats['binary'] = True

501

stats['binary'] = True

501

stats['ops'][CHMOD_FILENODE] = (

502

stats['ops'][CHMOD_FILENODE] = (

502

'modified file chmod %s => %s' % (

503

'modified file chmod %s => %s' % (

503

head['old_mode'], head['new_mode']))

504

head['old_mode'], head['new_mode']))

504

505

# RENAME

506

# RENAME

506

if head['rename_from'] != head['rename_to']:

507

if head['rename_from'] != head['rename_to']:

507

op = OPS.MOD

508

op = OPS.MOD

508

stats['binary'] = True

509

stats['binary'] = True

509

stats['renamed'] = (head['rename_from'], head['rename_to'])

510

stats['renamed'] = (head['rename_from'], head['rename_to'])

510

stats['ops'][RENAMED_FILENODE] = (

511

stats['ops'][RENAMED_FILENODE] = (

511

'file renamed from %s to %s' % (

512

'file renamed from %s to %s' % (

512

head['rename_from'], head['rename_to']))

513

head['rename_from'], head['rename_to']))

513

# COPY

514

# COPY

514

if head.get('copy_from') and head.get('copy_to'):

515

if head.get('copy_from') and head.get('copy_to'):

515

op = OPS.MOD

516

op = OPS.MOD

516

stats['binary'] = True

517

stats['binary'] = True

517

stats['copied'] = (head['copy_from'], head['copy_to'])

518

stats['copied'] = (head['copy_from'], head['copy_to'])

518

stats['ops'][COPIED_FILENODE] = (

519

stats['ops'][COPIED_FILENODE] = (

519

'file copied from %s to %s' % (

520

'file copied from %s to %s' % (

520

head['copy_from'], head['copy_to']))

521

head['copy_from'], head['copy_to']))

521

522

# If our new parsed headers didn't match anything fallback to

523

# If our new parsed headers didn't match anything fallback to

523

# old style detection

524

# old style detection

524

if op is None:

525

if op is None:

525

if not head['a_file'] and head['b_file']:

526

if not head['a_file'] and head['b_file']:

526

op = OPS.ADD

527

op = OPS.ADD

527

stats['binary'] = True

528

stats['binary'] = True

528

stats['new_file'] = True

529

stats['new_file'] = True

529

stats['ops'][NEW_FILENODE] = 'new file'

530

stats['ops'][NEW_FILENODE] = 'new file'

530

531

elif head['a_file'] and not head['b_file']:

532

elif head['a_file'] and not head['b_file']:

532

op = OPS.DEL

533

op = OPS.DEL

533

stats['binary'] = True

534

stats['binary'] = True

534

stats['ops'][DEL_FILENODE] = 'deleted file'

535

stats['ops'][DEL_FILENODE] = 'deleted file'

535

536

# it's not ADD not DELETE

537

# it's not ADD not DELETE

537

if op is None:

538

if op is None:

538

op = OPS.MOD

539

op = OPS.MOD

539

stats['binary'] = True

540

stats['binary'] = True

540

stats['ops'][MOD_FILENODE] = 'modified file'

541

stats['ops'][MOD_FILENODE] = 'modified file'

541

542

# a real non-binary diff

543

# a real non-binary diff

543

if head['a_file'] or head['b_file']:

544

if head['a_file'] or head['b_file']:

544

# simulate splitlines, so we keep the line end part

545

# simulate splitlines, so we keep the line end part

545

diff = self.diff_splitter(chunk.diff)

546

diff = self.diff_splitter(chunk.diff)

546

547

# append each file to the diff size

548

# append each file to the diff size

548

raw_chunk_size = len(raw_diff)

549

raw_chunk_size = len(raw_diff)

549

550

exceeds_limit = raw_chunk_size > self.file_limit

551

exceeds_limit = raw_chunk_size > self.file_limit

551

self.cur_diff_size += raw_chunk_size

552

self.cur_diff_size += raw_chunk_size

552

553

try:

554

try:

554

# Check each file instead of the whole diff.

555

# Check each file instead of the whole diff.

555

# Diff will hide big files but still show small ones.

556

# Diff will hide big files but still show small ones.

556

# From the tests big files are fairly safe to be parsed

557

# From the tests big files are fairly safe to be parsed

557

# but the browser is the bottleneck.

558

# but the browser is the bottleneck.

558

if not self.show_full_diff and exceeds_limit:

559

if not self.show_full_diff and exceeds_limit:

559

log.debug('File `%s` exceeds current file_limit of %s',

560

log.debug('File `%s` exceeds current file_limit of %s',

560

safe_unicode(head['b_path']), self.file_limit)

561

safe_unicode(head['b_path']), self.file_limit)

561

raise DiffLimitExceeded(

562

raise DiffLimitExceeded(

562

'File Limit %s Exceeded', self.file_limit)

563

'File Limit %s Exceeded', self.file_limit)

563

564

self._check_large_diff()

565

self._check_large_diff()

565

566

raw_diff, chunks, _stats = self._new_parse_lines(diff)

567

raw_diff, chunks, _stats = self._new_parse_lines(diff)

567

stats['binary'] = False

568

stats['binary'] = False

568

stats['added'] = _stats[0]

569

stats['added'] = _stats[0]

569

stats['deleted'] = _stats[1]

570

stats['deleted'] = _stats[1]

570

# explicit mark that it's a modified file

571

# explicit mark that it's a modified file

571

if op == OPS.MOD:

572

if op == OPS.MOD:

572

stats['ops'][MOD_FILENODE] = 'modified file'

573

stats['ops'][MOD_FILENODE] = 'modified file'

573

574

except DiffLimitExceeded:

575

except DiffLimitExceeded:

575

diff_container = lambda _diff: \

576

diff_container = lambda _diff: \

576

LimitedDiffContainer(

577

LimitedDiffContainer(

577

self.diff_limit, self.cur_diff_size, _diff)

578

self.diff_limit, self.cur_diff_size, _diff)

578

579

limited_diff = True

580

limited_diff = True

580

chunks = []

581

chunks = []

581

582

else: # GIT format binary patch, or possibly empty diff

583

else: # GIT format binary patch, or possibly empty diff

583

if head['bin_patch']:

584

if head['bin_patch']:

584

# we have operation already extracted, but we mark simply

585

# we have operation already extracted, but we mark simply

585

# it's a diff we wont show for binary files

586

# it's a diff we wont show for binary files

586

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

587

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

587

chunks = []

588

chunks = []

588

589

# Hide content of deleted node by setting empty chunks

590

# Hide content of deleted node by setting empty chunks

590

if chunks and not self.show_full_diff and op == OPS.DEL:

591

if chunks and not self.show_full_diff and op == OPS.DEL:

591

# if not full diff mode show deleted file contents

592

# if not full diff mode show deleted file contents

592

# TODO: anderson: if the view is not too big, there is no way

593

# TODO: anderson: if the view is not too big, there is no way

593

# to see the content of the file

594

# to see the content of the file

594

chunks = []

595

chunks = []

595

596

chunks.insert(

597

chunks.insert(

597

0, [{'old_lineno': '',

598

0, [{'old_lineno': '',

598

'new_lineno': '',

599

'new_lineno': '',

599

'action': Action.CONTEXT,

600

'action': Action.CONTEXT,

600

'line': msg,

601

'line': msg,

601

} for _op, msg in stats['ops'].iteritems()

602

} for _op, msg in stats['ops'].iteritems()

602

if _op not in [MOD_FILENODE]])

603

if _op not in [MOD_FILENODE]])

603

604

original_filename = safe_unicode(head['a_path'])

605

original_filename = safe_unicode(head['a_path'])

605

_files.append({

606

_files.append({

606

'original_filename': original_filename,

607

'original_filename': original_filename,

607

'filename': safe_unicode(head['b_path']),

608

'filename': safe_unicode(head['b_path']),

608

'old_revision': head['a_blob_id'],

609

'old_revision': head['a_blob_id'],

609

'new_revision': head['b_blob_id'],

610

'new_revision': head['b_blob_id'],

610

'chunks': chunks,

611

'chunks': chunks,

611

'raw_diff': safe_unicode(raw_diff),

612

'raw_diff': safe_unicode(raw_diff),

612

'operation': op,

613

'operation': op,

613

'stats': stats,

614

'stats': stats,

614

'exceeds_limit': exceeds_limit,

615

'exceeds_limit': exceeds_limit,

615

'is_limited_diff': limited_diff,

616

'is_limited_diff': limited_diff,

616

})

617

})

617

618

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

619

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

619

OPS.DEL: 2}.get(info['operation'])

620

OPS.DEL: 2}.get(info['operation'])

620

621

return diff_container(sorted(_files, key=sorter))

622

return diff_container(sorted(_files, key=sorter))

622

623

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

624

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

624

def _parse_lines(self, diff_iter):

625

def _parse_lines(self, diff_iter):

625

"""

626

"""

626

Parse the diff an return data for the template.

627

Parse the diff an return data for the template.

627

"""

628

"""

628

629

stats = [0, 0]

630

stats = [0, 0]

630

chunks = []

631

chunks = []

631

raw_diff = []

632

raw_diff = []

632

633

try:

634

try:

634

line = diff_iter.next()

635

line = diff_iter.next()

635

636

while line:

637

while line:

637

raw_diff.append(line)

638

raw_diff.append(line)

638

lines = []

639

lines = []

639

chunks.append(lines)

640

chunks.append(lines)

640

641

match = self._chunk_re.match(line)

642

match = self._chunk_re.match(line)

642

643

if not match:

644

if not match:

644

break

645

break

645

646

gr = match.groups()

647

gr = match.groups()

647

(old_line, old_end,

648

(old_line, old_end,

648

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

649

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

649

old_line -= 1

650

old_line -= 1

650

new_line -= 1

651

new_line -= 1

651

652

context = len(gr) == 5

653

context = len(gr) == 5

653

old_end += old_line

654

old_end += old_line

654

new_end += new_line

655

new_end += new_line

655

656

if context:

657

if context:

657

# skip context only if it's first line

658

# skip context only if it's first line

658

if int(gr[0]) > 1:

659

if int(gr[0]) > 1:

659

lines.append({

660

lines.append({

660

'old_lineno': '...',

661

'old_lineno': '...',

661

'new_lineno': '...',

662

'new_lineno': '...',

662

'action': Action.CONTEXT,

663

'action': Action.CONTEXT,

663

'line': line,

664

'line': line,

664

})

665

})

665

666

line = diff_iter.next()

667

line = diff_iter.next()

667

668

while old_line < old_end or new_line < new_end:

669

while old_line < old_end or new_line < new_end:

669

command = ' '

670

command = ' '

670

if line:

671

if line:

671

command = line[0]

672

command = line[0]

672

673

affects_old = affects_new = False

674

affects_old = affects_new = False

674

675

# ignore those if we don't expect them

676

# ignore those if we don't expect them

676

if command in '#@':

677

if command in '#@':

677

continue

678

continue

678

elif command == '+':

679

elif command == '+':

679

affects_new = True

680

affects_new = True

680

action = Action.ADD

681

action = Action.ADD

681

stats[0] += 1

682

stats[0] += 1

682

elif command == '-':

683

elif command == '-':

683

affects_old = True

684

affects_old = True

684

action = Action.DELETE

685

action = Action.DELETE

685

stats[1] += 1

686

stats[1] += 1

686

else:

687

else:

687

affects_old = affects_new = True

688

affects_old = affects_new = True

688

action = Action.UNMODIFIED

689

action = Action.UNMODIFIED

689

690

if not self._newline_marker.match(line):

691

if not self._newline_marker.match(line):

691

old_line += affects_old

692

old_line += affects_old

692

new_line += affects_new

693

new_line += affects_new

693

lines.append({

694

lines.append({

694

'old_lineno': affects_old and old_line or '',

695

'old_lineno': affects_old and old_line or '',

695

'new_lineno': affects_new and new_line or '',

696

'new_lineno': affects_new and new_line or '',

696

'action': action,

697

'action': action,

697

'line': self._clean_line(line, command)

698

'line': self._clean_line(line, command)

698

})

699

})

699

raw_diff.append(line)

700

raw_diff.append(line)

700

701

line = diff_iter.next()

702

line = diff_iter.next()

702

703

if self._newline_marker.match(line):

704

if self._newline_marker.match(line):

704

# we need to append to lines, since this is not

705

# we need to append to lines, since this is not

705

# counted in the line specs of diff

706

# counted in the line specs of diff

706

lines.append({

707

lines.append({

707

'old_lineno': '...',

708

'old_lineno': '...',

708

'new_lineno': '...',

709

'new_lineno': '...',

709

'action': Action.CONTEXT,

710

'action': Action.CONTEXT,

710

'line': self._clean_line(line, command)

711

'line': self._clean_line(line, command)

711

})

712

})

712

713

except StopIteration:

714

except StopIteration:

714

pass

715

pass

715

return ''.join(raw_diff), chunks, stats

716

return ''.join(raw_diff), chunks, stats

716

717

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

718

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

718

def _new_parse_lines(self, diff_iter):

719

def _new_parse_lines(self, diff_iter):

719

"""

720

"""

720

Parse the diff an return data for the template.

721

Parse the diff an return data for the template.

721

"""

722

"""

722

723

stats = [0, 0]

724

stats = [0, 0]

724

chunks = []

725

chunks = []

725

raw_diff = []

726

raw_diff = []

726

727

try:

728

try:

728

line = diff_iter.next()

729

line = diff_iter.next()

729

730

while line:

731

while line:

731

raw_diff.append(line)

732

raw_diff.append(line)

732

# match header e.g @@ -0,0 +1 @@\n'

733

# match header e.g @@ -0,0 +1 @@\n'

733

match = self._chunk_re.match(line)

734

match = self._chunk_re.match(line)

734

735

if not match:

736

if not match:

736

break

737

break

737

738

gr = match.groups()

739

gr = match.groups()

739

(old_line, old_end,

740

(old_line, old_end,

740

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

741

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

741

742

lines = []

743

lines = []

743

hunk = {

744

hunk = {

744

'section_header': gr[-1],

745

'section_header': gr[-1],

745

'source_start': old_line,

746

'source_start': old_line,

746

'source_length': old_end,

747

'source_length': old_end,

747

'target_start': new_line,

748

'target_start': new_line,

748

'target_length': new_end,

749

'target_length': new_end,

749

'lines': lines,

750

'lines': lines,

750

}

751

}

751

chunks.append(hunk)

752

chunks.append(hunk)

752

753

old_line -= 1

754

old_line -= 1

754

new_line -= 1

755

new_line -= 1

755

756

context = len(gr) == 5

757

context = len(gr) == 5

757

old_end += old_line

758

old_end += old_line

758

new_end += new_line

759

new_end += new_line

759

760

line = diff_iter.next()

761

line = diff_iter.next()

761

762

while old_line < old_end or new_line < new_end:

763

while old_line < old_end or new_line < new_end:

763

command = ' '

764

command = ' '

764

if line:

765

if line:

765

command = line[0]

766

command = line[0]

766

767

affects_old = affects_new = False

768

affects_old = affects_new = False

768

769

# ignore those if we don't expect them

770

# ignore those if we don't expect them

770

if command in '#@':

771

if command in '#@':

771

continue

772

continue

772

elif command == '+':

773

elif command == '+':

773

affects_new = True

774

affects_new = True

774

action = Action.ADD

775

action = Action.ADD

775

stats[0] += 1

776

stats[0] += 1

776

elif command == '-':

777

elif command == '-':

777

affects_old = True

778

affects_old = True

778

action = Action.DELETE

779

action = Action.DELETE

779

stats[1] += 1

780

stats[1] += 1

780

else:

781

else:

781

affects_old = affects_new = True

782

affects_old = affects_new = True

782

action = Action.UNMODIFIED

783

action = Action.UNMODIFIED

783

784

if not self._newline_marker.match(line):

785

if not self._newline_marker.match(line):

785

old_line += affects_old

786

old_line += affects_old

786

new_line += affects_new

787

new_line += affects_new

787

lines.append({

788

lines.append({

788

'old_lineno': affects_old and old_line or '',

789

'old_lineno': affects_old and old_line or '',

789

'new_lineno': affects_new and new_line or '',

790

'new_lineno': affects_new and new_line or '',

790

'action': action,

791

'action': action,

791

'line': self._clean_line(line, command)

792

'line': self._clean_line(line, command)

792

})

793

})

793

raw_diff.append(line)

794

raw_diff.append(line)

794

795

line = diff_iter.next()

796

line = diff_iter.next()

796

797

if self._newline_marker.match(line):

798

if self._newline_marker.match(line):

798

# we need to append to lines, since this is not

799

# we need to append to lines, since this is not

799

# counted in the line specs of diff

800

# counted in the line specs of diff

800

if affects_old:

801

if affects_old:

801

action = Action.OLD_NO_NL

802

action = Action.OLD_NO_NL

802

elif affects_new:

803

elif affects_new:

803

action = Action.NEW_NO_NL

804

action = Action.NEW_NO_NL

804

else:

805

else:

805

raise Exception('invalid context for no newline')

806

raise Exception('invalid context for no newline')

806

807

lines.append({

808

lines.append({

808

'old_lineno': None,

809

'old_lineno': None,

809

'new_lineno': None,

810

'new_lineno': None,

810

'action': action,

811

'action': action,

811

'line': self._clean_line(line, command)

812

'line': self._clean_line(line, command)

812

})

813

})

813

814

except StopIteration:

815

except StopIteration:

815

pass

816

pass

816

817

return ''.join(raw_diff), chunks, stats

818

return ''.join(raw_diff), chunks, stats

818

819

def _safe_id(self, idstring):

820

def _safe_id(self, idstring):

820

"""Make a string safe for including in an id attribute.

821

"""Make a string safe for including in an id attribute.

821

822

The HTML spec says that id attributes 'must begin with

823

The HTML spec says that id attributes 'must begin with

823

a letter ([A-Za-z]) and may be followed by any number

824

a letter ([A-Za-z]) and may be followed by any number

824

of letters, digits ([0-9]), hyphens ("-"), underscores

825

of letters, digits ([0-9]), hyphens ("-"), underscores

825

("_"), colons (":"), and periods (".")'. These regexps

826

("_"), colons (":"), and periods (".")'. These regexps

826

are slightly over-zealous, in that they remove colons

827

are slightly over-zealous, in that they remove colons

827

and periods unnecessarily.

828

and periods unnecessarily.

828

829

Whitespace is transformed into underscores, and then

830

Whitespace is transformed into underscores, and then

830

anything which is not a hyphen or a character that

831

anything which is not a hyphen or a character that

831

matches \w (alphanumerics and underscore) is removed.

832

matches \w (alphanumerics and underscore) is removed.

832

833

"""

834

"""

834

# Transform all whitespace to underscore

835

# Transform all whitespace to underscore

835

idstring = re.sub(r'\s', "_", '%s' % idstring)

836

idstring = re.sub(r'\s', "_", '%s' % idstring)

836

# Remove everything that is not a hyphen or a member of \w

837

# Remove everything that is not a hyphen or a member of \w

837

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

838

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

838

return idstring

839

return idstring

839

840

@classmethod

841

@classmethod

841

def diff_splitter(cls, string):

842

def diff_splitter(cls, string):

842

"""

843

"""

843

Diff split that emulates .splitlines() but works only on \n

844

Diff split that emulates .splitlines() but works only on \n

844

"""

845

"""

845

if not string:

846

if not string:

846

return

847

return

847

elif string == '\n':

848

elif string == '\n':

848

yield u'\n'

849

yield u'\n'

849

else:

850

else:

850

851

has_newline = string.endswith('\n')

852

has_newline = string.endswith('\n')

852

elements = string.split('\n')

853

elements = string.split('\n')

853

if has_newline:

854

if has_newline:

854

# skip last element as it's empty string from newlines

855

# skip last element as it's empty string from newlines

855

elements = elements[:-1]

856

elements = elements[:-1]

856

857

len_elements = len(elements)

858

len_elements = len(elements)

858

859

for cnt, line in enumerate(elements, start=1):

860

for cnt, line in enumerate(elements, start=1):

860

last_line = cnt == len_elements

861

last_line = cnt == len_elements

861

if last_line and not has_newline:

862

if last_line and not has_newline:

862

yield safe_unicode(line)

863

yield safe_unicode(line)

863

else:

864

else:

864

yield safe_unicode(line) + '\n'

865

yield safe_unicode(line) + '\n'

865

866

def prepare(self, inline_diff=True):

867

def prepare(self, inline_diff=True):

867

"""

868

"""

868

Prepare the passed udiff for HTML rendering.

869

Prepare the passed udiff for HTML rendering.

869

870

:return: A list of dicts with diff information.

871

:return: A list of dicts with diff information.

871

"""

872

"""

872

parsed = self._parser(inline_diff=inline_diff)

873

parsed = self._parser(inline_diff=inline_diff)

873

self.parsed = True

874

self.parsed = True

874

self.parsed_diff = parsed

875

self.parsed_diff = parsed

875

return parsed

876

return parsed

876

877

def as_raw(self, diff_lines=None):

878

def as_raw(self, diff_lines=None):

878

"""

879

"""

879

Returns raw diff as a byte string

880

Returns raw diff as a byte string

880

"""

881

"""

881

return self._diff.raw

882

return self._diff.raw

882

883

def as_html(self, table_class='code-difftable', line_class='line',

884

def as_html(self, table_class='code-difftable', line_class='line',

884

old_lineno_class='lineno old', new_lineno_class='lineno new',

885

old_lineno_class='lineno old', new_lineno_class='lineno new',

885

code_class='code', enable_comments=False, parsed_lines=None):

886

code_class='code', enable_comments=False, parsed_lines=None):

886

"""

887

"""

887

Return given diff as html table with customized css classes

888

Return given diff as html table with customized css classes

888

"""

889

"""

889

# TODO(marcink): not sure how to pass in translator

890

# TODO(marcink): not sure how to pass in translator

890

# here in an efficient way, leave the _ for proper gettext extraction

891

# here in an efficient way, leave the _ for proper gettext extraction

891

_ = lambda s: s

892

_ = lambda s: s

892

893

def _link_to_if(condition, label, url):

894

def _link_to_if(condition, label, url):

894

"""

895

"""

895

Generates a link if condition is meet or just the label if not.

896

Generates a link if condition is meet or just the label if not.

896

"""

897

"""

897

898

if condition:

899

if condition:

899

return '''<a href="%(url)s" class="tooltip"

900

return '''<a href="%(url)s" class="tooltip"

900

title="%(title)s">%(label)s</a>''' % {

901

title="%(title)s">%(label)s</a>''' % {

901

'title': _('Click to select line'),

902

'title': _('Click to select line'),

902

'url': url,

903

'url': url,

903

'label': label

904

'label': label

904

}

905

}

905

else:

906

else:

906

return label

907

return label

907

if not self.parsed:

908

if not self.parsed:

908

self.prepare()

909

self.prepare()

909

910

diff_lines = self.parsed_diff

911

diff_lines = self.parsed_diff

911

if parsed_lines:

912

if parsed_lines:

912

diff_lines = parsed_lines

913

diff_lines = parsed_lines

913

914

_html_empty = True

915

_html_empty = True

915

_html = []

916

_html = []

916

_html.append('''<table class="%(table_class)s">\n''' % {

917

_html.append('''<table class="%(table_class)s">\n''' % {

917

'table_class': table_class

918

'table_class': table_class

918

})

919

})

919

920

for diff in diff_lines:

921

for diff in diff_lines:

921

for line in diff['chunks']:

922

for line in diff['chunks']:

922

_html_empty = False

923

_html_empty = False

923

for change in line:

924

for change in line:

924

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

925

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

925

'lc': line_class,

926

'lc': line_class,

926

'action': change['action']

927

'action': change['action']

927

})

928

})

928

anchor_old_id = ''

929

anchor_old_id = ''

929

anchor_new_id = ''

930

anchor_new_id = ''

930

anchor_old = "%(filename)s_o%(oldline_no)s" % {

931

anchor_old = "%(filename)s_o%(oldline_no)s" % {

931

'filename': self._safe_id(diff['filename']),

932

'filename': self._safe_id(diff['filename']),

932

'oldline_no': change['old_lineno']

933

'oldline_no': change['old_lineno']

933

}

934

}

934

anchor_new = "%(filename)s_n%(oldline_no)s" % {

935

anchor_new = "%(filename)s_n%(oldline_no)s" % {

935

'filename': self._safe_id(diff['filename']),

936

'filename': self._safe_id(diff['filename']),

936

'oldline_no': change['new_lineno']

937

'oldline_no': change['new_lineno']

937

}

938

}

938

cond_old = (change['old_lineno'] != '...' and

939

cond_old = (change['old_lineno'] != '...' and

939

change['old_lineno'])

940

change['old_lineno'])

940

cond_new = (change['new_lineno'] != '...' and

941

cond_new = (change['new_lineno'] != '...' and

941

change['new_lineno'])

942

change['new_lineno'])

942

if cond_old:

943

if cond_old:

943

anchor_old_id = 'id="%s"' % anchor_old

944

anchor_old_id = 'id="%s"' % anchor_old

944

if cond_new:

945

if cond_new:

945

anchor_new_id = 'id="%s"' % anchor_new

946

anchor_new_id = 'id="%s"' % anchor_new

946

947

if change['action'] != Action.CONTEXT:

948

if change['action'] != Action.CONTEXT:

948

anchor_link = True

949

anchor_link = True

949

else:

950

else:

950

anchor_link = False

951

anchor_link = False

951

952

###########################################################

953

###########################################################

953

# COMMENT ICONS

954

# COMMENT ICONS

954

###########################################################

955

###########################################################

955

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

956

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

956

957

if enable_comments and change['action'] != Action.CONTEXT:

958

if enable_comments and change['action'] != Action.CONTEXT:

958

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

959

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

959

960

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

961

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

961

962

###########################################################

963

###########################################################

963

# OLD LINE NUMBER

964

# OLD LINE NUMBER

964

###########################################################

965

###########################################################

965

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

966

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

966

'a_id': anchor_old_id,

967

'a_id': anchor_old_id,

967

'olc': old_lineno_class

968

'olc': old_lineno_class

968

})

969

})

969

970

_html.append('''%(link)s''' % {

971

_html.append('''%(link)s''' % {

971

'link': _link_to_if(anchor_link, change['old_lineno'],

972

'link': _link_to_if(anchor_link, change['old_lineno'],

972

'#%s' % anchor_old)

973

'#%s' % anchor_old)

973

})

974

})

974

_html.append('''</td>\n''')

975

_html.append('''</td>\n''')

975

###########################################################

976

###########################################################

976

# NEW LINE NUMBER

977

# NEW LINE NUMBER

977

###########################################################

978

###########################################################

978

979

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

980

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

980

'a_id': anchor_new_id,

981

'a_id': anchor_new_id,

981

'nlc': new_lineno_class

982

'nlc': new_lineno_class

982

})

983

})

983

984

_html.append('''%(link)s''' % {

985

_html.append('''%(link)s''' % {

985

'link': _link_to_if(anchor_link, change['new_lineno'],

986

'link': _link_to_if(anchor_link, change['new_lineno'],

986

'#%s' % anchor_new)

987

'#%s' % anchor_new)

987

})

988

})

988

_html.append('''</td>\n''')

989

_html.append('''</td>\n''')

989

###########################################################

990

###########################################################

990

# CODE

991

# CODE

991

###########################################################

992

###########################################################

992

code_classes = [code_class]

993

code_classes = [code_class]

993

if (not enable_comments or

994

if (not enable_comments or

994

change['action'] == Action.CONTEXT):

995

change['action'] == Action.CONTEXT):

995

code_classes.append('no-comment')

996

code_classes.append('no-comment')

996

_html.append('\t<td class="%s">' % ' '.join(code_classes))

997

_html.append('\t<td class="%s">' % ' '.join(code_classes))

997

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

998

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

998

'code': change['line']

999

'code': change['line']

999

})

1000

})

1000

1001

_html.append('''\t</td>''')

1002

_html.append('''\t</td>''')

1002

_html.append('''\n</tr>\n''')

1003

_html.append('''\n</tr>\n''')

1003

_html.append('''</table>''')

1004

_html.append('''</table>''')

1004

if _html_empty:

1005

if _html_empty:

1005

return None

1006

return None

1006

return ''.join(_html)

1007

return ''.join(_html)

1007

1008

def stat(self):

1009

def stat(self):

1009

"""

1010

"""

1010

Returns tuple of added, and removed lines for this instance

1011

Returns tuple of added, and removed lines for this instance

1011

"""

1012

"""

1012

return self.adds, self.removes

1013

return self.adds, self.removes

1013

1014

def get_context_of_line(

1015

def get_context_of_line(

1015

self, path, diff_line=None, context_before=3, context_after=3):

1016

self, path, diff_line=None, context_before=3, context_after=3):

1016

"""

1017

"""

1017

Returns the context lines for the specified diff line.

1018

Returns the context lines for the specified diff line.

1018

1019

:type diff_line: :class:`DiffLineNumber`

1020

:type diff_line: :class:`DiffLineNumber`

1020

"""

1021

"""

1021

assert self.parsed, "DiffProcessor is not initialized."

1022

assert self.parsed, "DiffProcessor is not initialized."

1022

1023

if None not in diff_line:

1024

if None not in diff_line:

1024

raise ValueError(

1025

raise ValueError(

1025

"Cannot specify both line numbers: {}".format(diff_line))

1026

"Cannot specify both line numbers: {}".format(diff_line))

1026

1027

file_diff = self._get_file_diff(path)

1028

file_diff = self._get_file_diff(path)

1028

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1029

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1029

1030

first_line_to_include = max(idx - context_before, 0)

1031

first_line_to_include = max(idx - context_before, 0)

1031

first_line_after_context = idx + context_after + 1

1032

first_line_after_context = idx + context_after + 1

1032

context_lines = chunk[first_line_to_include:first_line_after_context]

1033

context_lines = chunk[first_line_to_include:first_line_after_context]

1033

1034

line_contents = [

1035

line_contents = [

1035

_context_line(line) for line in context_lines

1036

_context_line(line) for line in context_lines

1036

if _is_diff_content(line)]

1037

if _is_diff_content(line)]

1037

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1038

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1038

# Once they are fixed, we can drop this line here.

1039

# Once they are fixed, we can drop this line here.

1039

if line_contents:

1040

if line_contents:

1040

line_contents[-1] = (

1041

line_contents[-1] = (

1041

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1042

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1042

return line_contents

1043

return line_contents

1043

1044

def find_context(self, path, context, offset=0):

1045

def find_context(self, path, context, offset=0):

1045

"""

1046

"""

1046

Finds the given `context` inside of the diff.

1047

Finds the given `context` inside of the diff.

1047

1048

Use the parameter `offset` to specify which offset the target line has

1049

Use the parameter `offset` to specify which offset the target line has

1049

inside of the given `context`. This way the correct diff line will be

1050

inside of the given `context`. This way the correct diff line will be

1050

returned.

1051

returned.

1051

1052

:param offset: Shall be used to specify the offset of the main line

1053

:param offset: Shall be used to specify the offset of the main line

1053

within the given `context`.

1054

within the given `context`.

1054

"""

1055

"""

1055

if offset < 0 or offset >= len(context):

1056

if offset < 0 or offset >= len(context):

1056

raise ValueError(

1057

raise ValueError(

1057

"Only positive values up to the length of the context "

1058

"Only positive values up to the length of the context "

1058

"minus one are allowed.")

1059

"minus one are allowed.")

1059

1060

matches = []

1061

matches = []

1061

file_diff = self._get_file_diff(path)

1062

file_diff = self._get_file_diff(path)

1062

1063

for chunk in file_diff['chunks']:

1064

for chunk in file_diff['chunks']:

1064

context_iter = iter(context)

1065

context_iter = iter(context)

1065

for line_idx, line in enumerate(chunk):

1066

for line_idx, line in enumerate(chunk):

1066

try:

1067

try:

1067

if _context_line(line) == context_iter.next():

1068

if _context_line(line) == context_iter.next():

1068

continue

1069

continue

1069

except StopIteration:

1070

except StopIteration:

1070

matches.append((line_idx, chunk))

1071

matches.append((line_idx, chunk))

1071

context_iter = iter(context)

1072

context_iter = iter(context)

1072

1073

# Increment position and triger StopIteration

1074

# Increment position and triger StopIteration

1074

# if we had a match at the end

1075

# if we had a match at the end

1075

line_idx += 1

1076

line_idx += 1

1076

try:

1077

try:

1077

context_iter.next()

1078

context_iter.next()

1078

except StopIteration:

1079

except StopIteration:

1079

matches.append((line_idx, chunk))

1080

matches.append((line_idx, chunk))

1080

1081

effective_offset = len(context) - offset

1082

effective_offset = len(context) - offset

1082

found_at_diff_lines = [

1083

found_at_diff_lines = [

1083

_line_to_diff_line_number(chunk[idx - effective_offset])

1084

_line_to_diff_line_number(chunk[idx - effective_offset])

1084

for idx, chunk in matches]

1085

for idx, chunk in matches]

1085

1086

return found_at_diff_lines

1087

return found_at_diff_lines

1087

1088

def _get_file_diff(self, path):

1089

def _get_file_diff(self, path):

1089

for file_diff in self.parsed_diff:

1090

for file_diff in self.parsed_diff:

1090

if file_diff['filename'] == path:

1091

if file_diff['filename'] == path:

1091

break

1092

break

1092

else:

1093

else:

1093

raise FileNotInDiffException("File {} not in diff".format(path))

1094

raise FileNotInDiffException("File {} not in diff".format(path))

1094

return file_diff

1095

return file_diff

1095

1096

def _find_chunk_line_index(self, file_diff, diff_line):

1097

def _find_chunk_line_index(self, file_diff, diff_line):

1097

for chunk in file_diff['chunks']:

1098

for chunk in file_diff['chunks']:

1098

for idx, line in enumerate(chunk):

1099

for idx, line in enumerate(chunk):

1099

if line['old_lineno'] == diff_line.old:

1100

if line['old_lineno'] == diff_line.old:

1100

return chunk, idx

1101

return chunk, idx

1101

if line['new_lineno'] == diff_line.new:

1102

if line['new_lineno'] == diff_line.new:

1102

return chunk, idx

1103

return chunk, idx

1103

raise LineNotInDiffException(

1104

raise LineNotInDiffException(

1104

"The line {} is not part of the diff.".format(diff_line))

1105

"The line {} is not part of the diff.".format(diff_line))

1105

1106

1107

def _is_diff_content(line):

1108

def _is_diff_content(line):

1108

return line['action'] in (

1109

return line['action'] in (

1109

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1110

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1110

1111

1112

def _context_line(line):

1113

def _context_line(line):

1113

return (line['action'], line['line'])

1114

return (line['action'], line['line'])

1114

1115

1116

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1117

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1117

1118

1119

def _line_to_diff_line_number(line):

1120

def _line_to_diff_line_number(line):

1120

new_line_no = line['new_lineno'] or None

1121

new_line_no = line['new_lineno'] or None

1121

old_line_no = line['old_lineno'] or None

1122

old_line_no = line['old_lineno'] or None

1122

return DiffLineNumber(old=old_line_no, new=new_line_no)

1123

return DiffLineNumber(old=old_line_no, new=new_line_no)

1123

1124

1125

class FileNotInDiffException(Exception):

1126

class FileNotInDiffException(Exception):

1126

"""

1127

"""

1127

Raised when the context for a missing file is requested.

1128

Raised when the context for a missing file is requested.

1128

1129

If you request the context for a line in a file which is not part of the

1130

If you request the context for a line in a file which is not part of the

1130

given diff, then this exception is raised.

1131

given diff, then this exception is raised.

1131

"""

1132

"""

1132

1133

1134

class LineNotInDiffException(Exception):

1135

class LineNotInDiffException(Exception):

1135

"""

1136

"""

1136

Raised when the context for a missing line is requested.

1137

Raised when the context for a missing line is requested.

1137

1138

If you request the context for a line in a file and this line is not

1139

If you request the context for a line in a file and this line is not

1139

part of the given diff, then this exception is raised.

1140

part of the given diff, then this exception is raised.

1140

"""

1141

"""

1141

1142

1143

class DiffLimitExceeded(Exception):

1144

class DiffLimitExceeded(Exception):

1144

pass

1145

pass

1145

1146

1147

# NOTE(marcink): if diffs.mako change, probably this

1148

# NOTE(marcink): if diffs.mako change, probably this

1148

# needs a bump to next version

1149

# needs a bump to next version

1149

CURRENT_DIFF_VERSION = 'v4'

1150

CURRENT_DIFF_VERSION = 'v4'

1150

1151

1152

def _cleanup_cache_file(cached_diff_file):

1153

def _cleanup_cache_file(cached_diff_file):

1153

# cleanup file to not store it "damaged"

1154

# cleanup file to not store it "damaged"

1154

try:

1155

try:

1155

os.remove(cached_diff_file)

1156

os.remove(cached_diff_file)

1156

except Exception:

1157

except Exception:

1157

log.exception('Failed to cleanup path %s', cached_diff_file)

1158

log.exception('Failed to cleanup path %s', cached_diff_file)

1158

1159

1160

1161

def _get_compression_mode(cached_diff_file):

1162

mode = 'bz2'

1163

if 'mode:plain' in cached_diff_file:

1164

mode = 'plain'

1165

elif 'mode:gzip' in cached_diff_file:

1166

mode = 'gzip'

1167

return mode

1168

1169

1160

def cache_diff(cached_diff_file, diff, commits):

1170

def cache_diff(cached_diff_file, diff, commits):

1161

mode = 'plain' if 'mode:plain' in cached_diff_file else ''

1171

compression_mode = _get_compression_mode(cached_diff_file)

1162

1172

1163

struct = {

1173

struct = {

1164

'version': CURRENT_DIFF_VERSION,

1174

'version': CURRENT_DIFF_VERSION,

1165

'diff': diff,

1175

'diff': diff,

1166

'commits': commits

1176

'commits': commits

1167

}

1177

}

1168

1178

1169

start = time.time()

1179

start = time.time()

1170

try:

1180

try:

1171

if mode == 'plain':

1181

if compression_mode == 'plain':

1172

with open(cached_diff_file, 'wb') as f:

1182

with open(cached_diff_file, 'wb') as f:

1173

pickle.dump(struct, f)

1183

pickle.dump(struct, f)

1184

elif compression_mode == 'gzip':

1185

with gzip.GzipFile(cached_diff_file, 'wb') as f:

1186

pickle.dump(struct, f)

1174

else:

1187

else:

1175

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1188

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1176

pickle.dump(struct, f)

1189

pickle.dump(struct, f)

1177

except Exception:

1190

except Exception:

1178

log.warn('Failed to save cache', exc_info=True)

1191

log.warn('Failed to save cache', exc_info=True)

1179

_cleanup_cache_file(cached_diff_file)

1192

_cleanup_cache_file(cached_diff_file)

1180

1193

1181

log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)

1194

log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)

1182

1195

1183

1196

1184

def load_cached_diff(cached_diff_file):

1197

def load_cached_diff(cached_diff_file):

1185

mode = 'plain' if 'mode:plain' in cached_diff_file else ''

1198

compression_mode = _get_compression_mode(cached_diff_file)

1186

1199

1187

default_struct = {

1200

default_struct = {

1188

'version': CURRENT_DIFF_VERSION,

1201

'version': CURRENT_DIFF_VERSION,

1189

'diff': None,

1202

'diff': None,

1190

'commits': None

1203

'commits': None

1191

}

1204

}

1192

1205

1193

has_cache = os.path.isfile(cached_diff_file)

1206

has_cache = os.path.isfile(cached_diff_file)

1194

if not has_cache:

1207

if not has_cache:

1195

log.debug('Reading diff cache file failed %s', cached_diff_file)

1208

log.debug('Reading diff cache file failed %s', cached_diff_file)

1196

return default_struct

1209

return default_struct

1197

1210

1198

data = None

1211

data = None

1199

1212

1200

start = time.time()

1213

start = time.time()

1201

try:

1214

try:

1202

if mode == 'plain':

1215

if compression_mode == 'plain':

1203

with open(cached_diff_file, 'rb') as f:

1216

with open(cached_diff_file, 'rb') as f:

1204

data = pickle.load(f)

1217

data = pickle.load(f)

1218

elif compression_mode == 'gzip':

1219

with gzip.GzipFile(cached_diff_file, 'rb') as f:

1220

data = pickle.load(f)

1205

else:

1221

else:

1206

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1222

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1207

data = pickle.load(f)

1223

data = pickle.load(f)

1208

except Exception:

1224

except Exception:

1209

log.warn('Failed to read diff cache file', exc_info=True)

1225

log.warn('Failed to read diff cache file', exc_info=True)

1210

1226

1211

if not data:

1227

if not data:

1212

data = default_struct

1228

data = default_struct

1213

1229

1214

if not isinstance(data, dict):

1230

if not isinstance(data, dict):

1215

# old version of data ?

1231

# old version of data ?

1216

data = default_struct

1232

data = default_struct

1217

1233

1218

# check version

1234

# check version

1219

if data.get('version') != CURRENT_DIFF_VERSION:

1235

if data.get('version') != CURRENT_DIFF_VERSION:

1220

# purge cache

1236

# purge cache

1221

_cleanup_cache_file(cached_diff_file)

1237

_cleanup_cache_file(cached_diff_file)

1222

return default_struct

1238

return default_struct

1223

1239

1224

log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)

1240

log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)

1225

1241

1226

return data

1242

return data

1227

1243

1228

1244

1229

def generate_diff_cache_key(*args):

1245

def generate_diff_cache_key(*args):

1230

"""

1246

"""

1231

Helper to generate a cache key using arguments

1247

Helper to generate a cache key using arguments

1232

"""

1248

"""

1233

def arg_mapper(input_param):

1249

def arg_mapper(input_param):

1234

input_param = safe_str(input_param)

1250

input_param = safe_str(input_param)

1235

# we cannot allow '/' in arguments since it would allow

1251

# we cannot allow '/' in arguments since it would allow

1236

# subdirectory usage

1252

# subdirectory usage

1237

input_param.replace('/', '_')

1253

input_param.replace('/', '_')

1238

return input_param or None # prevent empty string arguments

1254

return input_param or None # prevent empty string arguments

1239

1255

1240

return '_'.join([

1256

return '_'.join([

1241

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1257

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1242

1258

1243

1259

1244

def diff_cache_exist(cache_storage, *args):

1260

def diff_cache_exist(cache_storage, *args):

1245

"""

1261

"""

1246

Based on all generated arguments check and return a cache path

1262

Based on all generated arguments check and return a cache path

1247

"""

1263

"""

1264

args = list(args) + ['mode:gzip']

1248

cache_key = generate_diff_cache_key(*args)

1265

cache_key = generate_diff_cache_key(*args)

1249

cache_file_path = os.path.join(cache_storage, cache_key)

1266

cache_file_path = os.path.join(cache_storage, cache_key)

1250

# prevent path traversal attacks using some param that have e.g '../../'

1267

# prevent path traversal attacks using some param that have e.g '../../'

1251

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1268

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1252

raise ValueError('Final path must be within {}'.format(cache_storage))

1269

raise ValueError('Final path must be within {}'.format(cache_storage))

1253

1270

1254

return cache_file_path

1271

return cache_file_path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2019 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import os
             import re
             import bz2
+            import gzip
             import time
             import collections
             import difflib
             import logging
             import cPickle as pickle
             from itertools import tee, imap
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             # define max context, a file with more than this numbers of lines is unusable
             # in browser anyway
             MAX_CONTEXT = 20 * 1024
             DEFAULT_CONTEXT = 3
             def get_diff_context(request):
                 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
             def get_diff_whitespace_flag(request):
                 return request.GET.get('ignorews', '') == '1'
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 # protect against IntOverflow when passing HUGE context
                 if context > MAX_CONTEXT:
                     context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
                 OLD_NO_NL = 'old-no-nl'
                 NEW_NO_NL = 'new-no-nl'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 # collapse ranges of commits over given number
                 _collapse_commits_over = 5
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     log.debug('Initialized DiffProcessor with %s mode', format)
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._new_parse_gitdiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return string \
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _check_large_diff(self):
                     log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                 def _new_parse_gitdiff(self, inline_diff=True):
                     _files = []
                     # this can be overriden later to a LimitedDiffContainer type
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         log.debug('parsing diff %r', head)
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'old_mode': None,
                             'new_mode': None,
                             'ops': {},
                         }
                         if head['old_mode']:
                             stats['old_mode'] = head['old_mode']
                         if head['new_mode']:
                             stats['new_mode'] = head['new_mode']
                         if head['b_mode']:
                             stats['new_mode'] = head['b_mode']
                         # delete file
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         # new file
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['old_mode'] = None
                             stats['new_mode'] = head['new_file_mode']
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         # modify operation, can be copy, rename or chmod
                         else:
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['renamed'] = (head['rename_from'], head['rename_to'])
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['copied'] = (head['copy_from'], head['copy_to'])
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['new_file'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             # simulate splitlines, so we keep the line end part
                             diff = self.diff_splitter(chunk.diff)
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                             exceeds_limit = raw_chunk_size > self.file_limit
                             self.cur_diff_size += raw_chunk_size
                             try:
                                 # Check each file instead of the whole diff.
                                 # Diff will hide big files but still show small ones.
                                 # From the tests big files are fairly safe to be parsed
                                 # but the browser is the bottleneck.
                                 if not self.show_full_diff and exceeds_limit:
                                     log.debug('File `%s` exceeds current file_limit of %s',
                                               safe_unicode(head['b_path']), self.file_limit)
                                     raise DiffLimitExceeded(
                                         'File Limit %s Exceeded', self.file_limit)
                                 self._check_large_diff()
                                 raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         # Hide content of deleted node by setting empty chunks
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(
 , [{'old_lineno': '',
                                  'new_lineno': '',
                                  'action': Action.CONTEXT,
                                  'line': msg,
                                  } for _op, msg in stats['ops'].iteritems()
                                 if _op not in [MOD_FILENODE]])
                         original_filename = safe_unicode(head['a_path'])
                         _files.append({
                             'original_filename': original_filename,
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
                 def _parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                 def _new_parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             # match header e.g @@ -0,0 +1 @@\n'
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             lines = []
                             hunk = {
                                 'section_header': gr[-1],
                                 'source_start': old_line,
                                 'source_length': old_end,
                                 'target_start': new_line,
                                 'target_length': new_end,
                                 'lines': lines,
                             }
                             chunks.append(hunk)
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     if affects_old:
                                         action = Action.OLD_NO_NL
                                     elif affects_new:
                                         action = Action.NEW_NO_NL
                                     else:
                                         raise Exception('invalid context for no newline')
                                     lines.append({
                                         'old_lineno':   None,
                                         'new_lineno':   None,
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 @classmethod
                 def diff_splitter(cls, string):
                     """
                     Diff split that emulates .splitlines() but works only on \n
                     """
                     if not string:
                         return
                     elif string == '\n':
                         yield u'\n'
                     else:
                         has_newline = string.endswith('\n')
                         elements = string.split('\n')
                         if has_newline:
                             # skip last element as it's empty string from newlines
                             elements = elements[:-1]
                         len_elements = len(elements)
                         for cnt, line in enumerate(elements, start=1):
                             last_line = cnt == len_elements
                             if last_line and not has_newline:
                                 yield safe_unicode(line)
                             else:
                                 yield safe_unicode(line) + '\n'
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     # TODO(marcink): not sure how to pass in translator
                     # here in an efficient way, leave the _ for proper gettext extraction
                     _ = lambda s: s
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICONS
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass
             # NOTE(marcink): if diffs.mako change, probably this
             # needs a bump to next version
             CURRENT_DIFF_VERSION = 'v4'
             def _cleanup_cache_file(cached_diff_file):
                 # cleanup file to not store it "damaged"
                 try:
                     os.remove(cached_diff_file)
                 except Exception:
                     log.exception('Failed to cleanup path %s', cached_diff_file)
+            def _get_compression_mode(cached_diff_file):
+                mode = 'bz2'
+                if 'mode:plain' in cached_diff_file:
+                    mode = 'plain'
+                elif 'mode:gzip' in cached_diff_file:
+                    mode = 'gzip'
+                return mode
             def cache_diff(cached_diff_file, diff, commits):
-                mode = 'plain' if 'mode:plain' in cached_diff_file else ''
+                compression_mode = _get_compression_mode(cached_diff_file)
                 struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': diff,
                     'commits': commits
                 }
                 start = time.time()
                 try:
-                    if mode == 'plain':
+                    if compression_mode == 'plain':
                         with open(cached_diff_file, 'wb') as f:
                             pickle.dump(struct, f)
+                    elif compression_mode == 'gzip':
+                        with gzip.GzipFile(cached_diff_file, 'wb') as f:
+                            pickle.dump(struct, f)
                     else:
                         with bz2.BZ2File(cached_diff_file, 'wb') as f:
                             pickle.dump(struct, f)
                 except Exception:
                     log.warn('Failed to save cache', exc_info=True)
                     _cleanup_cache_file(cached_diff_file)
                 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
             def load_cached_diff(cached_diff_file):
-                mode = 'plain' if 'mode:plain' in cached_diff_file else ''
+                compression_mode = _get_compression_mode(cached_diff_file)
                 default_struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': None,
                     'commits': None
                 }
                 has_cache = os.path.isfile(cached_diff_file)
                 if not has_cache:
                     log.debug('Reading diff cache file failed %s', cached_diff_file)
                     return default_struct
                 data = None
                 start = time.time()
                 try:
-                    if mode == 'plain':
+                    if compression_mode == 'plain':
                         with open(cached_diff_file, 'rb') as f:
                             data = pickle.load(f)
+                    elif compression_mode == 'gzip':
+                        with gzip.GzipFile(cached_diff_file, 'rb') as f:
+                            data = pickle.load(f)
                     else:
                         with bz2.BZ2File(cached_diff_file, 'rb') as f:
                             data = pickle.load(f)
                 except Exception:
                     log.warn('Failed to read diff cache file', exc_info=True)
                 if not data:
                     data = default_struct
                 if not isinstance(data, dict):
                     # old version of data ?
                     data = default_struct
                 # check version
                 if data.get('version') != CURRENT_DIFF_VERSION:
                     # purge cache
                     _cleanup_cache_file(cached_diff_file)
                     return default_struct
                 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
                 return data
             def generate_diff_cache_key(*args):
                 """
                 Helper to generate a cache key using arguments
                 """
                 def arg_mapper(input_param):
                     input_param = safe_str(input_param)
                     # we cannot allow '/' in arguments since it would allow
                     # subdirectory usage
                     input_param.replace('/', '_')
                     return input_param or None  # prevent empty string arguments
                 return '_'.join([
                     '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
             def diff_cache_exist(cache_storage, *args):
                 """
                 Based on all generated arguments check and return a cache path
                 """
+                args = list(args) + ['mode:gzip']
                 cache_key = generate_diff_cache_key(*args)
                 cache_file_path = os.path.join(cache_storage, cache_key)
                 # prevent path traversal attacks using some param that have e.g '../../'
                 if not os.path.abspath(cache_file_path).startswith(cache_storage):
                     raise ValueError('Final path must be within {}'.format(cache_storage))
                 return cache_file_path