u/pc/rhodecode-enterprise-ce-fork-pc Commit - r2690:01439ec4

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import os

26

import os

27

import re

27

import re

28

import bz2

29

28

import collections

30

import collections

29

import difflib

31

import difflib

30

import logging

32

import logging

31

import cPickle as pickle

33

import cPickle as pickle

32

33

from itertools import tee, imap

34

from itertools import tee, imap

34

35

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.exceptions import VCSError

36

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

37

from rhodecode.lib.utils2 import safe_unicode, safe_str

38

from rhodecode.lib.utils2 import safe_unicode, safe_str

38

39

log = logging.getLogger(__name__)

40

log = logging.getLogger(__name__)

40

41

# define max context, a file with more than this numbers of lines is unusable

42

# define max context, a file with more than this numbers of lines is unusable

42

# in browser anyway

43

# in browser anyway

43

MAX_CONTEXT = 1024 * 1014

44

MAX_CONTEXT = 1024 * 1014

44

45

46

class OPS(object):

47

class OPS(object):

47

ADD = 'A'

48

ADD = 'A'

48

MOD = 'M'

49

MOD = 'M'

49

DEL = 'D'

50

DEL = 'D'

50

51

52

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

53

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

53

"""

54

"""

54

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

55

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

55

56

:param ignore_whitespace: ignore whitespaces in diff

57

:param ignore_whitespace: ignore whitespaces in diff

57

"""

58

"""

58

# make sure we pass in default context

59

# make sure we pass in default context

59

context = context or 3

60

context = context or 3

60

# protect against IntOverflow when passing HUGE context

61

# protect against IntOverflow when passing HUGE context

61

if context > MAX_CONTEXT:

62

if context > MAX_CONTEXT:

62

context = MAX_CONTEXT

63

context = MAX_CONTEXT

63

64

submodules = filter(lambda o: isinstance(o, SubModuleNode),

65

submodules = filter(lambda o: isinstance(o, SubModuleNode),

65

[filenode_new, filenode_old])

66

[filenode_new, filenode_old])

66

if submodules:

67

if submodules:

67

return ''

68

return ''

68

69

for filenode in (filenode_old, filenode_new):

70

for filenode in (filenode_old, filenode_new):

70

if not isinstance(filenode, FileNode):

71

if not isinstance(filenode, FileNode):

71

raise VCSError(

72

raise VCSError(

72

"Given object should be FileNode object, not %s"

73

"Given object should be FileNode object, not %s"

73

% filenode.__class__)

74

% filenode.__class__)

74

75

repo = filenode_new.commit.repository

76

repo = filenode_new.commit.repository

76

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

77

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

77

new_commit = filenode_new.commit

78

new_commit = filenode_new.commit

78

79

vcs_gitdiff = repo.get_diff(

80

vcs_gitdiff = repo.get_diff(

80

old_commit, new_commit, filenode_new.path,

81

old_commit, new_commit, filenode_new.path,

81

ignore_whitespace, context, path1=filenode_old.path)

82

ignore_whitespace, context, path1=filenode_old.path)

82

return vcs_gitdiff

83

return vcs_gitdiff

83

84

NEW_FILENODE = 1

85

NEW_FILENODE = 1

85

DEL_FILENODE = 2

86

DEL_FILENODE = 2

86

MOD_FILENODE = 3

87

MOD_FILENODE = 3

87

RENAMED_FILENODE = 4

88

RENAMED_FILENODE = 4

88

COPIED_FILENODE = 5

89

COPIED_FILENODE = 5

89

CHMOD_FILENODE = 6

90

CHMOD_FILENODE = 6

90

BIN_FILENODE = 7

91

BIN_FILENODE = 7

91

92

93

class LimitedDiffContainer(object):

94

class LimitedDiffContainer(object):

94

95

def __init__(self, diff_limit, cur_diff_size, diff):

96

def __init__(self, diff_limit, cur_diff_size, diff):

96

self.diff = diff

97

self.diff = diff

97

self.diff_limit = diff_limit

98

self.diff_limit = diff_limit

98

self.cur_diff_size = cur_diff_size

99

self.cur_diff_size = cur_diff_size

99

100

def __getitem__(self, key):

101

def __getitem__(self, key):

101

return self.diff.__getitem__(key)

102

return self.diff.__getitem__(key)

102

103

def __iter__(self):

104

def __iter__(self):

104

for l in self.diff:

105

for l in self.diff:

105

yield l

106

yield l

106

107

108

class Action(object):

109

class Action(object):

109

"""

110

"""

110

Contains constants for the action value of the lines in a parsed diff.

111

Contains constants for the action value of the lines in a parsed diff.

111

"""

112

"""

112

113

ADD = 'add'

114

ADD = 'add'

114

DELETE = 'del'

115

DELETE = 'del'

115

UNMODIFIED = 'unmod'

116

UNMODIFIED = 'unmod'

116

117

CONTEXT = 'context'

118

CONTEXT = 'context'

118

OLD_NO_NL = 'old-no-nl'

119

OLD_NO_NL = 'old-no-nl'

119

NEW_NO_NL = 'new-no-nl'

120

NEW_NO_NL = 'new-no-nl'

120

121

122

class DiffProcessor(object):

123

class DiffProcessor(object):

123

"""

124

"""

124

Give it a unified or git diff and it returns a list of the files that were

125

Give it a unified or git diff and it returns a list of the files that were

125

mentioned in the diff together with a dict of meta information that

126

mentioned in the diff together with a dict of meta information that

126

can be used to render it in a HTML template.

127

can be used to render it in a HTML template.

127

128

.. note:: Unicode handling

129

.. note:: Unicode handling

129

130

The original diffs are a byte sequence and can contain filenames

131

The original diffs are a byte sequence and can contain filenames

131

in mixed encodings. This class generally returns `unicode` objects

132

in mixed encodings. This class generally returns `unicode` objects

132

since the result is intended for presentation to the user.

133

since the result is intended for presentation to the user.

133

134

"""

135

"""

135

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

136

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

136

_newline_marker = re.compile(r'^\\ No newline at end of file')

137

_newline_marker = re.compile(r'^\\ No newline at end of file')

137

138

# used for inline highlighter word split

139

# used for inline highlighter word split

139

_token_re = re.compile(r'()(>|<|&|\W+?)')

140

_token_re = re.compile(r'()(>|<|&|\W+?)')

140

141

# collapse ranges of commits over given number

142

# collapse ranges of commits over given number

142

_collapse_commits_over = 5

143

_collapse_commits_over = 5

143

144

def __init__(self, diff, format='gitdiff', diff_limit=None,

145

def __init__(self, diff, format='gitdiff', diff_limit=None,

145

file_limit=None, show_full_diff=True):

146

file_limit=None, show_full_diff=True):

146

"""

147

"""

147

:param diff: A `Diff` object representing a diff from a vcs backend

148

:param diff: A `Diff` object representing a diff from a vcs backend

148

:param format: format of diff passed, `udiff` or `gitdiff`

149

:param format: format of diff passed, `udiff` or `gitdiff`

149

:param diff_limit: define the size of diff that is considered "big"

150

:param diff_limit: define the size of diff that is considered "big"

150

based on that parameter cut off will be triggered, set to None

151

based on that parameter cut off will be triggered, set to None

151

to show full diff

152

to show full diff

152

"""

153

"""

153

self._diff = diff

154

self._diff = diff

154

self._format = format

155

self._format = format

155

self.adds = 0

156

self.adds = 0

156

self.removes = 0

157

self.removes = 0

157

# calculate diff size

158

# calculate diff size

158

self.diff_limit = diff_limit

159

self.diff_limit = diff_limit

159

self.file_limit = file_limit

160

self.file_limit = file_limit

160

self.show_full_diff = show_full_diff

161

self.show_full_diff = show_full_diff

161

self.cur_diff_size = 0

162

self.cur_diff_size = 0

162

self.parsed = False

163

self.parsed = False

163

self.parsed_diff = []

164

self.parsed_diff = []

164

165

log.debug('Initialized DiffProcessor with %s mode', format)

166

log.debug('Initialized DiffProcessor with %s mode', format)

166

if format == 'gitdiff':

167

if format == 'gitdiff':

167

self.differ = self._highlight_line_difflib

168

self.differ = self._highlight_line_difflib

168

self._parser = self._parse_gitdiff

169

self._parser = self._parse_gitdiff

169

else:

170

else:

170

self.differ = self._highlight_line_udiff

171

self.differ = self._highlight_line_udiff

171

self._parser = self._new_parse_gitdiff

172

self._parser = self._new_parse_gitdiff

172

173

def _copy_iterator(self):

174

def _copy_iterator(self):

174

"""

175

"""

175

make a fresh copy of generator, we should not iterate thru

176

make a fresh copy of generator, we should not iterate thru

176

an original as it's needed for repeating operations on

177

an original as it's needed for repeating operations on

177

this instance of DiffProcessor

178

this instance of DiffProcessor

178

"""

179

"""

179

self.__udiff, iterator_copy = tee(self.__udiff)

180

self.__udiff, iterator_copy = tee(self.__udiff)

180

return iterator_copy

181

return iterator_copy

181

182

def _escaper(self, string):

183

def _escaper(self, string):

183

"""

184

"""

184

Escaper for diff escapes special chars and checks the diff limit

185

Escaper for diff escapes special chars and checks the diff limit

185

186

:param string:

187

:param string:

187

"""

188

"""

188

self.cur_diff_size += len(string)

189

self.cur_diff_size += len(string)

189

190

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

191

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

191

raise DiffLimitExceeded('Diff Limit Exceeded')

192

raise DiffLimitExceeded('Diff Limit Exceeded')

192

193

return string \

194

return string \

194

.replace('&', '&')\

195

.replace('&', '&')\

195

.replace('<', '<')\

196

.replace('<', '<')\

196

.replace('>', '>')

197

.replace('>', '>')

197

198

def _line_counter(self, l):

199

def _line_counter(self, l):

199

"""

200

"""

200

Checks each line and bumps total adds/removes for this diff

201

Checks each line and bumps total adds/removes for this diff

201

202

:param l:

203

:param l:

203

"""

204

"""

204

if l.startswith('+') and not l.startswith('+++'):

205

if l.startswith('+') and not l.startswith('+++'):

205

self.adds += 1

206

self.adds += 1

206

elif l.startswith('-') and not l.startswith('---'):

207

elif l.startswith('-') and not l.startswith('---'):

207

self.removes += 1

208

self.removes += 1

208

return safe_unicode(l)

209

return safe_unicode(l)

209

210

def _highlight_line_difflib(self, line, next_):

211

def _highlight_line_difflib(self, line, next_):

211

"""

212

"""

212

Highlight inline changes in both lines.

213

Highlight inline changes in both lines.

213

"""

214

"""

214

215

if line['action'] == Action.DELETE:

216

if line['action'] == Action.DELETE:

216

old, new = line, next_

217

old, new = line, next_

217

else:

218

else:

218

old, new = next_, line

219

old, new = next_, line

219

220

oldwords = self._token_re.split(old['line'])

221

oldwords = self._token_re.split(old['line'])

221

newwords = self._token_re.split(new['line'])

222

newwords = self._token_re.split(new['line'])

222

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

223

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

223

224

oldfragments, newfragments = [], []

225

oldfragments, newfragments = [], []

225

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

226

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

226

oldfrag = ''.join(oldwords[i1:i2])

227

oldfrag = ''.join(oldwords[i1:i2])

227

newfrag = ''.join(newwords[j1:j2])

228

newfrag = ''.join(newwords[j1:j2])

228

if tag != 'equal':

229

if tag != 'equal':

229

if oldfrag:

230

if oldfrag:

230

oldfrag = '<del>%s</del>' % oldfrag

231

oldfrag = '<del>%s</del>' % oldfrag

231

if newfrag:

232

if newfrag:

232

newfrag = '<ins>%s</ins>' % newfrag

233

newfrag = '<ins>%s</ins>' % newfrag

233

oldfragments.append(oldfrag)

234

oldfragments.append(oldfrag)

234

newfragments.append(newfrag)

235

newfragments.append(newfrag)

235

236

old['line'] = "".join(oldfragments)

237

old['line'] = "".join(oldfragments)

237

new['line'] = "".join(newfragments)

238

new['line'] = "".join(newfragments)

238

239

def _highlight_line_udiff(self, line, next_):

240

def _highlight_line_udiff(self, line, next_):

240

"""

241

"""

241

Highlight inline changes in both lines.

242

Highlight inline changes in both lines.

242

"""

243

"""

243

start = 0

244

start = 0

244

limit = min(len(line['line']), len(next_['line']))

245

limit = min(len(line['line']), len(next_['line']))

245

while start < limit and line['line'][start] == next_['line'][start]:

246

while start < limit and line['line'][start] == next_['line'][start]:

246

start += 1

247

start += 1

247

end = -1

248

end = -1

248

limit -= start

249

limit -= start

249

while -end <= limit and line['line'][end] == next_['line'][end]:

250

while -end <= limit and line['line'][end] == next_['line'][end]:

250

end -= 1

251

end -= 1

251

end += 1

252

end += 1

252

if start or end:

253

if start or end:

253

def do(l):

254

def do(l):

254

last = end + len(l['line'])

255

last = end + len(l['line'])

255

if l['action'] == Action.ADD:

256

if l['action'] == Action.ADD:

256

tag = 'ins'

257

tag = 'ins'

257

else:

258

else:

258

tag = 'del'

259

tag = 'del'

259

l['line'] = '%s<%s>%s</%s>%s' % (

260

l['line'] = '%s<%s>%s</%s>%s' % (

260

l['line'][:start],

261

l['line'][:start],

261

tag,

262

tag,

262

l['line'][start:last],

263

l['line'][start:last],

263

tag,

264

tag,

264

l['line'][last:]

265

l['line'][last:]

265

)

266

)

266

do(line)

267

do(line)

267

do(next_)

268

do(next_)

268

269

def _clean_line(self, line, command):

270

def _clean_line(self, line, command):

270

if command in ['+', '-', ' ']:

271

if command in ['+', '-', ' ']:

271

# only modify the line if it's actually a diff thing

272

# only modify the line if it's actually a diff thing

272

line = line[1:]

273

line = line[1:]

273

return line

274

return line

274

275

def _parse_gitdiff(self, inline_diff=True):

276

def _parse_gitdiff(self, inline_diff=True):

276

_files = []

277

_files = []

277

diff_container = lambda arg: arg

278

diff_container = lambda arg: arg

278

279

for chunk in self._diff.chunks():

280

for chunk in self._diff.chunks():

280

head = chunk.header

281

head = chunk.header

281

282

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

283

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

283

raw_diff = chunk.raw

284

raw_diff = chunk.raw

284

limited_diff = False

285

limited_diff = False

285

exceeds_limit = False

286

exceeds_limit = False

286

287

op = None

288

op = None

288

stats = {

289

stats = {

289

'added': 0,

290

'added': 0,

290

'deleted': 0,

291

'deleted': 0,

291

'binary': False,

292

'binary': False,

292

'ops': {},

293

'ops': {},

293

}

294

}

294

295

if head['deleted_file_mode']:

296

if head['deleted_file_mode']:

296

op = OPS.DEL

297

op = OPS.DEL

297

stats['binary'] = True

298

stats['binary'] = True

298

stats['ops'][DEL_FILENODE] = 'deleted file'

299

stats['ops'][DEL_FILENODE] = 'deleted file'

299

300

elif head['new_file_mode']:

301

elif head['new_file_mode']:

301

op = OPS.ADD

302

op = OPS.ADD

302

stats['binary'] = True

303

stats['binary'] = True

303

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

304

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

304

else: # modify operation, can be copy, rename or chmod

305

else: # modify operation, can be copy, rename or chmod

305

306

# CHMOD

307

# CHMOD

307

if head['new_mode'] and head['old_mode']:

308

if head['new_mode'] and head['old_mode']:

308

op = OPS.MOD

309

op = OPS.MOD

309

stats['binary'] = True

310

stats['binary'] = True

310

stats['ops'][CHMOD_FILENODE] = (

311

stats['ops'][CHMOD_FILENODE] = (

311

'modified file chmod %s => %s' % (

312

'modified file chmod %s => %s' % (

312

head['old_mode'], head['new_mode']))

313

head['old_mode'], head['new_mode']))

313

# RENAME

314

# RENAME

314

if head['rename_from'] != head['rename_to']:

315

if head['rename_from'] != head['rename_to']:

315

op = OPS.MOD

316

op = OPS.MOD

316

stats['binary'] = True

317

stats['binary'] = True

317

stats['ops'][RENAMED_FILENODE] = (

318

stats['ops'][RENAMED_FILENODE] = (

318

'file renamed from %s to %s' % (

319

'file renamed from %s to %s' % (

319

head['rename_from'], head['rename_to']))

320

head['rename_from'], head['rename_to']))

320

# COPY

321

# COPY

321

if head.get('copy_from') and head.get('copy_to'):

322

if head.get('copy_from') and head.get('copy_to'):

322

op = OPS.MOD

323

op = OPS.MOD

323

stats['binary'] = True

324

stats['binary'] = True

324

stats['ops'][COPIED_FILENODE] = (

325

stats['ops'][COPIED_FILENODE] = (

325

'file copied from %s to %s' % (

326

'file copied from %s to %s' % (

326

head['copy_from'], head['copy_to']))

327

head['copy_from'], head['copy_to']))

327

328

# If our new parsed headers didn't match anything fallback to

329

# If our new parsed headers didn't match anything fallback to

329

# old style detection

330

# old style detection

330

if op is None:

331

if op is None:

331

if not head['a_file'] and head['b_file']:

332

if not head['a_file'] and head['b_file']:

332

op = OPS.ADD

333

op = OPS.ADD

333

stats['binary'] = True

334

stats['binary'] = True

334

stats['ops'][NEW_FILENODE] = 'new file'

335

stats['ops'][NEW_FILENODE] = 'new file'

335

336

elif head['a_file'] and not head['b_file']:

337

elif head['a_file'] and not head['b_file']:

337

op = OPS.DEL

338

op = OPS.DEL

338

stats['binary'] = True

339

stats['binary'] = True

339

stats['ops'][DEL_FILENODE] = 'deleted file'

340

stats['ops'][DEL_FILENODE] = 'deleted file'

340

341

# it's not ADD not DELETE

342

# it's not ADD not DELETE

342

if op is None:

343

if op is None:

343

op = OPS.MOD

344

op = OPS.MOD

344

stats['binary'] = True

345

stats['binary'] = True

345

stats['ops'][MOD_FILENODE] = 'modified file'

346

stats['ops'][MOD_FILENODE] = 'modified file'

346

347

# a real non-binary diff

348

# a real non-binary diff

348

if head['a_file'] or head['b_file']:

349

if head['a_file'] or head['b_file']:

349

try:

350

try:

350

raw_diff, chunks, _stats = self._parse_lines(diff)

351

raw_diff, chunks, _stats = self._parse_lines(diff)

351

stats['binary'] = False

352

stats['binary'] = False

352

stats['added'] = _stats[0]

353

stats['added'] = _stats[0]

353

stats['deleted'] = _stats[1]

354

stats['deleted'] = _stats[1]

354

# explicit mark that it's a modified file

355

# explicit mark that it's a modified file

355

if op == OPS.MOD:

356

if op == OPS.MOD:

356

stats['ops'][MOD_FILENODE] = 'modified file'

357

stats['ops'][MOD_FILENODE] = 'modified file'

357

exceeds_limit = len(raw_diff) > self.file_limit

358

exceeds_limit = len(raw_diff) > self.file_limit

358

359

# changed from _escaper function so we validate size of

360

# changed from _escaper function so we validate size of

360

# each file instead of the whole diff

361

# each file instead of the whole diff

361

# diff will hide big files but still show small ones

362

# diff will hide big files but still show small ones

362

# from my tests, big files are fairly safe to be parsed

363

# from my tests, big files are fairly safe to be parsed

363

# but the browser is the bottleneck

364

# but the browser is the bottleneck

364

if not self.show_full_diff and exceeds_limit:

365

if not self.show_full_diff and exceeds_limit:

365

raise DiffLimitExceeded('File Limit Exceeded')

366

raise DiffLimitExceeded('File Limit Exceeded')

366

367

except DiffLimitExceeded:

368

except DiffLimitExceeded:

368

diff_container = lambda _diff: \

369

diff_container = lambda _diff: \

369

LimitedDiffContainer(

370

LimitedDiffContainer(

370

self.diff_limit, self.cur_diff_size, _diff)

371

self.diff_limit, self.cur_diff_size, _diff)

371

372

exceeds_limit = len(raw_diff) > self.file_limit

373

exceeds_limit = len(raw_diff) > self.file_limit

373

limited_diff = True

374

limited_diff = True

374

chunks = []

375

chunks = []

375

376

else: # GIT format binary patch, or possibly empty diff

377

else: # GIT format binary patch, or possibly empty diff

377

if head['bin_patch']:

378

if head['bin_patch']:

378

# we have operation already extracted, but we mark simply

379

# we have operation already extracted, but we mark simply

379

# it's a diff we wont show for binary files

380

# it's a diff we wont show for binary files

380

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

381

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

381

chunks = []

382

chunks = []

382

383

if chunks and not self.show_full_diff and op == OPS.DEL:

384

if chunks and not self.show_full_diff and op == OPS.DEL:

384

# if not full diff mode show deleted file contents

385

# if not full diff mode show deleted file contents

385

# TODO: anderson: if the view is not too big, there is no way

386

# TODO: anderson: if the view is not too big, there is no way

386

# to see the content of the file

387

# to see the content of the file

387

chunks = []

388

chunks = []

388

389

chunks.insert(0, [{

390

chunks.insert(0, [{

390

'old_lineno': '',

391

'old_lineno': '',

391

'new_lineno': '',

392

'new_lineno': '',

392

'action': Action.CONTEXT,

393

'action': Action.CONTEXT,

393

'line': msg,

394

'line': msg,

394

} for _op, msg in stats['ops'].iteritems()

395

} for _op, msg in stats['ops'].iteritems()

395

if _op not in [MOD_FILENODE]])

396

if _op not in [MOD_FILENODE]])

396

397

_files.append({

398

_files.append({

398

'filename': safe_unicode(head['b_path']),

399

'filename': safe_unicode(head['b_path']),

399

'old_revision': head['a_blob_id'],

400

'old_revision': head['a_blob_id'],

400

'new_revision': head['b_blob_id'],

401

'new_revision': head['b_blob_id'],

401

'chunks': chunks,

402

'chunks': chunks,

402

'raw_diff': safe_unicode(raw_diff),

403

'raw_diff': safe_unicode(raw_diff),

403

'operation': op,

404

'operation': op,

404

'stats': stats,

405

'stats': stats,

405

'exceeds_limit': exceeds_limit,

406

'exceeds_limit': exceeds_limit,

406

'is_limited_diff': limited_diff,

407

'is_limited_diff': limited_diff,

407

})

408

})

408

409

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

410

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

410

OPS.DEL: 2}.get(info['operation'])

411

OPS.DEL: 2}.get(info['operation'])

411

412

if not inline_diff:

413

if not inline_diff:

413

return diff_container(sorted(_files, key=sorter))

414

return diff_container(sorted(_files, key=sorter))

414

415

# highlight inline changes

416

# highlight inline changes

416

for diff_data in _files:

417

for diff_data in _files:

417

for chunk in diff_data['chunks']:

418

for chunk in diff_data['chunks']:

418

lineiter = iter(chunk)

419

lineiter = iter(chunk)

419

try:

420

try:

420

while 1:

421

while 1:

421

line = lineiter.next()

422

line = lineiter.next()

422

if line['action'] not in (

423

if line['action'] not in (

423

Action.UNMODIFIED, Action.CONTEXT):

424

Action.UNMODIFIED, Action.CONTEXT):

424

nextline = lineiter.next()

425

nextline = lineiter.next()

425

if nextline['action'] in ['unmod', 'context'] or \

426

if nextline['action'] in ['unmod', 'context'] or \

426

nextline['action'] == line['action']:

427

nextline['action'] == line['action']:

427

continue

428

continue

428

self.differ(line, nextline)

429

self.differ(line, nextline)

429

except StopIteration:

430

except StopIteration:

430

pass

431

pass

431

432

return diff_container(sorted(_files, key=sorter))

433

return diff_container(sorted(_files, key=sorter))

433

434

def _check_large_diff(self):

435

def _check_large_diff(self):

435

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

436

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

436

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

437

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

437

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

438

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

438

439

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

440

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

440

def _new_parse_gitdiff(self, inline_diff=True):

441

def _new_parse_gitdiff(self, inline_diff=True):

441

_files = []

442

_files = []

442

443

# this can be overriden later to a LimitedDiffContainer type

444

# this can be overriden later to a LimitedDiffContainer type

444

diff_container = lambda arg: arg

445

diff_container = lambda arg: arg

445

446

for chunk in self._diff.chunks():

447

for chunk in self._diff.chunks():

447

head = chunk.header

448

head = chunk.header

448

log.debug('parsing diff %r' % head)

449

log.debug('parsing diff %r' % head)

449

450

raw_diff = chunk.raw

451

raw_diff = chunk.raw

451

limited_diff = False

452

limited_diff = False

452

exceeds_limit = False

453

exceeds_limit = False

453

454

op = None

455

op = None

455

stats = {

456

stats = {

456

'added': 0,

457

'added': 0,

457

'deleted': 0,

458

'deleted': 0,

458

'binary': False,

459

'binary': False,

459

'old_mode': None,

460

'old_mode': None,

460

'new_mode': None,

461

'new_mode': None,

461

'ops': {},

462

'ops': {},

462

}

463

}

463

if head['old_mode']:

464

if head['old_mode']:

464

stats['old_mode'] = head['old_mode']

465

stats['old_mode'] = head['old_mode']

465

if head['new_mode']:

466

if head['new_mode']:

466

stats['new_mode'] = head['new_mode']

467

stats['new_mode'] = head['new_mode']

467

if head['b_mode']:

468

if head['b_mode']:

468

stats['new_mode'] = head['b_mode']

469

stats['new_mode'] = head['b_mode']

469

470

# delete file

471

# delete file

471

if head['deleted_file_mode']:

472

if head['deleted_file_mode']:

472

op = OPS.DEL

473

op = OPS.DEL

473

stats['binary'] = True

474

stats['binary'] = True

474

stats['ops'][DEL_FILENODE] = 'deleted file'

475

stats['ops'][DEL_FILENODE] = 'deleted file'

475

476

# new file

477

# new file

477

elif head['new_file_mode']:

478

elif head['new_file_mode']:

478

op = OPS.ADD

479

op = OPS.ADD

479

stats['binary'] = True

480

stats['binary'] = True

480

stats['old_mode'] = None

481

stats['old_mode'] = None

481

stats['new_mode'] = head['new_file_mode']

482

stats['new_mode'] = head['new_file_mode']

482

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

483

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

483

484

# modify operation, can be copy, rename or chmod

485

# modify operation, can be copy, rename or chmod

485

else:

486

else:

486

# CHMOD

487

# CHMOD

487

if head['new_mode'] and head['old_mode']:

488

if head['new_mode'] and head['old_mode']:

488

op = OPS.MOD

489

op = OPS.MOD

489

stats['binary'] = True

490

stats['binary'] = True

490

stats['ops'][CHMOD_FILENODE] = (

491

stats['ops'][CHMOD_FILENODE] = (

491

'modified file chmod %s => %s' % (

492

'modified file chmod %s => %s' % (

492

head['old_mode'], head['new_mode']))

493

head['old_mode'], head['new_mode']))

493

494

# RENAME

495

# RENAME

495

if head['rename_from'] != head['rename_to']:

496

if head['rename_from'] != head['rename_to']:

496

op = OPS.MOD

497

op = OPS.MOD

497

stats['binary'] = True

498

stats['binary'] = True

498

stats['renamed'] = (head['rename_from'], head['rename_to'])

499

stats['renamed'] = (head['rename_from'], head['rename_to'])

499

stats['ops'][RENAMED_FILENODE] = (

500

stats['ops'][RENAMED_FILENODE] = (

500

'file renamed from %s to %s' % (

501

'file renamed from %s to %s' % (

501

head['rename_from'], head['rename_to']))

502

head['rename_from'], head['rename_to']))

502

# COPY

503

# COPY

503

if head.get('copy_from') and head.get('copy_to'):

504

if head.get('copy_from') and head.get('copy_to'):

504

op = OPS.MOD

505

op = OPS.MOD

505

stats['binary'] = True

506

stats['binary'] = True

506

stats['copied'] = (head['copy_from'], head['copy_to'])

507

stats['copied'] = (head['copy_from'], head['copy_to'])

507

stats['ops'][COPIED_FILENODE] = (

508

stats['ops'][COPIED_FILENODE] = (

508

'file copied from %s to %s' % (

509

'file copied from %s to %s' % (

509

head['copy_from'], head['copy_to']))

510

head['copy_from'], head['copy_to']))

510

511

# If our new parsed headers didn't match anything fallback to

512

# If our new parsed headers didn't match anything fallback to

512

# old style detection

513

# old style detection

513

if op is None:

514

if op is None:

514

if not head['a_file'] and head['b_file']:

515

if not head['a_file'] and head['b_file']:

515

op = OPS.ADD

516

op = OPS.ADD

516

stats['binary'] = True

517

stats['binary'] = True

517

stats['new_file'] = True

518

stats['new_file'] = True

518

stats['ops'][NEW_FILENODE] = 'new file'

519

stats['ops'][NEW_FILENODE] = 'new file'

519

520

elif head['a_file'] and not head['b_file']:

521

elif head['a_file'] and not head['b_file']:

521

op = OPS.DEL

522

op = OPS.DEL

522

stats['binary'] = True

523

stats['binary'] = True

523

stats['ops'][DEL_FILENODE] = 'deleted file'

524

stats['ops'][DEL_FILENODE] = 'deleted file'

524

525

# it's not ADD not DELETE

526

# it's not ADD not DELETE

526

if op is None:

527

if op is None:

527

op = OPS.MOD

528

op = OPS.MOD

528

stats['binary'] = True

529

stats['binary'] = True

529

stats['ops'][MOD_FILENODE] = 'modified file'

530

stats['ops'][MOD_FILENODE] = 'modified file'

530

531

# a real non-binary diff

532

# a real non-binary diff

532

if head['a_file'] or head['b_file']:

533

if head['a_file'] or head['b_file']:

533

# simulate splitlines, so we keep the line end part

534

# simulate splitlines, so we keep the line end part

534

diff = self.diff_splitter(chunk.diff)

535

diff = self.diff_splitter(chunk.diff)

535

536

# append each file to the diff size

537

# append each file to the diff size

537

raw_chunk_size = len(raw_diff)

538

raw_chunk_size = len(raw_diff)

538

539

exceeds_limit = raw_chunk_size > self.file_limit

540

exceeds_limit = raw_chunk_size > self.file_limit

540

self.cur_diff_size += raw_chunk_size

541

self.cur_diff_size += raw_chunk_size

541

542

try:

543

try:

543

# Check each file instead of the whole diff.

544

# Check each file instead of the whole diff.

544

# Diff will hide big files but still show small ones.

545

# Diff will hide big files but still show small ones.

545

# From the tests big files are fairly safe to be parsed

546

# From the tests big files are fairly safe to be parsed

546

# but the browser is the bottleneck.

547

# but the browser is the bottleneck.

547

if not self.show_full_diff and exceeds_limit:

548

if not self.show_full_diff and exceeds_limit:

548

log.debug('File `%s` exceeds current file_limit of %s',

549

log.debug('File `%s` exceeds current file_limit of %s',

549

safe_unicode(head['b_path']), self.file_limit)

550

safe_unicode(head['b_path']), self.file_limit)

550

raise DiffLimitExceeded(

551

raise DiffLimitExceeded(

551

'File Limit %s Exceeded', self.file_limit)

552

'File Limit %s Exceeded', self.file_limit)

552

553

self._check_large_diff()

554

self._check_large_diff()

554

555

raw_diff, chunks, _stats = self._new_parse_lines(diff)

556

raw_diff, chunks, _stats = self._new_parse_lines(diff)

556

stats['binary'] = False

557

stats['binary'] = False

557

stats['added'] = _stats[0]

558

stats['added'] = _stats[0]

558

stats['deleted'] = _stats[1]

559

stats['deleted'] = _stats[1]

559

# explicit mark that it's a modified file

560

# explicit mark that it's a modified file

560

if op == OPS.MOD:

561

if op == OPS.MOD:

561

stats['ops'][MOD_FILENODE] = 'modified file'

562

stats['ops'][MOD_FILENODE] = 'modified file'

562

563

except DiffLimitExceeded:

564

except DiffLimitExceeded:

564

diff_container = lambda _diff: \

565

diff_container = lambda _diff: \

565

LimitedDiffContainer(

566

LimitedDiffContainer(

566

self.diff_limit, self.cur_diff_size, _diff)

567

self.diff_limit, self.cur_diff_size, _diff)

567

568

limited_diff = True

569

limited_diff = True

569

chunks = []

570

chunks = []

570

571

else: # GIT format binary patch, or possibly empty diff

572

else: # GIT format binary patch, or possibly empty diff

572

if head['bin_patch']:

573

if head['bin_patch']:

573

# we have operation already extracted, but we mark simply

574

# we have operation already extracted, but we mark simply

574

# it's a diff we wont show for binary files

575

# it's a diff we wont show for binary files

575

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

576

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

576

chunks = []

577

chunks = []

577

578

# Hide content of deleted node by setting empty chunks

579

# Hide content of deleted node by setting empty chunks

579

if chunks and not self.show_full_diff and op == OPS.DEL:

580

if chunks and not self.show_full_diff and op == OPS.DEL:

580

# if not full diff mode show deleted file contents

581

# if not full diff mode show deleted file contents

581

# TODO: anderson: if the view is not too big, there is no way

582

# TODO: anderson: if the view is not too big, there is no way

582

# to see the content of the file

583

# to see the content of the file

583

chunks = []

584

chunks = []

584

585

chunks.insert(

586

chunks.insert(

586

0, [{'old_lineno': '',

587

0, [{'old_lineno': '',

587

'new_lineno': '',

588

'new_lineno': '',

588

'action': Action.CONTEXT,

589

'action': Action.CONTEXT,

589

'line': msg,

590

'line': msg,

590

} for _op, msg in stats['ops'].iteritems()

591

} for _op, msg in stats['ops'].iteritems()

591

if _op not in [MOD_FILENODE]])

592

if _op not in [MOD_FILENODE]])

592

593

original_filename = safe_unicode(head['a_path'])

594

original_filename = safe_unicode(head['a_path'])

594

_files.append({

595

_files.append({

595

'original_filename': original_filename,

596

'original_filename': original_filename,

596

'filename': safe_unicode(head['b_path']),

597

'filename': safe_unicode(head['b_path']),

597

'old_revision': head['a_blob_id'],

598

'old_revision': head['a_blob_id'],

598

'new_revision': head['b_blob_id'],

599

'new_revision': head['b_blob_id'],

599

'chunks': chunks,

600

'chunks': chunks,

600

'raw_diff': safe_unicode(raw_diff),

601

'raw_diff': safe_unicode(raw_diff),

601

'operation': op,

602

'operation': op,

602

'stats': stats,

603

'stats': stats,

603

'exceeds_limit': exceeds_limit,

604

'exceeds_limit': exceeds_limit,

604

'is_limited_diff': limited_diff,

605

'is_limited_diff': limited_diff,

605

})

606

})

606

607

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

608

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

608

OPS.DEL: 2}.get(info['operation'])

609

OPS.DEL: 2}.get(info['operation'])

609

610

return diff_container(sorted(_files, key=sorter))

611

return diff_container(sorted(_files, key=sorter))

611

612

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

613

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

613

def _parse_lines(self, diff_iter):

614

def _parse_lines(self, diff_iter):

614

"""

615

"""

615

Parse the diff an return data for the template.

616

Parse the diff an return data for the template.

616

"""

617

"""

617

618

stats = [0, 0]

619

stats = [0, 0]

619

chunks = []

620

chunks = []

620

raw_diff = []

621

raw_diff = []

621

622

try:

623

try:

623

line = diff_iter.next()

624

line = diff_iter.next()

624

625

while line:

626

while line:

626

raw_diff.append(line)

627

raw_diff.append(line)

627

lines = []

628

lines = []

628

chunks.append(lines)

629

chunks.append(lines)

629

630

match = self._chunk_re.match(line)

631

match = self._chunk_re.match(line)

631

632

if not match:

633

if not match:

633

break

634

break

634

635

gr = match.groups()

636

gr = match.groups()

636

(old_line, old_end,

637

(old_line, old_end,

637

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

638

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

638

old_line -= 1

639

old_line -= 1

639

new_line -= 1

640

new_line -= 1

640

641

context = len(gr) == 5

642

context = len(gr) == 5

642

old_end += old_line

643

old_end += old_line

643

new_end += new_line

644

new_end += new_line

644

645

if context:

646

if context:

646

# skip context only if it's first line

647

# skip context only if it's first line

647

if int(gr[0]) > 1:

648

if int(gr[0]) > 1:

648

lines.append({

649

lines.append({

649

'old_lineno': '...',

650

'old_lineno': '...',

650

'new_lineno': '...',

651

'new_lineno': '...',

651

'action': Action.CONTEXT,

652

'action': Action.CONTEXT,

652

'line': line,

653

'line': line,

653

})

654

})

654

655

line = diff_iter.next()

656

line = diff_iter.next()

656

657

while old_line < old_end or new_line < new_end:

658

while old_line < old_end or new_line < new_end:

658

command = ' '

659

command = ' '

659

if line:

660

if line:

660

command = line[0]

661

command = line[0]

661

662

affects_old = affects_new = False

663

affects_old = affects_new = False

663

664

# ignore those if we don't expect them

665

# ignore those if we don't expect them

665

if command in '#@':

666

if command in '#@':

666

continue

667

continue

667

elif command == '+':

668

elif command == '+':

668

affects_new = True

669

affects_new = True

669

action = Action.ADD

670

action = Action.ADD

670

stats[0] += 1

671

stats[0] += 1

671

elif command == '-':

672

elif command == '-':

672

affects_old = True

673

affects_old = True

673

action = Action.DELETE

674

action = Action.DELETE

674

stats[1] += 1

675

stats[1] += 1

675

else:

676

else:

676

affects_old = affects_new = True

677

affects_old = affects_new = True

677

action = Action.UNMODIFIED

678

action = Action.UNMODIFIED

678

679

if not self._newline_marker.match(line):

680

if not self._newline_marker.match(line):

680

old_line += affects_old

681

old_line += affects_old

681

new_line += affects_new

682

new_line += affects_new

682

lines.append({

683

lines.append({

683

'old_lineno': affects_old and old_line or '',

684

'old_lineno': affects_old and old_line or '',

684

'new_lineno': affects_new and new_line or '',

685

'new_lineno': affects_new and new_line or '',

685

'action': action,

686

'action': action,

686

'line': self._clean_line(line, command)

687

'line': self._clean_line(line, command)

687

})

688

})

688

raw_diff.append(line)

689

raw_diff.append(line)

689

690

line = diff_iter.next()

691

line = diff_iter.next()

691

692

if self._newline_marker.match(line):

693

if self._newline_marker.match(line):

693

# we need to append to lines, since this is not

694

# we need to append to lines, since this is not

694

# counted in the line specs of diff

695

# counted in the line specs of diff

695

lines.append({

696

lines.append({

696

'old_lineno': '...',

697

'old_lineno': '...',

697

'new_lineno': '...',

698

'new_lineno': '...',

698

'action': Action.CONTEXT,

699

'action': Action.CONTEXT,

699

'line': self._clean_line(line, command)

700

'line': self._clean_line(line, command)

700

})

701

})

701

702

except StopIteration:

703

except StopIteration:

703

pass

704

pass

704

return ''.join(raw_diff), chunks, stats

705

return ''.join(raw_diff), chunks, stats

705

706

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

707

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

707

def _new_parse_lines(self, diff_iter):

708

def _new_parse_lines(self, diff_iter):

708

"""

709

"""

709

Parse the diff an return data for the template.

710

Parse the diff an return data for the template.

710

"""

711

"""

711

712

stats = [0, 0]

713

stats = [0, 0]

713

chunks = []

714

chunks = []

714

raw_diff = []

715

raw_diff = []

715

716

try:

717

try:

717

line = diff_iter.next()

718

line = diff_iter.next()

718

719

while line:

720

while line:

720

raw_diff.append(line)

721

raw_diff.append(line)

721

# match header e.g @@ -0,0 +1 @@\n'

722

# match header e.g @@ -0,0 +1 @@\n'

722

match = self._chunk_re.match(line)

723

match = self._chunk_re.match(line)

723

724

if not match:

725

if not match:

725

break

726

break

726

727

gr = match.groups()

728

gr = match.groups()

728

(old_line, old_end,

729

(old_line, old_end,

729

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

730

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

730

731

lines = []

732

lines = []

732

hunk = {

733

hunk = {

733

'section_header': gr[-1],

734

'section_header': gr[-1],

734

'source_start': old_line,

735

'source_start': old_line,

735

'source_length': old_end,

736

'source_length': old_end,

736

'target_start': new_line,

737

'target_start': new_line,

737

'target_length': new_end,

738

'target_length': new_end,

738

'lines': lines,

739

'lines': lines,

739

}

740

}

740

chunks.append(hunk)

741

chunks.append(hunk)

741

742

old_line -= 1

743

old_line -= 1

743

new_line -= 1

744

new_line -= 1

744

745

context = len(gr) == 5

746

context = len(gr) == 5

746

old_end += old_line

747

old_end += old_line

747

new_end += new_line

748

new_end += new_line

748

749

line = diff_iter.next()

750

line = diff_iter.next()

750

751

while old_line < old_end or new_line < new_end:

752

while old_line < old_end or new_line < new_end:

752

command = ' '

753

command = ' '

753

if line:

754

if line:

754

command = line[0]

755

command = line[0]

755

756

affects_old = affects_new = False

757

affects_old = affects_new = False

757

758

# ignore those if we don't expect them

759

# ignore those if we don't expect them

759

if command in '#@':

760

if command in '#@':

760

continue

761

continue

761

elif command == '+':

762

elif command == '+':

762

affects_new = True

763

affects_new = True

763

action = Action.ADD

764

action = Action.ADD

764

stats[0] += 1

765

stats[0] += 1

765

elif command == '-':

766

elif command == '-':

766

affects_old = True

767

affects_old = True

767

action = Action.DELETE

768

action = Action.DELETE

768

stats[1] += 1

769

stats[1] += 1

769

else:

770

else:

770

affects_old = affects_new = True

771

affects_old = affects_new = True

771

action = Action.UNMODIFIED

772

action = Action.UNMODIFIED

772

773

if not self._newline_marker.match(line):

774

if not self._newline_marker.match(line):

774

old_line += affects_old

775

old_line += affects_old

775

new_line += affects_new

776

new_line += affects_new

776

lines.append({

777

lines.append({

777

'old_lineno': affects_old and old_line or '',

778

'old_lineno': affects_old and old_line or '',

778

'new_lineno': affects_new and new_line or '',

779

'new_lineno': affects_new and new_line or '',

779

'action': action,

780

'action': action,

780

'line': self._clean_line(line, command)

781

'line': self._clean_line(line, command)

781

})

782

})

782

raw_diff.append(line)

783

raw_diff.append(line)

783

784

line = diff_iter.next()

785

line = diff_iter.next()

785

786

if self._newline_marker.match(line):

787

if self._newline_marker.match(line):

787

# we need to append to lines, since this is not

788

# we need to append to lines, since this is not

788

# counted in the line specs of diff

789

# counted in the line specs of diff

789

if affects_old:

790

if affects_old:

790

action = Action.OLD_NO_NL

791

action = Action.OLD_NO_NL

791

elif affects_new:

792

elif affects_new:

792

action = Action.NEW_NO_NL

793

action = Action.NEW_NO_NL

793

else:

794

else:

794

raise Exception('invalid context for no newline')

795

raise Exception('invalid context for no newline')

795

796

lines.append({

797

lines.append({

797

'old_lineno': None,

798

'old_lineno': None,

798

'new_lineno': None,

799

'new_lineno': None,

799

'action': action,

800

'action': action,

800

'line': self._clean_line(line, command)

801

'line': self._clean_line(line, command)

801

})

802

})

802

803

except StopIteration:

804

except StopIteration:

804

pass

805

pass

805

806

return ''.join(raw_diff), chunks, stats

807

return ''.join(raw_diff), chunks, stats

807

808

def _safe_id(self, idstring):

809

def _safe_id(self, idstring):

809

"""Make a string safe for including in an id attribute.

810

"""Make a string safe for including in an id attribute.

810

811

The HTML spec says that id attributes 'must begin with

812

The HTML spec says that id attributes 'must begin with

812

a letter ([A-Za-z]) and may be followed by any number

813

a letter ([A-Za-z]) and may be followed by any number

813

of letters, digits ([0-9]), hyphens ("-"), underscores

814

of letters, digits ([0-9]), hyphens ("-"), underscores

814

("_"), colons (":"), and periods (".")'. These regexps

815

("_"), colons (":"), and periods (".")'. These regexps

815

are slightly over-zealous, in that they remove colons

816

are slightly over-zealous, in that they remove colons

816

and periods unnecessarily.

817

and periods unnecessarily.

817

818

Whitespace is transformed into underscores, and then

819

Whitespace is transformed into underscores, and then

819

anything which is not a hyphen or a character that

820

anything which is not a hyphen or a character that

820

matches \w (alphanumerics and underscore) is removed.

821

matches \w (alphanumerics and underscore) is removed.

821

822

"""

823

"""

823

# Transform all whitespace to underscore

824

# Transform all whitespace to underscore

824

idstring = re.sub(r'\s', "_", '%s' % idstring)

825

idstring = re.sub(r'\s', "_", '%s' % idstring)

825

# Remove everything that is not a hyphen or a member of \w

826

# Remove everything that is not a hyphen or a member of \w

826

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

827

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

827

return idstring

828

return idstring

828

829

@classmethod

830

@classmethod

830

def diff_splitter(cls, string):

831

def diff_splitter(cls, string):

831

"""

832

"""

832

Diff split that emulates .splitlines() but works only on \n

833

Diff split that emulates .splitlines() but works only on \n

833

"""

834

"""

834

if not string:

835

if not string:

835

return

836

return

836

elif string == '\n':

837

elif string == '\n':

837

yield u'\n'

838

yield u'\n'

838

else:

839

else:

839

840

has_newline = string.endswith('\n')

841

has_newline = string.endswith('\n')

841

elements = string.split('\n')

842

elements = string.split('\n')

842

if has_newline:

843

if has_newline:

843

# skip last element as it's empty string from newlines

844

# skip last element as it's empty string from newlines

844

elements = elements[:-1]

845

elements = elements[:-1]

845

846

len_elements = len(elements)

847

len_elements = len(elements)

847

848

for cnt, line in enumerate(elements, start=1):

849

for cnt, line in enumerate(elements, start=1):

849

last_line = cnt == len_elements

850

last_line = cnt == len_elements

850

if last_line and not has_newline:

851

if last_line and not has_newline:

851

yield safe_unicode(line)

852

yield safe_unicode(line)

852

else:

853

else:

853

yield safe_unicode(line) + '\n'

854

yield safe_unicode(line) + '\n'

854

855

def prepare(self, inline_diff=True):

856

def prepare(self, inline_diff=True):

856

"""

857

"""

857

Prepare the passed udiff for HTML rendering.

858

Prepare the passed udiff for HTML rendering.

858

859

:return: A list of dicts with diff information.

860

:return: A list of dicts with diff information.

860

"""

861

"""

861

parsed = self._parser(inline_diff=inline_diff)

862

parsed = self._parser(inline_diff=inline_diff)

862

self.parsed = True

863

self.parsed = True

863

self.parsed_diff = parsed

864

self.parsed_diff = parsed

864

return parsed

865

return parsed

865

866

def as_raw(self, diff_lines=None):

867

def as_raw(self, diff_lines=None):

867

"""

868

"""

868

Returns raw diff as a byte string

869

Returns raw diff as a byte string

869

"""

870

"""

870

return self._diff.raw

871

return self._diff.raw

871

872

def as_html(self, table_class='code-difftable', line_class='line',

873

def as_html(self, table_class='code-difftable', line_class='line',

873

old_lineno_class='lineno old', new_lineno_class='lineno new',

874

old_lineno_class='lineno old', new_lineno_class='lineno new',

874

code_class='code', enable_comments=False, parsed_lines=None):

875

code_class='code', enable_comments=False, parsed_lines=None):

875

"""

876

"""

876

Return given diff as html table with customized css classes

877

Return given diff as html table with customized css classes

877

"""

878

"""

878

# TODO(marcink): not sure how to pass in translator

879

# TODO(marcink): not sure how to pass in translator

879

# here in an efficient way, leave the _ for proper gettext extraction

880

# here in an efficient way, leave the _ for proper gettext extraction

880

_ = lambda s: s

881

_ = lambda s: s

881

882

def _link_to_if(condition, label, url):

883

def _link_to_if(condition, label, url):

883

"""

884

"""

884

Generates a link if condition is meet or just the label if not.

885

Generates a link if condition is meet or just the label if not.

885

"""

886

"""

886

887

if condition:

888

if condition:

888

return '''<a href="%(url)s" class="tooltip"

889

return '''<a href="%(url)s" class="tooltip"

889

title="%(title)s">%(label)s</a>''' % {

890

title="%(title)s">%(label)s</a>''' % {

890

'title': _('Click to select line'),

891

'title': _('Click to select line'),

891

'url': url,

892

'url': url,

892

'label': label

893

'label': label

893

}

894

}

894

else:

895

else:

895

return label

896

return label

896

if not self.parsed:

897

if not self.parsed:

897

self.prepare()

898

self.prepare()

898

899

diff_lines = self.parsed_diff

900

diff_lines = self.parsed_diff

900

if parsed_lines:

901

if parsed_lines:

901

diff_lines = parsed_lines

902

diff_lines = parsed_lines

902

903

_html_empty = True

904

_html_empty = True

904

_html = []

905

_html = []

905

_html.append('''<table class="%(table_class)s">\n''' % {

906

_html.append('''<table class="%(table_class)s">\n''' % {

906

'table_class': table_class

907

'table_class': table_class

907

})

908

})

908

909

for diff in diff_lines:

910

for diff in diff_lines:

910

for line in diff['chunks']:

911

for line in diff['chunks']:

911

_html_empty = False

912

_html_empty = False

912

for change in line:

913

for change in line:

913

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

914

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

914

'lc': line_class,

915

'lc': line_class,

915

'action': change['action']

916

'action': change['action']

916

})

917

})

917

anchor_old_id = ''

918

anchor_old_id = ''

918

anchor_new_id = ''

919

anchor_new_id = ''

919

anchor_old = "%(filename)s_o%(oldline_no)s" % {

920

anchor_old = "%(filename)s_o%(oldline_no)s" % {

920

'filename': self._safe_id(diff['filename']),

921

'filename': self._safe_id(diff['filename']),

921

'oldline_no': change['old_lineno']

922

'oldline_no': change['old_lineno']

922

}

923

}

923

anchor_new = "%(filename)s_n%(oldline_no)s" % {

924

anchor_new = "%(filename)s_n%(oldline_no)s" % {

924

'filename': self._safe_id(diff['filename']),

925

'filename': self._safe_id(diff['filename']),

925

'oldline_no': change['new_lineno']

926

'oldline_no': change['new_lineno']

926

}

927

}

927

cond_old = (change['old_lineno'] != '...' and

928

cond_old = (change['old_lineno'] != '...' and

928

change['old_lineno'])

929

change['old_lineno'])

929

cond_new = (change['new_lineno'] != '...' and

930

cond_new = (change['new_lineno'] != '...' and

930

change['new_lineno'])

931

change['new_lineno'])

931

if cond_old:

932

if cond_old:

932

anchor_old_id = 'id="%s"' % anchor_old

933

anchor_old_id = 'id="%s"' % anchor_old

933

if cond_new:

934

if cond_new:

934

anchor_new_id = 'id="%s"' % anchor_new

935

anchor_new_id = 'id="%s"' % anchor_new

935

936

if change['action'] != Action.CONTEXT:

937

if change['action'] != Action.CONTEXT:

937

anchor_link = True

938

anchor_link = True

938

else:

939

else:

939

anchor_link = False

940

anchor_link = False

940

941

###########################################################

942

###########################################################

942

# COMMENT ICONS

943

# COMMENT ICONS

943

###########################################################

944

###########################################################

944

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

945

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

945

946

if enable_comments and change['action'] != Action.CONTEXT:

947

if enable_comments and change['action'] != Action.CONTEXT:

947

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

948

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

948

949

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

950

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

950

951

###########################################################

952

###########################################################

952

# OLD LINE NUMBER

953

# OLD LINE NUMBER

953

###########################################################

954

###########################################################

954

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

955

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

955

'a_id': anchor_old_id,

956

'a_id': anchor_old_id,

956

'olc': old_lineno_class

957

'olc': old_lineno_class

957

})

958

})

958

959

_html.append('''%(link)s''' % {

960

_html.append('''%(link)s''' % {

960

'link': _link_to_if(anchor_link, change['old_lineno'],

961

'link': _link_to_if(anchor_link, change['old_lineno'],

961

'#%s' % anchor_old)

962

'#%s' % anchor_old)

962

})

963

})

963

_html.append('''</td>\n''')

964

_html.append('''</td>\n''')

964

###########################################################

965

###########################################################

965

# NEW LINE NUMBER

966

# NEW LINE NUMBER

966

###########################################################

967

###########################################################

967

968

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

969

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

969

'a_id': anchor_new_id,

970

'a_id': anchor_new_id,

970

'nlc': new_lineno_class

971

'nlc': new_lineno_class

971

})

972

})

972

973

_html.append('''%(link)s''' % {

974

_html.append('''%(link)s''' % {

974

'link': _link_to_if(anchor_link, change['new_lineno'],

975

'link': _link_to_if(anchor_link, change['new_lineno'],

975

'#%s' % anchor_new)

976

'#%s' % anchor_new)

976

})

977

})

977

_html.append('''</td>\n''')

978

_html.append('''</td>\n''')

978

###########################################################

979

###########################################################

979

# CODE

980

# CODE

980

###########################################################

981

###########################################################

981

code_classes = [code_class]

982

code_classes = [code_class]

982

if (not enable_comments or

983

if (not enable_comments or

983

change['action'] == Action.CONTEXT):

984

change['action'] == Action.CONTEXT):

984

code_classes.append('no-comment')

985

code_classes.append('no-comment')

985

_html.append('\t<td class="%s">' % ' '.join(code_classes))

986

_html.append('\t<td class="%s">' % ' '.join(code_classes))

986

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

987

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

987

'code': change['line']

988

'code': change['line']

988

})

989

})

989

990

_html.append('''\t</td>''')

991

_html.append('''\t</td>''')

991

_html.append('''\n</tr>\n''')

992

_html.append('''\n</tr>\n''')

992

_html.append('''</table>''')

993

_html.append('''</table>''')

993

if _html_empty:

994

if _html_empty:

994

return None

995

return None

995

return ''.join(_html)

996

return ''.join(_html)

996

997

def stat(self):

998

def stat(self):

998

"""

999

"""

999

Returns tuple of added, and removed lines for this instance

1000

Returns tuple of added, and removed lines for this instance

1000

"""

1001

"""

1001

return self.adds, self.removes

1002

return self.adds, self.removes

1002

1003

def get_context_of_line(

1004

def get_context_of_line(

1004

self, path, diff_line=None, context_before=3, context_after=3):

1005

self, path, diff_line=None, context_before=3, context_after=3):

1005

"""

1006

"""

1006

Returns the context lines for the specified diff line.

1007

Returns the context lines for the specified diff line.

1007

1008

:type diff_line: :class:`DiffLineNumber`

1009

:type diff_line: :class:`DiffLineNumber`

1009

"""

1010

"""

1010

assert self.parsed, "DiffProcessor is not initialized."

1011

assert self.parsed, "DiffProcessor is not initialized."

1011

1012

if None not in diff_line:

1013

if None not in diff_line:

1013

raise ValueError(

1014

raise ValueError(

1014

"Cannot specify both line numbers: {}".format(diff_line))

1015

"Cannot specify both line numbers: {}".format(diff_line))

1015

1016

file_diff = self._get_file_diff(path)

1017

file_diff = self._get_file_diff(path)

1017

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1018

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1018

1019

first_line_to_include = max(idx - context_before, 0)

1020

first_line_to_include = max(idx - context_before, 0)

1020

first_line_after_context = idx + context_after + 1

1021

first_line_after_context = idx + context_after + 1

1021

context_lines = chunk[first_line_to_include:first_line_after_context]

1022

context_lines = chunk[first_line_to_include:first_line_after_context]

1022

1023

line_contents = [

1024

line_contents = [

1024

_context_line(line) for line in context_lines

1025

_context_line(line) for line in context_lines

1025

if _is_diff_content(line)]

1026

if _is_diff_content(line)]

1026

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1027

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1027

# Once they are fixed, we can drop this line here.

1028

# Once they are fixed, we can drop this line here.

1028

if line_contents:

1029

if line_contents:

1029

line_contents[-1] = (

1030

line_contents[-1] = (

1030

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1031

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1031

return line_contents

1032

return line_contents

1032

1033

def find_context(self, path, context, offset=0):

1034

def find_context(self, path, context, offset=0):

1034

"""

1035

"""

1035

Finds the given `context` inside of the diff.

1036

Finds the given `context` inside of the diff.

1036

1037

Use the parameter `offset` to specify which offset the target line has

1038

Use the parameter `offset` to specify which offset the target line has

1038

inside of the given `context`. This way the correct diff line will be

1039

inside of the given `context`. This way the correct diff line will be

1039

returned.

1040

returned.

1040

1041

:param offset: Shall be used to specify the offset of the main line

1042

:param offset: Shall be used to specify the offset of the main line

1042

within the given `context`.

1043

within the given `context`.

1043

"""

1044

"""

1044

if offset < 0 or offset >= len(context):

1045

if offset < 0 or offset >= len(context):

1045

raise ValueError(

1046

raise ValueError(

1046

"Only positive values up to the length of the context "

1047

"Only positive values up to the length of the context "

1047

"minus one are allowed.")

1048

"minus one are allowed.")

1048

1049

matches = []

1050

matches = []

1050

file_diff = self._get_file_diff(path)

1051

file_diff = self._get_file_diff(path)

1051

1052

for chunk in file_diff['chunks']:

1053

for chunk in file_diff['chunks']:

1053

context_iter = iter(context)

1054

context_iter = iter(context)

1054

for line_idx, line in enumerate(chunk):

1055

for line_idx, line in enumerate(chunk):

1055

try:

1056

try:

1056

if _context_line(line) == context_iter.next():

1057

if _context_line(line) == context_iter.next():

1057

continue

1058

continue

1058

except StopIteration:

1059

except StopIteration:

1059

matches.append((line_idx, chunk))

1060

matches.append((line_idx, chunk))

1060

context_iter = iter(context)

1061

context_iter = iter(context)

1061

1062

# Increment position and triger StopIteration

1063

# Increment position and triger StopIteration

1063

# if we had a match at the end

1064

# if we had a match at the end

1064

line_idx += 1

1065

line_idx += 1

1065

try:

1066

try:

1066

context_iter.next()

1067

context_iter.next()

1067

except StopIteration:

1068

except StopIteration:

1068

matches.append((line_idx, chunk))

1069

matches.append((line_idx, chunk))

1069

1070

effective_offset = len(context) - offset

1071

effective_offset = len(context) - offset

1071

found_at_diff_lines = [

1072

found_at_diff_lines = [

1072

_line_to_diff_line_number(chunk[idx - effective_offset])

1073

_line_to_diff_line_number(chunk[idx - effective_offset])

1073

for idx, chunk in matches]

1074

for idx, chunk in matches]

1074

1075

return found_at_diff_lines

1076

return found_at_diff_lines

1076

1077

def _get_file_diff(self, path):

1078

def _get_file_diff(self, path):

1078

for file_diff in self.parsed_diff:

1079

for file_diff in self.parsed_diff:

1079

if file_diff['filename'] == path:

1080

if file_diff['filename'] == path:

1080

break

1081

break

1081

else:

1082

else:

1082

raise FileNotInDiffException("File {} not in diff".format(path))

1083

raise FileNotInDiffException("File {} not in diff".format(path))

1083

return file_diff

1084

return file_diff

1084

1085

def _find_chunk_line_index(self, file_diff, diff_line):

1086

def _find_chunk_line_index(self, file_diff, diff_line):

1086

for chunk in file_diff['chunks']:

1087

for chunk in file_diff['chunks']:

1087

for idx, line in enumerate(chunk):

1088

for idx, line in enumerate(chunk):

1088

if line['old_lineno'] == diff_line.old:

1089

if line['old_lineno'] == diff_line.old:

1089

return chunk, idx

1090

return chunk, idx

1090

if line['new_lineno'] == diff_line.new:

1091

if line['new_lineno'] == diff_line.new:

1091

return chunk, idx

1092

return chunk, idx

1092

raise LineNotInDiffException(

1093

raise LineNotInDiffException(

1093

"The line {} is not part of the diff.".format(diff_line))

1094

"The line {} is not part of the diff.".format(diff_line))

1094

1095

1096

def _is_diff_content(line):

1097

def _is_diff_content(line):

1097

return line['action'] in (

1098

return line['action'] in (

1098

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1099

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1099

1100

1101

def _context_line(line):

1102

def _context_line(line):

1102

return (line['action'], line['line'])

1103

return (line['action'], line['line'])

1103

1104

1105

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1106

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1106

1107

1108

def _line_to_diff_line_number(line):

1109

def _line_to_diff_line_number(line):

1109

new_line_no = line['new_lineno'] or None

1110

new_line_no = line['new_lineno'] or None

1110

old_line_no = line['old_lineno'] or None

1111

old_line_no = line['old_lineno'] or None

1111

return DiffLineNumber(old=old_line_no, new=new_line_no)

1112

return DiffLineNumber(old=old_line_no, new=new_line_no)

1112

1113

1114

class FileNotInDiffException(Exception):

1115

class FileNotInDiffException(Exception):

1115

"""

1116

"""

1116

Raised when the context for a missing file is requested.

1117

Raised when the context for a missing file is requested.

1117

1118

If you request the context for a line in a file which is not part of the

1119

If you request the context for a line in a file which is not part of the

1119

given diff, then this exception is raised.

1120

given diff, then this exception is raised.

1120

"""

1121

"""

1121

1122

1123

class LineNotInDiffException(Exception):

1124

class LineNotInDiffException(Exception):

1124

"""

1125

"""

1125

Raised when the context for a missing line is requested.

1126

Raised when the context for a missing line is requested.

1126

1127

If you request the context for a line in a file and this line is not

1128

If you request the context for a line in a file and this line is not

1128

part of the given diff, then this exception is raised.

1129

part of the given diff, then this exception is raised.

1129

"""

1130

"""

1130

1131

1132

class DiffLimitExceeded(Exception):

1133

class DiffLimitExceeded(Exception):

1133

pass

1134

pass

1134

1135

1136

def cache_diff(cached_diff_file, diff, commits):

1137

def cache_diff(cached_diff_file, diff, commits):

1137

1138

struct = {

1139

struct = {

1139

'version': 'v1',

1140

'version': 'v1',

1140

'diff': diff,

1141

'diff': diff,

1141

'commits': commits

1142

'commits': commits

1142

}

1143

}

1143

1144

try:

1145

try:

1145

with ~~open~~(cached_diff_file, 'wb') as f:

1146

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1146

pickle.dump(struct, f)

1147

pickle.dump(struct, f)

1147

log.debug('Saved diff cache under %s', cached_diff_file)

1148

log.debug('Saved diff cache under %s', cached_diff_file)

1148

except Exception:

1149

except Exception:

1149

log.warn('Failed to save cache', exc_info=True)

1150

log.warn('Failed to save cache', exc_info=True)

1150

# cleanup file to not store it "damaged"

1151

# cleanup file to not store it "damaged"

1151

try:

1152

try:

1152

os.remove(cached_diff_file)

1153

os.remove(cached_diff_file)

1153

except Exception:

1154

except Exception:

1154

log.exception('Failed to cleanup path %s', cached_diff_file)

1155

log.exception('Failed to cleanup path %s', cached_diff_file)

1155

1156

1157

def load_cached_diff(cached_diff_file):

1158

def load_cached_diff(cached_diff_file):

1158

1159

default_struct = {

1160

default_struct = {

1160

'version': 'v1',

1161

'version': 'v1',

1161

'diff': None,

1162

'diff': None,

1162

'commits': None

1163

'commits': None

1163

}

1164

}

1164

1165

has_cache = os.path.isfile(cached_diff_file)

1166

has_cache = os.path.isfile(cached_diff_file)

1166

if not has_cache:

1167

if not has_cache:

1167

return default_struct

1168

return default_struct

1168

1169

data = None

1170

data = None

1170

try:

1171

try:

1171

with ~~open~~(cached_diff_file, 'rb') as f:

1172

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1172

data = pickle.load(f)

1173

data = pickle.load(f)

1173

log.debug('Loaded diff cache from %s', cached_diff_file)

1174

log.debug('Loaded diff cache from %s', cached_diff_file)

1174

except Exception:

1175

except Exception:

1175

log.warn('Failed to read diff cache file', exc_info=True)

1176

log.warn('Failed to read diff cache file', exc_info=True)

1176

1177

if not data:

1178

if not data:

1178

data = default_struct

1179

data = default_struct

1179

1180

if not isinstance(data, dict):

1181

if not isinstance(data, dict):

1181

# old version of data ?

1182

# old version of data ?

1182

data = default_struct

1183

data = default_struct

1183

1184

return data

1185

return data

1185

1186

1187

def generate_diff_cache_key(*args):

1188

def generate_diff_cache_key(*args):

1188

"""

1189

"""

1189

Helper to generate a cache key using arguments

1190

Helper to generate a cache key using arguments

1190

"""

1191

"""

1191

def arg_mapper(input_param):

1192

def arg_mapper(input_param):

1192

input_param = safe_str(input_param)

1193

input_param = safe_str(input_param)

1193

# we cannot allow '/' in arguments since it would allow

1194

# we cannot allow '/' in arguments since it would allow

1194

# subdirectory usage

1195

# subdirectory usage

1195

input_param.replace('/', '_')

1196

input_param.replace('/', '_')

1196

return input_param or None # prevent empty string arguments

1197

return input_param or None # prevent empty string arguments

1197

1198

return '_'.join([

1199

return '_'.join([

1199

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1200

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1200

1201

1202

def diff_cache_exist(cache_storage, *args):

1203

def diff_cache_exist(cache_storage, *args):

1203

"""

1204

"""

1204

Based on all generated arguments check and return a cache path

1205

Based on all generated arguments check and return a cache path

1205

"""

1206

"""

1206

cache_key = generate_diff_cache_key(*args)

1207

cache_key = generate_diff_cache_key(*args)

1207

cache_file_path = os.path.join(cache_storage, cache_key)

1208

cache_file_path = os.path.join(cache_storage, cache_key)

1208

# prevent path traversal attacks using some param that have e.g '../../'

1209

# prevent path traversal attacks using some param that have e.g '../../'

1209

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1210

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1210

raise ValueError('Final path must be within {}'.format(cache_storage))

1211

raise ValueError('Final path must be within {}'.format(cache_storage))

1211

1212

return cache_file_path

1213

return cache_file_path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2018 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import os
             import re
+            import bz2
             import collections
             import difflib
             import logging
             import cPickle as pickle
             from itertools import tee, imap
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             # define max context, a file with more than this numbers of lines is unusable
             # in browser anyway
             MAX_CONTEXT = 1024 * 1014
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 # protect against IntOverflow when passing HUGE context
                 if context > MAX_CONTEXT:
                     context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
                 OLD_NO_NL = 'old-no-nl'
                 NEW_NO_NL = 'new-no-nl'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 # collapse ranges of commits over given number
                 _collapse_commits_over = 5
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     log.debug('Initialized DiffProcessor with %s mode', format)
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._new_parse_gitdiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return string \
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _check_large_diff(self):
                     log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                 def _new_parse_gitdiff(self, inline_diff=True):
                     _files = []
                     # this can be overriden later to a LimitedDiffContainer type
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         log.debug('parsing diff %r' % head)
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'old_mode': None,
                             'new_mode': None,
                             'ops': {},
                         }
                         if head['old_mode']:
                             stats['old_mode'] = head['old_mode']
                         if head['new_mode']:
                             stats['new_mode'] = head['new_mode']
                         if head['b_mode']:
                             stats['new_mode'] = head['b_mode']
                         # delete file
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         # new file
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['old_mode'] = None
                             stats['new_mode'] = head['new_file_mode']
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         # modify operation, can be copy, rename or chmod
                         else:
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['renamed'] = (head['rename_from'], head['rename_to'])
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['copied'] = (head['copy_from'], head['copy_to'])
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['new_file'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             # simulate splitlines, so we keep the line end part
                             diff = self.diff_splitter(chunk.diff)
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                             exceeds_limit = raw_chunk_size > self.file_limit
                             self.cur_diff_size += raw_chunk_size
                             try:
                                 # Check each file instead of the whole diff.
                                 # Diff will hide big files but still show small ones.
                                 # From the tests big files are fairly safe to be parsed
                                 # but the browser is the bottleneck.
                                 if not self.show_full_diff and exceeds_limit:
                                     log.debug('File `%s` exceeds current file_limit of %s',
                                               safe_unicode(head['b_path']), self.file_limit)
                                     raise DiffLimitExceeded(
                                         'File Limit %s Exceeded', self.file_limit)
                                 self._check_large_diff()
                                 raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         # Hide content of deleted node by setting empty chunks
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(
 , [{'old_lineno': '',
                                  'new_lineno': '',
                                  'action': Action.CONTEXT,
                                  'line': msg,
                                  } for _op, msg in stats['ops'].iteritems()
                                 if _op not in [MOD_FILENODE]])
                         original_filename = safe_unicode(head['a_path'])
                         _files.append({
                             'original_filename': original_filename,
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
                 def _parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                 def _new_parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             # match header e.g @@ -0,0 +1 @@\n'
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             lines = []
                             hunk = {
                                 'section_header': gr[-1],
                                 'source_start': old_line,
                                 'source_length': old_end,
                                 'target_start': new_line,
                                 'target_length': new_end,
                                 'lines': lines,
                             }
                             chunks.append(hunk)
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     if affects_old:
                                         action = Action.OLD_NO_NL
                                     elif affects_new:
                                         action = Action.NEW_NO_NL
                                     else:
                                         raise Exception('invalid context for no newline')
                                     lines.append({
                                         'old_lineno':   None,
                                         'new_lineno':   None,
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 @classmethod
                 def diff_splitter(cls, string):
                     """
                     Diff split that emulates .splitlines() but works only on \n
                     """
                     if not string:
                         return
                     elif string == '\n':
                         yield u'\n'
                     else:
                         has_newline = string.endswith('\n')
                         elements = string.split('\n')
                         if has_newline:
                             # skip last element as it's empty string from newlines
                             elements = elements[:-1]
                         len_elements = len(elements)
                         for cnt, line in enumerate(elements, start=1):
                             last_line = cnt == len_elements
                             if last_line and not has_newline:
                                 yield safe_unicode(line)
                             else:
                                 yield safe_unicode(line) + '\n'
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     # TODO(marcink): not sure how to pass in translator
                     # here in an efficient way, leave the _ for proper gettext extraction
                     _ = lambda s: s
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICONS
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass
             def cache_diff(cached_diff_file, diff, commits):
                 struct = {
                     'version': 'v1',
                     'diff': diff,
                     'commits': commits
                 }
                 try:
-                    with open(cached_diff_file, 'wb') as f:
+                    with bz2.BZ2File(cached_diff_file, 'wb') as f:
                         pickle.dump(struct, f)
                     log.debug('Saved diff cache under %s', cached_diff_file)
                 except Exception:
                     log.warn('Failed to save cache', exc_info=True)
                     # cleanup file to not store it "damaged"
                     try:
                         os.remove(cached_diff_file)
                     except Exception:
                         log.exception('Failed to cleanup path %s', cached_diff_file)
             def load_cached_diff(cached_diff_file):
                 default_struct = {
                     'version': 'v1',
                     'diff': None,
                     'commits': None
                 }
                 has_cache = os.path.isfile(cached_diff_file)
                 if not has_cache:
                     return default_struct
                 data = None
                 try:
-                    with open(cached_diff_file, 'rb') as f:
+                    with bz2.BZ2File(cached_diff_file, 'rb') as f:
                         data = pickle.load(f)
                     log.debug('Loaded diff cache from %s', cached_diff_file)
                 except Exception:
                     log.warn('Failed to read diff cache file', exc_info=True)
                 if not data:
                     data = default_struct
                 if not isinstance(data, dict):
                     # old version of data ?
                     data = default_struct
                 return data
             def generate_diff_cache_key(*args):
                 """
                 Helper to generate a cache key using arguments
                 """
                 def arg_mapper(input_param):
                     input_param = safe_str(input_param)
                     # we cannot allow '/' in arguments since it would allow
                     # subdirectory usage
                     input_param.replace('/', '_')
                     return input_param or None  # prevent empty string arguments
                 return '_'.join([
                     '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
             def diff_cache_exist(cache_storage, *args):
                 """
                 Based on all generated arguments check and return a cache path
                 """
                 cache_key = generate_diff_cache_key(*args)
                 cache_file_path = os.path.join(cache_storage, cache_key)
                 # prevent path traversal attacks using some param that have e.g '../../'
                 if not os.path.abspath(cache_file_path).startswith(cache_storage):
                     raise ValueError('Final path must be within {}'.format(cache_storage))
                 return cache_file_path