rhodecode-enterprise-ce Commit - r4324:6440858f

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Set of diffing helpers, previously part of vcs

23

Set of diffing helpers, previously part of vcs

24

"""

24

"""

25

26

import os

26

import os

27

import re

27

import re

28

import bz2

28

import bz2

29

import gzip

29

import gzip

30

import time

30

import time

31

32

import collections

32

import collections

33

import difflib

33

import difflib

34

import logging

34

import logging

35

import cPickle as pickle

35

import cPickle as pickle

36

from itertools import tee, imap

36

from itertools import tee, imap

37

38

from rhodecode.lib.vcs.exceptions import VCSError

38

from rhodecode.lib.vcs.exceptions import VCSError

39

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

39

from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

40

from rhodecode.lib.utils2 import safe_unicode, safe_str

41

42

log = logging.getLogger(__name__)

42

log = logging.getLogger(__name__)

43

44

# define max context, a file with more than this numbers of lines is unusable

44

# define max context, a file with more than this numbers of lines is unusable

45

# in browser anyway

45

# in browser anyway

46

MAX_CONTEXT = 20 * 1024

46

MAX_CONTEXT = 20 * 1024

47

DEFAULT_CONTEXT = 3

47

DEFAULT_CONTEXT = 3

48

49

50

def get_diff_context(request):

50

def get_diff_context(request):

51

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

51

return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

52

53

54

def get_diff_whitespace_flag(request):

54

def get_diff_whitespace_flag(request):

55

return request.GET.get('ignorews', '') == '1'

55

return request.GET.get('ignorews', '') == '1'

56

57

58

class OPS(object):

58

class OPS(object):

59

ADD = 'A'

59

ADD = 'A'

60

MOD = 'M'

60

MOD = 'M'

61

DEL = 'D'

61

DEL = 'D'

62

63

64

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

64

def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

65

"""

65

"""

66

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

66

Returns git style diff between given ``filenode_old`` and ``filenode_new``.

67

68

:param ignore_whitespace: ignore whitespaces in diff

68

:param ignore_whitespace: ignore whitespaces in diff

69

"""

69

"""

70

# make sure we pass in default context

70

# make sure we pass in default context

71

context = context or 3

71

context = context or 3

72

# protect against IntOverflow when passing HUGE context

72

# protect against IntOverflow when passing HUGE context

73

if context > MAX_CONTEXT:

73

if context > MAX_CONTEXT:

74

context = MAX_CONTEXT

74

context = MAX_CONTEXT

75

76

submodules = filter(lambda o: isinstance(o, SubModuleNode),

76

submodules = filter(lambda o: isinstance(o, SubModuleNode),

77

[filenode_new, filenode_old])

77

[filenode_new, filenode_old])

78

if submodules:

78

if submodules:

79

return ''

79

return ''

80

81

for filenode in (filenode_old, filenode_new):

81

for filenode in (filenode_old, filenode_new):

82

if not isinstance(filenode, FileNode):

82

if not isinstance(filenode, FileNode):

83

raise VCSError(

83

raise VCSError(

84

"Given object should be FileNode object, not %s"

84

"Given object should be FileNode object, not %s"

85

% filenode.__class__)

85

% filenode.__class__)

86

87

repo = filenode_new.commit.repository

87

repo = filenode_new.commit.repository

88

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

88

old_commit = filenode_old.commit or repo.EMPTY_COMMIT

89

new_commit = filenode_new.commit

89

new_commit = filenode_new.commit

90

91

vcs_gitdiff = repo.get_diff(

91

vcs_gitdiff = repo.get_diff(

92

old_commit, new_commit, filenode_new.path,

92

old_commit, new_commit, filenode_new.path,

93

ignore_whitespace, context, path1=filenode_old.path)

93

ignore_whitespace, context, path1=filenode_old.path)

94

return vcs_gitdiff

94

return vcs_gitdiff

95

96

NEW_FILENODE = 1

96

NEW_FILENODE = 1

97

DEL_FILENODE = 2

97

DEL_FILENODE = 2

98

MOD_FILENODE = 3

98

MOD_FILENODE = 3

99

RENAMED_FILENODE = 4

99

RENAMED_FILENODE = 4

100

COPIED_FILENODE = 5

100

COPIED_FILENODE = 5

101

CHMOD_FILENODE = 6

101

CHMOD_FILENODE = 6

102

BIN_FILENODE = 7

102

BIN_FILENODE = 7

103

104

105

class LimitedDiffContainer(object):

105

class LimitedDiffContainer(object):

106

107

def __init__(self, diff_limit, cur_diff_size, diff):

107

def __init__(self, diff_limit, cur_diff_size, diff):

108

self.diff = diff

108

self.diff = diff

109

self.diff_limit = diff_limit

109

self.diff_limit = diff_limit

110

self.cur_diff_size = cur_diff_size

110

self.cur_diff_size = cur_diff_size

111

112

def __getitem__(self, key):

112

def __getitem__(self, key):

113

return self.diff.__getitem__(key)

113

return self.diff.__getitem__(key)

114

115

def __iter__(self):

115

def __iter__(self):

116

for l in self.diff:

116

for l in self.diff:

117

yield l

117

yield l

118

119

120

class Action(object):

120

class Action(object):

121

"""

121

"""

122

Contains constants for the action value of the lines in a parsed diff.

122

Contains constants for the action value of the lines in a parsed diff.

123

"""

123

"""

124

125

ADD = 'add'

125

ADD = 'add'

126

DELETE = 'del'

126

DELETE = 'del'

127

UNMODIFIED = 'unmod'

127

UNMODIFIED = 'unmod'

128

129

CONTEXT = 'context'

129

CONTEXT = 'context'

130

OLD_NO_NL = 'old-no-nl'

130

OLD_NO_NL = 'old-no-nl'

131

NEW_NO_NL = 'new-no-nl'

131

NEW_NO_NL = 'new-no-nl'

132

133

134

class DiffProcessor(object):

134

class DiffProcessor(object):

135

"""

135

"""

136

Give it a unified or git diff and it returns a list of the files that were

136

Give it a unified or git diff and it returns a list of the files that were

137

mentioned in the diff together with a dict of meta information that

137

mentioned in the diff together with a dict of meta information that

138

can be used to render it in a HTML template.

138

can be used to render it in a HTML template.

139

140

.. note:: Unicode handling

140

.. note:: Unicode handling

141

142

The original diffs are a byte sequence and can contain filenames

142

The original diffs are a byte sequence and can contain filenames

143

in mixed encodings. This class generally returns `unicode` objects

143

in mixed encodings. This class generally returns `unicode` objects

144

since the result is intended for presentation to the user.

144

since the result is intended for presentation to the user.

145

146

"""

146

"""

147

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

147

_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

148

_newline_marker = re.compile(r'^\\ No newline at end of file')

148

_newline_marker = re.compile(r'^\\ No newline at end of file')

149

150

# used for inline highlighter word split

150

# used for inline highlighter word split

151

_token_re = re.compile(r'()(>|<|&|\W+?)')

151

_token_re = re.compile(r'()(>|<|&|\W+?)')

152

153

# collapse ranges of commits over given number

153

# collapse ranges of commits over given number

154

_collapse_commits_over = 5

154

_collapse_commits_over = 5

155

156

def __init__(self, diff, format='gitdiff', diff_limit=None,

156

def __init__(self, diff, format='gitdiff', diff_limit=None,

157

file_limit=None, show_full_diff=True):

157

file_limit=None, show_full_diff=True):

158

"""

158

"""

159

:param diff: A `Diff` object representing a diff from a vcs backend

159

:param diff: A `Diff` object representing a diff from a vcs backend

160

:param format: format of diff passed, `udiff` or `gitdiff`

160

:param format: format of diff passed, `udiff` or `gitdiff`

161

:param diff_limit: define the size of diff that is considered "big"

161

:param diff_limit: define the size of diff that is considered "big"

162

based on that parameter cut off will be triggered, set to None

162

based on that parameter cut off will be triggered, set to None

163

to show full diff

163

to show full diff

164

"""

164

"""

165

self._diff = diff

165

self._diff = diff

166

self._format = format

166

self._format = format

167

self.adds = 0

167

self.adds = 0

168

self.removes = 0

168

self.removes = 0

169

# calculate diff size

169

# calculate diff size

170

self.diff_limit = diff_limit

170

self.diff_limit = diff_limit

171

self.file_limit = file_limit

171

self.file_limit = file_limit

172

self.show_full_diff = show_full_diff

172

self.show_full_diff = show_full_diff

173

self.cur_diff_size = 0

173

self.cur_diff_size = 0

174

self.parsed = False

174

self.parsed = False

175

self.parsed_diff = []

175

self.parsed_diff = []

176

177

log.debug('Initialized DiffProcessor with %s mode', format)

177

log.debug('Initialized DiffProcessor with %s mode', format)

178

if format == 'gitdiff':

178

if format == 'gitdiff':

179

self.differ = self._highlight_line_difflib

179

self.differ = self._highlight_line_difflib

180

self._parser = self._parse_gitdiff

180

self._parser = self._parse_gitdiff

181

else:

181

else:

182

self.differ = self._highlight_line_udiff

182

self.differ = self._highlight_line_udiff

183

self._parser = self._new_parse_gitdiff

183

self._parser = self._new_parse_gitdiff

184

185

def _copy_iterator(self):

185

def _copy_iterator(self):

186

"""

186

"""

187

make a fresh copy of generator, we should not iterate thru

187

make a fresh copy of generator, we should not iterate thru

188

an original as it's needed for repeating operations on

188

an original as it's needed for repeating operations on

189

this instance of DiffProcessor

189

this instance of DiffProcessor

190

"""

190

"""

191

self.__udiff, iterator_copy = tee(self.__udiff)

191

self.__udiff, iterator_copy = tee(self.__udiff)

192

return iterator_copy

192

return iterator_copy

193

194

def _escaper(self, string):

194

def _escaper(self, string):

195

"""

195

"""

196

Escaper for diff escapes special chars and checks the diff limit

196

Escaper for diff escapes special chars and checks the diff limit

197

198

:param string:

198

:param string:

199

"""

199

"""

200

self.cur_diff_size += len(string)

200

self.cur_diff_size += len(string)

201

202

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

202

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

203

raise DiffLimitExceeded('Diff Limit Exceeded')

203

raise DiffLimitExceeded('Diff Limit Exceeded')

204

205

return string \

205

return string \

206

.replace('&', '&')\

206

.replace('&', '&')\

207

.replace('<', '<')\

207

.replace('<', '<')\

208

.replace('>', '>')

208

.replace('>', '>')

209

210

def _line_counter(self, l):

210

def _line_counter(self, l):

211

"""

211

"""

212

Checks each line and bumps total adds/removes for this diff

212

Checks each line and bumps total adds/removes for this diff

213

214

:param l:

214

:param l:

215

"""

215

"""

216

if l.startswith('+') and not l.startswith('+++'):

216

if l.startswith('+') and not l.startswith('+++'):

217

self.adds += 1

217

self.adds += 1

218

elif l.startswith('-') and not l.startswith('---'):

218

elif l.startswith('-') and not l.startswith('---'):

219

self.removes += 1

219

self.removes += 1

220

return safe_unicode(l)

220

return safe_unicode(l)

221

222

def _highlight_line_difflib(self, line, next_):

222

def _highlight_line_difflib(self, line, next_):

223

"""

223

"""

224

Highlight inline changes in both lines.

224

Highlight inline changes in both lines.

225

"""

225

"""

226

227

if line['action'] == Action.DELETE:

227

if line['action'] == Action.DELETE:

228

old, new = line, next_

228

old, new = line, next_

229

else:

229

else:

230

old, new = next_, line

230

old, new = next_, line

231

232

oldwords = self._token_re.split(old['line'])

232

oldwords = self._token_re.split(old['line'])

233

newwords = self._token_re.split(new['line'])

233

newwords = self._token_re.split(new['line'])

234

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

234

sequence = difflib.SequenceMatcher(None, oldwords, newwords)

235

236

oldfragments, newfragments = [], []

236

oldfragments, newfragments = [], []

237

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

237

for tag, i1, i2, j1, j2 in sequence.get_opcodes():

238

oldfrag = ''.join(oldwords[i1:i2])

238

oldfrag = ''.join(oldwords[i1:i2])

239

newfrag = ''.join(newwords[j1:j2])

239

newfrag = ''.join(newwords[j1:j2])

240

if tag != 'equal':

240

if tag != 'equal':

241

if oldfrag:

241

if oldfrag:

242

oldfrag = '<del>%s</del>' % oldfrag

242

oldfrag = '<del>%s</del>' % oldfrag

243

if newfrag:

243

if newfrag:

244

newfrag = '<ins>%s</ins>' % newfrag

244

newfrag = '<ins>%s</ins>' % newfrag

245

oldfragments.append(oldfrag)

245

oldfragments.append(oldfrag)

246

newfragments.append(newfrag)

246

newfragments.append(newfrag)

247

248

old['line'] = "".join(oldfragments)

248

old['line'] = "".join(oldfragments)

249

new['line'] = "".join(newfragments)

249

new['line'] = "".join(newfragments)

250

251

def _highlight_line_udiff(self, line, next_):

251

def _highlight_line_udiff(self, line, next_):

252

"""

252

"""

253

Highlight inline changes in both lines.

253

Highlight inline changes in both lines.

254

"""

254

"""

255

start = 0

255

start = 0

256

limit = min(len(line['line']), len(next_['line']))

256

limit = min(len(line['line']), len(next_['line']))

257

while start < limit and line['line'][start] == next_['line'][start]:

257

while start < limit and line['line'][start] == next_['line'][start]:

258

start += 1

258

start += 1

259

end = -1

259

end = -1

260

limit -= start

260

limit -= start

261

while -end <= limit and line['line'][end] == next_['line'][end]:

261

while -end <= limit and line['line'][end] == next_['line'][end]:

262

end -= 1

262

end -= 1

263

end += 1

263

end += 1

264

if start or end:

264

if start or end:

265

def do(l):

265

def do(l):

266

last = end + len(l['line'])

266

last = end + len(l['line'])

267

if l['action'] == Action.ADD:

267

if l['action'] == Action.ADD:

268

tag = 'ins'

268

tag = 'ins'

269

else:

269

else:

270

tag = 'del'

270

tag = 'del'

271

l['line'] = '%s<%s>%s</%s>%s' % (

271

l['line'] = '%s<%s>%s</%s>%s' % (

272

l['line'][:start],

272

l['line'][:start],

273

tag,

273

tag,

274

l['line'][start:last],

274

l['line'][start:last],

275

tag,

275

tag,

276

l['line'][last:]

276

l['line'][last:]

277

)

277

)

278

do(line)

278

do(line)

279

do(next_)

279

do(next_)

280

281

def _clean_line(self, line, command):

281

def _clean_line(self, line, command):

282

if command in ['+', '-', ' ']:

282

if command in ['+', '-', ' ']:

283

# only modify the line if it's actually a diff thing

283

# only modify the line if it's actually a diff thing

284

line = line[1:]

284

line = line[1:]

285

return line

285

return line

286

287

def _parse_gitdiff(self, inline_diff=True):

287

def _parse_gitdiff(self, inline_diff=True):

288

_files = []

288

_files = []

289

diff_container = lambda arg: arg

289

diff_container = lambda arg: arg

290

291

for chunk in self._diff.chunks():

291

for chunk in self._diff.chunks():

292

head = chunk.header

292

head = chunk.header

293

294

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

294

diff = imap(self._escaper, self.diff_splitter(chunk.diff))

295

raw_diff = chunk.raw

295

raw_diff = chunk.raw

296

limited_diff = False

296

limited_diff = False

297

exceeds_limit = False

297

exceeds_limit = False

298

299

op = None

299

op = None

300

stats = {

300

stats = {

301

'added': 0,

301

'added': 0,

302

'deleted': 0,

302

'deleted': 0,

303

'binary': False,

303

'binary': False,

304

'ops': {},

304

'ops': {},

305

}

305

}

306

307

if head['deleted_file_mode']:

307

if head['deleted_file_mode']:

308

op = OPS.DEL

308

op = OPS.DEL

309

stats['binary'] = True

309

stats['binary'] = True

310

stats['ops'][DEL_FILENODE] = 'deleted file'

310

stats['ops'][DEL_FILENODE] = 'deleted file'

311

312

elif head['new_file_mode']:

312

elif head['new_file_mode']:

313

op = OPS.ADD

313

op = OPS.ADD

314

stats['binary'] = True

314

stats['binary'] = True

315

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

315

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

316

else: # modify operation, can be copy, rename or chmod

316

else: # modify operation, can be copy, rename or chmod

317

318

# CHMOD

318

# CHMOD

319

if head['new_mode'] and head['old_mode']:

319

if head['new_mode'] and head['old_mode']:

320

op = OPS.MOD

320

op = OPS.MOD

321

stats['binary'] = True

321

stats['binary'] = True

322

stats['ops'][CHMOD_FILENODE] = (

322

stats['ops'][CHMOD_FILENODE] = (

323

'modified file chmod %s => %s' % (

323

'modified file chmod %s => %s' % (

324

head['old_mode'], head['new_mode']))

324

head['old_mode'], head['new_mode']))

325

# RENAME

325

# RENAME

326

if head['rename_from'] != head['rename_to']:

326

if head['rename_from'] != head['rename_to']:

327

op = OPS.MOD

327

op = OPS.MOD

328

stats['binary'] = True

328

stats['binary'] = True

329

stats['ops'][RENAMED_FILENODE] = (

329

stats['ops'][RENAMED_FILENODE] = (

330

'file renamed from %s to %s' % (

330

'file renamed from %s to %s' % (

331

head['rename_from'], head['rename_to']))

331

head['rename_from'], head['rename_to']))

332

# COPY

332

# COPY

333

if head.get('copy_from') and head.get('copy_to'):

333

if head.get('copy_from') and head.get('copy_to'):

334

op = OPS.MOD

334

op = OPS.MOD

335

stats['binary'] = True

335

stats['binary'] = True

336

stats['ops'][COPIED_FILENODE] = (

336

stats['ops'][COPIED_FILENODE] = (

337

'file copied from %s to %s' % (

337

'file copied from %s to %s' % (

338

head['copy_from'], head['copy_to']))

338

head['copy_from'], head['copy_to']))

339

340

# If our new parsed headers didn't match anything fallback to

340

# If our new parsed headers didn't match anything fallback to

341

# old style detection

341

# old style detection

342

if op is None:

342

if op is None:

343

if not head['a_file'] and head['b_file']:

343

if not head['a_file'] and head['b_file']:

344

op = OPS.ADD

344

op = OPS.ADD

345

stats['binary'] = True

345

stats['binary'] = True

346

stats['ops'][NEW_FILENODE] = 'new file'

346

stats['ops'][NEW_FILENODE] = 'new file'

347

348

elif head['a_file'] and not head['b_file']:

348

elif head['a_file'] and not head['b_file']:

349

op = OPS.DEL

349

op = OPS.DEL

350

stats['binary'] = True

350

stats['binary'] = True

351

stats['ops'][DEL_FILENODE] = 'deleted file'

351

stats['ops'][DEL_FILENODE] = 'deleted file'

352

353

# it's not ADD not DELETE

353

# it's not ADD not DELETE

354

if op is None:

354

if op is None:

355

op = OPS.MOD

355

op = OPS.MOD

356

stats['binary'] = True

356

stats['binary'] = True

357

stats['ops'][MOD_FILENODE] = 'modified file'

357

stats['ops'][MOD_FILENODE] = 'modified file'

358

359

# a real non-binary diff

359

# a real non-binary diff

360

if head['a_file'] or head['b_file']:

360

if head['a_file'] or head['b_file']:

361

try:

361

try:

362

raw_diff, chunks, _stats = self._parse_lines(diff)

362

raw_diff, chunks, _stats = self._parse_lines(diff)

363

stats['binary'] = False

363

stats['binary'] = False

364

stats['added'] = _stats[0]

364

stats['added'] = _stats[0]

365

stats['deleted'] = _stats[1]

365

stats['deleted'] = _stats[1]

366

# explicit mark that it's a modified file

366

# explicit mark that it's a modified file

367

if op == OPS.MOD:

367

if op == OPS.MOD:

368

stats['ops'][MOD_FILENODE] = 'modified file'

368

stats['ops'][MOD_FILENODE] = 'modified file'

369

exceeds_limit = len(raw_diff) > self.file_limit

369

exceeds_limit = len(raw_diff) > self.file_limit

370

371

# changed from _escaper function so we validate size of

371

# changed from _escaper function so we validate size of

372

# each file instead of the whole diff

372

# each file instead of the whole diff

373

# diff will hide big files but still show small ones

373

# diff will hide big files but still show small ones

374

# from my tests, big files are fairly safe to be parsed

374

# from my tests, big files are fairly safe to be parsed

375

# but the browser is the bottleneck

375

# but the browser is the bottleneck

376

if not self.show_full_diff and exceeds_limit:

376

if not self.show_full_diff and exceeds_limit:

377

raise DiffLimitExceeded('File Limit Exceeded')

377

raise DiffLimitExceeded('File Limit Exceeded')

378

379

except DiffLimitExceeded:

379

except DiffLimitExceeded:

380

diff_container = lambda _diff: \

380

diff_container = lambda _diff: \

381

LimitedDiffContainer(

381

LimitedDiffContainer(

382

self.diff_limit, self.cur_diff_size, _diff)

382

self.diff_limit, self.cur_diff_size, _diff)

383

384

exceeds_limit = len(raw_diff) > self.file_limit

384

exceeds_limit = len(raw_diff) > self.file_limit

385

limited_diff = True

385

limited_diff = True

386

chunks = []

386

chunks = []

387

388

else: # GIT format binary patch, or possibly empty diff

388

else: # GIT format binary patch, or possibly empty diff

389

if head['bin_patch']:

389

if head['bin_patch']:

390

# we have operation already extracted, but we mark simply

390

# we have operation already extracted, but we mark simply

391

# it's a diff we wont show for binary files

391

# it's a diff we wont show for binary files

392

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

392

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

393

chunks = []

393

chunks = []

394

395

if chunks and not self.show_full_diff and op == OPS.DEL:

395

if chunks and not self.show_full_diff and op == OPS.DEL:

396

# if not full diff mode show deleted file contents

396

# if not full diff mode show deleted file contents

397

# TODO: anderson: if the view is not too big, there is no way

397

# TODO: anderson: if the view is not too big, there is no way

398

# to see the content of the file

398

# to see the content of the file

399

chunks = []

399

chunks = []

400

401

chunks.insert(0, [{

401

chunks.insert(0, [{

402

'old_lineno': '',

402

'old_lineno': '',

403

'new_lineno': '',

403

'new_lineno': '',

404

'action': Action.CONTEXT,

404

'action': Action.CONTEXT,

405

'line': msg,

405

'line': msg,

406

} for _op, msg in stats['ops'].iteritems()

406

} for _op, msg in stats['ops'].iteritems()

407

if _op not in [MOD_FILENODE]])

407

if _op not in [MOD_FILENODE]])

408

409

_files.append({

409

_files.append({

410

'filename': safe_unicode(head['b_path']),

410

'filename': safe_unicode(head['b_path']),

411

'old_revision': head['a_blob_id'],

411

'old_revision': head['a_blob_id'],

412

'new_revision': head['b_blob_id'],

412

'new_revision': head['b_blob_id'],

413

'chunks': chunks,

413

'chunks': chunks,

414

'raw_diff': safe_unicode(raw_diff),

414

'raw_diff': safe_unicode(raw_diff),

415

'operation': op,

415

'operation': op,

416

'stats': stats,

416

'stats': stats,

417

'exceeds_limit': exceeds_limit,

417

'exceeds_limit': exceeds_limit,

418

'is_limited_diff': limited_diff,

418

'is_limited_diff': limited_diff,

419

})

419

})

420

421

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

421

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

422

OPS.DEL: 2}.get(info['operation'])

422

OPS.DEL: 2}.get(info['operation'])

423

424

if not inline_diff:

424

if not inline_diff:

425

return diff_container(sorted(_files, key=sorter))

425

return diff_container(sorted(_files, key=sorter))

426

427

# highlight inline changes

427

# highlight inline changes

428

for diff_data in _files:

428

for diff_data in _files:

429

for chunk in diff_data['chunks']:

429

for chunk in diff_data['chunks']:

430

lineiter = iter(chunk)

430

lineiter = iter(chunk)

431

try:

431

try:

432

while 1:

432

while 1:

433

line = lineiter.next()

433

line = lineiter.next()

434

if line['action'] not in (

434

if line['action'] not in (

435

Action.UNMODIFIED, Action.CONTEXT):

435

Action.UNMODIFIED, Action.CONTEXT):

436

nextline = lineiter.next()

436

nextline = lineiter.next()

437

if nextline['action'] in ['unmod', 'context'] or \

437

if nextline['action'] in ['unmod', 'context'] or \

438

nextline['action'] == line['action']:

438

nextline['action'] == line['action']:

439

continue

439

continue

440

self.differ(line, nextline)

440

self.differ(line, nextline)

441

except StopIteration:

441

except StopIteration:

442

pass

442

pass

443

444

return diff_container(sorted(_files, key=sorter))

444

return diff_container(sorted(_files, key=sorter))

445

446

def _check_large_diff(self):

446

def _check_large_diff(self):

447

log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)

447

if self.diff_limit:

448

log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)

448

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

449

if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

449

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

450

raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)

450

451

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

452

# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

452

def _new_parse_gitdiff(self, inline_diff=True):

453

def _new_parse_gitdiff(self, inline_diff=True):

453

_files = []

454

_files = []

454

455

# this can be overriden later to a LimitedDiffContainer type

456

# this can be overriden later to a LimitedDiffContainer type

456

diff_container = lambda arg: arg

457

diff_container = lambda arg: arg

457

458

for chunk in self._diff.chunks():

459

for chunk in self._diff.chunks():

459

head = chunk.header

460

head = chunk.header

460

log.debug('parsing diff %r', head)

461

log.debug('parsing diff %r', head)

461

462

raw_diff = chunk.raw

463

raw_diff = chunk.raw

463

limited_diff = False

464

limited_diff = False

464

exceeds_limit = False

465

exceeds_limit = False

465

466

op = None

467

op = None

467

stats = {

468

stats = {

468

'added': 0,

469

'added': 0,

469

'deleted': 0,

470

'deleted': 0,

470

'binary': False,

471

'binary': False,

471

'old_mode': None,

472

'old_mode': None,

472

'new_mode': None,

473

'new_mode': None,

473

'ops': {},

474

'ops': {},

474

}

475

}

475

if head['old_mode']:

476

if head['old_mode']:

476

stats['old_mode'] = head['old_mode']

477

stats['old_mode'] = head['old_mode']

477

if head['new_mode']:

478

if head['new_mode']:

478

stats['new_mode'] = head['new_mode']

479

stats['new_mode'] = head['new_mode']

479

if head['b_mode']:

480

if head['b_mode']:

480

stats['new_mode'] = head['b_mode']

481

stats['new_mode'] = head['b_mode']

481

482

# delete file

483

# delete file

483

if head['deleted_file_mode']:

484

if head['deleted_file_mode']:

484

op = OPS.DEL

485

op = OPS.DEL

485

stats['binary'] = True

486

stats['binary'] = True

486

stats['ops'][DEL_FILENODE] = 'deleted file'

487

stats['ops'][DEL_FILENODE] = 'deleted file'

487

488

# new file

489

# new file

489

elif head['new_file_mode']:

490

elif head['new_file_mode']:

490

op = OPS.ADD

491

op = OPS.ADD

491

stats['binary'] = True

492

stats['binary'] = True

492

stats['old_mode'] = None

493

stats['old_mode'] = None

493

stats['new_mode'] = head['new_file_mode']

494

stats['new_mode'] = head['new_file_mode']

494

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

495

stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

495

496

# modify operation, can be copy, rename or chmod

497

# modify operation, can be copy, rename or chmod

497

else:

498

else:

498

# CHMOD

499

# CHMOD

499

if head['new_mode'] and head['old_mode']:

500

if head['new_mode'] and head['old_mode']:

500

op = OPS.MOD

501

op = OPS.MOD

501

stats['binary'] = True

502

stats['binary'] = True

502

stats['ops'][CHMOD_FILENODE] = (

503

stats['ops'][CHMOD_FILENODE] = (

503

'modified file chmod %s => %s' % (

504

'modified file chmod %s => %s' % (

504

head['old_mode'], head['new_mode']))

505

head['old_mode'], head['new_mode']))

505

506

# RENAME

507

# RENAME

507

if head['rename_from'] != head['rename_to']:

508

if head['rename_from'] != head['rename_to']:

508

op = OPS.MOD

509

op = OPS.MOD

509

stats['binary'] = True

510

stats['binary'] = True

510

stats['renamed'] = (head['rename_from'], head['rename_to'])

511

stats['renamed'] = (head['rename_from'], head['rename_to'])

511

stats['ops'][RENAMED_FILENODE] = (

512

stats['ops'][RENAMED_FILENODE] = (

512

'file renamed from %s to %s' % (

513

'file renamed from %s to %s' % (

513

head['rename_from'], head['rename_to']))

514

head['rename_from'], head['rename_to']))

514

# COPY

515

# COPY

515

if head.get('copy_from') and head.get('copy_to'):

516

if head.get('copy_from') and head.get('copy_to'):

516

op = OPS.MOD

517

op = OPS.MOD

517

stats['binary'] = True

518

stats['binary'] = True

518

stats['copied'] = (head['copy_from'], head['copy_to'])

519

stats['copied'] = (head['copy_from'], head['copy_to'])

519

stats['ops'][COPIED_FILENODE] = (

520

stats['ops'][COPIED_FILENODE] = (

520

'file copied from %s to %s' % (

521

'file copied from %s to %s' % (

521

head['copy_from'], head['copy_to']))

522

head['copy_from'], head['copy_to']))

522

523

# If our new parsed headers didn't match anything fallback to

524

# If our new parsed headers didn't match anything fallback to

524

# old style detection

525

# old style detection

525

if op is None:

526

if op is None:

526

if not head['a_file'] and head['b_file']:

527

if not head['a_file'] and head['b_file']:

527

op = OPS.ADD

528

op = OPS.ADD

528

stats['binary'] = True

529

stats['binary'] = True

529

stats['new_file'] = True

530

stats['new_file'] = True

530

stats['ops'][NEW_FILENODE] = 'new file'

531

stats['ops'][NEW_FILENODE] = 'new file'

531

532

elif head['a_file'] and not head['b_file']:

533

elif head['a_file'] and not head['b_file']:

533

op = OPS.DEL

534

op = OPS.DEL

534

stats['binary'] = True

535

stats['binary'] = True

535

stats['ops'][DEL_FILENODE] = 'deleted file'

536

stats['ops'][DEL_FILENODE] = 'deleted file'

536

537

# it's not ADD not DELETE

538

# it's not ADD not DELETE

538

if op is None:

539

if op is None:

539

op = OPS.MOD

540

op = OPS.MOD

540

stats['binary'] = True

541

stats['binary'] = True

541

stats['ops'][MOD_FILENODE] = 'modified file'

542

stats['ops'][MOD_FILENODE] = 'modified file'

542

543

# a real non-binary diff

544

# a real non-binary diff

544

if head['a_file'] or head['b_file']:

545

if head['a_file'] or head['b_file']:

545

# simulate splitlines, so we keep the line end part

546

# simulate splitlines, so we keep the line end part

546

diff = self.diff_splitter(chunk.diff)

547

diff = self.diff_splitter(chunk.diff)

547

548

# append each file to the diff size

549

# append each file to the diff size

549

raw_chunk_size = len(raw_diff)

550

raw_chunk_size = len(raw_diff)

550

551

exceeds_limit = raw_chunk_size > self.file_limit

552

exceeds_limit = raw_chunk_size > self.file_limit

552

self.cur_diff_size += raw_chunk_size

553

self.cur_diff_size += raw_chunk_size

553

554

try:

555

try:

555

# Check each file instead of the whole diff.

556

# Check each file instead of the whole diff.

556

# Diff will hide big files but still show small ones.

557

# Diff will hide big files but still show small ones.

557

# From the tests big files are fairly safe to be parsed

558

# From the tests big files are fairly safe to be parsed

558

# but the browser is the bottleneck.

559

# but the browser is the bottleneck.

559

if not self.show_full_diff and exceeds_limit:

560

if not self.show_full_diff and exceeds_limit:

560

log.debug('File `%s` exceeds current file_limit of %s',

561

log.debug('File `%s` exceeds current file_limit of %s',

561

safe_unicode(head['b_path']), self.file_limit)

562

safe_unicode(head['b_path']), self.file_limit)

562

raise DiffLimitExceeded(

563

raise DiffLimitExceeded(

563

'File Limit %s Exceeded', self.file_limit)

564

'File Limit %s Exceeded', self.file_limit)

564

565

self._check_large_diff()

566

self._check_large_diff()

566

567

raw_diff, chunks, _stats = self._new_parse_lines(diff)

568

raw_diff, chunks, _stats = self._new_parse_lines(diff)

568

stats['binary'] = False

569

stats['binary'] = False

569

stats['added'] = _stats[0]

570

stats['added'] = _stats[0]

570

stats['deleted'] = _stats[1]

571

stats['deleted'] = _stats[1]

571

# explicit mark that it's a modified file

572

# explicit mark that it's a modified file

572

if op == OPS.MOD:

573

if op == OPS.MOD:

573

stats['ops'][MOD_FILENODE] = 'modified file'

574

stats['ops'][MOD_FILENODE] = 'modified file'

574

575

except DiffLimitExceeded:

576

except DiffLimitExceeded:

576

diff_container = lambda _diff: \

577

diff_container = lambda _diff: \

577

LimitedDiffContainer(

578

LimitedDiffContainer(

578

self.diff_limit, self.cur_diff_size, _diff)

579

self.diff_limit, self.cur_diff_size, _diff)

579

580

limited_diff = True

581

limited_diff = True

581

chunks = []

582

chunks = []

582

583

else: # GIT format binary patch, or possibly empty diff

584

else: # GIT format binary patch, or possibly empty diff

584

if head['bin_patch']:

585

if head['bin_patch']:

585

# we have operation already extracted, but we mark simply

586

# we have operation already extracted, but we mark simply

586

# it's a diff we wont show for binary files

587

# it's a diff we wont show for binary files

587

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

588

stats['ops'][BIN_FILENODE] = 'binary diff hidden'

588

chunks = []

589

chunks = []

589

590

# Hide content of deleted node by setting empty chunks

591

# Hide content of deleted node by setting empty chunks

591

if chunks and not self.show_full_diff and op == OPS.DEL:

592

if chunks and not self.show_full_diff and op == OPS.DEL:

592

# if not full diff mode show deleted file contents

593

# if not full diff mode show deleted file contents

593

# TODO: anderson: if the view is not too big, there is no way

594

# TODO: anderson: if the view is not too big, there is no way

594

# to see the content of the file

595

# to see the content of the file

595

chunks = []

596

chunks = []

596

597

chunks.insert(

598

chunks.insert(

598

0, [{'old_lineno': '',

599

0, [{'old_lineno': '',

599

'new_lineno': '',

600

'new_lineno': '',

600

'action': Action.CONTEXT,

601

'action': Action.CONTEXT,

601

'line': msg,

602

'line': msg,

602

} for _op, msg in stats['ops'].iteritems()

603

} for _op, msg in stats['ops'].iteritems()

603

if _op not in [MOD_FILENODE]])

604

if _op not in [MOD_FILENODE]])

604

605

original_filename = safe_unicode(head['a_path'])

606

original_filename = safe_unicode(head['a_path'])

606

_files.append({

607

_files.append({

607

'original_filename': original_filename,

608

'original_filename': original_filename,

608

'filename': safe_unicode(head['b_path']),

609

'filename': safe_unicode(head['b_path']),

609

'old_revision': head['a_blob_id'],

610

'old_revision': head['a_blob_id'],

610

'new_revision': head['b_blob_id'],

611

'new_revision': head['b_blob_id'],

611

'chunks': chunks,

612

'chunks': chunks,

612

'raw_diff': safe_unicode(raw_diff),

613

'raw_diff': safe_unicode(raw_diff),

613

'operation': op,

614

'operation': op,

614

'stats': stats,

615

'stats': stats,

615

'exceeds_limit': exceeds_limit,

616

'exceeds_limit': exceeds_limit,

616

'is_limited_diff': limited_diff,

617

'is_limited_diff': limited_diff,

617

})

618

})

618

619

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

620

sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,

620

OPS.DEL: 2}.get(info['operation'])

621

OPS.DEL: 2}.get(info['operation'])

621

622

return diff_container(sorted(_files, key=sorter))

623

return diff_container(sorted(_files, key=sorter))

623

624

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

625

# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

625

def _parse_lines(self, diff_iter):

626

def _parse_lines(self, diff_iter):

626

"""

627

"""

627

Parse the diff an return data for the template.

628

Parse the diff an return data for the template.

628

"""

629

"""

629

630

stats = [0, 0]

631

stats = [0, 0]

631

chunks = []

632

chunks = []

632

raw_diff = []

633

raw_diff = []

633

634

try:

635

try:

635

line = diff_iter.next()

636

line = diff_iter.next()

636

637

while line:

638

while line:

638

raw_diff.append(line)

639

raw_diff.append(line)

639

lines = []

640

lines = []

640

chunks.append(lines)

641

chunks.append(lines)

641

642

match = self._chunk_re.match(line)

643

match = self._chunk_re.match(line)

643

644

if not match:

645

if not match:

645

break

646

break

646

647

gr = match.groups()

648

gr = match.groups()

648

(old_line, old_end,

649

(old_line, old_end,

649

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

650

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

650

old_line -= 1

651

old_line -= 1

651

new_line -= 1

652

new_line -= 1

652

653

context = len(gr) == 5

654

context = len(gr) == 5

654

old_end += old_line

655

old_end += old_line

655

new_end += new_line

656

new_end += new_line

656

657

if context:

658

if context:

658

# skip context only if it's first line

659

# skip context only if it's first line

659

if int(gr[0]) > 1:

660

if int(gr[0]) > 1:

660

lines.append({

661

lines.append({

661

'old_lineno': '...',

662

'old_lineno': '...',

662

'new_lineno': '...',

663

'new_lineno': '...',

663

'action': Action.CONTEXT,

664

'action': Action.CONTEXT,

664

'line': line,

665

'line': line,

665

})

666

})

666

667

line = diff_iter.next()

668

line = diff_iter.next()

668

669

while old_line < old_end or new_line < new_end:

670

while old_line < old_end or new_line < new_end:

670

command = ' '

671

command = ' '

671

if line:

672

if line:

672

command = line[0]

673

command = line[0]

673

674

affects_old = affects_new = False

675

affects_old = affects_new = False

675

676

# ignore those if we don't expect them

677

# ignore those if we don't expect them

677

if command in '#@':

678

if command in '#@':

678

continue

679

continue

679

elif command == '+':

680

elif command == '+':

680

affects_new = True

681

affects_new = True

681

action = Action.ADD

682

action = Action.ADD

682

stats[0] += 1

683

stats[0] += 1

683

elif command == '-':

684

elif command == '-':

684

affects_old = True

685

affects_old = True

685

action = Action.DELETE

686

action = Action.DELETE

686

stats[1] += 1

687

stats[1] += 1

687

else:

688

else:

688

affects_old = affects_new = True

689

affects_old = affects_new = True

689

action = Action.UNMODIFIED

690

action = Action.UNMODIFIED

690

691

if not self._newline_marker.match(line):

692

if not self._newline_marker.match(line):

692

old_line += affects_old

693

old_line += affects_old

693

new_line += affects_new

694

new_line += affects_new

694

lines.append({

695

lines.append({

695

'old_lineno': affects_old and old_line or '',

696

'old_lineno': affects_old and old_line or '',

696

'new_lineno': affects_new and new_line or '',

697

'new_lineno': affects_new and new_line or '',

697

'action': action,

698

'action': action,

698

'line': self._clean_line(line, command)

699

'line': self._clean_line(line, command)

699

})

700

})

700

raw_diff.append(line)

701

raw_diff.append(line)

701

702

line = diff_iter.next()

703

line = diff_iter.next()

703

704

if self._newline_marker.match(line):

705

if self._newline_marker.match(line):

705

# we need to append to lines, since this is not

706

# we need to append to lines, since this is not

706

# counted in the line specs of diff

707

# counted in the line specs of diff

707

lines.append({

708

lines.append({

708

'old_lineno': '...',

709

'old_lineno': '...',

709

'new_lineno': '...',

710

'new_lineno': '...',

710

'action': Action.CONTEXT,

711

'action': Action.CONTEXT,

711

'line': self._clean_line(line, command)

712

'line': self._clean_line(line, command)

712

})

713

})

713

714

except StopIteration:

715

except StopIteration:

715

pass

716

pass

716

return ''.join(raw_diff), chunks, stats

717

return ''.join(raw_diff), chunks, stats

717

718

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

719

# FIXME: NEWDIFFS: dan: this replaces _parse_lines

719

def _new_parse_lines(self, diff_iter):

720

def _new_parse_lines(self, diff_iter):

720

"""

721

"""

721

Parse the diff an return data for the template.

722

Parse the diff an return data for the template.

722

"""

723

"""

723

724

stats = [0, 0]

725

stats = [0, 0]

725

chunks = []

726

chunks = []

726

raw_diff = []

727

raw_diff = []

727

728

try:

729

try:

729

line = diff_iter.next()

730

line = diff_iter.next()

730

731

while line:

732

while line:

732

raw_diff.append(line)

733

raw_diff.append(line)

733

# match header e.g @@ -0,0 +1 @@\n'

734

# match header e.g @@ -0,0 +1 @@\n'

734

match = self._chunk_re.match(line)

735

match = self._chunk_re.match(line)

735

736

if not match:

737

if not match:

737

break

738

break

738

739

gr = match.groups()

740

gr = match.groups()

740

(old_line, old_end,

741

(old_line, old_end,

741

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

742

new_line, new_end) = [int(x or 1) for x in gr[:-1]]

742

743

lines = []

744

lines = []

744

hunk = {

745

hunk = {

745

'section_header': gr[-1],

746

'section_header': gr[-1],

746

'source_start': old_line,

747

'source_start': old_line,

747

'source_length': old_end,

748

'source_length': old_end,

748

'target_start': new_line,

749

'target_start': new_line,

749

'target_length': new_end,

750

'target_length': new_end,

750

'lines': lines,

751

'lines': lines,

751

}

752

}

752

chunks.append(hunk)

753

chunks.append(hunk)

753

754

old_line -= 1

755

old_line -= 1

755

new_line -= 1

756

new_line -= 1

756

757

context = len(gr) == 5

758

context = len(gr) == 5

758

old_end += old_line

759

old_end += old_line

759

new_end += new_line

760

new_end += new_line

760

761

line = diff_iter.next()

762

line = diff_iter.next()

762

763

while old_line < old_end or new_line < new_end:

764

while old_line < old_end or new_line < new_end:

764

command = ' '

765

command = ' '

765

if line:

766

if line:

766

command = line[0]

767

command = line[0]

767

768

affects_old = affects_new = False

769

affects_old = affects_new = False

769

770

# ignore those if we don't expect them

771

# ignore those if we don't expect them

771

if command in '#@':

772

if command in '#@':

772

continue

773

continue

773

elif command == '+':

774

elif command == '+':

774

affects_new = True

775

affects_new = True

775

action = Action.ADD

776

action = Action.ADD

776

stats[0] += 1

777

stats[0] += 1

777

elif command == '-':

778

elif command == '-':

778

affects_old = True

779

affects_old = True

779

action = Action.DELETE

780

action = Action.DELETE

780

stats[1] += 1

781

stats[1] += 1

781

else:

782

else:

782

affects_old = affects_new = True

783

affects_old = affects_new = True

783

action = Action.UNMODIFIED

784

action = Action.UNMODIFIED

784

785

if not self._newline_marker.match(line):

786

if not self._newline_marker.match(line):

786

old_line += affects_old

787

old_line += affects_old

787

new_line += affects_new

788

new_line += affects_new

788

lines.append({

789

lines.append({

789

'old_lineno': affects_old and old_line or '',

790

'old_lineno': affects_old and old_line or '',

790

'new_lineno': affects_new and new_line or '',

791

'new_lineno': affects_new and new_line or '',

791

'action': action,

792

'action': action,

792

'line': self._clean_line(line, command)

793

'line': self._clean_line(line, command)

793

})

794

})

794

raw_diff.append(line)

795

raw_diff.append(line)

795

796

line = diff_iter.next()

797

line = diff_iter.next()

797

798

if self._newline_marker.match(line):

799

if self._newline_marker.match(line):

799

# we need to append to lines, since this is not

800

# we need to append to lines, since this is not

800

# counted in the line specs of diff

801

# counted in the line specs of diff

801

if affects_old:

802

if affects_old:

802

action = Action.OLD_NO_NL

803

action = Action.OLD_NO_NL

803

elif affects_new:

804

elif affects_new:

804

action = Action.NEW_NO_NL

805

action = Action.NEW_NO_NL

805

else:

806

else:

806

raise Exception('invalid context for no newline')

807

raise Exception('invalid context for no newline')

807

808

lines.append({

809

lines.append({

809

'old_lineno': None,

810

'old_lineno': None,

810

'new_lineno': None,

811

'new_lineno': None,

811

'action': action,

812

'action': action,

812

'line': self._clean_line(line, command)

813

'line': self._clean_line(line, command)

813

})

814

})

814

815

except StopIteration:

816

except StopIteration:

816

pass

817

pass

817

818

return ''.join(raw_diff), chunks, stats

819

return ''.join(raw_diff), chunks, stats

819

820

def _safe_id(self, idstring):

821

def _safe_id(self, idstring):

821

"""Make a string safe for including in an id attribute.

822

"""Make a string safe for including in an id attribute.

822

823

The HTML spec says that id attributes 'must begin with

824

The HTML spec says that id attributes 'must begin with

824

a letter ([A-Za-z]) and may be followed by any number

825

a letter ([A-Za-z]) and may be followed by any number

825

of letters, digits ([0-9]), hyphens ("-"), underscores

826

of letters, digits ([0-9]), hyphens ("-"), underscores

826

("_"), colons (":"), and periods (".")'. These regexps

827

("_"), colons (":"), and periods (".")'. These regexps

827

are slightly over-zealous, in that they remove colons

828

are slightly over-zealous, in that they remove colons

828

and periods unnecessarily.

829

and periods unnecessarily.

829

830

Whitespace is transformed into underscores, and then

831

Whitespace is transformed into underscores, and then

831

anything which is not a hyphen or a character that

832

anything which is not a hyphen or a character that

832

matches \w (alphanumerics and underscore) is removed.

833

matches \w (alphanumerics and underscore) is removed.

833

834

"""

835

"""

835

# Transform all whitespace to underscore

836

# Transform all whitespace to underscore

836

idstring = re.sub(r'\s', "_", '%s' % idstring)

837

idstring = re.sub(r'\s', "_", '%s' % idstring)

837

# Remove everything that is not a hyphen or a member of \w

838

# Remove everything that is not a hyphen or a member of \w

838

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

839

idstring = re.sub(r'(?!-)\W', "", idstring).lower()

839

return idstring

840

return idstring

840

841

@classmethod

842

@classmethod

842

def diff_splitter(cls, string):

843

def diff_splitter(cls, string):

843

"""

844

"""

844

Diff split that emulates .splitlines() but works only on \n

845

Diff split that emulates .splitlines() but works only on \n

845

"""

846

"""

846

if not string:

847

if not string:

847

return

848

return

848

elif string == '\n':

849

elif string == '\n':

849

yield u'\n'

850

yield u'\n'

850

else:

851

else:

851

852

has_newline = string.endswith('\n')

853

has_newline = string.endswith('\n')

853

elements = string.split('\n')

854

elements = string.split('\n')

854

if has_newline:

855

if has_newline:

855

# skip last element as it's empty string from newlines

856

# skip last element as it's empty string from newlines

856

elements = elements[:-1]

857

elements = elements[:-1]

857

858

len_elements = len(elements)

859

len_elements = len(elements)

859

860

for cnt, line in enumerate(elements, start=1):

861

for cnt, line in enumerate(elements, start=1):

861

last_line = cnt == len_elements

862

last_line = cnt == len_elements

862

if last_line and not has_newline:

863

if last_line and not has_newline:

863

yield safe_unicode(line)

864

yield safe_unicode(line)

864

else:

865

else:

865

yield safe_unicode(line) + '\n'

866

yield safe_unicode(line) + '\n'

866

867

def prepare(self, inline_diff=True):

868

def prepare(self, inline_diff=True):

868

"""

869

"""

869

Prepare the passed udiff for HTML rendering.

870

Prepare the passed udiff for HTML rendering.

870

871

:return: A list of dicts with diff information.

872

:return: A list of dicts with diff information.

872

"""

873

"""

873

parsed = self._parser(inline_diff=inline_diff)

874

parsed = self._parser(inline_diff=inline_diff)

874

self.parsed = True

875

self.parsed = True

875

self.parsed_diff = parsed

876

self.parsed_diff = parsed

876

return parsed

877

return parsed

877

878

def as_raw(self, diff_lines=None):

879

def as_raw(self, diff_lines=None):

879

"""

880

"""

880

Returns raw diff as a byte string

881

Returns raw diff as a byte string

881

"""

882

"""

882

return self._diff.raw

883

return self._diff.raw

883

884

def as_html(self, table_class='code-difftable', line_class='line',

885

def as_html(self, table_class='code-difftable', line_class='line',

885

old_lineno_class='lineno old', new_lineno_class='lineno new',

886

old_lineno_class='lineno old', new_lineno_class='lineno new',

886

code_class='code', enable_comments=False, parsed_lines=None):

887

code_class='code', enable_comments=False, parsed_lines=None):

887

"""

888

"""

888

Return given diff as html table with customized css classes

889

Return given diff as html table with customized css classes

889

"""

890

"""

890

# TODO(marcink): not sure how to pass in translator

891

# TODO(marcink): not sure how to pass in translator

891

# here in an efficient way, leave the _ for proper gettext extraction

892

# here in an efficient way, leave the _ for proper gettext extraction

892

_ = lambda s: s

893

_ = lambda s: s

893

894

def _link_to_if(condition, label, url):

895

def _link_to_if(condition, label, url):

895

"""

896

"""

896

Generates a link if condition is meet or just the label if not.

897

Generates a link if condition is meet or just the label if not.

897

"""

898

"""

898

899

if condition:

900

if condition:

900

return '''<a href="%(url)s" class="tooltip"

901

return '''<a href="%(url)s" class="tooltip"

901

title="%(title)s">%(label)s</a>''' % {

902

title="%(title)s">%(label)s</a>''' % {

902

'title': _('Click to select line'),

903

'title': _('Click to select line'),

903

'url': url,

904

'url': url,

904

'label': label

905

'label': label

905

}

906

}

906

else:

907

else:

907

return label

908

return label

908

if not self.parsed:

909

if not self.parsed:

909

self.prepare()

910

self.prepare()

910

911

diff_lines = self.parsed_diff

912

diff_lines = self.parsed_diff

912

if parsed_lines:

913

if parsed_lines:

913

diff_lines = parsed_lines

914

diff_lines = parsed_lines

914

915

_html_empty = True

916

_html_empty = True

916

_html = []

917

_html = []

917

_html.append('''<table class="%(table_class)s">\n''' % {

918

_html.append('''<table class="%(table_class)s">\n''' % {

918

'table_class': table_class

919

'table_class': table_class

919

})

920

})

920

921

for diff in diff_lines:

922

for diff in diff_lines:

922

for line in diff['chunks']:

923

for line in diff['chunks']:

923

_html_empty = False

924

_html_empty = False

924

for change in line:

925

for change in line:

925

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

926

_html.append('''<tr class="%(lc)s %(action)s">\n''' % {

926

'lc': line_class,

927

'lc': line_class,

927

'action': change['action']

928

'action': change['action']

928

})

929

})

929

anchor_old_id = ''

930

anchor_old_id = ''

930

anchor_new_id = ''

931

anchor_new_id = ''

931

anchor_old = "%(filename)s_o%(oldline_no)s" % {

932

anchor_old = "%(filename)s_o%(oldline_no)s" % {

932

'filename': self._safe_id(diff['filename']),

933

'filename': self._safe_id(diff['filename']),

933

'oldline_no': change['old_lineno']

934

'oldline_no': change['old_lineno']

934

}

935

}

935

anchor_new = "%(filename)s_n%(oldline_no)s" % {

936

anchor_new = "%(filename)s_n%(oldline_no)s" % {

936

'filename': self._safe_id(diff['filename']),

937

'filename': self._safe_id(diff['filename']),

937

'oldline_no': change['new_lineno']

938

'oldline_no': change['new_lineno']

938

}

939

}

939

cond_old = (change['old_lineno'] != '...' and

940

cond_old = (change['old_lineno'] != '...' and

940

change['old_lineno'])

941

change['old_lineno'])

941

cond_new = (change['new_lineno'] != '...' and

942

cond_new = (change['new_lineno'] != '...' and

942

change['new_lineno'])

943

change['new_lineno'])

943

if cond_old:

944

if cond_old:

944

anchor_old_id = 'id="%s"' % anchor_old

945

anchor_old_id = 'id="%s"' % anchor_old

945

if cond_new:

946

if cond_new:

946

anchor_new_id = 'id="%s"' % anchor_new

947

anchor_new_id = 'id="%s"' % anchor_new

947

948

if change['action'] != Action.CONTEXT:

949

if change['action'] != Action.CONTEXT:

949

anchor_link = True

950

anchor_link = True

950

else:

951

else:

951

anchor_link = False

952

anchor_link = False

952

953

###########################################################

954

###########################################################

954

# COMMENT ICONS

955

# COMMENT ICONS

955

###########################################################

956

###########################################################

956

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

957

_html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')

957

958

if enable_comments and change['action'] != Action.CONTEXT:

959

if enable_comments and change['action'] != Action.CONTEXT:

959

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

960

_html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')

960

961

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

962

_html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')

962

963

###########################################################

964

###########################################################

964

# OLD LINE NUMBER

965

# OLD LINE NUMBER

965

###########################################################

966

###########################################################

966

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

967

_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

967

'a_id': anchor_old_id,

968

'a_id': anchor_old_id,

968

'olc': old_lineno_class

969

'olc': old_lineno_class

969

})

970

})

970

971

_html.append('''%(link)s''' % {

972

_html.append('''%(link)s''' % {

972

'link': _link_to_if(anchor_link, change['old_lineno'],

973

'link': _link_to_if(anchor_link, change['old_lineno'],

973

'#%s' % anchor_old)

974

'#%s' % anchor_old)

974

})

975

})

975

_html.append('''</td>\n''')

976

_html.append('''</td>\n''')

976

###########################################################

977

###########################################################

977

# NEW LINE NUMBER

978

# NEW LINE NUMBER

978

###########################################################

979

###########################################################

979

980

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

981

_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

981

'a_id': anchor_new_id,

982

'a_id': anchor_new_id,

982

'nlc': new_lineno_class

983

'nlc': new_lineno_class

983

})

984

})

984

985

_html.append('''%(link)s''' % {

986

_html.append('''%(link)s''' % {

986

'link': _link_to_if(anchor_link, change['new_lineno'],

987

'link': _link_to_if(anchor_link, change['new_lineno'],

987

'#%s' % anchor_new)

988

'#%s' % anchor_new)

988

})

989

})

989

_html.append('''</td>\n''')

990

_html.append('''</td>\n''')

990

###########################################################

991

###########################################################

991

# CODE

992

# CODE

992

###########################################################

993

###########################################################

993

code_classes = [code_class]

994

code_classes = [code_class]

994

if (not enable_comments or

995

if (not enable_comments or

995

change['action'] == Action.CONTEXT):

996

change['action'] == Action.CONTEXT):

996

code_classes.append('no-comment')

997

code_classes.append('no-comment')

997

_html.append('\t<td class="%s">' % ' '.join(code_classes))

998

_html.append('\t<td class="%s">' % ' '.join(code_classes))

998

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

999

_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

999

'code': change['line']

1000

'code': change['line']

1000

})

1001

})

1001

1002

_html.append('''\t</td>''')

1003

_html.append('''\t</td>''')

1003

_html.append('''\n</tr>\n''')

1004

_html.append('''\n</tr>\n''')

1004

_html.append('''</table>''')

1005

_html.append('''</table>''')

1005

if _html_empty:

1006

if _html_empty:

1006

return None

1007

return None

1007

return ''.join(_html)

1008

return ''.join(_html)

1008

1009

def stat(self):

1010

def stat(self):

1010

"""

1011

"""

1011

Returns tuple of added, and removed lines for this instance

1012

Returns tuple of added, and removed lines for this instance

1012

"""

1013

"""

1013

return self.adds, self.removes

1014

return self.adds, self.removes

1014

1015

def get_context_of_line(

1016

def get_context_of_line(

1016

self, path, diff_line=None, context_before=3, context_after=3):

1017

self, path, diff_line=None, context_before=3, context_after=3):

1017

"""

1018

"""

1018

Returns the context lines for the specified diff line.

1019

Returns the context lines for the specified diff line.

1019

1020

:type diff_line: :class:`DiffLineNumber`

1021

:type diff_line: :class:`DiffLineNumber`

1021

"""

1022

"""

1022

assert self.parsed, "DiffProcessor is not initialized."

1023

assert self.parsed, "DiffProcessor is not initialized."

1023

1024

if None not in diff_line:

1025

if None not in diff_line:

1025

raise ValueError(

1026

raise ValueError(

1026

"Cannot specify both line numbers: {}".format(diff_line))

1027

"Cannot specify both line numbers: {}".format(diff_line))

1027

1028

file_diff = self._get_file_diff(path)

1029

file_diff = self._get_file_diff(path)

1029

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1030

chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

1030

1031

first_line_to_include = max(idx - context_before, 0)

1032

first_line_to_include = max(idx - context_before, 0)

1032

first_line_after_context = idx + context_after + 1

1033

first_line_after_context = idx + context_after + 1

1033

context_lines = chunk[first_line_to_include:first_line_after_context]

1034

context_lines = chunk[first_line_to_include:first_line_after_context]

1034

1035

line_contents = [

1036

line_contents = [

1036

_context_line(line) for line in context_lines

1037

_context_line(line) for line in context_lines

1037

if _is_diff_content(line)]

1038

if _is_diff_content(line)]

1038

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1039

# TODO: johbo: Interim fixup, the diff chunks drop the final newline.

1039

# Once they are fixed, we can drop this line here.

1040

# Once they are fixed, we can drop this line here.

1040

if line_contents:

1041

if line_contents:

1041

line_contents[-1] = (

1042

line_contents[-1] = (

1042

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1043

line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')

1043

return line_contents

1044

return line_contents

1044

1045

def find_context(self, path, context, offset=0):

1046

def find_context(self, path, context, offset=0):

1046

"""

1047

"""

1047

Finds the given `context` inside of the diff.

1048

Finds the given `context` inside of the diff.

1048

1049

Use the parameter `offset` to specify which offset the target line has

1050

Use the parameter `offset` to specify which offset the target line has

1050

inside of the given `context`. This way the correct diff line will be

1051

inside of the given `context`. This way the correct diff line will be

1051

returned.

1052

returned.

1052

1053

:param offset: Shall be used to specify the offset of the main line

1054

:param offset: Shall be used to specify the offset of the main line

1054

within the given `context`.

1055

within the given `context`.

1055

"""

1056

"""

1056

if offset < 0 or offset >= len(context):

1057

if offset < 0 or offset >= len(context):

1057

raise ValueError(

1058

raise ValueError(

1058

"Only positive values up to the length of the context "

1059

"Only positive values up to the length of the context "

1059

"minus one are allowed.")

1060

"minus one are allowed.")

1060

1061

matches = []

1062

matches = []

1062

file_diff = self._get_file_diff(path)

1063

file_diff = self._get_file_diff(path)

1063

1064

for chunk in file_diff['chunks']:

1065

for chunk in file_diff['chunks']:

1065

context_iter = iter(context)

1066

context_iter = iter(context)

1066

for line_idx, line in enumerate(chunk):

1067

for line_idx, line in enumerate(chunk):

1067

try:

1068

try:

1068

if _context_line(line) == context_iter.next():

1069

if _context_line(line) == context_iter.next():

1069

continue

1070

continue

1070

except StopIteration:

1071

except StopIteration:

1071

matches.append((line_idx, chunk))

1072

matches.append((line_idx, chunk))

1072

context_iter = iter(context)

1073

context_iter = iter(context)

1073

1074

# Increment position and triger StopIteration

1075

# Increment position and triger StopIteration

1075

# if we had a match at the end

1076

# if we had a match at the end

1076

line_idx += 1

1077

line_idx += 1

1077

try:

1078

try:

1078

context_iter.next()

1079

context_iter.next()

1079

except StopIteration:

1080

except StopIteration:

1080

matches.append((line_idx, chunk))

1081

matches.append((line_idx, chunk))

1081

1082

effective_offset = len(context) - offset

1083

effective_offset = len(context) - offset

1083

found_at_diff_lines = [

1084

found_at_diff_lines = [

1084

_line_to_diff_line_number(chunk[idx - effective_offset])

1085

_line_to_diff_line_number(chunk[idx - effective_offset])

1085

for idx, chunk in matches]

1086

for idx, chunk in matches]

1086

1087

return found_at_diff_lines

1088

return found_at_diff_lines

1088

1089

def _get_file_diff(self, path):

1090

def _get_file_diff(self, path):

1090

for file_diff in self.parsed_diff:

1091

for file_diff in self.parsed_diff:

1091

if file_diff['filename'] == path:

1092

if file_diff['filename'] == path:

1092

break

1093

break

1093

else:

1094

else:

1094

raise FileNotInDiffException("File {} not in diff".format(path))

1095

raise FileNotInDiffException("File {} not in diff".format(path))

1095

return file_diff

1096

return file_diff

1096

1097

def _find_chunk_line_index(self, file_diff, diff_line):

1098

def _find_chunk_line_index(self, file_diff, diff_line):

1098

for chunk in file_diff['chunks']:

1099

for chunk in file_diff['chunks']:

1099

for idx, line in enumerate(chunk):

1100

for idx, line in enumerate(chunk):

1100

if line['old_lineno'] == diff_line.old:

1101

if line['old_lineno'] == diff_line.old:

1101

return chunk, idx

1102

return chunk, idx

1102

if line['new_lineno'] == diff_line.new:

1103

if line['new_lineno'] == diff_line.new:

1103

return chunk, idx

1104

return chunk, idx

1104

raise LineNotInDiffException(

1105

raise LineNotInDiffException(

1105

"The line {} is not part of the diff.".format(diff_line))

1106

"The line {} is not part of the diff.".format(diff_line))

1106

1107

1108

def _is_diff_content(line):

1109

def _is_diff_content(line):

1109

return line['action'] in (

1110

return line['action'] in (

1110

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1111

Action.UNMODIFIED, Action.ADD, Action.DELETE)

1111

1112

1113

def _context_line(line):

1114

def _context_line(line):

1114

return (line['action'], line['line'])

1115

return (line['action'], line['line'])

1115

1116

1117

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1118

DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])

1118

1119

1120

def _line_to_diff_line_number(line):

1121

def _line_to_diff_line_number(line):

1121

new_line_no = line['new_lineno'] or None

1122

new_line_no = line['new_lineno'] or None

1122

old_line_no = line['old_lineno'] or None

1123

old_line_no = line['old_lineno'] or None

1123

return DiffLineNumber(old=old_line_no, new=new_line_no)

1124

return DiffLineNumber(old=old_line_no, new=new_line_no)

1124

1125

1126

class FileNotInDiffException(Exception):

1127

class FileNotInDiffException(Exception):

1127

"""

1128

"""

1128

Raised when the context for a missing file is requested.

1129

Raised when the context for a missing file is requested.

1129

1130

If you request the context for a line in a file which is not part of the

1131

If you request the context for a line in a file which is not part of the

1131

given diff, then this exception is raised.

1132

given diff, then this exception is raised.

1132

"""

1133

"""

1133

1134

1135

class LineNotInDiffException(Exception):

1136

class LineNotInDiffException(Exception):

1136

"""

1137

"""

1137

Raised when the context for a missing line is requested.

1138

Raised when the context for a missing line is requested.

1138

1139

If you request the context for a line in a file and this line is not

1140

If you request the context for a line in a file and this line is not

1140

part of the given diff, then this exception is raised.

1141

part of the given diff, then this exception is raised.

1141

"""

1142

"""

1142

1143

1144

class DiffLimitExceeded(Exception):

1145

class DiffLimitExceeded(Exception):

1145

pass

1146

pass

1146

1147

1148

# NOTE(marcink): if diffs.mako change, probably this

1149

# NOTE(marcink): if diffs.mako change, probably this

1149

# needs a bump to next version

1150

# needs a bump to next version

1150

CURRENT_DIFF_VERSION = 'v4'

1151

CURRENT_DIFF_VERSION = 'v4'

1151

1152

1153

def _cleanup_cache_file(cached_diff_file):

1154

def _cleanup_cache_file(cached_diff_file):

1154

# cleanup file to not store it "damaged"

1155

# cleanup file to not store it "damaged"

1155

try:

1156

try:

1156

os.remove(cached_diff_file)

1157

os.remove(cached_diff_file)

1157

except Exception:

1158

except Exception:

1158

log.exception('Failed to cleanup path %s', cached_diff_file)

1159

log.exception('Failed to cleanup path %s', cached_diff_file)

1159

1160

1161

def _get_compression_mode(cached_diff_file):

1162

def _get_compression_mode(cached_diff_file):

1162

mode = 'bz2'

1163

mode = 'bz2'

1163

if 'mode:plain' in cached_diff_file:

1164

if 'mode:plain' in cached_diff_file:

1164

mode = 'plain'

1165

mode = 'plain'

1165

elif 'mode:gzip' in cached_diff_file:

1166

elif 'mode:gzip' in cached_diff_file:

1166

mode = 'gzip'

1167

mode = 'gzip'

1167

return mode

1168

return mode

1168

1169

1170

def cache_diff(cached_diff_file, diff, commits):

1171

def cache_diff(cached_diff_file, diff, commits):

1171

compression_mode = _get_compression_mode(cached_diff_file)

1172

compression_mode = _get_compression_mode(cached_diff_file)

1172

1173

struct = {

1174

struct = {

1174

'version': CURRENT_DIFF_VERSION,

1175

'version': CURRENT_DIFF_VERSION,

1175

'diff': diff,

1176

'diff': diff,

1176

'commits': commits

1177

'commits': commits

1177

}

1178

}

1178

1179

start = time.time()

1180

start = time.time()

1180

try:

1181

try:

1181

if compression_mode == 'plain':

1182

if compression_mode == 'plain':

1182

with open(cached_diff_file, 'wb') as f:

1183

with open(cached_diff_file, 'wb') as f:

1183

pickle.dump(struct, f)

1184

pickle.dump(struct, f)

1184

elif compression_mode == 'gzip':

1185

elif compression_mode == 'gzip':

1185

with gzip.GzipFile(cached_diff_file, 'wb') as f:

1186

with gzip.GzipFile(cached_diff_file, 'wb') as f:

1186

pickle.dump(struct, f)

1187

pickle.dump(struct, f)

1187

else:

1188

else:

1188

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1189

with bz2.BZ2File(cached_diff_file, 'wb') as f:

1189

pickle.dump(struct, f)

1190

pickle.dump(struct, f)

1190

except Exception:

1191

except Exception:

1191

log.warn('Failed to save cache', exc_info=True)

1192

log.warn('Failed to save cache', exc_info=True)

1192

_cleanup_cache_file(cached_diff_file)

1193

_cleanup_cache_file(cached_diff_file)

1193

1194

log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)

1195

log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)

1195

1196

1197

def load_cached_diff(cached_diff_file):

1198

def load_cached_diff(cached_diff_file):

1198

compression_mode = _get_compression_mode(cached_diff_file)

1199

compression_mode = _get_compression_mode(cached_diff_file)

1199

1200

default_struct = {

1201

default_struct = {

1201

'version': CURRENT_DIFF_VERSION,

1202

'version': CURRENT_DIFF_VERSION,

1202

'diff': None,

1203

'diff': None,

1203

'commits': None

1204

'commits': None

1204

}

1205

}

1205

1206

has_cache = os.path.isfile(cached_diff_file)

1207

has_cache = os.path.isfile(cached_diff_file)

1207

if not has_cache:

1208

if not has_cache:

1208

log.debug('Reading diff cache file failed %s', cached_diff_file)

1209

log.debug('Reading diff cache file failed %s', cached_diff_file)

1209

return default_struct

1210

return default_struct

1210

1211

data = None

1212

data = None

1212

1213

start = time.time()

1214

start = time.time()

1214

try:

1215

try:

1215

if compression_mode == 'plain':

1216

if compression_mode == 'plain':

1216

with open(cached_diff_file, 'rb') as f:

1217

with open(cached_diff_file, 'rb') as f:

1217

data = pickle.load(f)

1218

data = pickle.load(f)

1218

elif compression_mode == 'gzip':

1219

elif compression_mode == 'gzip':

1219

with gzip.GzipFile(cached_diff_file, 'rb') as f:

1220

with gzip.GzipFile(cached_diff_file, 'rb') as f:

1220

data = pickle.load(f)

1221

data = pickle.load(f)

1221

else:

1222

else:

1222

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1223

with bz2.BZ2File(cached_diff_file, 'rb') as f:

1223

data = pickle.load(f)

1224

data = pickle.load(f)

1224

except Exception:

1225

except Exception:

1225

log.warn('Failed to read diff cache file', exc_info=True)

1226

log.warn('Failed to read diff cache file', exc_info=True)

1226

1227

if not data:

1228

if not data:

1228

data = default_struct

1229

data = default_struct

1229

1230

if not isinstance(data, dict):

1231

if not isinstance(data, dict):

1231

# old version of data ?

1232

# old version of data ?

1232

data = default_struct

1233

data = default_struct

1233

1234

# check version

1235

# check version

1235

if data.get('version') != CURRENT_DIFF_VERSION:

1236

if data.get('version') != CURRENT_DIFF_VERSION:

1236

# purge cache

1237

# purge cache

1237

_cleanup_cache_file(cached_diff_file)

1238

_cleanup_cache_file(cached_diff_file)

1238

return default_struct

1239

return default_struct

1239

1240

log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)

1241

log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)

1241

1242

return data

1243

return data

1243

1244

1245

def generate_diff_cache_key(*args):

1246

def generate_diff_cache_key(*args):

1246

"""

1247

"""

1247

Helper to generate a cache key using arguments

1248

Helper to generate a cache key using arguments

1248

"""

1249

"""

1249

def arg_mapper(input_param):

1250

def arg_mapper(input_param):

1250

input_param = safe_str(input_param)

1251

input_param = safe_str(input_param)

1251

# we cannot allow '/' in arguments since it would allow

1252

# we cannot allow '/' in arguments since it would allow

1252

# subdirectory usage

1253

# subdirectory usage

1253

input_param.replace('/', '_')

1254

input_param.replace('/', '_')

1254

return input_param or None # prevent empty string arguments

1255

return input_param or None # prevent empty string arguments

1255

1256

return '_'.join([

1257

return '_'.join([

1257

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1258

'{}' for i in range(len(args))]).format(*map(arg_mapper, args))

1258

1259

1260

def diff_cache_exist(cache_storage, *args):

1261

def diff_cache_exist(cache_storage, *args):

1261

"""

1262

"""

1262

Based on all generated arguments check and return a cache path

1263

Based on all generated arguments check and return a cache path

1263

"""

1264

"""

1264

args = list(args) + ['mode:gzip']

1265

args = list(args) + ['mode:gzip']

1265

cache_key = generate_diff_cache_key(*args)

1266

cache_key = generate_diff_cache_key(*args)

1266

cache_file_path = os.path.join(cache_storage, cache_key)

1267

cache_file_path = os.path.join(cache_storage, cache_key)

1267

# prevent path traversal attacks using some param that have e.g '../../'

1268

# prevent path traversal attacks using some param that have e.g '../../'

1268

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1269

if not os.path.abspath(cache_file_path).startswith(cache_storage):

1269

raise ValueError('Final path must be within {}'.format(cache_storage))

1270

raise ValueError('Final path must be within {}'.format(cache_storage))

1270

1271

return cache_file_path

1272

return cache_file_path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2020 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import os
             import re
             import bz2
             import gzip
             import time
             import collections
             import difflib
             import logging
             import cPickle as pickle
             from itertools import tee, imap
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.utils2 import safe_unicode, safe_str
             log = logging.getLogger(__name__)
             # define max context, a file with more than this numbers of lines is unusable
             # in browser anyway
             MAX_CONTEXT = 20 * 1024
             DEFAULT_CONTEXT = 3
             def get_diff_context(request):
                 return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT
             def get_diff_whitespace_flag(request):
                 return request.GET.get('ignorews', '') == '1'
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 # protect against IntOverflow when passing HUGE context
                 if context > MAX_CONTEXT:
                     context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
                 OLD_NO_NL = 'old-no-nl'
                 NEW_NO_NL = 'new-no-nl'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 # collapse ranges of commits over given number
                 _collapse_commits_over = 5
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     log.debug('Initialized DiffProcessor with %s mode', format)
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._new_parse_gitdiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return string \
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _check_large_diff(self):
-                    log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
+                    if self.diff_limit:
+                        log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                 def _new_parse_gitdiff(self, inline_diff=True):
                     _files = []
                     # this can be overriden later to a LimitedDiffContainer type
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         log.debug('parsing diff %r', head)
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'old_mode': None,
                             'new_mode': None,
                             'ops': {},
                         }
                         if head['old_mode']:
                             stats['old_mode'] = head['old_mode']
                         if head['new_mode']:
                             stats['new_mode'] = head['new_mode']
                         if head['b_mode']:
                             stats['new_mode'] = head['b_mode']
                         # delete file
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         # new file
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['old_mode'] = None
                             stats['new_mode'] = head['new_file_mode']
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         # modify operation, can be copy, rename or chmod
                         else:
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['renamed'] = (head['rename_from'], head['rename_to'])
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['copied'] = (head['copy_from'], head['copy_to'])
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['new_file'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             # simulate splitlines, so we keep the line end part
                             diff = self.diff_splitter(chunk.diff)
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                             exceeds_limit = raw_chunk_size > self.file_limit
                             self.cur_diff_size += raw_chunk_size
                             try:
                                 # Check each file instead of the whole diff.
                                 # Diff will hide big files but still show small ones.
                                 # From the tests big files are fairly safe to be parsed
                                 # but the browser is the bottleneck.
                                 if not self.show_full_diff and exceeds_limit:
                                     log.debug('File `%s` exceeds current file_limit of %s',
                                               safe_unicode(head['b_path']), self.file_limit)
                                     raise DiffLimitExceeded(
                                         'File Limit %s Exceeded', self.file_limit)
                                 self._check_large_diff()
                                 raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         # Hide content of deleted node by setting empty chunks
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(
 , [{'old_lineno': '',
                                  'new_lineno': '',
                                  'action': Action.CONTEXT,
                                  'line': msg,
                                  } for _op, msg in stats['ops'].iteritems()
                                 if _op not in [MOD_FILENODE]])
                         original_filename = safe_unicode(head['a_path'])
                         _files.append({
                             'original_filename': original_filename,
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
                 def _parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                 def _new_parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             # match header e.g @@ -0,0 +1 @@\n'
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             lines = []
                             hunk = {
                                 'section_header': gr[-1],
                                 'source_start': old_line,
                                 'source_length': old_end,
                                 'target_start': new_line,
                                 'target_length': new_end,
                                 'lines': lines,
                             }
                             chunks.append(hunk)
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                 raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     if affects_old:
                                         action = Action.OLD_NO_NL
                                     elif affects_new:
                                         action = Action.NEW_NO_NL
                                     else:
                                         raise Exception('invalid context for no newline')
                                     lines.append({
                                         'old_lineno':   None,
                                         'new_lineno':   None,
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 @classmethod
                 def diff_splitter(cls, string):
                     """
                     Diff split that emulates .splitlines() but works only on \n
                     """
                     if not string:
                         return
                     elif string == '\n':
                         yield u'\n'
                     else:
                         has_newline = string.endswith('\n')
                         elements = string.split('\n')
                         if has_newline:
                             # skip last element as it's empty string from newlines
                             elements = elements[:-1]
                         len_elements = len(elements)
                         for cnt, line in enumerate(elements, start=1):
                             last_line = cnt == len_elements
                             if last_line and not has_newline:
                                 yield safe_unicode(line)
                             else:
                                 yield safe_unicode(line) + '\n'
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     # TODO(marcink): not sure how to pass in translator
                     # here in an efficient way, leave the _ for proper gettext extraction
                     _ = lambda s: s
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICONS
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass
             # NOTE(marcink): if diffs.mako change, probably this
             # needs a bump to next version
             CURRENT_DIFF_VERSION = 'v4'
             def _cleanup_cache_file(cached_diff_file):
                 # cleanup file to not store it "damaged"
                 try:
                     os.remove(cached_diff_file)
                 except Exception:
                     log.exception('Failed to cleanup path %s', cached_diff_file)
             def _get_compression_mode(cached_diff_file):
                 mode = 'bz2'
                 if 'mode:plain' in cached_diff_file:
                     mode = 'plain'
                 elif 'mode:gzip' in cached_diff_file:
                     mode = 'gzip'
                 return mode
             def cache_diff(cached_diff_file, diff, commits):
                 compression_mode = _get_compression_mode(cached_diff_file)
                 struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': diff,
                     'commits': commits
                 }
                 start = time.time()
                 try:
                     if compression_mode == 'plain':
                         with open(cached_diff_file, 'wb') as f:
                             pickle.dump(struct, f)
                     elif compression_mode == 'gzip':
                         with gzip.GzipFile(cached_diff_file, 'wb') as f:
                             pickle.dump(struct, f)
                     else:
                         with bz2.BZ2File(cached_diff_file, 'wb') as f:
                             pickle.dump(struct, f)
                 except Exception:
                     log.warn('Failed to save cache', exc_info=True)
                     _cleanup_cache_file(cached_diff_file)
                 log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)
             def load_cached_diff(cached_diff_file):
                 compression_mode = _get_compression_mode(cached_diff_file)
                 default_struct = {
                     'version': CURRENT_DIFF_VERSION,
                     'diff': None,
                     'commits': None
                 }
                 has_cache = os.path.isfile(cached_diff_file)
                 if not has_cache:
                     log.debug('Reading diff cache file failed %s', cached_diff_file)
                     return default_struct
                 data = None
                 start = time.time()
                 try:
                     if compression_mode == 'plain':
                         with open(cached_diff_file, 'rb') as f:
                             data = pickle.load(f)
                     elif compression_mode == 'gzip':
                         with gzip.GzipFile(cached_diff_file, 'rb') as f:
                             data = pickle.load(f)
                     else:
                         with bz2.BZ2File(cached_diff_file, 'rb') as f:
                             data = pickle.load(f)
                 except Exception:
                     log.warn('Failed to read diff cache file', exc_info=True)
                 if not data:
                     data = default_struct
                 if not isinstance(data, dict):
                     # old version of data ?
                     data = default_struct
                 # check version
                 if data.get('version') != CURRENT_DIFF_VERSION:
                     # purge cache
                     _cleanup_cache_file(cached_diff_file)
                     return default_struct
                 log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)
                 return data
             def generate_diff_cache_key(*args):
                 """
                 Helper to generate a cache key using arguments
                 """
                 def arg_mapper(input_param):
                     input_param = safe_str(input_param)
                     # we cannot allow '/' in arguments since it would allow
                     # subdirectory usage
                     input_param.replace('/', '_')
                     return input_param or None  # prevent empty string arguments
                 return '_'.join([
                     '{}' for i in range(len(args))]).format(*map(arg_mapper, args))
             def diff_cache_exist(cache_storage, *args):
                 """
                 Based on all generated arguments check and return a cache path
                 """
                 args = list(args) + ['mode:gzip']
                 cache_key = generate_diff_cache_key(*args)
                 cache_file_path = os.path.join(cache_storage, cache_key)
                 # prevent path traversal attacks using some param that have e.g '../../'
                 if not os.path.abspath(cache_file_path).startswith(cache_storage):
                     raise ValueError('Final path must be within {}'.format(cache_storage))
                 return cache_file_path