rhodecode-enterprise-ce Commit - r4576:99f87073

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

import logging

21

import logging

22

import difflib

22

import difflib

23

from itertools import groupby

23

from itertools import groupby

24

25

from pygments import lex

25

from pygments import lex

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

27

from pygments.lexers.special import TextLexer, Token

27

from pygments.lexers.special import TextLexer, Token

28

from pygments.lexers import get_lexer_by_name

28

from pygments.lexers import get_lexer_by_name

29

from pyramid import compat

29

from pyramid import compat

30

31

from rhodecode.lib.helpers import (

31

from rhodecode.lib.helpers import (

32

get_lexer_for_filenode, html_escape, get_custom_lexer)

32

get_lexer_for_filenode, html_escape, get_custom_lexer)

33

from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode

33

from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode

34

from rhodecode.lib.vcs.nodes import FileNode

34

from rhodecode.lib.vcs.nodes import FileNode

35

from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError

35

from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError

36

from rhodecode.lib.diff_match_patch import diff_match_patch

36

from rhodecode.lib.diff_match_patch import diff_match_patch

37

from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE

37

from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE

38

39

40

plain_text_lexer = get_lexer_by_name(

40

plain_text_lexer = get_lexer_by_name(

41

'text', stripall=False, stripnl=False, ensurenl=False)

41

'text', stripall=False, stripnl=False, ensurenl=False)

42

43

44

log = logging.getLogger(__name__)

44

log = logging.getLogger(__name__)

45

46

47

def filenode_as_lines_tokens(filenode, lexer=None):

47

def filenode_as_lines_tokens(filenode, lexer=None):

48

org_lexer = lexer

48

org_lexer = lexer

49

lexer = lexer or get_lexer_for_filenode(filenode)

49

lexer = lexer or get_lexer_for_filenode(filenode)

50

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

50

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

51

lexer, filenode, org_lexer)

51

lexer, filenode, org_lexer)

52

content = filenode.content

52

content = filenode.content

53

tokens = tokenize_string(content, lexer)

53

tokens = tokenize_string(content, lexer)

54

lines = split_token_stream(tokens, content)

54

lines = split_token_stream(tokens, content)

55

rv = list(lines)

55

rv = list(lines)

56

return rv

56

return rv

57

58

59

def tokenize_string(content, lexer):

59

def tokenize_string(content, lexer):

60

"""

60

"""

61

Use pygments to tokenize some content based on a lexer

61

Use pygments to tokenize some content based on a lexer

62

ensuring all original new lines and whitespace is preserved

62

ensuring all original new lines and whitespace is preserved

63

"""

63

"""

64

65

lexer.stripall = False

65

lexer.stripall = False

66

lexer.stripnl = False

66

lexer.stripnl = False

67

lexer.ensurenl = False

67

lexer.ensurenl = False

68

69

if isinstance(lexer, TextLexer):

69

if isinstance(lexer, TextLexer):

70

lexed = [(Token.Text, content)]

70

lexed = [(Token.Text, content)]

71

else:

71

else:

72

lexed = lex(content, lexer)

72

lexed = lex(content, lexer)

73

74

for token_type, token_text in lexed:

74

for token_type, token_text in lexed:

75

yield pygment_token_class(token_type), token_text

75

yield pygment_token_class(token_type), token_text

76

77

78

def split_token_stream(tokens, content):

78

def split_token_stream(tokens, content):

79

"""

79

"""

80

Take a list of (TokenType, text) tuples and split them by a string

80

Take a list of (TokenType, text) tuples and split them by a string

81

82

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

82

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

83

[(TEXT, 'some'), (TEXT, 'text'),

83

[(TEXT, 'some'), (TEXT, 'text'),

84

(TEXT, 'more'), (TEXT, 'text')]

84

(TEXT, 'more'), (TEXT, 'text')]

85

"""

85

"""

86

87

token_buffer = []

87

token_buffer = []

88

for token_class, token_text in tokens:

88

for token_class, token_text in tokens:

89

parts = token_text.split('\n')

89

parts = token_text.split('\n')

90

for part in parts[:-1]:

90

for part in parts[:-1]:

91

token_buffer.append((token_class, part))

91

token_buffer.append((token_class, part))

92

yield token_buffer

92

yield token_buffer

93

token_buffer = []

93

token_buffer = []

94

95

token_buffer.append((token_class, parts[-1]))

95

token_buffer.append((token_class, parts[-1]))

96

97

if token_buffer:

97

if token_buffer:

98

yield token_buffer

98

yield token_buffer

99

elif content:

99

elif content:

100

# this is a special case, we have the content, but tokenization didn't produce

100

# this is a special case, we have the content, but tokenization didn't produce

101

# any results. THis can happen if know file extensions like .css have some bogus

101

# any results. THis can happen if know file extensions like .css have some bogus

102

# unicode content without any newline characters

102

# unicode content without any newline characters

103

yield [(pygment_token_class(Token.Text), content)]

103

yield [(pygment_token_class(Token.Text), content)]

104

105

106

def filenode_as_annotated_lines_tokens(filenode):

106

def filenode_as_annotated_lines_tokens(filenode):

107

"""

107

"""

108

Take a file node and return a list of annotations => lines, if no annotation

108

Take a file node and return a list of annotations => lines, if no annotation

109

is found, it will be None.

109

is found, it will be None.

110

111

eg:

111

eg:

112

113

[

113

[

114

(annotation1, [

114

(annotation1, [

115

(1, line1_tokens_list),

115

(1, line1_tokens_list),

116

(2, line2_tokens_list),

116

(2, line2_tokens_list),

117

]),

117

]),

118

(annotation2, [

118

(annotation2, [

119

(3, line1_tokens_list),

119

(3, line1_tokens_list),

120

]),

120

]),

121

(None, [

121

(None, [

122

(4, line1_tokens_list),

122

(4, line1_tokens_list),

123

]),

123

]),

124

(annotation1, [

124

(annotation1, [

125

(5, line1_tokens_list),

125

(5, line1_tokens_list),

126

(6, line2_tokens_list),

126

(6, line2_tokens_list),

127

])

127

])

128

]

128

]

129

"""

129

"""

130

131

commit_cache = {} # cache commit_getter lookups

131

commit_cache = {} # cache commit_getter lookups

132

133

def _get_annotation(commit_id, commit_getter):

133

def _get_annotation(commit_id, commit_getter):

134

if commit_id not in commit_cache:

134

if commit_id not in commit_cache:

135

commit_cache[commit_id] = commit_getter()

135

commit_cache[commit_id] = commit_getter()

136

return commit_cache[commit_id]

136

return commit_cache[commit_id]

137

138

annotation_lookup = {

138

annotation_lookup = {

139

line_no: _get_annotation(commit_id, commit_getter)

139

line_no: _get_annotation(commit_id, commit_getter)

140

for line_no, commit_id, commit_getter, line_content

140

for line_no, commit_id, commit_getter, line_content

141

in filenode.annotate

141

in filenode.annotate

142

}

142

}

143

144

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

144

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

145

for line_no, tokens

145

for line_no, tokens

146

in enumerate(filenode_as_lines_tokens(filenode), 1))

146

in enumerate(filenode_as_lines_tokens(filenode), 1))

147

148

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

148

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

149

150

for annotation, group in grouped_annotations_lines:

150

for annotation, group in grouped_annotations_lines:

151

yield (

151

yield (

152

annotation, [(line_no, tokens)

152

annotation, [(line_no, tokens)

153

for (_, line_no, tokens) in group]

153

for (_, line_no, tokens) in group]

154

)

154

)

155

156

157

def render_tokenstream(tokenstream):

157

def render_tokenstream(tokenstream):

158

result = []

158

result = []

159

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

159

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

160

161

if token_class:

161

if token_class:

162

result.append(u'' % token_class)

162

result.append(u'' % token_class)

163

else:

163

else:

164

result.append(u'')

164

result.append(u'')

165

166

for op_tag, token_text in token_ops_texts:

166

for op_tag, token_text in token_ops_texts:

167

168

if op_tag:

168

if op_tag:

169

result.append(u'<%s>' % op_tag)

169

result.append(u'<%s>' % op_tag)

170

171

# NOTE(marcink): in some cases of mixed encodings, we might run into

171

# NOTE(marcink): in some cases of mixed encodings, we might run into

172

# troubles in the html_escape, in this case we say unicode force on token_text

172

# troubles in the html_escape, in this case we say unicode force on token_text

173

# that would ensure "correct" data even with the cost of rendered

173

# that would ensure "correct" data even with the cost of rendered

174

try:

174

try:

175

escaped_text = html_escape(token_text)

175

escaped_text = html_escape(token_text)

176

except TypeError:

176

except TypeError:

177

escaped_text = html_escape(safe_unicode(token_text))

177

escaped_text = html_escape(safe_unicode(token_text))

178

179

# TODO: dan: investigate showing hidden characters like space/nl/tab

179

# TODO: dan: investigate showing hidden characters like space/nl/tab

180

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

180

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

181

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

181

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

182

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

182

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

183

184

result.append(escaped_text)

184

result.append(escaped_text)

185

186

if op_tag:

186

if op_tag:

187

result.append(u'</%s>' % op_tag)

187

result.append(u'</%s>' % op_tag)

188

189

result.append(u'')

189

result.append(u'')

190

191

html = ''.join(result)

191

html = ''.join(result)

192

return html

192

return html

193

194

195

def rollup_tokenstream(tokenstream):

195

def rollup_tokenstream(tokenstream):

196

"""

196

"""

197

Group a token stream of the format:

197

Group a token stream of the format:

198

199

('class', 'op', 'text')

199

('class', 'op', 'text')

200

or

200

or

201

('class', 'text')

201

('class', 'text')

202

203

into

203

into

204

205

[('class1',

205

[('class1',

206

[('op1', 'text'),

206

[('op1', 'text'),

207

('op2', 'text')]),

207

('op2', 'text')]),

208

('class2',

208

('class2',

209

[('op3', 'text')])]

209

[('op3', 'text')])]

210

211

This is used to get the minimal tags necessary when

211

This is used to get the minimal tags necessary when

212

rendering to html eg for a token stream ie.

212

rendering to html eg for a token stream ie.

213

214

<ins>he</ins>llo

214

<ins>he</ins>llo

215

vs

215

vs

216

<ins>he</ins>llo

216

<ins>he</ins>llo

217

218

If a 2 tuple is passed in, the output op will be an empty string.

218

If a 2 tuple is passed in, the output op will be an empty string.

219

220

eg:

220

eg:

221

222

>>> rollup_tokenstream([('classA', '', 'h'),

222

>>> rollup_tokenstream([('classA', '', 'h'),

223

('classA', 'del', 'ell'),

223

('classA', 'del', 'ell'),

224

('classA', '', 'o'),

224

('classA', '', 'o'),

225

('classB', '', ' '),

225

('classB', '', ' '),

226

('classA', '', 'the'),

226

('classA', '', 'the'),

227

('classA', '', 're'),

227

('classA', '', 're'),

228

])

228

])

229

230

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

230

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

231

('classB', [('', ' ')],

231

('classB', [('', ' ')],

232

('classA', [('', 'there')]]

232

('classA', [('', 'there')]]

233

234

"""

234

"""

235

if tokenstream and len(tokenstream[0]) == 2:

235

if tokenstream and len(tokenstream[0]) == 2:

236

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

236

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

237

238

result = []

238

result = []

239

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

239

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

240

ops = []

240

ops = []

241

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

241

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

242

text_buffer = []

242

text_buffer = []

243

for t_class, t_op, t_text in token_text_list:

243

for t_class, t_op, t_text in token_text_list:

244

text_buffer.append(t_text)

244

text_buffer.append(t_text)

245

ops.append((token_op, ''.join(text_buffer)))

245

ops.append((token_op, ''.join(text_buffer)))

246

result.append((token_class, ops))

246

result.append((token_class, ops))

247

return result

247

return result

248

249

250

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

250

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

251

"""

251

"""

252

Converts a list of (token_class, token_text) tuples to a list of

252

Converts a list of (token_class, token_text) tuples to a list of

253

(token_class, token_op, token_text) tuples where token_op is one of

253

(token_class, token_op, token_text) tuples where token_op is one of

254

('ins', 'del', '')

254

('ins', 'del', '')

255

256

:param old_tokens: list of (token_class, token_text) tuples of old line

256

:param old_tokens: list of (token_class, token_text) tuples of old line

257

:param new_tokens: list of (token_class, token_text) tuples of new line

257

:param new_tokens: list of (token_class, token_text) tuples of new line

258

:param use_diff_match_patch: boolean, will use google's diff match patch

258

:param use_diff_match_patch: boolean, will use google's diff match patch

259

library which has options to 'smooth' out the character by character

259

library which has options to 'smooth' out the character by character

260

differences making nicer ins/del blocks

260

differences making nicer ins/del blocks

261

"""

261

"""

262

263

old_tokens_result = []

263

old_tokens_result = []

264

new_tokens_result = []

264

new_tokens_result = []

265

266

similarity = difflib.SequenceMatcher(None,

266

similarity = difflib.SequenceMatcher(None,

267

''.join(token_text for token_class, token_text in old_tokens),

267

''.join(token_text for token_class, token_text in old_tokens),

268

''.join(token_text for token_class, token_text in new_tokens)

268

''.join(token_text for token_class, token_text in new_tokens)

269

).ratio()

269

).ratio()

270

271

if similarity < 0.6: # return, the blocks are too different

271

if similarity < 0.6: # return, the blocks are too different

272

for token_class, token_text in old_tokens:

272

for token_class, token_text in old_tokens:

273

old_tokens_result.append((token_class, '', token_text))

273

old_tokens_result.append((token_class, '', token_text))

274

for token_class, token_text in new_tokens:

274

for token_class, token_text in new_tokens:

275

new_tokens_result.append((token_class, '', token_text))

275

new_tokens_result.append((token_class, '', token_text))

276

return old_tokens_result, new_tokens_result, similarity

276

return old_tokens_result, new_tokens_result, similarity

277

278

token_sequence_matcher = difflib.SequenceMatcher(None,

278

token_sequence_matcher = difflib.SequenceMatcher(None,

279

[x[1] for x in old_tokens],

279

[x[1] for x in old_tokens],

280

[x[1] for x in new_tokens])

280

[x[1] for x in new_tokens])

281

282

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

282

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

283

# check the differences by token block types first to give a more

283

# check the differences by token block types first to give a more

284

# nicer "block" level replacement vs character diffs

284

# nicer "block" level replacement vs character diffs

285

286

if tag == 'equal':

286

if tag == 'equal':

287

for token_class, token_text in old_tokens[o1:o2]:

287

for token_class, token_text in old_tokens[o1:o2]:

288

old_tokens_result.append((token_class, '', token_text))

288

old_tokens_result.append((token_class, '', token_text))

289

for token_class, token_text in new_tokens[n1:n2]:

289

for token_class, token_text in new_tokens[n1:n2]:

290

new_tokens_result.append((token_class, '', token_text))

290

new_tokens_result.append((token_class, '', token_text))

291

elif tag == 'delete':

291

elif tag == 'delete':

292

for token_class, token_text in old_tokens[o1:o2]:

292

for token_class, token_text in old_tokens[o1:o2]:

293

old_tokens_result.append((token_class, 'del', token_text))

293

old_tokens_result.append((token_class, 'del', token_text))

294

elif tag == 'insert':

294

elif tag == 'insert':

295

for token_class, token_text in new_tokens[n1:n2]:

295

for token_class, token_text in new_tokens[n1:n2]:

296

new_tokens_result.append((token_class, 'ins', token_text))

296

new_tokens_result.append((token_class, 'ins', token_text))

297

elif tag == 'replace':

297

elif tag == 'replace':

298

# if same type token blocks must be replaced, do a diff on the

298

# if same type token blocks must be replaced, do a diff on the

299

# characters in the token blocks to show individual changes

299

# characters in the token blocks to show individual changes

300

301

old_char_tokens = []

301

old_char_tokens = []

302

new_char_tokens = []

302

new_char_tokens = []

303

for token_class, token_text in old_tokens[o1:o2]:

303

for token_class, token_text in old_tokens[o1:o2]:

304

for char in token_text:

304

for char in token_text:

305

old_char_tokens.append((token_class, char))

305

old_char_tokens.append((token_class, char))

306

307

for token_class, token_text in new_tokens[n1:n2]:

307

for token_class, token_text in new_tokens[n1:n2]:

308

for char in token_text:

308

for char in token_text:

309

new_char_tokens.append((token_class, char))

309

new_char_tokens.append((token_class, char))

310

311

old_string = ''.join([token_text for

311

old_string = ''.join([token_text for

312

token_class, token_text in old_char_tokens])

312

token_class, token_text in old_char_tokens])

313

new_string = ''.join([token_text for

313

new_string = ''.join([token_text for

314

token_class, token_text in new_char_tokens])

314

token_class, token_text in new_char_tokens])

315

316

char_sequence = difflib.SequenceMatcher(

316

char_sequence = difflib.SequenceMatcher(

317

None, old_string, new_string)

317

None, old_string, new_string)

318

copcodes = char_sequence.get_opcodes()

318

copcodes = char_sequence.get_opcodes()

319

obuffer, nbuffer = [], []

319

obuffer, nbuffer = [], []

320

321

if use_diff_match_patch:

321

if use_diff_match_patch:

322

dmp = diff_match_patch()

322

dmp = diff_match_patch()

323

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

323

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

324

reps = dmp.diff_main(old_string, new_string)

324

reps = dmp.diff_main(old_string, new_string)

325

dmp.diff_cleanupEfficiency(reps)

325

dmp.diff_cleanupEfficiency(reps)

326

327

a, b = 0, 0

327

a, b = 0, 0

328

for op, rep in reps:

328

for op, rep in reps:

329

l = len(rep)

329

l = len(rep)

330

if op == 0:

330

if op == 0:

331

for i, c in enumerate(rep):

331

for i, c in enumerate(rep):

332

obuffer.append((old_char_tokens[a+i][0], '', c))

332

obuffer.append((old_char_tokens[a+i][0], '', c))

333

nbuffer.append((new_char_tokens[b+i][0], '', c))

333

nbuffer.append((new_char_tokens[b+i][0], '', c))

334

a += l

334

a += l

335

b += l

335

b += l

336

elif op == -1:

336

elif op == -1:

337

for i, c in enumerate(rep):

337

for i, c in enumerate(rep):

338

obuffer.append((old_char_tokens[a+i][0], 'del', c))

338

obuffer.append((old_char_tokens[a+i][0], 'del', c))

339

a += l

339

a += l

340

elif op == 1:

340

elif op == 1:

341

for i, c in enumerate(rep):

341

for i, c in enumerate(rep):

342

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

342

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

343

b += l

343

b += l

344

else:

344

else:

345

for ctag, co1, co2, cn1, cn2 in copcodes:

345

for ctag, co1, co2, cn1, cn2 in copcodes:

346

if ctag == 'equal':

346

if ctag == 'equal':

347

for token_class, token_text in old_char_tokens[co1:co2]:

347

for token_class, token_text in old_char_tokens[co1:co2]:

348

obuffer.append((token_class, '', token_text))

348

obuffer.append((token_class, '', token_text))

349

for token_class, token_text in new_char_tokens[cn1:cn2]:

349

for token_class, token_text in new_char_tokens[cn1:cn2]:

350

nbuffer.append((token_class, '', token_text))

350

nbuffer.append((token_class, '', token_text))

351

elif ctag == 'delete':

351

elif ctag == 'delete':

352

for token_class, token_text in old_char_tokens[co1:co2]:

352

for token_class, token_text in old_char_tokens[co1:co2]:

353

obuffer.append((token_class, 'del', token_text))

353

obuffer.append((token_class, 'del', token_text))

354

elif ctag == 'insert':

354

elif ctag == 'insert':

355

for token_class, token_text in new_char_tokens[cn1:cn2]:

355

for token_class, token_text in new_char_tokens[cn1:cn2]:

356

nbuffer.append((token_class, 'ins', token_text))

356

nbuffer.append((token_class, 'ins', token_text))

357

elif ctag == 'replace':

357

elif ctag == 'replace':

358

for token_class, token_text in old_char_tokens[co1:co2]:

358

for token_class, token_text in old_char_tokens[co1:co2]:

359

obuffer.append((token_class, 'del', token_text))

359

obuffer.append((token_class, 'del', token_text))

360

for token_class, token_text in new_char_tokens[cn1:cn2]:

360

for token_class, token_text in new_char_tokens[cn1:cn2]:

361

nbuffer.append((token_class, 'ins', token_text))

361

nbuffer.append((token_class, 'ins', token_text))

362

363

old_tokens_result.extend(obuffer)

363

old_tokens_result.extend(obuffer)

364

new_tokens_result.extend(nbuffer)

364

new_tokens_result.extend(nbuffer)

365

366

return old_tokens_result, new_tokens_result, similarity

366

return old_tokens_result, new_tokens_result, similarity

367

368

369

def diffset_node_getter(commit):

369

def diffset_node_getter(commit):

370

def get_node(fname):

370

def get_node(fname):

371

try:

371

try:

372

return commit.get_node(fname)

372

return commit.get_node(fname)

373

except NodeDoesNotExistError:

373

except NodeDoesNotExistError:

374

return None

374

return None

375

376

return get_node

376

return get_node

377

378

379

class DiffSet(object):

379

class DiffSet(object):

380

"""

380

"""

381

An object for parsing the diff result from diffs.DiffProcessor and

381

An object for parsing the diff result from diffs.DiffProcessor and

382

adding highlighting, side by side/unified renderings and line diffs

382

adding highlighting, side by side/unified renderings and line diffs

383

"""

383

"""

384

385

HL_REAL = 'REAL' # highlights using original file, slow

385

HL_REAL = 'REAL' # highlights using original file, slow

386

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

386

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

387

# in the case of multiline code

387

# in the case of multiline code

388

HL_NONE = 'NONE' # no highlighting, fastest

388

HL_NONE = 'NONE' # no highlighting, fastest

389

390

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

390

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

391

source_repo_name=None,

391

source_repo_name=None,

392

source_node_getter=lambda filename: None,

392

source_node_getter=lambda filename: None,

393

target_repo_name=None,

393

target_repo_name=None,

394

target_node_getter=lambda filename: None,

394

target_node_getter=lambda filename: None,

395

source_nodes=None, target_nodes=None,

395

source_nodes=None, target_nodes=None,

396

# files over this size will use fast highlighting

396

# files over this size will use fast highlighting

397

max_file_size_limit=150 * 1024,

397

max_file_size_limit=150 * 1024,

398

):

398

):

399

400

self.highlight_mode = highlight_mode

400

self.highlight_mode = highlight_mode

401

self.highlighted_filenodes = {}

401

self.highlighted_filenodes = {}

402

self.source_node_getter = source_node_getter

402

self.source_node_getter = source_node_getter

403

self.target_node_getter = target_node_getter

403

self.target_node_getter = target_node_getter

404

self.source_nodes = source_nodes or {}

404

self.source_nodes = source_nodes or {}

405

self.target_nodes = target_nodes or {}

405

self.target_nodes = target_nodes or {}

406

self.repo_name = repo_name

406

self.repo_name = repo_name

407

self.target_repo_name = target_repo_name or repo_name

407

self.target_repo_name = target_repo_name or repo_name

408

self.source_repo_name = source_repo_name or repo_name

408

self.source_repo_name = source_repo_name or repo_name

409

self.max_file_size_limit = max_file_size_limit

409

self.max_file_size_limit = max_file_size_limit

410

411

def render_patchset(self, patchset, source_ref=None, target_ref=None):

411

def render_patchset(self, patchset, source_ref=None, target_ref=None):

412

diffset = AttributeDict(dict(

412

diffset = AttributeDict(dict(

413

lines_added=0,

413

lines_added=0,

414

lines_deleted=0,

414

lines_deleted=0,

415

changed_files=0,

415

changed_files=0,

416

files=[],

416

files=[],

417

file_stats={},

417

file_stats={},

418

limited_diff=isinstance(patchset, LimitedDiffContainer),

418

limited_diff=isinstance(patchset, LimitedDiffContainer),

419

repo_name=self.repo_name,

419

repo_name=self.repo_name,

420

target_repo_name=self.target_repo_name,

420

target_repo_name=self.target_repo_name,

421

source_repo_name=self.source_repo_name,

421

source_repo_name=self.source_repo_name,

422

source_ref=source_ref,

422

source_ref=source_ref,

423

target_ref=target_ref,

423

target_ref=target_ref,

424

))

424

))

425

for patch in patchset:

425

for patch in patchset:

426

diffset.file_stats[patch['filename']] = patch['stats']

426

diffset.file_stats[patch['filename']] = patch['stats']

427

filediff = self.render_patch(patch)

427

filediff = self.render_patch(patch)

428

filediff.diffset = StrictAttributeDict(dict(

428

filediff.diffset = StrictAttributeDict(dict(

429

source_ref=diffset.source_ref,

429

source_ref=diffset.source_ref,

430

target_ref=diffset.target_ref,

430

target_ref=diffset.target_ref,

431

repo_name=diffset.repo_name,

431

repo_name=diffset.repo_name,

432

source_repo_name=diffset.source_repo_name,

432

source_repo_name=diffset.source_repo_name,

433

target_repo_name=diffset.target_repo_name,

433

target_repo_name=diffset.target_repo_name,

434

))

434

))

435

diffset.files.append(filediff)

435

diffset.files.append(filediff)

436

diffset.changed_files += 1

436

diffset.changed_files += 1

437

if not patch['stats']['binary']:

437

if not patch['stats']['binary']:

438

diffset.lines_added += patch['stats']['added']

438

diffset.lines_added += patch['stats']['added']

439

diffset.lines_deleted += patch['stats']['deleted']

439

diffset.lines_deleted += patch['stats']['deleted']

440

441

return diffset

441

return diffset

442

443

_lexer_cache = {}

443

_lexer_cache = {}

444

445

def _get_lexer_for_filename(self, filename, filenode=None):

445

def _get_lexer_for_filename(self, filename, filenode=None):

446

# cached because we might need to call it twice for source/target

446

# cached because we might need to call it twice for source/target

447

if filename not in self._lexer_cache:

447

if filename not in self._lexer_cache:

448

if filenode:

448

if filenode:

449

lexer = filenode.lexer

449

lexer = filenode.lexer

450

extension = filenode.extension

450

extension = filenode.extension

451

else:

451

else:

452

lexer = FileNode.get_lexer(filename=filename)

452

lexer = FileNode.get_lexer(filename=filename)

453

extension = filename.split('.')[-1]

453

extension = filename.split('.')[-1]

454

455

lexer = get_custom_lexer(extension) or lexer

455

lexer = get_custom_lexer(extension) or lexer

456

self._lexer_cache[filename] = lexer

456

self._lexer_cache[filename] = lexer

457

return self._lexer_cache[filename]

457

return self._lexer_cache[filename]

458

459

def render_patch(self, patch):

459

def render_patch(self, patch):

460

log.debug('rendering diff for %r', patch['filename'])

460

log.debug('rendering diff for %r', patch['filename'])

461

462

source_filename = patch['original_filename']

462

source_filename = patch['original_filename']

463

target_filename = patch['filename']

463

target_filename = patch['filename']

464

465

source_lexer = plain_text_lexer

465

source_lexer = plain_text_lexer

466

target_lexer = plain_text_lexer

466

target_lexer = plain_text_lexer

467

468

if not patch['stats']['binary']:

468

if not patch['stats']['binary']:

469

node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None

469

node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None

470

hl_mode = node_hl_mode or self.highlight_mode

470

hl_mode = node_hl_mode or self.highlight_mode

471

472

if hl_mode == self.HL_REAL:

472

if hl_mode == self.HL_REAL:

473

if (source_filename and patch['operation'] in ('D', 'M')

473

if (source_filename and patch['operation'] in ('D', 'M')

474

and source_filename not in self.source_nodes):

474

and source_filename not in self.source_nodes):

475

self.source_nodes[source_filename] = (

475

self.source_nodes[source_filename] = (

476

self.source_node_getter(source_filename))

476

self.source_node_getter(source_filename))

477

478

if (target_filename and patch['operation'] in ('A', 'M')

478

if (target_filename and patch['operation'] in ('A', 'M')

479

and target_filename not in self.target_nodes):

479

and target_filename not in self.target_nodes):

480

self.target_nodes[target_filename] = (

480

self.target_nodes[target_filename] = (

481

self.target_node_getter(target_filename))

481

self.target_node_getter(target_filename))

482

483

elif hl_mode == self.HL_FAST:

483

elif hl_mode == self.HL_FAST:

484

source_lexer = self._get_lexer_for_filename(source_filename)

484

source_lexer = self._get_lexer_for_filename(source_filename)

485

target_lexer = self._get_lexer_for_filename(target_filename)

485

target_lexer = self._get_lexer_for_filename(target_filename)

486

487

source_file = self.source_nodes.get(source_filename, source_filename)

487

source_file = self.source_nodes.get(source_filename, source_filename)

488

target_file = self.target_nodes.get(target_filename, target_filename)

488

target_file = self.target_nodes.get(target_filename, target_filename)

489

raw_id_uid = ''

489

raw_id_uid = ''

490

if self.source_nodes.get(source_filename):

490

if self.source_nodes.get(source_filename):

491

raw_id_uid = self.source_nodes[source_filename].commit.raw_id

491

raw_id_uid = self.source_nodes[source_filename].commit.raw_id

492

493

if not raw_id_uid and self.target_nodes.get(target_filename):

493

if not raw_id_uid and self.target_nodes.get(target_filename):

494

# in case this is a new file we only have it in target

494

# in case this is a new file we only have it in target

495

raw_id_uid = self.target_nodes[target_filename].commit.raw_id

495

raw_id_uid = self.target_nodes[target_filename].commit.raw_id

496

497

source_filenode, target_filenode = None, None

497

source_filenode, target_filenode = None, None

498

499

# TODO: dan: FileNode.lexer works on the content of the file - which

499

# TODO: dan: FileNode.lexer works on the content of the file - which

500

# can be slow - issue #4289 explains a lexer clean up - which once

500

# can be slow - issue #4289 explains a lexer clean up - which once

501

# done can allow caching a lexer for a filenode to avoid the file lookup

501

# done can allow caching a lexer for a filenode to avoid the file lookup

502

if isinstance(source_file, FileNode):

502

if isinstance(source_file, FileNode):

503

source_filenode = source_file

503

source_filenode = source_file

504

#source_lexer = source_file.lexer

504

#source_lexer = source_file.lexer

505

source_lexer = self._get_lexer_for_filename(source_filename)

505

source_lexer = self._get_lexer_for_filename(source_filename)

506

source_file.lexer = source_lexer

506

source_file.lexer = source_lexer

507

508

if isinstance(target_file, FileNode):

508

if isinstance(target_file, FileNode):

509

target_filenode = target_file

509

target_filenode = target_file

510

#target_lexer = target_file.lexer

510

#target_lexer = target_file.lexer

511

target_lexer = self._get_lexer_for_filename(target_filename)

511

target_lexer = self._get_lexer_for_filename(target_filename)

512

target_file.lexer = target_lexer

512

target_file.lexer = target_lexer

513

514

source_file_path, target_file_path = None, None

514

source_file_path, target_file_path = None, None

515

516

if source_filename != '/dev/null':

516

if source_filename != '/dev/null':

517

source_file_path = source_filename

517

source_file_path = source_filename

518

if target_filename != '/dev/null':

518

if target_filename != '/dev/null':

519

target_file_path = target_filename

519

target_file_path = target_filename

520

521

source_file_type = source_lexer.name

521

source_file_type = source_lexer.name

522

target_file_type = target_lexer.name

522

target_file_type = target_lexer.name

523

524

filediff = AttributeDict({

524

filediff = AttributeDict({

525

'source_file_path': source_file_path,

525

'source_file_path': source_file_path,

526

'target_file_path': target_file_path,

526

'target_file_path': target_file_path,

527

'source_filenode': source_filenode,

527

'source_filenode': source_filenode,

528

'target_filenode': target_filenode,

528

'target_filenode': target_filenode,

529

'source_file_type': target_file_type,

529

'source_file_type': target_file_type,

530

'target_file_type': source_file_type,

530

'target_file_type': source_file_type,

531

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

531

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

532

'operation': patch['operation'],

532

'operation': patch['operation'],

533

'source_mode': patch['stats']['old_mode'],

533

'source_mode': patch['stats']['old_mode'],

534

'target_mode': patch['stats']['new_mode'],

534

'target_mode': patch['stats']['new_mode'],

535

'limited_diff': patch['is_limited_diff'],

535

'limited_diff': patch['is_limited_diff'],

536

'hunks': [],

536

'hunks': [],

537

'hunk_ops': None,

537

'hunk_ops': None,

538

'diffset': self,

538

'diffset': self,

539

'raw_id': raw_id_uid,

539

'raw_id': raw_id_uid,

540

})

540

})

541

542

file_chunks = patch['chunks'][1:]

542

file_chunks = patch['chunks'][1:]

543

for i, hunk in enumerate(file_chunks, 1):

543

for i, hunk in enumerate(file_chunks, 1):

544

hunkbit = self.parse_hunk(hunk, source_file, target_file)

544

hunkbit = self.parse_hunk(hunk, source_file, target_file)

545

hunkbit.source_file_path = source_file_path

545

hunkbit.source_file_path = source_file_path

546

hunkbit.target_file_path = target_file_path

546

hunkbit.target_file_path = target_file_path

547

hunkbit.index = i

547

hunkbit.index = i

548

filediff.hunks.append(hunkbit)

548

filediff.hunks.append(hunkbit)

549

550

# Simulate hunk on OPS type line which doesn't really contain any diff

550

# Simulate hunk on OPS type line which doesn't really contain any diff

551

# this allows commenting on those

551

# this allows commenting on those

552

if not file_chunks:

552

if not file_chunks:

553

actions = []

553

actions = []

554

for op_id, op_text in filediff.patch['stats']['ops'].items():

554

for op_id, op_text in filediff.patch['stats']['ops'].items():

555

if op_id == DEL_FILENODE:

555

if op_id == DEL_FILENODE:

556

actions.append(u'file was removed')

556

actions.append(u'file was removed')

557

elif op_id == BIN_FILENODE:

557

elif op_id == BIN_FILENODE:

558

actions.append(u'binary diff hidden')

558

actions.append(u'binary diff hidden')

559

else:

559

else:

560

actions.append(safe_unicode(op_text))

560

actions.append(safe_unicode(op_text))

561

action_line = u'NO CONTENT: ' + \

561

action_line = u'NO CONTENT: ' + \

562

u', '.join(actions) or u'UNDEFINED_ACTION'

562

u', '.join(actions) or u'UNDEFINED_ACTION'

563

564

hunk_ops = {'source_length': 0, 'source_start': 0,

564

hunk_ops = {'source_length': 0, 'source_start': 0,

565

'lines': [

565

'lines': [

566

{'new_lineno': 0, 'old_lineno': 1,

566

{'new_lineno': 0, 'old_lineno': 1,

567

'action': 'unmod-no-hl', 'line': action_line}

567

'action': 'unmod-no-hl', 'line': action_line}

568

],

568

],

569

'section_header': u'', 'target_start': 1, 'target_length': 1}

569

'section_header': u'', 'target_start': 1, 'target_length': 1}

570

571

hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)

571

hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)

572

hunkbit.source_file_path = source_file_path

572

hunkbit.source_file_path = source_file_path

573

hunkbit.target_file_path = target_file_path

573

hunkbit.target_file_path = target_file_path

574

filediff.hunk_ops = hunkbit

574

filediff.hunk_ops = hunkbit

575

return filediff

575

return filediff

576

577

def parse_hunk(self, hunk, source_file, target_file):

577

def parse_hunk(self, hunk, source_file, target_file):

578

result = AttributeDict(dict(

578

result = AttributeDict(dict(

579

source_start=hunk['source_start'],

579

source_start=hunk['source_start'],

580

source_length=hunk['source_length'],

580

source_length=hunk['source_length'],

581

target_start=hunk['target_start'],

581

target_start=hunk['target_start'],

582

target_length=hunk['target_length'],

582

target_length=hunk['target_length'],

583

section_header=hunk['section_header'],

583

section_header=hunk['section_header'],

584

lines=[],

584

lines=[],

585

))

585

))

586

before, after = [], []

586

before, after = [], []

587

588

for line in hunk['lines']:

588

for line in hunk['lines']:

589

if line['action'] in ['unmod', 'unmod-no-hl']:

589

if line['action'] in ['unmod', 'unmod-no-hl']:

590

no_hl = line['action'] == 'unmod-no-hl'

590

no_hl = line['action'] == 'unmod-no-hl'

591

result.lines.extend(

591

result.lines.extend(

592

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

592

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

593

after.append(line)

593

after.append(line)

594

before.append(line)

594

before.append(line)

595

elif line['action'] == 'add':

595

elif line['action'] == 'add':

596

after.append(line)

596

after.append(line)

597

elif line['action'] == 'del':

597

elif line['action'] == 'del':

598

before.append(line)

598

before.append(line)

599

elif line['action'] == 'old-no-nl':

599

elif line['action'] == 'old-no-nl':

600

before.append(line)

600

before.append(line)

601

elif line['action'] == 'new-no-nl':

601

elif line['action'] == 'new-no-nl':

602

after.append(line)

602

after.append(line)

603

604

all_actions = [x['action'] for x in after] + [x['action'] for x in before]

604

all_actions = [x['action'] for x in after] + [x['action'] for x in before]

605

no_hl = {x for x in all_actions} == {'unmod-no-hl'}

605

no_hl = {x for x in all_actions} == {'unmod-no-hl'}

606

result.lines.extend(

606

result.lines.extend(

607

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

607

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

608

# NOTE(marcink): we must keep list() call here so we can cache the result...

608

# NOTE(marcink): we must keep list() call here so we can cache the result...

609

result.unified = list(self.as_unified(result.lines))

609

result.unified = list(self.as_unified(result.lines))

610

result.sideside = result.lines

610

result.sideside = result.lines

611

612

return result

612

return result

613

614

def parse_lines(self, before_lines, after_lines, source_file, target_file,

614

def parse_lines(self, before_lines, after_lines, source_file, target_file,

615

no_hl=False):

615

no_hl=False):

616

# TODO: dan: investigate doing the diff comparison and fast highlighting

616

# TODO: dan: investigate doing the diff comparison and fast highlighting

617

# on the entire before and after buffered block lines rather than by

617

# on the entire before and after buffered block lines rather than by

618

# line, this means we can get better 'fast' highlighting if the context

618

# line, this means we can get better 'fast' highlighting if the context

619

# allows it - eg.

619

# allows it - eg.

620

# line 4: """

620

# line 4: """

621

# line 5: this gets highlighted as a string

621

# line 5: this gets highlighted as a string

622

# line 6: """

622

# line 6: """

623

624

lines = []

624

lines = []

625

626

before_newline = AttributeDict()

626

before_newline = AttributeDict()

627

after_newline = AttributeDict()

627

after_newline = AttributeDict()

628

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

628

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

629

before_newline_line = before_lines.pop(-1)

629

before_newline_line = before_lines.pop(-1)

630

before_newline.content = '\n {}'.format(

630

before_newline.content = '\n {}'.format(

631

render_tokenstream(

631

render_tokenstream(

632

[(x[0], '', x[1])

632

[(x[0], '', x[1])

633

for x in [('nonl', before_newline_line['line'])]]))

633

for x in [('nonl', before_newline_line['line'])]]))

634

635

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

635

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

636

after_newline_line = after_lines.pop(-1)

636

after_newline_line = after_lines.pop(-1)

637

after_newline.content = '\n {}'.format(

637

after_newline.content = '\n {}'.format(

638

render_tokenstream(

638

render_tokenstream(

639

[(x[0], '', x[1])

639

[(x[0], '', x[1])

640

for x in [('nonl', after_newline_line['line'])]]))

640

for x in [('nonl', after_newline_line['line'])]]))

641

642

while before_lines or after_lines:

642

while before_lines or after_lines:

643

before, after = None, None

643

before, after = None, None

644

before_tokens, after_tokens = None, None

644

before_tokens, after_tokens = None, None

645

646

if before_lines:

646

if before_lines:

647

before = before_lines.pop(0)

647

before = before_lines.pop(0)

648

if after_lines:

648

if after_lines:

649

after = after_lines.pop(0)

649

after = after_lines.pop(0)

650

651

original = AttributeDict()

651

original = AttributeDict()

652

modified = AttributeDict()

652

modified = AttributeDict()

653

654

if before:

654

if before:

655

if before['action'] == 'old-no-nl':

655

if before['action'] == 'old-no-nl':

656

before_tokens = [('nonl', before['line'])]

656

before_tokens = [('nonl', before['line'])]

657

else:

657

else:

658

before_tokens = self.get_line_tokens(

658

before_tokens = self.get_line_tokens(

659

line_text=before['line'], line_number=before['old_lineno'],

659

line_text=before['line'], line_number=before['old_lineno'],

660

input_file=source_file, no_hl=no_hl)

660

input_file=source_file, no_hl=no_hl)

661

original.lineno = before['old_lineno']

661

original.lineno = before['old_lineno']

662

original.content = before['line']

662

original.content = before['line']

663

original.action = self.action_to_op(before['action'])

663

original.action = self.action_to_op(before['action'])

664

665

original.get_comment_args = (

665

original.get_comment_args = (

666

source_file, 'o', before['old_lineno'])

666

source_file, 'o', before['old_lineno'])

667

668

if after:

668

if after:

669

if after['action'] == 'new-no-nl':

669

if after['action'] == 'new-no-nl':

670

after_tokens = [('nonl', after['line'])]

670

after_tokens = [('nonl', after['line'])]

671

else:

671

else:

672

after_tokens = self.get_line_tokens(

672

after_tokens = self.get_line_tokens(

673

line_text=after['line'], line_number=after['new_lineno'],

673

line_text=after['line'], line_number=after['new_lineno'],

674

input_file=target_file, no_hl=no_hl)

674

input_file=target_file, no_hl=no_hl)

675

modified.lineno = after['new_lineno']

675

modified.lineno = after['new_lineno']

676

modified.content = after['line']

676

modified.content = after['line']

677

modified.action = self.action_to_op(after['action'])

677

modified.action = self.action_to_op(after['action'])

678

679

modified.get_comment_args = (target_file, 'n', after['new_lineno'])

679

modified.get_comment_args = (target_file, 'n', after['new_lineno'])

680

681

# diff the lines

681

# diff the lines

682

if before_tokens and after_tokens:

682

if before_tokens and after_tokens:

683

o_tokens, m_tokens, similarity = tokens_diff(

683

o_tokens, m_tokens, similarity = tokens_diff(

684

before_tokens, after_tokens)

684

before_tokens, after_tokens)

685

original.content = render_tokenstream(o_tokens)

685

original.content = render_tokenstream(o_tokens)

686

modified.content = render_tokenstream(m_tokens)

686

modified.content = render_tokenstream(m_tokens)

687

elif before_tokens:

687

elif before_tokens:

688

original.content = render_tokenstream(

688

original.content = render_tokenstream(

689

[(x[0], '', x[1]) for x in before_tokens])

689

[(x[0], '', x[1]) for x in before_tokens])

690

elif after_tokens:

690

elif after_tokens:

691

modified.content = render_tokenstream(

691

modified.content = render_tokenstream(

692

[(x[0], '', x[1]) for x in after_tokens])

692

[(x[0], '', x[1]) for x in after_tokens])

693

694

if not before_lines and before_newline:

694

if not before_lines and before_newline:

695

original.content += before_newline.content

695

original.content += before_newline.content

696

before_newline = None

696

before_newline = None

697

if not after_lines and after_newline:

697

if not after_lines and after_newline:

698

modified.content += after_newline.content

698

modified.content += after_newline.content

699

after_newline = None

699

after_newline = None

700

701

lines.append(AttributeDict({

701

lines.append(AttributeDict({

702

'original': original,

702

'original': original,

703

'modified': modified,

703

'modified': modified,

704

}))

704

}))

705

706

return lines

706

return lines

707

708

def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):

708

def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):

709

filenode = None

709

filenode = None

710

filename = None

710

filename = None

711

712

if isinstance(input_file, compat.string_types):

712

if isinstance(input_file, compat.string_types):

713

filename = input_file

713

filename = input_file

714

elif isinstance(input_file, FileNode):

714

elif isinstance(input_file, FileNode):

715

filenode = input_file

715

filenode = input_file

716

filename = input_file.unicode_path

716

filename = input_file.unicode_path

717

718

hl_mode = self.HL_NONE if no_hl else self.highlight_mode

718

hl_mode = self.HL_NONE if no_hl else self.highlight_mode

719

if hl_mode == self.HL_REAL and filenode:

719

if hl_mode == self.HL_REAL and filenode:

720

lexer = self._get_lexer_for_filename(filename)

720

lexer = self._get_lexer_for_filename(filename)

721

file_size_allowed = input_file.size < self.max_file_size_limit

721

file_size_allowed = input_file.size < self.max_file_size_limit

722

if line_number and file_size_allowed:

722

if line_number and file_size_allowed:

723

return self.get_tokenized_filenode_line(

723

return self.get_tokenized_filenode_line(

724

input_file, line_number, lexer)

724

input_file, line_number, lexer)

725

726

if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:

726

if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:

727

lexer = self._get_lexer_for_filename(filename)

727

lexer = self._get_lexer_for_filename(filename)

728

return list(tokenize_string(line_text, lexer))

728

return list(tokenize_string(line_text, lexer))

729

730

return list(tokenize_string(line_text, plain_text_lexer))

730

return list(tokenize_string(line_text, plain_text_lexer))

731

732

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

732

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

733

734

if filenode not in self.highlighted_filenodes:

734

if filenode not in self.highlighted_filenodes:

735

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

735

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

736

self.highlighted_filenodes[filenode] = tokenized_lines

736

self.highlighted_filenodes[filenode] = tokenized_lines

737

738

try:

738

try:

739

return self.highlighted_filenodes[filenode][line_number - 1]

739

return self.highlighted_filenodes[filenode][line_number - 1]

740

except Exception:

740

except Exception:

741

log.exception('diff rendering error')

741

return [('', u'rhodecode diff rendering error')]

742

return [('', u'rhodecode diff rendering error')]

742

743

def action_to_op(self, action):

744

def action_to_op(self, action):

744

return {

745

return {

745

'add': '+',

746

'add': '+',

746

'del': '-',

747

'del': '-',

747

'unmod': ' ',

748

'unmod': ' ',

748

'unmod-no-hl': ' ',

749

'unmod-no-hl': ' ',

749

'old-no-nl': ' ',

750

'old-no-nl': ' ',

750

'new-no-nl': ' ',

751

'new-no-nl': ' ',

751

}.get(action, action)

752

}.get(action, action)

752

753

def as_unified(self, lines):

754

def as_unified(self, lines):

754

"""

755

"""

755

Return a generator that yields the lines of a diff in unified order

756

Return a generator that yields the lines of a diff in unified order

756

"""

757

"""

757

def generator():

758

def generator():

758

buf = []

759

buf = []

759

for line in lines:

760

for line in lines:

760

761

if buf and not line.original or line.original.action == ' ':

762

if buf and not line.original or line.original.action == ' ':

762

for b in buf:

763

for b in buf:

763

yield b

764

yield b

764

buf = []

765

buf = []

765

766

if line.original:

767

if line.original:

767

if line.original.action == ' ':

768

if line.original.action == ' ':

768

yield (line.original.lineno, line.modified.lineno,

769

yield (line.original.lineno, line.modified.lineno,

769

line.original.action, line.original.content,

770

line.original.action, line.original.content,

770

line.original.get_comment_args)

771

line.original.get_comment_args)

771

continue

772

continue

772

773

if line.original.action == '-':

774

if line.original.action == '-':

774

yield (line.original.lineno, None,

775

yield (line.original.lineno, None,

775

line.original.action, line.original.content,

776

line.original.action, line.original.content,

776

line.original.get_comment_args)

777

line.original.get_comment_args)

777

778

if line.modified.action == '+':

779

if line.modified.action == '+':

779

buf.append((

780

buf.append((

780

None, line.modified.lineno,

781

None, line.modified.lineno,

781

line.modified.action, line.modified.content,

782

line.modified.action, line.modified.content,

782

line.modified.get_comment_args))

783

line.modified.get_comment_args))

783

continue

784

continue

784

785

if line.modified:

786

if line.modified:

786

yield (None, line.modified.lineno,

787

yield (None, line.modified.lineno,

787

line.modified.action, line.modified.content,

788

line.modified.action, line.modified.content,

788

line.modified.get_comment_args)

789

line.modified.get_comment_args)

789

790

for b in buf:

791

for b in buf:

791

yield b

792

yield b

792

793

return generator()

794

return generator()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2020 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import logging
             import difflib
             from itertools import groupby
             from pygments import lex
             from pygments.formatters.html import _get_ttype_class as pygment_token_class
             from pygments.lexers.special import TextLexer, Token
             from pygments.lexers import get_lexer_by_name
             from pyramid import compat
             from rhodecode.lib.helpers import (
                 get_lexer_for_filenode, html_escape, get_custom_lexer)
             from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
             from rhodecode.lib.vcs.nodes import FileNode
             from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
             from rhodecode.lib.diff_match_patch import diff_match_patch
             from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
             plain_text_lexer = get_lexer_by_name(
                 'text', stripall=False, stripnl=False, ensurenl=False)
             log = logging.getLogger(__name__)
             def filenode_as_lines_tokens(filenode, lexer=None):
                 org_lexer = lexer
                 lexer = lexer or get_lexer_for_filenode(filenode)
                 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                           lexer, filenode, org_lexer)
                 content = filenode.content
                 tokens = tokenize_string(content, lexer)
                 lines = split_token_stream(tokens, content)
                 rv = list(lines)
                 return rv
             def tokenize_string(content, lexer):
                 """
                 Use pygments to tokenize some content based on a lexer
                 ensuring all original new lines and whitespace is preserved
                 """
                 lexer.stripall = False
                 lexer.stripnl = False
                 lexer.ensurenl = False
                 if isinstance(lexer, TextLexer):
                     lexed = [(Token.Text, content)]
                 else:
                     lexed = lex(content, lexer)
                 for token_type, token_text in lexed:
                     yield pygment_token_class(token_type), token_text
             def split_token_stream(tokens, content):
                 """
                 Take a list of (TokenType, text) tuples and split them by a string
                 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                 [(TEXT, 'some'), (TEXT, 'text'),
                  (TEXT, 'more'), (TEXT, 'text')]
                 """
                 token_buffer = []
                 for token_class, token_text in tokens:
                     parts = token_text.split('\n')
                     for part in parts[:-1]:
                         token_buffer.append((token_class, part))
                         yield token_buffer
                         token_buffer = []
                     token_buffer.append((token_class, parts[-1]))
                 if token_buffer:
                     yield token_buffer
                 elif content:
                     # this is a special case, we have the content, but tokenization didn't produce
                     # any results. THis can happen if know file extensions like .css have some bogus
                     # unicode content without any newline characters
                     yield [(pygment_token_class(Token.Text), content)]
             def filenode_as_annotated_lines_tokens(filenode):
                 """
                 Take a file node and return a list of annotations => lines, if no annotation
                 is found, it will be None.
                 eg:
                 [
                     (annotation1, [
                         (1, line1_tokens_list),
                         (2, line2_tokens_list),
                     ]),
                     (annotation2, [
                         (3, line1_tokens_list),
                     ]),
                     (None, [
                         (4, line1_tokens_list),
                     ]),
                     (annotation1, [
                         (5, line1_tokens_list),
                         (6, line2_tokens_list),
                     ])
                 ]
                 """
                 commit_cache = {}  # cache commit_getter lookups
                 def _get_annotation(commit_id, commit_getter):
                     if commit_id not in commit_cache:
                         commit_cache[commit_id] = commit_getter()
                     return commit_cache[commit_id]
                 annotation_lookup = {
                     line_no: _get_annotation(commit_id, commit_getter)
                     for line_no, commit_id, commit_getter, line_content
                     in filenode.annotate
                 }
                 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                       for line_no, tokens
                                       in enumerate(filenode_as_lines_tokens(filenode), 1))
                 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                 for annotation, group in grouped_annotations_lines:
                     yield (
                         annotation, [(line_no, tokens)
                                       for (_, line_no, tokens) in group]
                     )
             def render_tokenstream(tokenstream):
                 result = []
                 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
                     if token_class:
                         result.append(u'<span class="%s">' % token_class)
                     else:
                         result.append(u'<span>')
                     for op_tag, token_text in token_ops_texts:
                         if op_tag:
                             result.append(u'<%s>' % op_tag)
                         # NOTE(marcink): in some cases of mixed encodings, we might run into
                         # troubles in the html_escape, in this case we say unicode force on token_text
                         # that would ensure "correct" data even with the cost of rendered
                         try:
                             escaped_text = html_escape(token_text)
                         except TypeError:
                             escaped_text = html_escape(safe_unicode(token_text))
                         # TODO: dan: investigate showing hidden characters like space/nl/tab
                         # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
                         # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
                         # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
                         result.append(escaped_text)
                         if op_tag:
                             result.append(u'</%s>' % op_tag)
                     result.append(u'</span>')
                 html = ''.join(result)
                 return html
             def rollup_tokenstream(tokenstream):
                 """
                 Group a token stream of the format:
                     ('class', 'op', 'text')
                 or
                     ('class', 'text')
                 into
                     [('class1',
                         [('op1', 'text'),
                          ('op2', 'text')]),
                      ('class2',
                         [('op3', 'text')])]
                 This is used to get the minimal tags necessary when
                 rendering to html eg for a token stream ie.
                 <span class="A"><ins>he</ins>llo</span>
                 vs
                 <span class="A"><ins>he</ins></span><span class="A">llo</span>
                 If a 2 tuple is passed in, the output op will be an empty string.
                 eg:
                 >>> rollup_tokenstream([('classA', '',      'h'),
                                         ('classA', 'del',   'ell'),
                                         ('classA', '',      'o'),
                                         ('classB', '',      ' '),
                                         ('classA', '',      'the'),
                                         ('classA', '',      're'),
                                         ])
                 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
                  ('classB', [('', ' ')],
                  ('classA', [('', 'there')]]
                 """
                 if tokenstream and len(tokenstream[0]) == 2:
                     tokenstream = ((t[0], '', t[1]) for t in tokenstream)
                 result = []
                 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
                     ops = []
                     for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
                         text_buffer = []
                         for t_class, t_op, t_text in token_text_list:
                             text_buffer.append(t_text)
                         ops.append((token_op, ''.join(text_buffer)))
                     result.append((token_class, ops))
                 return result
             def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
                 """
                 Converts a list of (token_class, token_text) tuples to a list of
                 (token_class, token_op, token_text) tuples where token_op is one of
                 ('ins', 'del', '')
                 :param old_tokens: list of (token_class, token_text) tuples of old line
                 :param new_tokens: list of (token_class, token_text) tuples of new line
                 :param use_diff_match_patch: boolean, will use google's diff match patch
                     library which has options to 'smooth' out the character by character
                     differences making nicer ins/del blocks
                 """
                 old_tokens_result = []
                 new_tokens_result = []
                 similarity = difflib.SequenceMatcher(None,
                     ''.join(token_text for token_class, token_text in old_tokens),
                     ''.join(token_text for token_class, token_text in new_tokens)
                 ).ratio()
                 if similarity < 0.6: # return, the blocks are too different
                     for token_class, token_text in old_tokens:
                         old_tokens_result.append((token_class, '', token_text))
                     for token_class, token_text in new_tokens:
                         new_tokens_result.append((token_class, '', token_text))
                     return old_tokens_result, new_tokens_result, similarity
                 token_sequence_matcher = difflib.SequenceMatcher(None,
                     [x[1] for x in old_tokens],
                     [x[1] for x in new_tokens])
                 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
                     # check the differences by token block types first to give a more
                     # nicer "block" level replacement vs character diffs
                     if tag == 'equal':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, '', token_text))
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, '', token_text))
                     elif tag == 'delete':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, 'del', token_text))
                     elif tag == 'insert':
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, 'ins', token_text))
                     elif tag == 'replace':
                         # if same type token blocks must be replaced, do a diff on the
                         # characters in the token blocks to show individual changes
                         old_char_tokens = []
                         new_char_tokens = []
                         for token_class, token_text in old_tokens[o1:o2]:
                             for char in token_text:
                                 old_char_tokens.append((token_class, char))
                         for token_class, token_text in new_tokens[n1:n2]:
                             for char in token_text:
                                 new_char_tokens.append((token_class, char))
                         old_string = ''.join([token_text for
                             token_class, token_text in old_char_tokens])
                         new_string = ''.join([token_text for
                             token_class, token_text in new_char_tokens])
                         char_sequence = difflib.SequenceMatcher(
                             None, old_string, new_string)
                         copcodes = char_sequence.get_opcodes()
                         obuffer, nbuffer = [], []
                         if use_diff_match_patch:
                             dmp = diff_match_patch()
                             dmp.Diff_EditCost = 11  # TODO: dan: extract this to a setting
                             reps = dmp.diff_main(old_string, new_string)
                             dmp.diff_cleanupEfficiency(reps)
                             a, b = 0, 0
                             for op, rep in reps:
                                 l = len(rep)
                                 if op == 0:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], '', c))
                                         nbuffer.append((new_char_tokens[b+i][0], '', c))
                                     a += l
                                     b += l
                                 elif op == -1:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], 'del', c))
                                     a += l
                                 elif op == 1:
                                     for i, c in enumerate(rep):
                                         nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
                                     b += l
                         else:
                             for ctag, co1, co2, cn1, cn2 in copcodes:
                                 if ctag == 'equal':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, '', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, '', token_text))
                                 elif ctag == 'delete':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                 elif ctag == 'insert':
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                                 elif ctag == 'replace':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                         old_tokens_result.extend(obuffer)
                         new_tokens_result.extend(nbuffer)
                 return old_tokens_result, new_tokens_result, similarity
             def diffset_node_getter(commit):
                 def get_node(fname):
                     try:
                         return commit.get_node(fname)
                     except NodeDoesNotExistError:
                         return None
                 return get_node
             class DiffSet(object):
                 """
                 An object for parsing the diff result from diffs.DiffProcessor and
                 adding highlighting, side by side/unified renderings and line diffs
                 """
                 HL_REAL = 'REAL'  # highlights using original file, slow
                 HL_FAST = 'FAST'  # highlights using just the line, fast but not correct
                                   # in the case of multiline code
                 HL_NONE = 'NONE'  # no highlighting, fastest
                 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
                              source_repo_name=None,
                              source_node_getter=lambda filename: None,
                              target_repo_name=None,
                              target_node_getter=lambda filename: None,
                              source_nodes=None, target_nodes=None,
                              # files over this size will use fast highlighting
                              max_file_size_limit=150 * 1024,
                              ):
                     self.highlight_mode = highlight_mode
                     self.highlighted_filenodes = {}
                     self.source_node_getter = source_node_getter
                     self.target_node_getter = target_node_getter
                     self.source_nodes = source_nodes or {}
                     self.target_nodes = target_nodes or {}
                     self.repo_name = repo_name
                     self.target_repo_name = target_repo_name or repo_name
                     self.source_repo_name = source_repo_name or repo_name
                     self.max_file_size_limit = max_file_size_limit
                 def render_patchset(self, patchset, source_ref=None, target_ref=None):
                     diffset = AttributeDict(dict(
                         lines_added=0,
                         lines_deleted=0,
                         changed_files=0,
                         files=[],
                         file_stats={},
                         limited_diff=isinstance(patchset, LimitedDiffContainer),
                         repo_name=self.repo_name,
                         target_repo_name=self.target_repo_name,
                         source_repo_name=self.source_repo_name,
                         source_ref=source_ref,
                         target_ref=target_ref,
                     ))
                     for patch in patchset:
                         diffset.file_stats[patch['filename']] = patch['stats']
                         filediff = self.render_patch(patch)
                         filediff.diffset = StrictAttributeDict(dict(
                             source_ref=diffset.source_ref,
                             target_ref=diffset.target_ref,
                             repo_name=diffset.repo_name,
                             source_repo_name=diffset.source_repo_name,
                             target_repo_name=diffset.target_repo_name,
                         ))
                         diffset.files.append(filediff)
                         diffset.changed_files += 1
                         if not patch['stats']['binary']:
                             diffset.lines_added += patch['stats']['added']
                             diffset.lines_deleted += patch['stats']['deleted']
                     return diffset
                 _lexer_cache = {}
                 def _get_lexer_for_filename(self, filename, filenode=None):
                     # cached because we might need to call it twice for source/target
                     if filename not in self._lexer_cache:
                         if filenode:
                             lexer = filenode.lexer
                             extension = filenode.extension
                         else:
                             lexer = FileNode.get_lexer(filename=filename)
                             extension = filename.split('.')[-1]
                         lexer = get_custom_lexer(extension) or lexer
                         self._lexer_cache[filename] = lexer
                     return self._lexer_cache[filename]
                 def render_patch(self, patch):
                     log.debug('rendering diff for %r', patch['filename'])
                     source_filename = patch['original_filename']
                     target_filename = patch['filename']
                     source_lexer = plain_text_lexer
                     target_lexer = plain_text_lexer
                     if not patch['stats']['binary']:
                         node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
                         hl_mode = node_hl_mode or self.highlight_mode
                         if hl_mode == self.HL_REAL:
                             if (source_filename and patch['operation'] in ('D', 'M')
                                 and source_filename not in self.source_nodes):
                                     self.source_nodes[source_filename] = (
                                         self.source_node_getter(source_filename))
                             if (target_filename and patch['operation'] in ('A', 'M')
                                 and target_filename not in self.target_nodes):
                                     self.target_nodes[target_filename] = (
                                         self.target_node_getter(target_filename))
                         elif hl_mode == self.HL_FAST:
                             source_lexer = self._get_lexer_for_filename(source_filename)
                             target_lexer = self._get_lexer_for_filename(target_filename)
                     source_file = self.source_nodes.get(source_filename, source_filename)
                     target_file = self.target_nodes.get(target_filename, target_filename)
                     raw_id_uid = ''
                     if self.source_nodes.get(source_filename):
                         raw_id_uid = self.source_nodes[source_filename].commit.raw_id
                     if not raw_id_uid and self.target_nodes.get(target_filename):
                         # in case this is a new file we only have it in target
                         raw_id_uid = self.target_nodes[target_filename].commit.raw_id
                     source_filenode, target_filenode = None, None
                     # TODO: dan: FileNode.lexer works on the content of the file - which
                     # can be slow - issue #4289 explains a lexer clean up - which once
                     # done can allow caching a lexer for a filenode to avoid the file lookup
                     if isinstance(source_file, FileNode):
                         source_filenode = source_file
                         #source_lexer = source_file.lexer
                         source_lexer = self._get_lexer_for_filename(source_filename)
                         source_file.lexer = source_lexer
                     if isinstance(target_file, FileNode):
                         target_filenode = target_file
                         #target_lexer = target_file.lexer
                         target_lexer = self._get_lexer_for_filename(target_filename)
                         target_file.lexer = target_lexer
                     source_file_path, target_file_path = None, None
                     if source_filename != '/dev/null':
                         source_file_path = source_filename
                     if target_filename != '/dev/null':
                         target_file_path = target_filename
                     source_file_type = source_lexer.name
                     target_file_type = target_lexer.name
                     filediff = AttributeDict({
                         'source_file_path': source_file_path,
                         'target_file_path': target_file_path,
                         'source_filenode': source_filenode,
                         'target_filenode': target_filenode,
                         'source_file_type': target_file_type,
                         'target_file_type': source_file_type,
                         'patch': {'filename': patch['filename'], 'stats': patch['stats']},
                         'operation': patch['operation'],
                         'source_mode': patch['stats']['old_mode'],
                         'target_mode': patch['stats']['new_mode'],
                         'limited_diff': patch['is_limited_diff'],
                         'hunks': [],
                         'hunk_ops': None,
                         'diffset': self,
                         'raw_id': raw_id_uid,
                     })
                     file_chunks = patch['chunks'][1:]
                     for i, hunk in enumerate(file_chunks, 1):
                         hunkbit = self.parse_hunk(hunk, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         hunkbit.index = i
                         filediff.hunks.append(hunkbit)
                     # Simulate hunk on OPS type line which doesn't really contain any diff
                     # this allows commenting on those
                     if not file_chunks:
                         actions = []
                         for op_id, op_text in filediff.patch['stats']['ops'].items():
                             if op_id == DEL_FILENODE:
                                 actions.append(u'file was removed')
                             elif op_id == BIN_FILENODE:
                                 actions.append(u'binary diff hidden')
                             else:
                                 actions.append(safe_unicode(op_text))
                         action_line = u'NO CONTENT: ' + \
                                       u', '.join(actions) or u'UNDEFINED_ACTION'
                         hunk_ops = {'source_length': 0, 'source_start': 0,
                                     'lines': [
                                         {'new_lineno': 0, 'old_lineno': 1,
                                          'action': 'unmod-no-hl', 'line': action_line}
                                     ],
                                     'section_header': u'', 'target_start': 1, 'target_length': 1}
                         hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         filediff.hunk_ops = hunkbit
                     return filediff
                 def parse_hunk(self, hunk, source_file, target_file):
                     result = AttributeDict(dict(
                         source_start=hunk['source_start'],
                         source_length=hunk['source_length'],
                         target_start=hunk['target_start'],
                         target_length=hunk['target_length'],
                         section_header=hunk['section_header'],
                         lines=[],
                     ))
                     before, after = [], []
                     for line in hunk['lines']:
                         if line['action'] in ['unmod', 'unmod-no-hl']:
                             no_hl = line['action'] == 'unmod-no-hl'
                             result.lines.extend(
                                 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
                             after.append(line)
                             before.append(line)
                         elif line['action'] == 'add':
                             after.append(line)
                         elif line['action'] == 'del':
                             before.append(line)
                         elif line['action'] == 'old-no-nl':
                             before.append(line)
                         elif line['action'] == 'new-no-nl':
                             after.append(line)
                     all_actions = [x['action'] for x in after] + [x['action'] for x in before]
                     no_hl = {x for x in all_actions} == {'unmod-no-hl'}
                     result.lines.extend(
                         self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
                     # NOTE(marcink): we must keep list() call here so we can cache the result...
                     result.unified = list(self.as_unified(result.lines))
                     result.sideside = result.lines
                     return result
                 def parse_lines(self, before_lines, after_lines, source_file, target_file,
                                 no_hl=False):
                     # TODO: dan: investigate doing the diff comparison and fast highlighting
                     # on the entire before and after buffered block lines rather than by
                     # line, this means we can get better 'fast' highlighting if the context
                     # allows it - eg.
                     # line 4: """
                     # line 5: this gets highlighted as a string
                     # line 6: """
                     lines = []
                     before_newline = AttributeDict()
                     after_newline = AttributeDict()
                     if before_lines and before_lines[-1]['action'] == 'old-no-nl':
                         before_newline_line = before_lines.pop(-1)
                         before_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', before_newline_line['line'])]]))
                     if after_lines and after_lines[-1]['action'] == 'new-no-nl':
                         after_newline_line = after_lines.pop(-1)
                         after_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', after_newline_line['line'])]]))
                     while before_lines or after_lines:
                         before, after = None, None
                         before_tokens, after_tokens = None, None
                         if before_lines:
                             before = before_lines.pop(0)
                         if after_lines:
                             after = after_lines.pop(0)
                         original = AttributeDict()
                         modified = AttributeDict()
                         if before:
                             if before['action'] == 'old-no-nl':
                                 before_tokens = [('nonl', before['line'])]
                             else:
                                 before_tokens = self.get_line_tokens(
                                     line_text=before['line'], line_number=before['old_lineno'],
                                     input_file=source_file, no_hl=no_hl)
                             original.lineno = before['old_lineno']
                             original.content = before['line']
                             original.action = self.action_to_op(before['action'])
                             original.get_comment_args = (
                                 source_file, 'o', before['old_lineno'])
                         if after:
                             if after['action'] == 'new-no-nl':
                                 after_tokens = [('nonl', after['line'])]
                             else:
                                 after_tokens = self.get_line_tokens(
                                     line_text=after['line'], line_number=after['new_lineno'],
                                     input_file=target_file, no_hl=no_hl)
                             modified.lineno = after['new_lineno']
                             modified.content = after['line']
                             modified.action = self.action_to_op(after['action'])
                             modified.get_comment_args = (target_file, 'n', after['new_lineno'])
                         # diff the lines
                         if before_tokens and after_tokens:
                             o_tokens, m_tokens, similarity = tokens_diff(
                                 before_tokens, after_tokens)
                             original.content = render_tokenstream(o_tokens)
                             modified.content = render_tokenstream(m_tokens)
                         elif before_tokens:
                             original.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in before_tokens])
                         elif after_tokens:
                             modified.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in after_tokens])
                         if not before_lines and before_newline:
                             original.content += before_newline.content
                             before_newline = None
                         if not after_lines and after_newline:
                             modified.content += after_newline.content
                             after_newline = None
                         lines.append(AttributeDict({
                             'original': original,
                             'modified': modified,
                         }))
                     return lines
                 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
                     filenode = None
                     filename = None
                     if isinstance(input_file, compat.string_types):
                         filename = input_file
                     elif isinstance(input_file, FileNode):
                         filenode = input_file
                         filename = input_file.unicode_path
                     hl_mode = self.HL_NONE if no_hl else self.highlight_mode
                     if hl_mode == self.HL_REAL and filenode:
                         lexer = self._get_lexer_for_filename(filename)
                         file_size_allowed = input_file.size < self.max_file_size_limit
                         if line_number and file_size_allowed:
                             return self.get_tokenized_filenode_line(
                                 input_file, line_number, lexer)
                     if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
                         lexer = self._get_lexer_for_filename(filename)
                         return list(tokenize_string(line_text, lexer))
                     return list(tokenize_string(line_text, plain_text_lexer))
                 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
                     if filenode not in self.highlighted_filenodes:
                         tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
                         self.highlighted_filenodes[filenode] = tokenized_lines
                     try:
                         return self.highlighted_filenodes[filenode][line_number - 1]
                     except Exception:
+                        log.exception('diff rendering error')
                         return [('', u'rhodecode diff rendering error')]
                 def action_to_op(self, action):
                     return {
                         'add': '+',
                         'del': '-',
                         'unmod': ' ',
                         'unmod-no-hl': ' ',
                         'old-no-nl': ' ',
                         'new-no-nl': ' ',
                     }.get(action, action)
                 def as_unified(self, lines):
                     """
                     Return a generator that yields the lines of a diff in unified order
                     """
                     def generator():
                         buf = []
                         for line in lines:
                             if buf and not line.original or line.original.action == ' ':
                                 for b in buf:
                                     yield b
                                 buf = []
                             if line.original:
                                 if line.original.action == ' ':
                                     yield (line.original.lineno, line.modified.lineno,
                                            line.original.action, line.original.content,
                                            line.original.get_comment_args)
                                     continue
                                 if line.original.action == '-':
                                     yield (line.original.lineno, None,
                                            line.original.action, line.original.content,
                                            line.original.get_comment_args)
                                 if line.modified.action == '+':
                                     buf.append((
                                         None, line.modified.lineno,
                                         line.modified.action, line.modified.content,
                                         line.modified.get_comment_args))
                                     continue
                             if line.modified:
                                 yield (None, line.modified.lineno,
                                        line.modified.action, line.modified.content,
                                        line.modified.get_comment_args)
                         for b in buf:
                             yield b
                     return generator()