u/pc/rhodecode-enterprise-ce-fork-pc Commit - r2682:44d560de

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

import logging

21

import logging

22

import difflib

22

import difflib

23

from itertools import groupby

23

from itertools import groupby

24

25

from pygments import lex

25

from pygments import lex

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

27

from pygments.lexers.special import TextLexer, Token

27

from pygments.lexers.special import TextLexer, Token

28

29

from rhodecode.lib.helpers import (

29

from rhodecode.lib.helpers import (

30

get_lexer_for_filenode, html_escape, get_custom_lexer)

30

get_lexer_for_filenode, html_escape, get_custom_lexer)

31

from rhodecode.lib.utils2 import AttributeDict

31

from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict

32

from rhodecode.lib.vcs.nodes import FileNode

32

from rhodecode.lib.vcs.nodes import FileNode

33

from rhodecode.lib.diff_match_patch import diff_match_patch

33

from rhodecode.lib.diff_match_patch import diff_match_patch

34

from rhodecode.lib.diffs import LimitedDiffContainer

34

from rhodecode.lib.diffs import LimitedDiffContainer

35

from pygments.lexers import get_lexer_by_name

35

from pygments.lexers import get_lexer_by_name

36

37

plain_text_lexer = get_lexer_by_name(

37

plain_text_lexer = get_lexer_by_name(

38

'text', stripall=False, stripnl=False, ensurenl=False)

38

'text', stripall=False, stripnl=False, ensurenl=False)

39

40

41

log = logging.getLogger(__name__)

41

log = logging.getLogger(__name__)

42

43

44

def filenode_as_lines_tokens(filenode, lexer=None):

44

def filenode_as_lines_tokens(filenode, lexer=None):

45

org_lexer = lexer

45

org_lexer = lexer

46

lexer = lexer or get_lexer_for_filenode(filenode)

46

lexer = lexer or get_lexer_for_filenode(filenode)

47

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

47

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

48

lexer, filenode, org_lexer)

48

lexer, filenode, org_lexer)

49

tokens = tokenize_string(filenode.content, lexer)

49

tokens = tokenize_string(filenode.content, lexer)

50

lines = split_token_stream(tokens)

50

lines = split_token_stream(tokens)

51

rv = list(lines)

51

rv = list(lines)

52

return rv

52

return rv

53

54

55

def tokenize_string(content, lexer):

55

def tokenize_string(content, lexer):

56

"""

56

"""

57

Use pygments to tokenize some content based on a lexer

57

Use pygments to tokenize some content based on a lexer

58

ensuring all original new lines and whitespace is preserved

58

ensuring all original new lines and whitespace is preserved

59

"""

59

"""

60

61

lexer.stripall = False

61

lexer.stripall = False

62

lexer.stripnl = False

62

lexer.stripnl = False

63

lexer.ensurenl = False

63

lexer.ensurenl = False

64

65

if isinstance(lexer, TextLexer):

65

if isinstance(lexer, TextLexer):

66

lexed = [(Token.Text, content)]

66

lexed = [(Token.Text, content)]

67

else:

67

else:

68

lexed = lex(content, lexer)

68

lexed = lex(content, lexer)

69

70

for token_type, token_text in lexed:

70

for token_type, token_text in lexed:

71

yield pygment_token_class(token_type), token_text

71

yield pygment_token_class(token_type), token_text

72

73

74

def split_token_stream(tokens):

74

def split_token_stream(tokens):

75

"""

75

"""

76

Take a list of (TokenType, text) tuples and split them by a string

76

Take a list of (TokenType, text) tuples and split them by a string

77

78

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

78

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

79

[(TEXT, 'some'), (TEXT, 'text'),

79

[(TEXT, 'some'), (TEXT, 'text'),

80

(TEXT, 'more'), (TEXT, 'text')]

80

(TEXT, 'more'), (TEXT, 'text')]

81

"""

81

"""

82

83

buffer = []

83

buffer = []

84

for token_class, token_text in tokens:

84

for token_class, token_text in tokens:

85

parts = token_text.split('\n')

85

parts = token_text.split('\n')

86

for part in parts[:-1]:

86

for part in parts[:-1]:

87

buffer.append((token_class, part))

87

buffer.append((token_class, part))

88

yield buffer

88

yield buffer

89

buffer = []

89

buffer = []

90

91

buffer.append((token_class, parts[-1]))

91

buffer.append((token_class, parts[-1]))

92

93

if buffer:

93

if buffer:

94

yield buffer

94

yield buffer

95

96

97

def filenode_as_annotated_lines_tokens(filenode):

97

def filenode_as_annotated_lines_tokens(filenode):

98

"""

98

"""

99

Take a file node and return a list of annotations => lines, if no annotation

99

Take a file node and return a list of annotations => lines, if no annotation

100

is found, it will be None.

100

is found, it will be None.

101

102

eg:

102

eg:

103

104

[

104

[

105

(annotation1, [

105

(annotation1, [

106

(1, line1_tokens_list),

106

(1, line1_tokens_list),

107

(2, line2_tokens_list),

107

(2, line2_tokens_list),

108

]),

108

]),

109

(annotation2, [

109

(annotation2, [

110

(3, line1_tokens_list),

110

(3, line1_tokens_list),

111

]),

111

]),

112

(None, [

112

(None, [

113

(4, line1_tokens_list),

113

(4, line1_tokens_list),

114

]),

114

]),

115

(annotation1, [

115

(annotation1, [

116

(5, line1_tokens_list),

116

(5, line1_tokens_list),

117

(6, line2_tokens_list),

117

(6, line2_tokens_list),

118

])

118

])

119

]

119

]

120

"""

120

"""

121

122

commit_cache = {} # cache commit_getter lookups

122

commit_cache = {} # cache commit_getter lookups

123

124

def _get_annotation(commit_id, commit_getter):

124

def _get_annotation(commit_id, commit_getter):

125

if commit_id not in commit_cache:

125

if commit_id not in commit_cache:

126

commit_cache[commit_id] = commit_getter()

126

commit_cache[commit_id] = commit_getter()

127

return commit_cache[commit_id]

127

return commit_cache[commit_id]

128

129

annotation_lookup = {

129

annotation_lookup = {

130

line_no: _get_annotation(commit_id, commit_getter)

130

line_no: _get_annotation(commit_id, commit_getter)

131

for line_no, commit_id, commit_getter, line_content

131

for line_no, commit_id, commit_getter, line_content

132

in filenode.annotate

132

in filenode.annotate

133

}

133

}

134

135

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

135

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

136

for line_no, tokens

136

for line_no, tokens

137

in enumerate(filenode_as_lines_tokens(filenode), 1))

137

in enumerate(filenode_as_lines_tokens(filenode), 1))

138

139

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

139

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

140

141

for annotation, group in grouped_annotations_lines:

141

for annotation, group in grouped_annotations_lines:

142

yield (

142

yield (

143

annotation, [(line_no, tokens)

143

annotation, [(line_no, tokens)

144

for (_, line_no, tokens) in group]

144

for (_, line_no, tokens) in group]

145

)

145

)

146

147

148

def render_tokenstream(tokenstream):

148

def render_tokenstream(tokenstream):

149

result = []

149

result = []

150

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

150

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

151

152

if token_class:

152

if token_class:

153

result.append(u'' % token_class)

153

result.append(u'' % token_class)

154

else:

154

else:

155

result.append(u'')

155

result.append(u'')

156

157

for op_tag, token_text in token_ops_texts:

157

for op_tag, token_text in token_ops_texts:

158

159

if op_tag:

159

if op_tag:

160

result.append(u'<%s>' % op_tag)

160

result.append(u'<%s>' % op_tag)

161

162

escaped_text = html_escape(token_text)

162

escaped_text = html_escape(token_text)

163

164

# TODO: dan: investigate showing hidden characters like space/nl/tab

164

# TODO: dan: investigate showing hidden characters like space/nl/tab

165

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

165

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

166

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

166

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

167

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

167

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

168

169

result.append(escaped_text)

169

result.append(escaped_text)

170

171

if op_tag:

171

if op_tag:

172

result.append(u'</%s>' % op_tag)

172

result.append(u'</%s>' % op_tag)

173

174

result.append(u'')

174

result.append(u'')

175

176

html = ''.join(result)

176

html = ''.join(result)

177

return html

177

return html

178

179

180

def rollup_tokenstream(tokenstream):

180

def rollup_tokenstream(tokenstream):

181

"""

181

"""

182

Group a token stream of the format:

182

Group a token stream of the format:

183

184

('class', 'op', 'text')

184

('class', 'op', 'text')

185

or

185

or

186

('class', 'text')

186

('class', 'text')

187

188

into

188

into

189

190

[('class1',

190

[('class1',

191

[('op1', 'text'),

191

[('op1', 'text'),

192

('op2', 'text')]),

192

('op2', 'text')]),

193

('class2',

193

('class2',

194

[('op3', 'text')])]

194

[('op3', 'text')])]

195

196

This is used to get the minimal tags necessary when

196

This is used to get the minimal tags necessary when

197

rendering to html eg for a token stream ie.

197

rendering to html eg for a token stream ie.

198

199

<ins>he</ins>llo

199

<ins>he</ins>llo

200

vs

200

vs

201

<ins>he</ins>llo

201

<ins>he</ins>llo

202

203

If a 2 tuple is passed in, the output op will be an empty string.

203

If a 2 tuple is passed in, the output op will be an empty string.

204

205

eg:

205

eg:

206

207

>>> rollup_tokenstream([('classA', '', 'h'),

207

>>> rollup_tokenstream([('classA', '', 'h'),

208

('classA', 'del', 'ell'),

208

('classA', 'del', 'ell'),

209

('classA', '', 'o'),

209

('classA', '', 'o'),

210

('classB', '', ' '),

210

('classB', '', ' '),

211

('classA', '', 'the'),

211

('classA', '', 'the'),

212

('classA', '', 're'),

212

('classA', '', 're'),

213

])

213

])

214

215

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

215

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

216

('classB', [('', ' ')],

216

('classB', [('', ' ')],

217

('classA', [('', 'there')]]

217

('classA', [('', 'there')]]

218

219

"""

219

"""

220

if tokenstream and len(tokenstream[0]) == 2:

220

if tokenstream and len(tokenstream[0]) == 2:

221

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

221

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

222

223

result = []

223

result = []

224

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

224

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

225

ops = []

225

ops = []

226

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

226

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

227

text_buffer = []

227

text_buffer = []

228

for t_class, t_op, t_text in token_text_list:

228

for t_class, t_op, t_text in token_text_list:

229

text_buffer.append(t_text)

229

text_buffer.append(t_text)

230

ops.append((token_op, ''.join(text_buffer)))

230

ops.append((token_op, ''.join(text_buffer)))

231

result.append((token_class, ops))

231

result.append((token_class, ops))

232

return result

232

return result

233

234

235

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

235

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

236

"""

236

"""

237

Converts a list of (token_class, token_text) tuples to a list of

237

Converts a list of (token_class, token_text) tuples to a list of

238

(token_class, token_op, token_text) tuples where token_op is one of

238

(token_class, token_op, token_text) tuples where token_op is one of

239

('ins', 'del', '')

239

('ins', 'del', '')

240

241

:param old_tokens: list of (token_class, token_text) tuples of old line

241

:param old_tokens: list of (token_class, token_text) tuples of old line

242

:param new_tokens: list of (token_class, token_text) tuples of new line

242

:param new_tokens: list of (token_class, token_text) tuples of new line

243

:param use_diff_match_patch: boolean, will use google's diff match patch

243

:param use_diff_match_patch: boolean, will use google's diff match patch

244

library which has options to 'smooth' out the character by character

244

library which has options to 'smooth' out the character by character

245

differences making nicer ins/del blocks

245

differences making nicer ins/del blocks

246

"""

246

"""

247

248

old_tokens_result = []

248

old_tokens_result = []

249

new_tokens_result = []

249

new_tokens_result = []

250

251

similarity = difflib.SequenceMatcher(None,

251

similarity = difflib.SequenceMatcher(None,

252

''.join(token_text for token_class, token_text in old_tokens),

252

''.join(token_text for token_class, token_text in old_tokens),

253

''.join(token_text for token_class, token_text in new_tokens)

253

''.join(token_text for token_class, token_text in new_tokens)

254

).ratio()

254

).ratio()

255

256

if similarity < 0.6: # return, the blocks are too different

256

if similarity < 0.6: # return, the blocks are too different

257

for token_class, token_text in old_tokens:

257

for token_class, token_text in old_tokens:

258

old_tokens_result.append((token_class, '', token_text))

258

old_tokens_result.append((token_class, '', token_text))

259

for token_class, token_text in new_tokens:

259

for token_class, token_text in new_tokens:

260

new_tokens_result.append((token_class, '', token_text))

260

new_tokens_result.append((token_class, '', token_text))

261

return old_tokens_result, new_tokens_result, similarity

261

return old_tokens_result, new_tokens_result, similarity

262

263

token_sequence_matcher = difflib.SequenceMatcher(None,

263

token_sequence_matcher = difflib.SequenceMatcher(None,

264

[x[1] for x in old_tokens],

264

[x[1] for x in old_tokens],

265

[x[1] for x in new_tokens])

265

[x[1] for x in new_tokens])

266

267

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

267

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

268

# check the differences by token block types first to give a more

268

# check the differences by token block types first to give a more

269

# nicer "block" level replacement vs character diffs

269

# nicer "block" level replacement vs character diffs

270

271

if tag == 'equal':

271

if tag == 'equal':

272

for token_class, token_text in old_tokens[o1:o2]:

272

for token_class, token_text in old_tokens[o1:o2]:

273

old_tokens_result.append((token_class, '', token_text))

273

old_tokens_result.append((token_class, '', token_text))

274

for token_class, token_text in new_tokens[n1:n2]:

274

for token_class, token_text in new_tokens[n1:n2]:

275

new_tokens_result.append((token_class, '', token_text))

275

new_tokens_result.append((token_class, '', token_text))

276

elif tag == 'delete':

276

elif tag == 'delete':

277

for token_class, token_text in old_tokens[o1:o2]:

277

for token_class, token_text in old_tokens[o1:o2]:

278

old_tokens_result.append((token_class, 'del', token_text))

278

old_tokens_result.append((token_class, 'del', token_text))

279

elif tag == 'insert':

279

elif tag == 'insert':

280

for token_class, token_text in new_tokens[n1:n2]:

280

for token_class, token_text in new_tokens[n1:n2]:

281

new_tokens_result.append((token_class, 'ins', token_text))

281

new_tokens_result.append((token_class, 'ins', token_text))

282

elif tag == 'replace':

282

elif tag == 'replace':

283

# if same type token blocks must be replaced, do a diff on the

283

# if same type token blocks must be replaced, do a diff on the

284

# characters in the token blocks to show individual changes

284

# characters in the token blocks to show individual changes

285

286

old_char_tokens = []

286

old_char_tokens = []

287

new_char_tokens = []

287

new_char_tokens = []

288

for token_class, token_text in old_tokens[o1:o2]:

288

for token_class, token_text in old_tokens[o1:o2]:

289

for char in token_text:

289

for char in token_text:

290

old_char_tokens.append((token_class, char))

290

old_char_tokens.append((token_class, char))

291

292

for token_class, token_text in new_tokens[n1:n2]:

292

for token_class, token_text in new_tokens[n1:n2]:

293

for char in token_text:

293

for char in token_text:

294

new_char_tokens.append((token_class, char))

294

new_char_tokens.append((token_class, char))

295

296

old_string = ''.join([token_text for

296

old_string = ''.join([token_text for

297

token_class, token_text in old_char_tokens])

297

token_class, token_text in old_char_tokens])

298

new_string = ''.join([token_text for

298

new_string = ''.join([token_text for

299

token_class, token_text in new_char_tokens])

299

token_class, token_text in new_char_tokens])

300

301

char_sequence = difflib.SequenceMatcher(

301

char_sequence = difflib.SequenceMatcher(

302

None, old_string, new_string)

302

None, old_string, new_string)

303

copcodes = char_sequence.get_opcodes()

303

copcodes = char_sequence.get_opcodes()

304

obuffer, nbuffer = [], []

304

obuffer, nbuffer = [], []

305

306

if use_diff_match_patch:

306

if use_diff_match_patch:

307

dmp = diff_match_patch()

307

dmp = diff_match_patch()

308

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

308

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

309

reps = dmp.diff_main(old_string, new_string)

309

reps = dmp.diff_main(old_string, new_string)

310

dmp.diff_cleanupEfficiency(reps)

310

dmp.diff_cleanupEfficiency(reps)

311

312

a, b = 0, 0

312

a, b = 0, 0

313

for op, rep in reps:

313

for op, rep in reps:

314

l = len(rep)

314

l = len(rep)

315

if op == 0:

315

if op == 0:

316

for i, c in enumerate(rep):

316

for i, c in enumerate(rep):

317

obuffer.append((old_char_tokens[a+i][0], '', c))

317

obuffer.append((old_char_tokens[a+i][0], '', c))

318

nbuffer.append((new_char_tokens[b+i][0], '', c))

318

nbuffer.append((new_char_tokens[b+i][0], '', c))

319

a += l

319

a += l

320

b += l

320

b += l

321

elif op == -1:

321

elif op == -1:

322

for i, c in enumerate(rep):

322

for i, c in enumerate(rep):

323

obuffer.append((old_char_tokens[a+i][0], 'del', c))

323

obuffer.append((old_char_tokens[a+i][0], 'del', c))

324

a += l

324

a += l

325

elif op == 1:

325

elif op == 1:

326

for i, c in enumerate(rep):

326

for i, c in enumerate(rep):

327

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

327

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

328

b += l

328

b += l

329

else:

329

else:

330

for ctag, co1, co2, cn1, cn2 in copcodes:

330

for ctag, co1, co2, cn1, cn2 in copcodes:

331

if ctag == 'equal':

331

if ctag == 'equal':

332

for token_class, token_text in old_char_tokens[co1:co2]:

332

for token_class, token_text in old_char_tokens[co1:co2]:

333

obuffer.append((token_class, '', token_text))

333

obuffer.append((token_class, '', token_text))

334

for token_class, token_text in new_char_tokens[cn1:cn2]:

334

for token_class, token_text in new_char_tokens[cn1:cn2]:

335

nbuffer.append((token_class, '', token_text))

335

nbuffer.append((token_class, '', token_text))

336

elif ctag == 'delete':

336

elif ctag == 'delete':

337

for token_class, token_text in old_char_tokens[co1:co2]:

337

for token_class, token_text in old_char_tokens[co1:co2]:

338

obuffer.append((token_class, 'del', token_text))

338

obuffer.append((token_class, 'del', token_text))

339

elif ctag == 'insert':

339

elif ctag == 'insert':

340

for token_class, token_text in new_char_tokens[cn1:cn2]:

340

for token_class, token_text in new_char_tokens[cn1:cn2]:

341

nbuffer.append((token_class, 'ins', token_text))

341

nbuffer.append((token_class, 'ins', token_text))

342

elif ctag == 'replace':

342

elif ctag == 'replace':

343

for token_class, token_text in old_char_tokens[co1:co2]:

343

for token_class, token_text in old_char_tokens[co1:co2]:

344

obuffer.append((token_class, 'del', token_text))

344

obuffer.append((token_class, 'del', token_text))

345

for token_class, token_text in new_char_tokens[cn1:cn2]:

345

for token_class, token_text in new_char_tokens[cn1:cn2]:

346

nbuffer.append((token_class, 'ins', token_text))

346

nbuffer.append((token_class, 'ins', token_text))

347

348

old_tokens_result.extend(obuffer)

348

old_tokens_result.extend(obuffer)

349

new_tokens_result.extend(nbuffer)

349

new_tokens_result.extend(nbuffer)

350

351

return old_tokens_result, new_tokens_result, similarity

351

return old_tokens_result, new_tokens_result, similarity

352

353

354

class DiffSet(object):

354

class DiffSet(object):

355

"""

355

"""

356

An object for parsing the diff result from diffs.DiffProcessor and

356

An object for parsing the diff result from diffs.DiffProcessor and

357

adding highlighting, side by side/unified renderings and line diffs

357

adding highlighting, side by side/unified renderings and line diffs

358

"""

358

"""

359

360

HL_REAL = 'REAL' # highlights using original file, slow

360

HL_REAL = 'REAL' # highlights using original file, slow

361

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

361

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

362

# in the case of multiline code

362

# in the case of multiline code

363

HL_NONE = 'NONE' # no highlighting, fastest

363

HL_NONE = 'NONE' # no highlighting, fastest

364

365

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

365

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

366

source_repo_name=None,

366

source_repo_name=None,

367

source_node_getter=lambda filename: None,

367

source_node_getter=lambda filename: None,

368

target_node_getter=lambda filename: None,

368

target_node_getter=lambda filename: None,

369

source_nodes=None, target_nodes=None,

369

source_nodes=None, target_nodes=None,

370

max_file_size_limit=150 * 1024, # files over this size will

370

max_file_size_limit=150 * 1024, # files over this size will

371

# use fast highlighting

371

# use fast highlighting

372

comments=None,

372

comments=None,

373

):

373

):

374

375

self.highlight_mode = highlight_mode

375

self.highlight_mode = highlight_mode

376

self.highlighted_filenodes = {}

376

self.highlighted_filenodes = {}

377

self.source_node_getter = source_node_getter

377

self.source_node_getter = source_node_getter

378

self.target_node_getter = target_node_getter

378

self.target_node_getter = target_node_getter

379

self.source_nodes = source_nodes or {}

379

self.source_nodes = source_nodes or {}

380

self.target_nodes = target_nodes or {}

380

self.target_nodes = target_nodes or {}

381

self.repo_name = repo_name

381

self.repo_name = repo_name

382

self.source_repo_name = source_repo_name or repo_name

382

self.source_repo_name = source_repo_name or repo_name

383

self.comments = comments or {}

383

self.comments = comments or {}

384

self.comments_store = self.comments.copy()

384

self.comments_store = self.comments.copy()

385

self.max_file_size_limit = max_file_size_limit

385

self.max_file_size_limit = max_file_size_limit

386

387

def render_patchset(self, patchset, source_ref=None, target_ref=None):

387

def render_patchset(self, patchset, source_ref=None, target_ref=None):

388

diffset = AttributeDict(dict(

388

diffset = AttributeDict(dict(

389

lines_added=0,

389

lines_added=0,

390

lines_deleted=0,

390

lines_deleted=0,

391

changed_files=0,

391

changed_files=0,

392

files=[],

392

files=[],

393

file_stats={},

393

file_stats={},

394

limited_diff=isinstance(patchset, LimitedDiffContainer),

394

limited_diff=isinstance(patchset, LimitedDiffContainer),

395

repo_name=self.repo_name,

395

repo_name=self.repo_name,

396

source_repo_name=self.source_repo_name,

396

source_repo_name=self.source_repo_name,

397

source_ref=source_ref,

397

source_ref=source_ref,

398

target_ref=target_ref,

398

target_ref=target_ref,

399

))

399

))

400

for patch in patchset:

400

for patch in patchset:

401

diffset.file_stats[patch['filename']] = patch['stats']

401

diffset.file_stats[patch['filename']] = patch['stats']

402

filediff = self.render_patch(patch)

402

filediff = self.render_patch(patch)

403

filediff.diffset = ~~diffset~~

403

filediff.diffset = StrictAttributeDict(dict(

404

source_ref=diffset.source_ref,

405

target_ref=diffset.target_ref,

406

repo_name=diffset.repo_name,

407

source_repo_name=diffset.source_repo_name,

408

))

404

diffset.files.append(filediff)

409

diffset.files.append(filediff)

405

diffset.changed_files += 1

410

diffset.changed_files += 1

406

if not patch['stats']['binary']:

411

if not patch['stats']['binary']:

407

diffset.lines_added += patch['stats']['added']

412

diffset.lines_added += patch['stats']['added']

408

diffset.lines_deleted += patch['stats']['deleted']

413

diffset.lines_deleted += patch['stats']['deleted']

409

414

410

return diffset

415

return diffset

411

416

412

_lexer_cache = {}

417

_lexer_cache = {}

413

418

414

def _get_lexer_for_filename(self, filename, filenode=None):

419

def _get_lexer_for_filename(self, filename, filenode=None):

415

# cached because we might need to call it twice for source/target

420

# cached because we might need to call it twice for source/target

416

if filename not in self._lexer_cache:

421

if filename not in self._lexer_cache:

417

if filenode:

422

if filenode:

418

lexer = filenode.lexer

423

lexer = filenode.lexer

419

extension = filenode.extension

424

extension = filenode.extension

420

else:

425

else:

421

lexer = FileNode.get_lexer(filename=filename)

426

lexer = FileNode.get_lexer(filename=filename)

422

extension = filename.split('.')[-1]

427

extension = filename.split('.')[-1]

423

428

424

lexer = get_custom_lexer(extension) or lexer

429

lexer = get_custom_lexer(extension) or lexer

425

self._lexer_cache[filename] = lexer

430

self._lexer_cache[filename] = lexer

426

return self._lexer_cache[filename]

431

return self._lexer_cache[filename]

427

432

428

def render_patch(self, patch):

433

def render_patch(self, patch):

429

log.debug('rendering diff for %r' % patch['filename'])

434

log.debug('rendering diff for %r' % patch['filename'])

430

435

431

source_filename = patch['original_filename']

436

source_filename = patch['original_filename']

432

target_filename = patch['filename']

437

target_filename = patch['filename']

433

438

434

source_lexer = plain_text_lexer

439

source_lexer = plain_text_lexer

435

target_lexer = plain_text_lexer

440

target_lexer = plain_text_lexer

436

441

437

if not patch['stats']['binary']:

442

if not patch['stats']['binary']:

438

if self.highlight_mode == self.HL_REAL:

443

if self.highlight_mode == self.HL_REAL:

439

if (source_filename and patch['operation'] in ('D', 'M')

444

if (source_filename and patch['operation'] in ('D', 'M')

440

and source_filename not in self.source_nodes):

445

and source_filename not in self.source_nodes):

441

self.source_nodes[source_filename] = (

446

self.source_nodes[source_filename] = (

442

self.source_node_getter(source_filename))

447

self.source_node_getter(source_filename))

443

448

444

if (target_filename and patch['operation'] in ('A', 'M')

449

if (target_filename and patch['operation'] in ('A', 'M')

445

and target_filename not in self.target_nodes):

450

and target_filename not in self.target_nodes):

446

self.target_nodes[target_filename] = (

451

self.target_nodes[target_filename] = (

447

self.target_node_getter(target_filename))

452

self.target_node_getter(target_filename))

448

453

449

elif self.highlight_mode == self.HL_FAST:

454

elif self.highlight_mode == self.HL_FAST:

450

source_lexer = self._get_lexer_for_filename(source_filename)

455

source_lexer = self._get_lexer_for_filename(source_filename)

451

target_lexer = self._get_lexer_for_filename(target_filename)

456

target_lexer = self._get_lexer_for_filename(target_filename)

452

457

453

source_file = self.source_nodes.get(source_filename, source_filename)

458

source_file = self.source_nodes.get(source_filename, source_filename)

454

target_file = self.target_nodes.get(target_filename, target_filename)

459

target_file = self.target_nodes.get(target_filename, target_filename)

455

460

456

source_filenode, target_filenode = None, None

461

source_filenode, target_filenode = None, None

457

462

458

# TODO: dan: FileNode.lexer works on the content of the file - which

463

# TODO: dan: FileNode.lexer works on the content of the file - which

459

# can be slow - issue #4289 explains a lexer clean up - which once

464

# can be slow - issue #4289 explains a lexer clean up - which once

460

# done can allow caching a lexer for a filenode to avoid the file lookup

465

# done can allow caching a lexer for a filenode to avoid the file lookup

461

if isinstance(source_file, FileNode):

466

if isinstance(source_file, FileNode):

462

source_filenode = source_file

467

source_filenode = source_file

463

#source_lexer = source_file.lexer

468

#source_lexer = source_file.lexer

464

source_lexer = self._get_lexer_for_filename(source_filename)

469

source_lexer = self._get_lexer_for_filename(source_filename)

465

source_file.lexer = source_lexer

470

source_file.lexer = source_lexer

466

471

467

if isinstance(target_file, FileNode):

472

if isinstance(target_file, FileNode):

468

target_filenode = target_file

473

target_filenode = target_file

469

#target_lexer = target_file.lexer

474

#target_lexer = target_file.lexer

470

target_lexer = self._get_lexer_for_filename(target_filename)

475

target_lexer = self._get_lexer_for_filename(target_filename)

471

target_file.lexer = target_lexer

476

target_file.lexer = target_lexer

472

477

473

source_file_path, target_file_path = None, None

478

source_file_path, target_file_path = None, None

474

479

475

if source_filename != '/dev/null':

480

if source_filename != '/dev/null':

476

source_file_path = source_filename

481

source_file_path = source_filename

477

if target_filename != '/dev/null':

482

if target_filename != '/dev/null':

478

target_file_path = target_filename

483

target_file_path = target_filename

479

484

480

source_file_type = source_lexer.name

485

source_file_type = source_lexer.name

481

target_file_type = target_lexer.name

486

target_file_type = target_lexer.name

482

487

483

filediff = AttributeDict({

488

filediff = AttributeDict({

484

'source_file_path': source_file_path,

489

'source_file_path': source_file_path,

485

'target_file_path': target_file_path,

490

'target_file_path': target_file_path,

486

'source_filenode': source_filenode,

491

'source_filenode': source_filenode,

487

'target_filenode': target_filenode,

492

'target_filenode': target_filenode,

488

'source_file_type': target_file_type,

493

'source_file_type': target_file_type,

489

'target_file_type': source_file_type,

494

'target_file_type': source_file_type,

490

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

495

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

491

'operation': patch['operation'],

496

'operation': patch['operation'],

492

'source_mode': patch['stats']['old_mode'],

497

'source_mode': patch['stats']['old_mode'],

493

'target_mode': patch['stats']['new_mode'],

498

'target_mode': patch['stats']['new_mode'],

494

'limited_diff': isinstance(patch, LimitedDiffContainer),

499

'limited_diff': isinstance(patch, LimitedDiffContainer),

495

'hunks': [],

500

'hunks': [],

496

'diffset': self,

501

'diffset': self,

497

})

502

})

498

503

499

for hunk in patch['chunks'][1:]:

504

for hunk in patch['chunks'][1:]:

500

hunkbit = self.parse_hunk(hunk, source_file, target_file)

505

hunkbit = self.parse_hunk(hunk, source_file, target_file)

501

hunkbit.source_file_path = source_file_path

506

hunkbit.source_file_path = source_file_path

502

hunkbit.target_file_path = target_file_path

507

hunkbit.target_file_path = target_file_path

503

filediff.hunks.append(hunkbit)

508

filediff.hunks.append(hunkbit)

504

509

505

left_comments = {}

510

left_comments = {}

506

if source_file_path in self.comments_store:

511

if source_file_path in self.comments_store:

507

for lineno, comments in self.comments_store[source_file_path].items():

512

for lineno, comments in self.comments_store[source_file_path].items():

508

left_comments[lineno] = comments

513

left_comments[lineno] = comments

509

514

510

if target_file_path in self.comments_store:

515

if target_file_path in self.comments_store:

511

for lineno, comments in self.comments_store[target_file_path].items():

516

for lineno, comments in self.comments_store[target_file_path].items():

512

left_comments[lineno] = comments

517

left_comments[lineno] = comments

513

# left comments are one that we couldn't place in diff lines.

518

# left comments are one that we couldn't place in diff lines.

514

# could be outdated, or the diff changed and this line is no

519

# could be outdated, or the diff changed and this line is no

515

# longer available

520

# longer available

516

filediff.left_comments = left_comments

521

filediff.left_comments = left_comments

517

522

518

return filediff

523

return filediff

519

524

520

def parse_hunk(self, hunk, source_file, target_file):

525

def parse_hunk(self, hunk, source_file, target_file):

521

result = AttributeDict(dict(

526

result = AttributeDict(dict(

522

source_start=hunk['source_start'],

527

source_start=hunk['source_start'],

523

source_length=hunk['source_length'],

528

source_length=hunk['source_length'],

524

target_start=hunk['target_start'],

529

target_start=hunk['target_start'],

525

target_length=hunk['target_length'],

530

target_length=hunk['target_length'],

526

section_header=hunk['section_header'],

531

section_header=hunk['section_header'],

527

lines=[],

532

lines=[],

528

))

533

))

529

before, after = [], []

534

before, after = [], []

530

535

531

for line in hunk['lines']:

536

for line in hunk['lines']:

532

537

533

if line['action'] == 'unmod':

538

if line['action'] == 'unmod':

534

result.lines.extend(

539

result.lines.extend(

535

self.parse_lines(before, after, source_file, target_file))

540

self.parse_lines(before, after, source_file, target_file))

536

after.append(line)

541

after.append(line)

537

before.append(line)

542

before.append(line)

538

elif line['action'] == 'add':

543

elif line['action'] == 'add':

539

after.append(line)

544

after.append(line)

540

elif line['action'] == 'del':

545

elif line['action'] == 'del':

541

before.append(line)

546

before.append(line)

542

elif line['action'] == 'old-no-nl':

547

elif line['action'] == 'old-no-nl':

543

before.append(line)

548

before.append(line)

544

elif line['action'] == 'new-no-nl':

549

elif line['action'] == 'new-no-nl':

545

after.append(line)

550

after.append(line)

546

551

547

result.lines.extend(

552

result.lines.extend(

548

self.parse_lines(before, after, source_file, target_file))

553

self.parse_lines(before, after, source_file, target_file))

549

result.unified = self.as_unified(result.lines)

554

result.unified = self.as_unified(result.lines)

550

result.sideside = result.lines

555

result.sideside = result.lines

551

556

552

return result

557

return result

553

558

554

def parse_lines(self, before_lines, after_lines, source_file, target_file):

559

def parse_lines(self, before_lines, after_lines, source_file, target_file):

555

# TODO: dan: investigate doing the diff comparison and fast highlighting

560

# TODO: dan: investigate doing the diff comparison and fast highlighting

556

# on the entire before and after buffered block lines rather than by

561

# on the entire before and after buffered block lines rather than by

557

# line, this means we can get better 'fast' highlighting if the context

562

# line, this means we can get better 'fast' highlighting if the context

558

# allows it - eg.

563

# allows it - eg.

559

# line 4: """

564

# line 4: """

560

# line 5: this gets highlighted as a string

565

# line 5: this gets highlighted as a string

561

# line 6: """

566

# line 6: """

562

567

563

lines = []

568

lines = []

564

569

565

before_newline = AttributeDict()

570

before_newline = AttributeDict()

566

after_newline = AttributeDict()

571

after_newline = AttributeDict()

567

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

572

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

568

before_newline_line = before_lines.pop(-1)

573

before_newline_line = before_lines.pop(-1)

569

before_newline.content = '\n {}'.format(

574

before_newline.content = '\n {}'.format(

570

render_tokenstream(

575

render_tokenstream(

571

[(x[0], '', x[1])

576

[(x[0], '', x[1])

572

for x in [('nonl', before_newline_line['line'])]]))

577

for x in [('nonl', before_newline_line['line'])]]))

573

578

574

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

579

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

575

after_newline_line = after_lines.pop(-1)

580

after_newline_line = after_lines.pop(-1)

576

after_newline.content = '\n {}'.format(

581

after_newline.content = '\n {}'.format(

577

render_tokenstream(

582

render_tokenstream(

578

[(x[0], '', x[1])

583

[(x[0], '', x[1])

579

for x in [('nonl', after_newline_line['line'])]]))

584

for x in [('nonl', after_newline_line['line'])]]))

580

585

581

while before_lines or after_lines:

586

while before_lines or after_lines:

582

before, after = None, None

587

before, after = None, None

583

before_tokens, after_tokens = None, None

588

before_tokens, after_tokens = None, None

584

589

585

if before_lines:

590

if before_lines:

586

before = before_lines.pop(0)

591

before = before_lines.pop(0)

587

if after_lines:

592

if after_lines:

588

after = after_lines.pop(0)

593

after = after_lines.pop(0)

589

594

590

original = AttributeDict()

595

original = AttributeDict()

591

modified = AttributeDict()

596

modified = AttributeDict()

592

597

593

if before:

598

if before:

594

if before['action'] == 'old-no-nl':

599

if before['action'] == 'old-no-nl':

595

before_tokens = [('nonl', before['line'])]

600

before_tokens = [('nonl', before['line'])]

596

else:

601

else:

597

before_tokens = self.get_line_tokens(

602

before_tokens = self.get_line_tokens(

598

line_text=before['line'],

603

line_text=before['line'],

599

line_number=before['old_lineno'],

604

line_number=before['old_lineno'],

600

file=source_file)

605

file=source_file)

601

original.lineno = before['old_lineno']

606

original.lineno = before['old_lineno']

602

original.content = before['line']

607

original.content = before['line']

603

original.action = self.action_to_op(before['action'])

608

original.action = self.action_to_op(before['action'])

604

original.comments = self.get_comments_for('old',

609

original.comments = self.get_comments_for('old',

605

source_file, before['old_lineno'])

610

source_file, before['old_lineno'])

606

611

607

if after:

612

if after:

608

if after['action'] == 'new-no-nl':

613

if after['action'] == 'new-no-nl':

609

after_tokens = [('nonl', after['line'])]

614

after_tokens = [('nonl', after['line'])]

610

else:

615

else:

611

after_tokens = self.get_line_tokens(

616

after_tokens = self.get_line_tokens(

612

line_text=after['line'], line_number=after['new_lineno'],

617

line_text=after['line'], line_number=after['new_lineno'],

613

file=target_file)

618

file=target_file)

614

modified.lineno = after['new_lineno']

619

modified.lineno = after['new_lineno']

615

modified.content = after['line']

620

modified.content = after['line']

616

modified.action = self.action_to_op(after['action'])

621

modified.action = self.action_to_op(after['action'])

617

modified.comments = self.get_comments_for('new',

622

modified.comments = self.get_comments_for('new',

618

target_file, after['new_lineno'])

623

target_file, after['new_lineno'])

619

624

620

# diff the lines

625

# diff the lines

621

if before_tokens and after_tokens:

626

if before_tokens and after_tokens:

622

o_tokens, m_tokens, similarity = tokens_diff(

627

o_tokens, m_tokens, similarity = tokens_diff(

623

before_tokens, after_tokens)

628

before_tokens, after_tokens)

624

original.content = render_tokenstream(o_tokens)

629

original.content = render_tokenstream(o_tokens)

625

modified.content = render_tokenstream(m_tokens)

630

modified.content = render_tokenstream(m_tokens)

626

elif before_tokens:

631

elif before_tokens:

627

original.content = render_tokenstream(

632

original.content = render_tokenstream(

628

[(x[0], '', x[1]) for x in before_tokens])

633

[(x[0], '', x[1]) for x in before_tokens])

629

elif after_tokens:

634

elif after_tokens:

630

modified.content = render_tokenstream(

635

modified.content = render_tokenstream(

631

[(x[0], '', x[1]) for x in after_tokens])

636

[(x[0], '', x[1]) for x in after_tokens])

632

637

633

if not before_lines and before_newline:

638

if not before_lines and before_newline:

634

original.content += before_newline.content

639

original.content += before_newline.content

635

before_newline = None

640

before_newline = None

636

if not after_lines and after_newline:

641

if not after_lines and after_newline:

637

modified.content += after_newline.content

642

modified.content += after_newline.content

638

after_newline = None

643

after_newline = None

639

644

640

lines.append(AttributeDict({

645

lines.append(AttributeDict({

641

'original': original,

646

'original': original,

642

'modified': modified,

647

'modified': modified,

643

}))

648

}))

644

649

645

return lines

650

return lines

646

651

647

def get_comments_for(self, version, filename, line_number):

652

def get_comments_for(self, version, filename, line_number):

648

if hasattr(filename, 'unicode_path'):

653

if hasattr(filename, 'unicode_path'):

649

filename = filename.unicode_path

654

filename = filename.unicode_path

650

655

651

if not isinstance(filename, basestring):

656

if not isinstance(filename, basestring):

652

return None

657

return None

653

658

654

line_key = {

659

line_key = {

655

'old': 'o',

660

'old': 'o',

656

'new': 'n',

661

'new': 'n',

657

}[version] + str(line_number)

662

}[version] + str(line_number)

658

663

659

if filename in self.comments_store:

664

if filename in self.comments_store:

660

file_comments = self.comments_store[filename]

665

file_comments = self.comments_store[filename]

661

if line_key in file_comments:

666

if line_key in file_comments:

662

return file_comments.pop(line_key)

667

return file_comments.pop(line_key)

663

668

664

def get_line_tokens(self, line_text, line_number, file=None):

669

def get_line_tokens(self, line_text, line_number, file=None):

665

filenode = None

670

filenode = None

666

filename = None

671

filename = None

667

672

668

if isinstance(file, basestring):

673

if isinstance(file, basestring):

669

filename = file

674

filename = file

670

elif isinstance(file, FileNode):

675

elif isinstance(file, FileNode):

671

filenode = file

676

filenode = file

672

filename = file.unicode_path

677

filename = file.unicode_path

673

678

674

if self.highlight_mode == self.HL_REAL and filenode:

679

if self.highlight_mode == self.HL_REAL and filenode:

675

lexer = self._get_lexer_for_filename(filename)

680

lexer = self._get_lexer_for_filename(filename)

676

file_size_allowed = file.size < self.max_file_size_limit

681

file_size_allowed = file.size < self.max_file_size_limit

677

if line_number and file_size_allowed:

682

if line_number and file_size_allowed:

678

return self.get_tokenized_filenode_line(

683

return self.get_tokenized_filenode_line(

679

file, line_number, lexer)

684

file, line_number, lexer)

680

685

681

if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:

686

if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:

682

lexer = self._get_lexer_for_filename(filename)

687

lexer = self._get_lexer_for_filename(filename)

683

return list(tokenize_string(line_text, lexer))

688

return list(tokenize_string(line_text, lexer))

684

689

685

return list(tokenize_string(line_text, plain_text_lexer))

690

return list(tokenize_string(line_text, plain_text_lexer))

686

691

687

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

692

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

688

693

689

if filenode not in self.highlighted_filenodes:

694

if filenode not in self.highlighted_filenodes:

690

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

695

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

691

self.highlighted_filenodes[filenode] = tokenized_lines

696

self.highlighted_filenodes[filenode] = tokenized_lines

692

return self.highlighted_filenodes[filenode][line_number - 1]

697

return self.highlighted_filenodes[filenode][line_number - 1]

693

698

694

def action_to_op(self, action):

699

def action_to_op(self, action):

695

return {

700

return {

696

'add': '+',

701

'add': '+',

697

'del': '-',

702

'del': '-',

698

'unmod': ' ',

703

'unmod': ' ',

699

'old-no-nl': ' ',

704

'old-no-nl': ' ',

700

'new-no-nl': ' ',

705

'new-no-nl': ' ',

701

}.get(action, action)

706

}.get(action, action)

702

707

703

def as_unified(self, lines):

708

def as_unified(self, lines):

704

"""

709

"""

705

Return a generator that yields the lines of a diff in unified order

710

Return a generator that yields the lines of a diff in unified order

706

"""

711

"""

707

def generator():

712

def generator():

708

buf = []

713

buf = []

709

for line in lines:

714

for line in lines:

710

715

711

if buf and not line.original or line.original.action == ' ':

716

if buf and not line.original or line.original.action == ' ':

712

for b in buf:

717

for b in buf:

713

yield b

718

yield b

714

buf = []

719

buf = []

715

720

716

if line.original:

721

if line.original:

717

if line.original.action == ' ':

722

if line.original.action == ' ':

718

yield (line.original.lineno, line.modified.lineno,

723

yield (line.original.lineno, line.modified.lineno,

719

line.original.action, line.original.content,

724

line.original.action, line.original.content,

720

line.original.comments)

725

line.original.comments)

721

continue

726

continue

722

727

723

if line.original.action == '-':

728

if line.original.action == '-':

724

yield (line.original.lineno, None,

729

yield (line.original.lineno, None,

725

line.original.action, line.original.content,

730

line.original.action, line.original.content,

726

line.original.comments)

731

line.original.comments)

727

732

728

if line.modified.action == '+':

733

if line.modified.action == '+':

729

buf.append((

734

buf.append((

730

None, line.modified.lineno,

735

None, line.modified.lineno,

731

line.modified.action, line.modified.content,

736

line.modified.action, line.modified.content,

732

line.modified.comments))

737

line.modified.comments))

733

continue

738

continue

734

739

735

if line.modified:

740

if line.modified:

736

yield (None, line.modified.lineno,

741

yield (None, line.modified.lineno,

737

line.modified.action, line.modified.content,

742

line.modified.action, line.modified.content,

738

line.modified.comments)

743

line.modified.comments)

739

744

740

for b in buf:

745

for b in buf:

741

yield b

746

yield b

742

747

743

return generator()

748

return generator()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2018 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import logging
             import difflib
             from itertools import groupby
             from pygments import lex
             from pygments.formatters.html import _get_ttype_class as pygment_token_class
             from pygments.lexers.special import TextLexer, Token
             from rhodecode.lib.helpers import (
                 get_lexer_for_filenode, html_escape, get_custom_lexer)
-            from rhodecode.lib.utils2 import AttributeDict
+            from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict
             from rhodecode.lib.vcs.nodes import FileNode
             from rhodecode.lib.diff_match_patch import diff_match_patch
             from rhodecode.lib.diffs import LimitedDiffContainer
             from pygments.lexers import get_lexer_by_name
             plain_text_lexer = get_lexer_by_name(
                 'text', stripall=False, stripnl=False, ensurenl=False)
             log = logging.getLogger(__name__)
             def filenode_as_lines_tokens(filenode, lexer=None):
                 org_lexer = lexer
                 lexer = lexer or get_lexer_for_filenode(filenode)
                 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                           lexer, filenode, org_lexer)
                 tokens = tokenize_string(filenode.content, lexer)
                 lines = split_token_stream(tokens)
                 rv = list(lines)
                 return rv
             def tokenize_string(content, lexer):
                 """
                 Use pygments to tokenize some content based on a lexer
                 ensuring all original new lines and whitespace is preserved
                 """
                 lexer.stripall = False
                 lexer.stripnl = False
                 lexer.ensurenl = False
                 if isinstance(lexer, TextLexer):
                     lexed = [(Token.Text, content)]
                 else:
                     lexed = lex(content, lexer)
                 for token_type, token_text in lexed:
                     yield pygment_token_class(token_type), token_text
             def split_token_stream(tokens):
                 """
                 Take a list of (TokenType, text) tuples and split them by a string
                 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                 [(TEXT, 'some'), (TEXT, 'text'),
                  (TEXT, 'more'), (TEXT, 'text')]
                 """
                 buffer = []
                 for token_class, token_text in tokens:
                     parts = token_text.split('\n')
                     for part in parts[:-1]:
                         buffer.append((token_class, part))
                         yield buffer
                         buffer = []
                     buffer.append((token_class, parts[-1]))
                 if buffer:
                     yield buffer
             def filenode_as_annotated_lines_tokens(filenode):
                 """
                 Take a file node and return a list of annotations => lines, if no annotation
                 is found, it will be None.
                 eg:
                 [
                     (annotation1, [
                         (1, line1_tokens_list),
                         (2, line2_tokens_list),
                     ]),
                     (annotation2, [
                         (3, line1_tokens_list),
                     ]),
                     (None, [
                         (4, line1_tokens_list),
                     ]),
                     (annotation1, [
                         (5, line1_tokens_list),
                         (6, line2_tokens_list),
                     ])
                 ]
                 """
                 commit_cache = {}  # cache commit_getter lookups
                 def _get_annotation(commit_id, commit_getter):
                     if commit_id not in commit_cache:
                         commit_cache[commit_id] = commit_getter()
                     return commit_cache[commit_id]
                 annotation_lookup = {
                     line_no: _get_annotation(commit_id, commit_getter)
                     for line_no, commit_id, commit_getter, line_content
                     in filenode.annotate
                 }
                 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                       for line_no, tokens
                                       in enumerate(filenode_as_lines_tokens(filenode), 1))
                 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                 for annotation, group in grouped_annotations_lines:
                     yield (
                         annotation, [(line_no, tokens)
                                       for (_, line_no, tokens) in group]
                     )
             def render_tokenstream(tokenstream):
                 result = []
                 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
                     if token_class:
                         result.append(u'<span class="%s">' % token_class)
                     else:
                         result.append(u'<span>')
                     for op_tag, token_text in token_ops_texts:
                         if op_tag:
                             result.append(u'<%s>' % op_tag)
                         escaped_text = html_escape(token_text)
                         # TODO: dan: investigate showing hidden characters like space/nl/tab
                         # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
                         # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
                         # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
                         result.append(escaped_text)
                         if op_tag:
                             result.append(u'</%s>' % op_tag)
                     result.append(u'</span>')
                 html = ''.join(result)
                 return html
             def rollup_tokenstream(tokenstream):
                 """
                 Group a token stream of the format:
                     ('class', 'op', 'text')
                 or
                     ('class', 'text')
                 into
                     [('class1',
                         [('op1', 'text'),
                          ('op2', 'text')]),
                      ('class2',
                         [('op3', 'text')])]
                 This is used to get the minimal tags necessary when
                 rendering to html eg for a token stream ie.
                 <span class="A"><ins>he</ins>llo</span>
                 vs
                 <span class="A"><ins>he</ins></span><span class="A">llo</span>
                 If a 2 tuple is passed in, the output op will be an empty string.
                 eg:
                 >>> rollup_tokenstream([('classA', '',      'h'),
                                         ('classA', 'del',   'ell'),
                                         ('classA', '',      'o'),
                                         ('classB', '',      ' '),
                                         ('classA', '',      'the'),
                                         ('classA', '',      're'),
                                         ])
                 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
                  ('classB', [('', ' ')],
                  ('classA', [('', 'there')]]
                 """
                 if tokenstream and len(tokenstream[0]) == 2:
                     tokenstream = ((t[0], '', t[1]) for t in tokenstream)
                 result = []
                 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
                     ops = []
                     for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
                         text_buffer = []
                         for t_class, t_op, t_text in token_text_list:
                             text_buffer.append(t_text)
                         ops.append((token_op, ''.join(text_buffer)))
                     result.append((token_class, ops))
                 return result
             def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
                 """
                 Converts a list of (token_class, token_text) tuples to a list of
                 (token_class, token_op, token_text) tuples where token_op is one of
                 ('ins', 'del', '')
                 :param old_tokens: list of (token_class, token_text) tuples of old line
                 :param new_tokens: list of (token_class, token_text) tuples of new line
                 :param use_diff_match_patch: boolean, will use google's diff match patch
                     library which has options to 'smooth' out the character by character
                     differences making nicer ins/del blocks
                 """
                 old_tokens_result = []
                 new_tokens_result = []
                 similarity = difflib.SequenceMatcher(None,
                     ''.join(token_text for token_class, token_text in old_tokens),
                     ''.join(token_text for token_class, token_text in new_tokens)
                 ).ratio()
                 if similarity < 0.6: # return, the blocks are too different
                     for token_class, token_text in old_tokens:
                         old_tokens_result.append((token_class, '', token_text))
                     for token_class, token_text in new_tokens:
                         new_tokens_result.append((token_class, '', token_text))
                     return old_tokens_result, new_tokens_result, similarity
                 token_sequence_matcher = difflib.SequenceMatcher(None,
                     [x[1] for x in old_tokens],
                     [x[1] for x in new_tokens])
                 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
                     # check the differences by token block types first to give a more
                     # nicer "block" level replacement vs character diffs
                     if tag == 'equal':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, '', token_text))
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, '', token_text))
                     elif tag == 'delete':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, 'del', token_text))
                     elif tag == 'insert':
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, 'ins', token_text))
                     elif tag == 'replace':
                         # if same type token blocks must be replaced, do a diff on the
                         # characters in the token blocks to show individual changes
                         old_char_tokens = []
                         new_char_tokens = []
                         for token_class, token_text in old_tokens[o1:o2]:
                             for char in token_text:
                                 old_char_tokens.append((token_class, char))
                         for token_class, token_text in new_tokens[n1:n2]:
                             for char in token_text:
                                 new_char_tokens.append((token_class, char))
                         old_string = ''.join([token_text for
                             token_class, token_text in old_char_tokens])
                         new_string = ''.join([token_text for
                             token_class, token_text in new_char_tokens])
                         char_sequence = difflib.SequenceMatcher(
                             None, old_string, new_string)
                         copcodes = char_sequence.get_opcodes()
                         obuffer, nbuffer = [], []
                         if use_diff_match_patch:
                             dmp = diff_match_patch()
                             dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
                             reps = dmp.diff_main(old_string, new_string)
                             dmp.diff_cleanupEfficiency(reps)
                             a, b = 0, 0
                             for op, rep in reps:
                                 l = len(rep)
                                 if op == 0:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], '', c))
                                         nbuffer.append((new_char_tokens[b+i][0], '', c))
                                     a += l
                                     b += l
                                 elif op == -1:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], 'del', c))
                                     a += l
                                 elif op == 1:
                                     for i, c in enumerate(rep):
                                         nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
                                     b += l
                         else:
                             for ctag, co1, co2, cn1, cn2 in copcodes:
                                 if ctag == 'equal':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, '', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, '', token_text))
                                 elif ctag == 'delete':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                 elif ctag == 'insert':
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                                 elif ctag == 'replace':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                         old_tokens_result.extend(obuffer)
                         new_tokens_result.extend(nbuffer)
                 return old_tokens_result, new_tokens_result, similarity
             class DiffSet(object):
                 """
                 An object for parsing the diff result from diffs.DiffProcessor and
                 adding highlighting, side by side/unified renderings and line diffs
                 """
                 HL_REAL = 'REAL' # highlights using original file, slow
                 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
                                  # in the case of multiline code
                 HL_NONE = 'NONE' # no highlighting, fastest
                 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
                              source_repo_name=None,
                              source_node_getter=lambda filename: None,
                              target_node_getter=lambda filename: None,
                              source_nodes=None, target_nodes=None,
                              max_file_size_limit=150 * 1024, # files over this size will
                                                              # use fast highlighting
                              comments=None,
                              ):
                     self.highlight_mode = highlight_mode
                     self.highlighted_filenodes = {}
                     self.source_node_getter = source_node_getter
                     self.target_node_getter = target_node_getter
                     self.source_nodes = source_nodes or {}
                     self.target_nodes = target_nodes or {}
                     self.repo_name = repo_name
                     self.source_repo_name = source_repo_name or repo_name
                     self.comments = comments or {}
                     self.comments_store = self.comments.copy()
                     self.max_file_size_limit = max_file_size_limit
                 def render_patchset(self, patchset, source_ref=None, target_ref=None):
                     diffset = AttributeDict(dict(
                         lines_added=0,
                         lines_deleted=0,
                         changed_files=0,
                         files=[],
                         file_stats={},
                         limited_diff=isinstance(patchset, LimitedDiffContainer),
                         repo_name=self.repo_name,
                         source_repo_name=self.source_repo_name,
                         source_ref=source_ref,
                         target_ref=target_ref,
                     ))
                     for patch in patchset:
                         diffset.file_stats[patch['filename']] = patch['stats']
                         filediff = self.render_patch(patch)
-                        filediff.diffset = diffset
+                        filediff.diffset = StrictAttributeDict(dict(
+                            source_ref=diffset.source_ref,
+                            target_ref=diffset.target_ref,
+                            repo_name=diffset.repo_name,
+                            source_repo_name=diffset.source_repo_name,
+                        ))
                         diffset.files.append(filediff)
                         diffset.changed_files += 1
                         if not patch['stats']['binary']:
                             diffset.lines_added += patch['stats']['added']
                             diffset.lines_deleted += patch['stats']['deleted']
                     return diffset
                 _lexer_cache = {}
                 def _get_lexer_for_filename(self, filename, filenode=None):
                     # cached because we might need to call it twice for source/target
                     if filename not in self._lexer_cache:
                         if filenode:
                             lexer = filenode.lexer
                             extension = filenode.extension
                         else:
                             lexer = FileNode.get_lexer(filename=filename)
                             extension = filename.split('.')[-1]
                         lexer = get_custom_lexer(extension) or lexer
                         self._lexer_cache[filename] = lexer
                     return self._lexer_cache[filename]
                 def render_patch(self, patch):
                     log.debug('rendering diff for %r' % patch['filename'])
                     source_filename = patch['original_filename']
                     target_filename = patch['filename']
                     source_lexer = plain_text_lexer
                     target_lexer = plain_text_lexer
                     if not patch['stats']['binary']:
                         if self.highlight_mode == self.HL_REAL:
                             if (source_filename and patch['operation'] in ('D', 'M')
                                 and source_filename not in self.source_nodes):
                                     self.source_nodes[source_filename] = (
                                         self.source_node_getter(source_filename))
                             if (target_filename and patch['operation'] in ('A', 'M')
                                 and target_filename not in self.target_nodes):
                                     self.target_nodes[target_filename] = (
                                         self.target_node_getter(target_filename))
                         elif self.highlight_mode == self.HL_FAST:
                             source_lexer = self._get_lexer_for_filename(source_filename)
                             target_lexer = self._get_lexer_for_filename(target_filename)
                     source_file = self.source_nodes.get(source_filename, source_filename)
                     target_file = self.target_nodes.get(target_filename, target_filename)
                     source_filenode, target_filenode = None, None
                     # TODO: dan: FileNode.lexer works on the content of the file - which
                     # can be slow - issue #4289 explains a lexer clean up - which once
                     # done can allow caching a lexer for a filenode to avoid the file lookup
                     if isinstance(source_file, FileNode):
                         source_filenode = source_file
                         #source_lexer = source_file.lexer
                         source_lexer = self._get_lexer_for_filename(source_filename)
                         source_file.lexer = source_lexer
                     if isinstance(target_file, FileNode):
                         target_filenode = target_file
                         #target_lexer = target_file.lexer
                         target_lexer = self._get_lexer_for_filename(target_filename)
                         target_file.lexer = target_lexer
                     source_file_path, target_file_path = None, None
                     if source_filename != '/dev/null':
                         source_file_path = source_filename
                     if target_filename != '/dev/null':
                         target_file_path = target_filename
                     source_file_type = source_lexer.name
                     target_file_type = target_lexer.name
                     filediff = AttributeDict({
                         'source_file_path': source_file_path,
                         'target_file_path': target_file_path,
                         'source_filenode': source_filenode,
                         'target_filenode': target_filenode,
                         'source_file_type': target_file_type,
                         'target_file_type': source_file_type,
                         'patch': {'filename': patch['filename'], 'stats': patch['stats']},
                         'operation': patch['operation'],
                         'source_mode': patch['stats']['old_mode'],
                         'target_mode': patch['stats']['new_mode'],
                         'limited_diff': isinstance(patch, LimitedDiffContainer),
                         'hunks': [],
                         'diffset': self,
                     })
                     for hunk in patch['chunks'][1:]:
                         hunkbit = self.parse_hunk(hunk, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         filediff.hunks.append(hunkbit)
                     left_comments = {}
                     if source_file_path in self.comments_store:
                         for lineno, comments in self.comments_store[source_file_path].items():
                             left_comments[lineno] = comments
                     if target_file_path in self.comments_store:
                         for lineno, comments in self.comments_store[target_file_path].items():
                             left_comments[lineno] = comments
                     # left comments are one that we couldn't place in diff lines.
                     # could be outdated, or the diff changed and this line is no
                     # longer available
                     filediff.left_comments = left_comments
                     return filediff
                 def parse_hunk(self, hunk, source_file, target_file):
                     result = AttributeDict(dict(
                         source_start=hunk['source_start'],
                         source_length=hunk['source_length'],
                         target_start=hunk['target_start'],
                         target_length=hunk['target_length'],
                         section_header=hunk['section_header'],
                         lines=[],
                     ))
                     before, after = [], []
                     for line in hunk['lines']:
                         if line['action'] == 'unmod':
                             result.lines.extend(
                                 self.parse_lines(before, after, source_file, target_file))
                             after.append(line)
                             before.append(line)
                         elif line['action'] == 'add':
                             after.append(line)
                         elif line['action'] == 'del':
                             before.append(line)
                         elif line['action'] == 'old-no-nl':
                             before.append(line)
                         elif line['action'] == 'new-no-nl':
                             after.append(line)
                     result.lines.extend(
                         self.parse_lines(before, after, source_file, target_file))
                     result.unified = self.as_unified(result.lines)
                     result.sideside = result.lines
                     return result
                 def parse_lines(self, before_lines, after_lines, source_file, target_file):
                     # TODO: dan: investigate doing the diff comparison and fast highlighting
                     # on the entire before and after buffered block lines rather than by
                     # line, this means we can get better 'fast' highlighting if the context
                     # allows it - eg.
                     # line 4: """
                     # line 5: this gets highlighted as a string
                     # line 6: """
                     lines = []
                     before_newline = AttributeDict()
                     after_newline = AttributeDict()
                     if before_lines and before_lines[-1]['action'] == 'old-no-nl':
                         before_newline_line = before_lines.pop(-1)
                         before_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', before_newline_line['line'])]]))
                     if after_lines and after_lines[-1]['action'] == 'new-no-nl':
                         after_newline_line = after_lines.pop(-1)
                         after_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', after_newline_line['line'])]]))
                     while before_lines or after_lines:
                         before, after = None, None
                         before_tokens, after_tokens = None, None
                         if before_lines:
                             before = before_lines.pop(0)
                         if after_lines:
                             after = after_lines.pop(0)
                         original = AttributeDict()
                         modified = AttributeDict()
                         if before:
                             if before['action'] == 'old-no-nl':
                                 before_tokens = [('nonl', before['line'])]
                             else:
                                 before_tokens = self.get_line_tokens(
                                     line_text=before['line'],
                                     line_number=before['old_lineno'],
                                     file=source_file)
                             original.lineno = before['old_lineno']
                             original.content = before['line']
                             original.action = self.action_to_op(before['action'])
                             original.comments = self.get_comments_for('old',
                                 source_file, before['old_lineno'])
                         if after:
                             if after['action'] == 'new-no-nl':
                                 after_tokens = [('nonl', after['line'])]
                             else:
                                 after_tokens = self.get_line_tokens(
                                     line_text=after['line'], line_number=after['new_lineno'],
                                     file=target_file)
                             modified.lineno = after['new_lineno']
                             modified.content = after['line']
                             modified.action = self.action_to_op(after['action'])
                             modified.comments = self.get_comments_for('new',
                                 target_file, after['new_lineno'])
                         # diff the lines
                         if before_tokens and after_tokens:
                             o_tokens, m_tokens, similarity = tokens_diff(
                                 before_tokens, after_tokens)
                             original.content = render_tokenstream(o_tokens)
                             modified.content = render_tokenstream(m_tokens)
                         elif before_tokens:
                             original.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in before_tokens])
                         elif after_tokens:
                             modified.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in after_tokens])
                         if not before_lines and before_newline:
                             original.content += before_newline.content
                             before_newline = None
                         if not after_lines and after_newline:
                             modified.content += after_newline.content
                             after_newline = None
                         lines.append(AttributeDict({
                             'original': original,
                             'modified': modified,
                         }))
                     return lines
                 def get_comments_for(self, version, filename, line_number):
                     if hasattr(filename, 'unicode_path'):
                         filename = filename.unicode_path
                     if not isinstance(filename, basestring):
                         return None
                     line_key = {
                         'old': 'o',
                         'new': 'n',
                     }[version] + str(line_number)
                     if filename in self.comments_store:
                         file_comments = self.comments_store[filename]
                         if line_key in file_comments:
                             return file_comments.pop(line_key)
                 def get_line_tokens(self, line_text, line_number, file=None):
                     filenode = None
                     filename = None
                     if isinstance(file, basestring):
                         filename = file
                     elif isinstance(file, FileNode):
                         filenode = file
                         filename = file.unicode_path
                     if self.highlight_mode == self.HL_REAL and filenode:
                         lexer = self._get_lexer_for_filename(filename)
                         file_size_allowed = file.size < self.max_file_size_limit
                         if line_number and file_size_allowed:
                             return self.get_tokenized_filenode_line(
                                 file, line_number, lexer)
                     if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
                         lexer = self._get_lexer_for_filename(filename)
                         return list(tokenize_string(line_text, lexer))
                     return list(tokenize_string(line_text, plain_text_lexer))
                 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
                     if filenode not in self.highlighted_filenodes:
                         tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
                         self.highlighted_filenodes[filenode] = tokenized_lines
                     return self.highlighted_filenodes[filenode][line_number - 1]
                 def action_to_op(self, action):
                     return {
                         'add': '+',
                         'del': '-',
                         'unmod': ' ',
                         'old-no-nl': ' ',
                         'new-no-nl': ' ',
                     }.get(action, action)
                 def as_unified(self, lines):
                     """
                     Return a generator that yields the lines of a diff in unified order
                     """
                     def generator():
                         buf = []
                         for line in lines:
                             if buf and not line.original or line.original.action == ' ':
                                 for b in buf:
                                     yield b
                                 buf = []
                             if line.original:
                                 if line.original.action == ' ':
                                     yield (line.original.lineno, line.modified.lineno,
                                            line.original.action, line.original.content,
                                            line.original.comments)
                                     continue
                                 if line.original.action == '-':
                                     yield (line.original.lineno, None,
                                            line.original.action, line.original.content,
                                            line.original.comments)
                                 if line.modified.action == '+':
                                     buf.append((
                                         None, line.modified.lineno,
                                         line.modified.action, line.modified.content,
                                         line.modified.comments))
                                     continue
                             if line.modified:
                                 yield (None, line.modified.lineno,
                                        line.modified.action, line.modified.content,
                                        line.modified.comments)
                         for b in buf:
                             yield b
                     return generator()