rhodecode-enterprise-ce Commit - r3102:2cd36dd3

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

import logging

21

import logging

22

import difflib

22

import difflib

23

from itertools import groupby

23

from itertools import groupby

24

25

from pygments import lex

25

from pygments import lex

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

26

from pygments.formatters.html import _get_ttype_class as pygment_token_class

27

from pygments.lexers.special import TextLexer, Token

27

from pygments.lexers.special import TextLexer, Token

28

from pygments.lexers import get_lexer_by_name

28

from pygments.lexers import get_lexer_by_name

29

30

from rhodecode.lib.helpers import (

30

from rhodecode.lib.helpers import (

31

get_lexer_for_filenode, html_escape, get_custom_lexer)

31

get_lexer_for_filenode, html_escape, get_custom_lexer)

32

from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode

32

from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode

33

from rhodecode.lib.vcs.nodes import FileNode

33

from rhodecode.lib.vcs.nodes import FileNode

34

from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError

34

from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError

35

from rhodecode.lib.diff_match_patch import diff_match_patch

35

from rhodecode.lib.diff_match_patch import diff_match_patch

36

from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE

36

from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE

37

38

39

plain_text_lexer = get_lexer_by_name(

39

plain_text_lexer = get_lexer_by_name(

40

'text', stripall=False, stripnl=False, ensurenl=False)

40

'text', stripall=False, stripnl=False, ensurenl=False)

41

42

43

log = logging.getLogger(__name__)

43

log = logging.getLogger(__name__)

44

45

46

def filenode_as_lines_tokens(filenode, lexer=None):

46

def filenode_as_lines_tokens(filenode, lexer=None):

47

org_lexer = lexer

47

org_lexer = lexer

48

lexer = lexer or get_lexer_for_filenode(filenode)

48

lexer = lexer or get_lexer_for_filenode(filenode)

49

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

49

log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',

50

lexer, filenode, org_lexer)

50

lexer, filenode, org_lexer)

51

tokens = tokenize_string(filenode.content, lexer)

51

tokens = tokenize_string(filenode.content, lexer)

52

lines = split_token_stream(tokens)

52

lines = split_token_stream(tokens)

53

rv = list(lines)

53

rv = list(lines)

54

return rv

54

return rv

55

56

57

def tokenize_string(content, lexer):

57

def tokenize_string(content, lexer):

58

"""

58

"""

59

Use pygments to tokenize some content based on a lexer

59

Use pygments to tokenize some content based on a lexer

60

ensuring all original new lines and whitespace is preserved

60

ensuring all original new lines and whitespace is preserved

61

"""

61

"""

62

63

lexer.stripall = False

63

lexer.stripall = False

64

lexer.stripnl = False

64

lexer.stripnl = False

65

lexer.ensurenl = False

65

lexer.ensurenl = False

66

67

if isinstance(lexer, TextLexer):

67

if isinstance(lexer, TextLexer):

68

lexed = [(Token.Text, content)]

68

lexed = [(Token.Text, content)]

69

else:

69

else:

70

lexed = lex(content, lexer)

70

lexed = lex(content, lexer)

71

72

for token_type, token_text in lexed:

72

for token_type, token_text in lexed:

73

yield pygment_token_class(token_type), token_text

73

yield pygment_token_class(token_type), token_text

74

75

76

def split_token_stream(tokens):

76

def split_token_stream(tokens):

77

"""

77

"""

78

Take a list of (TokenType, text) tuples and split them by a string

78

Take a list of (TokenType, text) tuples and split them by a string

79

80

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

80

split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])

81

[(TEXT, 'some'), (TEXT, 'text'),

81

[(TEXT, 'some'), (TEXT, 'text'),

82

(TEXT, 'more'), (TEXT, 'text')]

82

(TEXT, 'more'), (TEXT, 'text')]

83

"""

83

"""

84

85

buffer = []

85

buffer = []

86

for token_class, token_text in tokens:

86

for token_class, token_text in tokens:

87

parts = token_text.split('\n')

87

parts = token_text.split('\n')

88

for part in parts[:-1]:

88

for part in parts[:-1]:

89

buffer.append((token_class, part))

89

buffer.append((token_class, part))

90

yield buffer

90

yield buffer

91

buffer = []

91

buffer = []

92

93

buffer.append((token_class, parts[-1]))

93

buffer.append((token_class, parts[-1]))

94

95

if buffer:

95

if buffer:

96

yield buffer

96

yield buffer

97

98

99

def filenode_as_annotated_lines_tokens(filenode):

99

def filenode_as_annotated_lines_tokens(filenode):

100

"""

100

"""

101

Take a file node and return a list of annotations => lines, if no annotation

101

Take a file node and return a list of annotations => lines, if no annotation

102

is found, it will be None.

102

is found, it will be None.

103

104

eg:

104

eg:

105

106

[

106

[

107

(annotation1, [

107

(annotation1, [

108

(1, line1_tokens_list),

108

(1, line1_tokens_list),

109

(2, line2_tokens_list),

109

(2, line2_tokens_list),

110

]),

110

]),

111

(annotation2, [

111

(annotation2, [

112

(3, line1_tokens_list),

112

(3, line1_tokens_list),

113

]),

113

]),

114

(None, [

114

(None, [

115

(4, line1_tokens_list),

115

(4, line1_tokens_list),

116

]),

116

]),

117

(annotation1, [

117

(annotation1, [

118

(5, line1_tokens_list),

118

(5, line1_tokens_list),

119

(6, line2_tokens_list),

119

(6, line2_tokens_list),

120

])

120

])

121

]

121

]

122

"""

122

"""

123

124

commit_cache = {} # cache commit_getter lookups

124

commit_cache = {} # cache commit_getter lookups

125

126

def _get_annotation(commit_id, commit_getter):

126

def _get_annotation(commit_id, commit_getter):

127

if commit_id not in commit_cache:

127

if commit_id not in commit_cache:

128

commit_cache[commit_id] = commit_getter()

128

commit_cache[commit_id] = commit_getter()

129

return commit_cache[commit_id]

129

return commit_cache[commit_id]

130

131

annotation_lookup = {

131

annotation_lookup = {

132

line_no: _get_annotation(commit_id, commit_getter)

132

line_no: _get_annotation(commit_id, commit_getter)

133

for line_no, commit_id, commit_getter, line_content

133

for line_no, commit_id, commit_getter, line_content

134

in filenode.annotate

134

in filenode.annotate

135

}

135

}

136

137

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

137

annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)

138

for line_no, tokens

138

for line_no, tokens

139

in enumerate(filenode_as_lines_tokens(filenode), 1))

139

in enumerate(filenode_as_lines_tokens(filenode), 1))

140

141

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

141

grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])

142

143

for annotation, group in grouped_annotations_lines:

143

for annotation, group in grouped_annotations_lines:

144

yield (

144

yield (

145

annotation, [(line_no, tokens)

145

annotation, [(line_no, tokens)

146

for (_, line_no, tokens) in group]

146

for (_, line_no, tokens) in group]

147

)

147

)

148

149

150

def render_tokenstream(tokenstream):

150

def render_tokenstream(tokenstream):

151

result = []

151

result = []

152

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

152

for token_class, token_ops_texts in rollup_tokenstream(tokenstream):

153

154

if token_class:

154

if token_class:

155

result.append(u'' % token_class)

155

result.append(u'' % token_class)

156

else:

156

else:

157

result.append(u'')

157

result.append(u'')

158

159

for op_tag, token_text in token_ops_texts:

159

for op_tag, token_text in token_ops_texts:

160

161

if op_tag:

161

if op_tag:

162

result.append(u'<%s>' % op_tag)

162

result.append(u'<%s>' % op_tag)

163

164

escaped_text = html_escape(token_text)

164

escaped_text = html_escape(token_text)

165

166

# TODO: dan: investigate showing hidden characters like space/nl/tab

166

# TODO: dan: investigate showing hidden characters like space/nl/tab

167

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

167

# escaped_text = escaped_text.replace(' ', '<sp> </sp>')

168

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

168

# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')

169

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

169

# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')

170

171

result.append(escaped_text)

171

result.append(escaped_text)

172

173

if op_tag:

173

if op_tag:

174

result.append(u'</%s>' % op_tag)

174

result.append(u'</%s>' % op_tag)

175

176

result.append(u'')

176

result.append(u'')

177

178

html = ''.join(result)

178

html = ''.join(result)

179

return html

179

return html

180

181

182

def rollup_tokenstream(tokenstream):

182

def rollup_tokenstream(tokenstream):

183

"""

183

"""

184

Group a token stream of the format:

184

Group a token stream of the format:

185

186

('class', 'op', 'text')

186

('class', 'op', 'text')

187

or

187

or

188

('class', 'text')

188

('class', 'text')

189

190

into

190

into

191

192

[('class1',

192

[('class1',

193

[('op1', 'text'),

193

[('op1', 'text'),

194

('op2', 'text')]),

194

('op2', 'text')]),

195

('class2',

195

('class2',

196

[('op3', 'text')])]

196

[('op3', 'text')])]

197

198

This is used to get the minimal tags necessary when

198

This is used to get the minimal tags necessary when

199

rendering to html eg for a token stream ie.

199

rendering to html eg for a token stream ie.

200

201

<ins>he</ins>llo

201

<ins>he</ins>llo

202

vs

202

vs

203

<ins>he</ins>llo

203

<ins>he</ins>llo

204

205

If a 2 tuple is passed in, the output op will be an empty string.

205

If a 2 tuple is passed in, the output op will be an empty string.

206

207

eg:

207

eg:

208

209

>>> rollup_tokenstream([('classA', '', 'h'),

209

>>> rollup_tokenstream([('classA', '', 'h'),

210

('classA', 'del', 'ell'),

210

('classA', 'del', 'ell'),

211

('classA', '', 'o'),

211

('classA', '', 'o'),

212

('classB', '', ' '),

212

('classB', '', ' '),

213

('classA', '', 'the'),

213

('classA', '', 'the'),

214

('classA', '', 're'),

214

('classA', '', 're'),

215

])

215

])

216

217

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

217

[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],

218

('classB', [('', ' ')],

218

('classB', [('', ' ')],

219

('classA', [('', 'there')]]

219

('classA', [('', 'there')]]

220

221

"""

221

"""

222

if tokenstream and len(tokenstream[0]) == 2:

222

if tokenstream and len(tokenstream[0]) == 2:

223

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

223

tokenstream = ((t[0], '', t[1]) for t in tokenstream)

224

225

result = []

225

result = []

226

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

226

for token_class, op_list in groupby(tokenstream, lambda t: t[0]):

227

ops = []

227

ops = []

228

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

228

for token_op, token_text_list in groupby(op_list, lambda o: o[1]):

229

text_buffer = []

229

text_buffer = []

230

for t_class, t_op, t_text in token_text_list:

230

for t_class, t_op, t_text in token_text_list:

231

text_buffer.append(t_text)

231

text_buffer.append(t_text)

232

ops.append((token_op, ''.join(text_buffer)))

232

ops.append((token_op, ''.join(text_buffer)))

233

result.append((token_class, ops))

233

result.append((token_class, ops))

234

return result

234

return result

235

236

237

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

237

def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):

238

"""

238

"""

239

Converts a list of (token_class, token_text) tuples to a list of

239

Converts a list of (token_class, token_text) tuples to a list of

240

(token_class, token_op, token_text) tuples where token_op is one of

240

(token_class, token_op, token_text) tuples where token_op is one of

241

('ins', 'del', '')

241

('ins', 'del', '')

242

243

:param old_tokens: list of (token_class, token_text) tuples of old line

243

:param old_tokens: list of (token_class, token_text) tuples of old line

244

:param new_tokens: list of (token_class, token_text) tuples of new line

244

:param new_tokens: list of (token_class, token_text) tuples of new line

245

:param use_diff_match_patch: boolean, will use google's diff match patch

245

:param use_diff_match_patch: boolean, will use google's diff match patch

246

library which has options to 'smooth' out the character by character

246

library which has options to 'smooth' out the character by character

247

differences making nicer ins/del blocks

247

differences making nicer ins/del blocks

248

"""

248

"""

249

250

old_tokens_result = []

250

old_tokens_result = []

251

new_tokens_result = []

251

new_tokens_result = []

252

253

similarity = difflib.SequenceMatcher(None,

253

similarity = difflib.SequenceMatcher(None,

254

''.join(token_text for token_class, token_text in old_tokens),

254

''.join(token_text for token_class, token_text in old_tokens),

255

''.join(token_text for token_class, token_text in new_tokens)

255

''.join(token_text for token_class, token_text in new_tokens)

256

).ratio()

256

).ratio()

257

258

if similarity < 0.6: # return, the blocks are too different

258

if similarity < 0.6: # return, the blocks are too different

259

for token_class, token_text in old_tokens:

259

for token_class, token_text in old_tokens:

260

old_tokens_result.append((token_class, '', token_text))

260

old_tokens_result.append((token_class, '', token_text))

261

for token_class, token_text in new_tokens:

261

for token_class, token_text in new_tokens:

262

new_tokens_result.append((token_class, '', token_text))

262

new_tokens_result.append((token_class, '', token_text))

263

return old_tokens_result, new_tokens_result, similarity

263

return old_tokens_result, new_tokens_result, similarity

264

265

token_sequence_matcher = difflib.SequenceMatcher(None,

265

token_sequence_matcher = difflib.SequenceMatcher(None,

266

[x[1] for x in old_tokens],

266

[x[1] for x in old_tokens],

267

[x[1] for x in new_tokens])

267

[x[1] for x in new_tokens])

268

269

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

269

for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():

270

# check the differences by token block types first to give a more

270

# check the differences by token block types first to give a more

271

# nicer "block" level replacement vs character diffs

271

# nicer "block" level replacement vs character diffs

272

273

if tag == 'equal':

273

if tag == 'equal':

274

for token_class, token_text in old_tokens[o1:o2]:

274

for token_class, token_text in old_tokens[o1:o2]:

275

old_tokens_result.append((token_class, '', token_text))

275

old_tokens_result.append((token_class, '', token_text))

276

for token_class, token_text in new_tokens[n1:n2]:

276

for token_class, token_text in new_tokens[n1:n2]:

277

new_tokens_result.append((token_class, '', token_text))

277

new_tokens_result.append((token_class, '', token_text))

278

elif tag == 'delete':

278

elif tag == 'delete':

279

for token_class, token_text in old_tokens[o1:o2]:

279

for token_class, token_text in old_tokens[o1:o2]:

280

old_tokens_result.append((token_class, 'del', token_text))

280

old_tokens_result.append((token_class, 'del', token_text))

281

elif tag == 'insert':

281

elif tag == 'insert':

282

for token_class, token_text in new_tokens[n1:n2]:

282

for token_class, token_text in new_tokens[n1:n2]:

283

new_tokens_result.append((token_class, 'ins', token_text))

283

new_tokens_result.append((token_class, 'ins', token_text))

284

elif tag == 'replace':

284

elif tag == 'replace':

285

# if same type token blocks must be replaced, do a diff on the

285

# if same type token blocks must be replaced, do a diff on the

286

# characters in the token blocks to show individual changes

286

# characters in the token blocks to show individual changes

287

288

old_char_tokens = []

288

old_char_tokens = []

289

new_char_tokens = []

289

new_char_tokens = []

290

for token_class, token_text in old_tokens[o1:o2]:

290

for token_class, token_text in old_tokens[o1:o2]:

291

for char in token_text:

291

for char in token_text:

292

old_char_tokens.append((token_class, char))

292

old_char_tokens.append((token_class, char))

293

294

for token_class, token_text in new_tokens[n1:n2]:

294

for token_class, token_text in new_tokens[n1:n2]:

295

for char in token_text:

295

for char in token_text:

296

new_char_tokens.append((token_class, char))

296

new_char_tokens.append((token_class, char))

297

298

old_string = ''.join([token_text for

298

old_string = ''.join([token_text for

299

token_class, token_text in old_char_tokens])

299

token_class, token_text in old_char_tokens])

300

new_string = ''.join([token_text for

300

new_string = ''.join([token_text for

301

token_class, token_text in new_char_tokens])

301

token_class, token_text in new_char_tokens])

302

303

char_sequence = difflib.SequenceMatcher(

303

char_sequence = difflib.SequenceMatcher(

304

None, old_string, new_string)

304

None, old_string, new_string)

305

copcodes = char_sequence.get_opcodes()

305

copcodes = char_sequence.get_opcodes()

306

obuffer, nbuffer = [], []

306

obuffer, nbuffer = [], []

307

308

if use_diff_match_patch:

308

if use_diff_match_patch:

309

dmp = diff_match_patch()

309

dmp = diff_match_patch()

310

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

310

dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting

311

reps = dmp.diff_main(old_string, new_string)

311

reps = dmp.diff_main(old_string, new_string)

312

dmp.diff_cleanupEfficiency(reps)

312

dmp.diff_cleanupEfficiency(reps)

313

314

a, b = 0, 0

314

a, b = 0, 0

315

for op, rep in reps:

315

for op, rep in reps:

316

l = len(rep)

316

l = len(rep)

317

if op == 0:

317

if op == 0:

318

for i, c in enumerate(rep):

318

for i, c in enumerate(rep):

319

obuffer.append((old_char_tokens[a+i][0], '', c))

319

obuffer.append((old_char_tokens[a+i][0], '', c))

320

nbuffer.append((new_char_tokens[b+i][0], '', c))

320

nbuffer.append((new_char_tokens[b+i][0], '', c))

321

a += l

321

a += l

322

b += l

322

b += l

323

elif op == -1:

323

elif op == -1:

324

for i, c in enumerate(rep):

324

for i, c in enumerate(rep):

325

obuffer.append((old_char_tokens[a+i][0], 'del', c))

325

obuffer.append((old_char_tokens[a+i][0], 'del', c))

326

a += l

326

a += l

327

elif op == 1:

327

elif op == 1:

328

for i, c in enumerate(rep):

328

for i, c in enumerate(rep):

329

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

329

nbuffer.append((new_char_tokens[b+i][0], 'ins', c))

330

b += l

330

b += l

331

else:

331

else:

332

for ctag, co1, co2, cn1, cn2 in copcodes:

332

for ctag, co1, co2, cn1, cn2 in copcodes:

333

if ctag == 'equal':

333

if ctag == 'equal':

334

for token_class, token_text in old_char_tokens[co1:co2]:

334

for token_class, token_text in old_char_tokens[co1:co2]:

335

obuffer.append((token_class, '', token_text))

335

obuffer.append((token_class, '', token_text))

336

for token_class, token_text in new_char_tokens[cn1:cn2]:

336

for token_class, token_text in new_char_tokens[cn1:cn2]:

337

nbuffer.append((token_class, '', token_text))

337

nbuffer.append((token_class, '', token_text))

338

elif ctag == 'delete':

338

elif ctag == 'delete':

339

for token_class, token_text in old_char_tokens[co1:co2]:

339

for token_class, token_text in old_char_tokens[co1:co2]:

340

obuffer.append((token_class, 'del', token_text))

340

obuffer.append((token_class, 'del', token_text))

341

elif ctag == 'insert':

341

elif ctag == 'insert':

342

for token_class, token_text in new_char_tokens[cn1:cn2]:

342

for token_class, token_text in new_char_tokens[cn1:cn2]:

343

nbuffer.append((token_class, 'ins', token_text))

343

nbuffer.append((token_class, 'ins', token_text))

344

elif ctag == 'replace':

344

elif ctag == 'replace':

345

for token_class, token_text in old_char_tokens[co1:co2]:

345

for token_class, token_text in old_char_tokens[co1:co2]:

346

obuffer.append((token_class, 'del', token_text))

346

obuffer.append((token_class, 'del', token_text))

347

for token_class, token_text in new_char_tokens[cn1:cn2]:

347

for token_class, token_text in new_char_tokens[cn1:cn2]:

348

nbuffer.append((token_class, 'ins', token_text))

348

nbuffer.append((token_class, 'ins', token_text))

349

350

old_tokens_result.extend(obuffer)

350

old_tokens_result.extend(obuffer)

351

new_tokens_result.extend(nbuffer)

351

new_tokens_result.extend(nbuffer)

352

353

return old_tokens_result, new_tokens_result, similarity

353

return old_tokens_result, new_tokens_result, similarity

354

355

356

def diffset_node_getter(commit):

356

def diffset_node_getter(commit):

357

def get_node(fname):

357

def get_node(fname):

358

try:

358

try:

359

return commit.get_node(fname)

359

return commit.get_node(fname)

360

except NodeDoesNotExistError:

360

except NodeDoesNotExistError:

361

return None

361

return None

362

363

return get_node

363

return get_node

364

365

366

class DiffSet(object):

366

class DiffSet(object):

367

"""

367

"""

368

An object for parsing the diff result from diffs.DiffProcessor and

368

An object for parsing the diff result from diffs.DiffProcessor and

369

adding highlighting, side by side/unified renderings and line diffs

369

adding highlighting, side by side/unified renderings and line diffs

370

"""

370

"""

371

372

HL_REAL = 'REAL' # highlights using original file, slow

372

HL_REAL = 'REAL' # highlights using original file, slow

373

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

373

HL_FAST = 'FAST' # highlights using just the line, fast but not correct

374

# in the case of multiline code

374

# in the case of multiline code

375

HL_NONE = 'NONE' # no highlighting, fastest

375

HL_NONE = 'NONE' # no highlighting, fastest

376

377

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

377

def __init__(self, highlight_mode=HL_REAL, repo_name=None,

378

source_repo_name=None,

378

source_repo_name=None,

379

source_node_getter=lambda filename: None,

379

source_node_getter=lambda filename: None,

380

target_node_getter=lambda filename: None,

380

target_node_getter=lambda filename: None,

381

source_nodes=None, target_nodes=None,

381

source_nodes=None, target_nodes=None,

382

# files over this size will use fast highlighting

382

# files over this size will use fast highlighting

383

max_file_size_limit=150 * 1024,

383

max_file_size_limit=150 * 1024,

384

):

384

):

385

386

self.highlight_mode = highlight_mode

386

self.highlight_mode = highlight_mode

387

self.highlighted_filenodes = {}

387

self.highlighted_filenodes = {}

388

self.source_node_getter = source_node_getter

388

self.source_node_getter = source_node_getter

389

self.target_node_getter = target_node_getter

389

self.target_node_getter = target_node_getter

390

self.source_nodes = source_nodes or {}

390

self.source_nodes = source_nodes or {}

391

self.target_nodes = target_nodes or {}

391

self.target_nodes = target_nodes or {}

392

self.repo_name = repo_name

392

self.repo_name = repo_name

393

self.source_repo_name = source_repo_name or repo_name

393

self.source_repo_name = source_repo_name or repo_name

394

self.max_file_size_limit = max_file_size_limit

394

self.max_file_size_limit = max_file_size_limit

395

396

def render_patchset(self, patchset, source_ref=None, target_ref=None):

396

def render_patchset(self, patchset, source_ref=None, target_ref=None):

397

diffset = AttributeDict(dict(

397

diffset = AttributeDict(dict(

398

lines_added=0,

398

lines_added=0,

399

lines_deleted=0,

399

lines_deleted=0,

400

changed_files=0,

400

changed_files=0,

401

files=[],

401

files=[],

402

file_stats={},

402

file_stats={},

403

limited_diff=isinstance(patchset, LimitedDiffContainer),

403

limited_diff=isinstance(patchset, LimitedDiffContainer),

404

repo_name=self.repo_name,

404

repo_name=self.repo_name,

405

source_repo_name=self.source_repo_name,

405

source_repo_name=self.source_repo_name,

406

source_ref=source_ref,

406

source_ref=source_ref,

407

target_ref=target_ref,

407

target_ref=target_ref,

408

))

408

))

409

for patch in patchset:

409

for patch in patchset:

410

diffset.file_stats[patch['filename']] = patch['stats']

410

diffset.file_stats[patch['filename']] = patch['stats']

411

filediff = self.render_patch(patch)

411

filediff = self.render_patch(patch)

412

filediff.diffset = StrictAttributeDict(dict(

412

filediff.diffset = StrictAttributeDict(dict(

413

source_ref=diffset.source_ref,

413

source_ref=diffset.source_ref,

414

target_ref=diffset.target_ref,

414

target_ref=diffset.target_ref,

415

repo_name=diffset.repo_name,

415

repo_name=diffset.repo_name,

416

source_repo_name=diffset.source_repo_name,

416

source_repo_name=diffset.source_repo_name,

417

))

417

))

418

diffset.files.append(filediff)

418

diffset.files.append(filediff)

419

diffset.changed_files += 1

419

diffset.changed_files += 1

420

if not patch['stats']['binary']:

420

if not patch['stats']['binary']:

421

diffset.lines_added += patch['stats']['added']

421

diffset.lines_added += patch['stats']['added']

422

diffset.lines_deleted += patch['stats']['deleted']

422

diffset.lines_deleted += patch['stats']['deleted']

423

424

return diffset

424

return diffset

425

426

_lexer_cache = {}

426

_lexer_cache = {}

427

428

def _get_lexer_for_filename(self, filename, filenode=None):

428

def _get_lexer_for_filename(self, filename, filenode=None):

429

# cached because we might need to call it twice for source/target

429

# cached because we might need to call it twice for source/target

430

if filename not in self._lexer_cache:

430

if filename not in self._lexer_cache:

431

if filenode:

431

if filenode:

432

lexer = filenode.lexer

432

lexer = filenode.lexer

433

extension = filenode.extension

433

extension = filenode.extension

434

else:

434

else:

435

lexer = FileNode.get_lexer(filename=filename)

435

lexer = FileNode.get_lexer(filename=filename)

436

extension = filename.split('.')[-1]

436

extension = filename.split('.')[-1]

437

438

lexer = get_custom_lexer(extension) or lexer

438

lexer = get_custom_lexer(extension) or lexer

439

self._lexer_cache[filename] = lexer

439

self._lexer_cache[filename] = lexer

440

return self._lexer_cache[filename]

440

return self._lexer_cache[filename]

441

442

def render_patch(self, patch):

442

def render_patch(self, patch):

443

log.debug('rendering diff for %r', patch['filename'])

443

log.debug('rendering diff for %r', patch['filename'])

444

445

source_filename = patch['original_filename']

445

source_filename = patch['original_filename']

446

target_filename = patch['filename']

446

target_filename = patch['filename']

447

448

source_lexer = plain_text_lexer

448

source_lexer = plain_text_lexer

449

target_lexer = plain_text_lexer

449

target_lexer = plain_text_lexer

450

451

if not patch['stats']['binary']:

451

if not patch['stats']['binary']:

452

node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None

452

node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None

453

hl_mode = node_hl_mode or self.highlight_mode

453

hl_mode = node_hl_mode or self.highlight_mode

454

455

if hl_mode == self.HL_REAL:

455

if hl_mode == self.HL_REAL:

456

if (source_filename and patch['operation'] in ('D', 'M')

456

if (source_filename and patch['operation'] in ('D', 'M')

457

and source_filename not in self.source_nodes):

457

and source_filename not in self.source_nodes):

458

self.source_nodes[source_filename] = (

458

self.source_nodes[source_filename] = (

459

self.source_node_getter(source_filename))

459

self.source_node_getter(source_filename))

460

461

if (target_filename and patch['operation'] in ('A', 'M')

461

if (target_filename and patch['operation'] in ('A', 'M')

462

and target_filename not in self.target_nodes):

462

and target_filename not in self.target_nodes):

463

self.target_nodes[target_filename] = (

463

self.target_nodes[target_filename] = (

464

self.target_node_getter(target_filename))

464

self.target_node_getter(target_filename))

465

466

elif hl_mode == self.HL_FAST:

466

elif hl_mode == self.HL_FAST:

467

source_lexer = self._get_lexer_for_filename(source_filename)

467

source_lexer = self._get_lexer_for_filename(source_filename)

468

target_lexer = self._get_lexer_for_filename(target_filename)

468

target_lexer = self._get_lexer_for_filename(target_filename)

469

470

source_file = self.source_nodes.get(source_filename, source_filename)

470

source_file = self.source_nodes.get(source_filename, source_filename)

471

target_file = self.target_nodes.get(target_filename, target_filename)

471

target_file = self.target_nodes.get(target_filename, target_filename)

472

473

source_filenode, target_filenode = None, None

473

source_filenode, target_filenode = None, None

474

475

# TODO: dan: FileNode.lexer works on the content of the file - which

475

# TODO: dan: FileNode.lexer works on the content of the file - which

476

# can be slow - issue #4289 explains a lexer clean up - which once

476

# can be slow - issue #4289 explains a lexer clean up - which once

477

# done can allow caching a lexer for a filenode to avoid the file lookup

477

# done can allow caching a lexer for a filenode to avoid the file lookup

478

if isinstance(source_file, FileNode):

478

if isinstance(source_file, FileNode):

479

source_filenode = source_file

479

source_filenode = source_file

480

#source_lexer = source_file.lexer

480

#source_lexer = source_file.lexer

481

source_lexer = self._get_lexer_for_filename(source_filename)

481

source_lexer = self._get_lexer_for_filename(source_filename)

482

source_file.lexer = source_lexer

482

source_file.lexer = source_lexer

483

484

if isinstance(target_file, FileNode):

484

if isinstance(target_file, FileNode):

485

target_filenode = target_file

485

target_filenode = target_file

486

#target_lexer = target_file.lexer

486

#target_lexer = target_file.lexer

487

target_lexer = self._get_lexer_for_filename(target_filename)

487

target_lexer = self._get_lexer_for_filename(target_filename)

488

target_file.lexer = target_lexer

488

target_file.lexer = target_lexer

489

490

source_file_path, target_file_path = None, None

490

source_file_path, target_file_path = None, None

491

492

if source_filename != '/dev/null':

492

if source_filename != '/dev/null':

493

source_file_path = source_filename

493

source_file_path = source_filename

494

if target_filename != '/dev/null':

494

if target_filename != '/dev/null':

495

target_file_path = target_filename

495

target_file_path = target_filename

496

497

source_file_type = source_lexer.name

497

source_file_type = source_lexer.name

498

target_file_type = target_lexer.name

498

target_file_type = target_lexer.name

499

500

filediff = AttributeDict({

500

filediff = AttributeDict({

501

'source_file_path': source_file_path,

501

'source_file_path': source_file_path,

502

'target_file_path': target_file_path,

502

'target_file_path': target_file_path,

503

'source_filenode': source_filenode,

503

'source_filenode': source_filenode,

504

'target_filenode': target_filenode,

504

'target_filenode': target_filenode,

505

'source_file_type': target_file_type,

505

'source_file_type': target_file_type,

506

'target_file_type': source_file_type,

506

'target_file_type': source_file_type,

507

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

507

'patch': {'filename': patch['filename'], 'stats': patch['stats']},

508

'operation': patch['operation'],

508

'operation': patch['operation'],

509

'source_mode': patch['stats']['old_mode'],

509

'source_mode': patch['stats']['old_mode'],

510

'target_mode': patch['stats']['new_mode'],

510

'target_mode': patch['stats']['new_mode'],

511

'limited_diff': isinstance(patch, LimitedDiffContainer),

511

'limited_diff': isinstance(patch, LimitedDiffContainer),

512

'hunks': [],

512

'hunks': [],

513

'hunk_ops': None,

513

'hunk_ops': None,

514

'diffset': self,

514

'diffset': self,

515

})

515

})

516

file_chunks = patch['chunks'][1:]

516

file_chunks = patch['chunks'][1:]

517

for hunk in file_chunks:

517

for hunk in file_chunks:

518

hunkbit = self.parse_hunk(hunk, source_file, target_file)

518

hunkbit = self.parse_hunk(hunk, source_file, target_file)

519

hunkbit.source_file_path = source_file_path

519

hunkbit.source_file_path = source_file_path

520

hunkbit.target_file_path = target_file_path

520

hunkbit.target_file_path = target_file_path

521

filediff.hunks.append(hunkbit)

521

filediff.hunks.append(hunkbit)

522

523

# Simulate hunk on OPS type line which doesn't really contain any diff

523

# Simulate hunk on OPS type line which doesn't really contain any diff

524

# this allows commenting on those

524

# this allows commenting on those

525

if not file_chunks:

525

if not file_chunks:

526

actions = []

526

actions = []

527

for op_id, op_text in filediff.patch['stats']['ops'].items():

527

for op_id, op_text in filediff.patch['stats']['ops'].items():

528

if op_id == DEL_FILENODE:

528

if op_id == DEL_FILENODE:

529

actions.append(u'file was ~~delet~~ed')

529

actions.append(u'file was removed')

530

elif op_id == BIN_FILENODE:

530

elif op_id == BIN_FILENODE:

531

actions.append(u'binary diff hidden')

531

actions.append(u'binary diff hidden')

532

else:

532

else:

533

actions.append(safe_unicode(op_text))

533

actions.append(safe_unicode(op_text))

534

action_line = u'NO CONTENT: ' + \

534

action_line = u'NO CONTENT: ' + \

535

u', '.join(actions) or u'UNDEFINED_ACTION'

535

u', '.join(actions) or u'UNDEFINED_ACTION'

536

537

hunk_ops = {'source_length': 0, 'source_start': 0,

537

hunk_ops = {'source_length': 0, 'source_start': 0,

538

'lines': [

538

'lines': [

539

{'new_lineno': 0, 'old_lineno': 1,

539

{'new_lineno': 0, 'old_lineno': 1,

540

'action': 'unmod-no-hl', 'line': action_line}

540

'action': 'unmod-no-hl', 'line': action_line}

541

],

541

],

542

'section_header': u'', 'target_start': 1, 'target_length': 1}

542

'section_header': u'', 'target_start': 1, 'target_length': 1}

543

544

hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)

544

hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)

545

hunkbit.source_file_path = source_file_path

545

hunkbit.source_file_path = source_file_path

546

hunkbit.target_file_path = target_file_path

546

hunkbit.target_file_path = target_file_path

547

filediff.hunk_ops = hunkbit

547

filediff.hunk_ops = hunkbit

548

return filediff

548

return filediff

549

550

def parse_hunk(self, hunk, source_file, target_file):

550

def parse_hunk(self, hunk, source_file, target_file):

551

result = AttributeDict(dict(

551

result = AttributeDict(dict(

552

source_start=hunk['source_start'],

552

source_start=hunk['source_start'],

553

source_length=hunk['source_length'],

553

source_length=hunk['source_length'],

554

target_start=hunk['target_start'],

554

target_start=hunk['target_start'],

555

target_length=hunk['target_length'],

555

target_length=hunk['target_length'],

556

section_header=hunk['section_header'],

556

section_header=hunk['section_header'],

557

lines=[],

557

lines=[],

558

))

558

))

559

before, after = [], []

559

before, after = [], []

560

561

for line in hunk['lines']:

561

for line in hunk['lines']:

562

if line['action'] in ['unmod', 'unmod-no-hl']:

562

if line['action'] in ['unmod', 'unmod-no-hl']:

563

no_hl = line['action'] == 'unmod-no-hl'

563

no_hl = line['action'] == 'unmod-no-hl'

564

result.lines.extend(

564

result.lines.extend(

565

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

565

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

566

after.append(line)

566

after.append(line)

567

before.append(line)

567

before.append(line)

568

elif line['action'] == 'add':

568

elif line['action'] == 'add':

569

after.append(line)

569

after.append(line)

570

elif line['action'] == 'del':

570

elif line['action'] == 'del':

571

before.append(line)

571

before.append(line)

572

elif line['action'] == 'old-no-nl':

572

elif line['action'] == 'old-no-nl':

573

before.append(line)

573

before.append(line)

574

elif line['action'] == 'new-no-nl':

574

elif line['action'] == 'new-no-nl':

575

after.append(line)

575

after.append(line)

576

577

all_actions = [x['action'] for x in after] + [x['action'] for x in before]

577

all_actions = [x['action'] for x in after] + [x['action'] for x in before]

578

no_hl = {x for x in all_actions} == {'unmod-no-hl'}

578

no_hl = {x for x in all_actions} == {'unmod-no-hl'}

579

result.lines.extend(

579

result.lines.extend(

580

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

580

self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))

581

# NOTE(marcink): we must keep list() call here so we can cache the result...

581

# NOTE(marcink): we must keep list() call here so we can cache the result...

582

result.unified = list(self.as_unified(result.lines))

582

result.unified = list(self.as_unified(result.lines))

583

result.sideside = result.lines

583

result.sideside = result.lines

584

585

return result

585

return result

586

587

def parse_lines(self, before_lines, after_lines, source_file, target_file,

587

def parse_lines(self, before_lines, after_lines, source_file, target_file,

588

no_hl=False):

588

no_hl=False):

589

# TODO: dan: investigate doing the diff comparison and fast highlighting

589

# TODO: dan: investigate doing the diff comparison and fast highlighting

590

# on the entire before and after buffered block lines rather than by

590

# on the entire before and after buffered block lines rather than by

591

# line, this means we can get better 'fast' highlighting if the context

591

# line, this means we can get better 'fast' highlighting if the context

592

# allows it - eg.

592

# allows it - eg.

593

# line 4: """

593

# line 4: """

594

# line 5: this gets highlighted as a string

594

# line 5: this gets highlighted as a string

595

# line 6: """

595

# line 6: """

596

597

lines = []

597

lines = []

598

599

before_newline = AttributeDict()

599

before_newline = AttributeDict()

600

after_newline = AttributeDict()

600

after_newline = AttributeDict()

601

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

601

if before_lines and before_lines[-1]['action'] == 'old-no-nl':

602

before_newline_line = before_lines.pop(-1)

602

before_newline_line = before_lines.pop(-1)

603

before_newline.content = '\n {}'.format(

603

before_newline.content = '\n {}'.format(

604

render_tokenstream(

604

render_tokenstream(

605

[(x[0], '', x[1])

605

[(x[0], '', x[1])

606

for x in [('nonl', before_newline_line['line'])]]))

606

for x in [('nonl', before_newline_line['line'])]]))

607

608

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

608

if after_lines and after_lines[-1]['action'] == 'new-no-nl':

609

after_newline_line = after_lines.pop(-1)

609

after_newline_line = after_lines.pop(-1)

610

after_newline.content = '\n {}'.format(

610

after_newline.content = '\n {}'.format(

611

render_tokenstream(

611

render_tokenstream(

612

[(x[0], '', x[1])

612

[(x[0], '', x[1])

613

for x in [('nonl', after_newline_line['line'])]]))

613

for x in [('nonl', after_newline_line['line'])]]))

614

615

while before_lines or after_lines:

615

while before_lines or after_lines:

616

before, after = None, None

616

before, after = None, None

617

before_tokens, after_tokens = None, None

617

before_tokens, after_tokens = None, None

618

619

if before_lines:

619

if before_lines:

620

before = before_lines.pop(0)

620

before = before_lines.pop(0)

621

if after_lines:

621

if after_lines:

622

after = after_lines.pop(0)

622

after = after_lines.pop(0)

623

624

original = AttributeDict()

624

original = AttributeDict()

625

modified = AttributeDict()

625

modified = AttributeDict()

626

627

if before:

627

if before:

628

if before['action'] == 'old-no-nl':

628

if before['action'] == 'old-no-nl':

629

before_tokens = [('nonl', before['line'])]

629

before_tokens = [('nonl', before['line'])]

630

else:

630

else:

631

before_tokens = self.get_line_tokens(

631

before_tokens = self.get_line_tokens(

632

line_text=before['line'], line_number=before['old_lineno'],

632

line_text=before['line'], line_number=before['old_lineno'],

633

input_file=source_file, no_hl=no_hl)

633

input_file=source_file, no_hl=no_hl)

634

original.lineno = before['old_lineno']

634

original.lineno = before['old_lineno']

635

original.content = before['line']

635

original.content = before['line']

636

original.action = self.action_to_op(before['action'])

636

original.action = self.action_to_op(before['action'])

637

638

original.get_comment_args = (

638

original.get_comment_args = (

639

source_file, 'o', before['old_lineno'])

639

source_file, 'o', before['old_lineno'])

640

641

if after:

641

if after:

642

if after['action'] == 'new-no-nl':

642

if after['action'] == 'new-no-nl':

643

after_tokens = [('nonl', after['line'])]

643

after_tokens = [('nonl', after['line'])]

644

else:

644

else:

645

after_tokens = self.get_line_tokens(

645

after_tokens = self.get_line_tokens(

646

line_text=after['line'], line_number=after['new_lineno'],

646

line_text=after['line'], line_number=after['new_lineno'],

647

input_file=target_file, no_hl=no_hl)

647

input_file=target_file, no_hl=no_hl)

648

modified.lineno = after['new_lineno']

648

modified.lineno = after['new_lineno']

649

modified.content = after['line']

649

modified.content = after['line']

650

modified.action = self.action_to_op(after['action'])

650

modified.action = self.action_to_op(after['action'])

651

652

modified.get_comment_args = (target_file, 'n', after['new_lineno'])

652

modified.get_comment_args = (target_file, 'n', after['new_lineno'])

653

654

# diff the lines

654

# diff the lines

655

if before_tokens and after_tokens:

655

if before_tokens and after_tokens:

656

o_tokens, m_tokens, similarity = tokens_diff(

656

o_tokens, m_tokens, similarity = tokens_diff(

657

before_tokens, after_tokens)

657

before_tokens, after_tokens)

658

original.content = render_tokenstream(o_tokens)

658

original.content = render_tokenstream(o_tokens)

659

modified.content = render_tokenstream(m_tokens)

659

modified.content = render_tokenstream(m_tokens)

660

elif before_tokens:

660

elif before_tokens:

661

original.content = render_tokenstream(

661

original.content = render_tokenstream(

662

[(x[0], '', x[1]) for x in before_tokens])

662

[(x[0], '', x[1]) for x in before_tokens])

663

elif after_tokens:

663

elif after_tokens:

664

modified.content = render_tokenstream(

664

modified.content = render_tokenstream(

665

[(x[0], '', x[1]) for x in after_tokens])

665

[(x[0], '', x[1]) for x in after_tokens])

666

667

if not before_lines and before_newline:

667

if not before_lines and before_newline:

668

original.content += before_newline.content

668

original.content += before_newline.content

669

before_newline = None

669

before_newline = None

670

if not after_lines and after_newline:

670

if not after_lines and after_newline:

671

modified.content += after_newline.content

671

modified.content += after_newline.content

672

after_newline = None

672

after_newline = None

673

674

lines.append(AttributeDict({

674

lines.append(AttributeDict({

675

'original': original,

675

'original': original,

676

'modified': modified,

676

'modified': modified,

677

}))

677

}))

678

679

return lines

679

return lines

680

681

def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):

681

def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):

682

filenode = None

682

filenode = None

683

filename = None

683

filename = None

684

685

if isinstance(input_file, basestring):

685

if isinstance(input_file, basestring):

686

filename = input_file

686

filename = input_file

687

elif isinstance(input_file, FileNode):

687

elif isinstance(input_file, FileNode):

688

filenode = input_file

688

filenode = input_file

689

filename = input_file.unicode_path

689

filename = input_file.unicode_path

690

691

hl_mode = self.HL_NONE if no_hl else self.highlight_mode

691

hl_mode = self.HL_NONE if no_hl else self.highlight_mode

692

if hl_mode == self.HL_REAL and filenode:

692

if hl_mode == self.HL_REAL and filenode:

693

lexer = self._get_lexer_for_filename(filename)

693

lexer = self._get_lexer_for_filename(filename)

694

file_size_allowed = input_file.size < self.max_file_size_limit

694

file_size_allowed = input_file.size < self.max_file_size_limit

695

if line_number and file_size_allowed:

695

if line_number and file_size_allowed:

696

return self.get_tokenized_filenode_line(

696

return self.get_tokenized_filenode_line(

697

input_file, line_number, lexer)

697

input_file, line_number, lexer)

698

699

if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:

699

if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:

700

lexer = self._get_lexer_for_filename(filename)

700

lexer = self._get_lexer_for_filename(filename)

701

return list(tokenize_string(line_text, lexer))

701

return list(tokenize_string(line_text, lexer))

702

703

return list(tokenize_string(line_text, plain_text_lexer))

703

return list(tokenize_string(line_text, plain_text_lexer))

704

705

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

705

def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):

706

707

if filenode not in self.highlighted_filenodes:

707

if filenode not in self.highlighted_filenodes:

708

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

708

tokenized_lines = filenode_as_lines_tokens(filenode, lexer)

709

self.highlighted_filenodes[filenode] = tokenized_lines

709

self.highlighted_filenodes[filenode] = tokenized_lines

710

return self.highlighted_filenodes[filenode][line_number - 1]

710

return self.highlighted_filenodes[filenode][line_number - 1]

711

712

def action_to_op(self, action):

712

def action_to_op(self, action):

713

return {

713

return {

714

'add': '+',

714

'add': '+',

715

'del': '-',

715

'del': '-',

716

'unmod': ' ',

716

'unmod': ' ',

717

'unmod-no-hl': ' ',

717

'unmod-no-hl': ' ',

718

'old-no-nl': ' ',

718

'old-no-nl': ' ',

719

'new-no-nl': ' ',

719

'new-no-nl': ' ',

720

}.get(action, action)

720

}.get(action, action)

721

722

def as_unified(self, lines):

722

def as_unified(self, lines):

723

"""

723

"""

724

Return a generator that yields the lines of a diff in unified order

724

Return a generator that yields the lines of a diff in unified order

725

"""

725

"""

726

def generator():

726

def generator():

727

buf = []

727

buf = []

728

for line in lines:

728

for line in lines:

729

730

if buf and not line.original or line.original.action == ' ':

730

if buf and not line.original or line.original.action == ' ':

731

for b in buf:

731

for b in buf:

732

yield b

732

yield b

733

buf = []

733

buf = []

734

735

if line.original:

735

if line.original:

736

if line.original.action == ' ':

736

if line.original.action == ' ':

737

yield (line.original.lineno, line.modified.lineno,

737

yield (line.original.lineno, line.modified.lineno,

738

line.original.action, line.original.content,

738

line.original.action, line.original.content,

739

line.original.get_comment_args)

739

line.original.get_comment_args)

740

continue

740

continue

741

742

if line.original.action == '-':

742

if line.original.action == '-':

743

yield (line.original.lineno, None,

743

yield (line.original.lineno, None,

744

line.original.action, line.original.content,

744

line.original.action, line.original.content,

745

line.original.get_comment_args)

745

line.original.get_comment_args)

746

747

if line.modified.action == '+':

747

if line.modified.action == '+':

748

buf.append((

748

buf.append((

749

None, line.modified.lineno,

749

None, line.modified.lineno,

750

line.modified.action, line.modified.content,

750

line.modified.action, line.modified.content,

751

line.modified.get_comment_args))

751

line.modified.get_comment_args))

752

continue

752

continue

753

754

if line.modified:

754

if line.modified:

755

yield (None, line.modified.lineno,

755

yield (None, line.modified.lineno,

756

line.modified.action, line.modified.content,

756

line.modified.action, line.modified.content,

757

line.modified.get_comment_args)

757

line.modified.get_comment_args)

758

759

for b in buf:

759

for b in buf:

760

yield b

760

yield b

761

762

return generator()

762

return generator()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2018 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import logging
             import difflib
             from itertools import groupby
             from pygments import lex
             from pygments.formatters.html import _get_ttype_class as pygment_token_class
             from pygments.lexers.special import TextLexer, Token
             from pygments.lexers import get_lexer_by_name
             from rhodecode.lib.helpers import (
                 get_lexer_for_filenode, html_escape, get_custom_lexer)
             from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict, safe_unicode
             from rhodecode.lib.vcs.nodes import FileNode
             from rhodecode.lib.vcs.exceptions import VCSError, NodeDoesNotExistError
             from rhodecode.lib.diff_match_patch import diff_match_patch
             from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
             plain_text_lexer = get_lexer_by_name(
                 'text', stripall=False, stripnl=False, ensurenl=False)
             log = logging.getLogger(__name__)
             def filenode_as_lines_tokens(filenode, lexer=None):
                 org_lexer = lexer
                 lexer = lexer or get_lexer_for_filenode(filenode)
                 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                           lexer, filenode, org_lexer)
                 tokens = tokenize_string(filenode.content, lexer)
                 lines = split_token_stream(tokens)
                 rv = list(lines)
                 return rv
             def tokenize_string(content, lexer):
                 """
                 Use pygments to tokenize some content based on a lexer
                 ensuring all original new lines and whitespace is preserved
                 """
                 lexer.stripall = False
                 lexer.stripnl = False
                 lexer.ensurenl = False
                 if isinstance(lexer, TextLexer):
                     lexed = [(Token.Text, content)]
                 else:
                     lexed = lex(content, lexer)
                 for token_type, token_text in lexed:
                     yield pygment_token_class(token_type), token_text
             def split_token_stream(tokens):
                 """
                 Take a list of (TokenType, text) tuples and split them by a string
                 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                 [(TEXT, 'some'), (TEXT, 'text'),
                  (TEXT, 'more'), (TEXT, 'text')]
                 """
                 buffer = []
                 for token_class, token_text in tokens:
                     parts = token_text.split('\n')
                     for part in parts[:-1]:
                         buffer.append((token_class, part))
                         yield buffer
                         buffer = []
                     buffer.append((token_class, parts[-1]))
                 if buffer:
                     yield buffer
             def filenode_as_annotated_lines_tokens(filenode):
                 """
                 Take a file node and return a list of annotations => lines, if no annotation
                 is found, it will be None.
                 eg:
                 [
                     (annotation1, [
                         (1, line1_tokens_list),
                         (2, line2_tokens_list),
                     ]),
                     (annotation2, [
                         (3, line1_tokens_list),
                     ]),
                     (None, [
                         (4, line1_tokens_list),
                     ]),
                     (annotation1, [
                         (5, line1_tokens_list),
                         (6, line2_tokens_list),
                     ])
                 ]
                 """
                 commit_cache = {}  # cache commit_getter lookups
                 def _get_annotation(commit_id, commit_getter):
                     if commit_id not in commit_cache:
                         commit_cache[commit_id] = commit_getter()
                     return commit_cache[commit_id]
                 annotation_lookup = {
                     line_no: _get_annotation(commit_id, commit_getter)
                     for line_no, commit_id, commit_getter, line_content
                     in filenode.annotate
                 }
                 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                       for line_no, tokens
                                       in enumerate(filenode_as_lines_tokens(filenode), 1))
                 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                 for annotation, group in grouped_annotations_lines:
                     yield (
                         annotation, [(line_no, tokens)
                                       for (_, line_no, tokens) in group]
                     )
             def render_tokenstream(tokenstream):
                 result = []
                 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
                     if token_class:
                         result.append(u'<span class="%s">' % token_class)
                     else:
                         result.append(u'<span>')
                     for op_tag, token_text in token_ops_texts:
                         if op_tag:
                             result.append(u'<%s>' % op_tag)
                         escaped_text = html_escape(token_text)
                         # TODO: dan: investigate showing hidden characters like space/nl/tab
                         # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
                         # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
                         # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
                         result.append(escaped_text)
                         if op_tag:
                             result.append(u'</%s>' % op_tag)
                     result.append(u'</span>')
                 html = ''.join(result)
                 return html
             def rollup_tokenstream(tokenstream):
                 """
                 Group a token stream of the format:
                     ('class', 'op', 'text')
                 or
                     ('class', 'text')
                 into
                     [('class1',
                         [('op1', 'text'),
                          ('op2', 'text')]),
                      ('class2',
                         [('op3', 'text')])]
                 This is used to get the minimal tags necessary when
                 rendering to html eg for a token stream ie.
                 <span class="A"><ins>he</ins>llo</span>
                 vs
                 <span class="A"><ins>he</ins></span><span class="A">llo</span>
                 If a 2 tuple is passed in, the output op will be an empty string.
                 eg:
                 >>> rollup_tokenstream([('classA', '',      'h'),
                                         ('classA', 'del',   'ell'),
                                         ('classA', '',      'o'),
                                         ('classB', '',      ' '),
                                         ('classA', '',      'the'),
                                         ('classA', '',      're'),
                                         ])
                 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
                  ('classB', [('', ' ')],
                  ('classA', [('', 'there')]]
                 """
                 if tokenstream and len(tokenstream[0]) == 2:
                     tokenstream = ((t[0], '', t[1]) for t in tokenstream)
                 result = []
                 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
                     ops = []
                     for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
                         text_buffer = []
                         for t_class, t_op, t_text in token_text_list:
                             text_buffer.append(t_text)
                         ops.append((token_op, ''.join(text_buffer)))
                     result.append((token_class, ops))
                 return result
             def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
                 """
                 Converts a list of (token_class, token_text) tuples to a list of
                 (token_class, token_op, token_text) tuples where token_op is one of
                 ('ins', 'del', '')
                 :param old_tokens: list of (token_class, token_text) tuples of old line
                 :param new_tokens: list of (token_class, token_text) tuples of new line
                 :param use_diff_match_patch: boolean, will use google's diff match patch
                     library which has options to 'smooth' out the character by character
                     differences making nicer ins/del blocks
                 """
                 old_tokens_result = []
                 new_tokens_result = []
                 similarity = difflib.SequenceMatcher(None,
                     ''.join(token_text for token_class, token_text in old_tokens),
                     ''.join(token_text for token_class, token_text in new_tokens)
                 ).ratio()
                 if similarity < 0.6: # return, the blocks are too different
                     for token_class, token_text in old_tokens:
                         old_tokens_result.append((token_class, '', token_text))
                     for token_class, token_text in new_tokens:
                         new_tokens_result.append((token_class, '', token_text))
                     return old_tokens_result, new_tokens_result, similarity
                 token_sequence_matcher = difflib.SequenceMatcher(None,
                     [x[1] for x in old_tokens],
                     [x[1] for x in new_tokens])
                 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
                     # check the differences by token block types first to give a more
                     # nicer "block" level replacement vs character diffs
                     if tag == 'equal':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, '', token_text))
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, '', token_text))
                     elif tag == 'delete':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, 'del', token_text))
                     elif tag == 'insert':
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, 'ins', token_text))
                     elif tag == 'replace':
                         # if same type token blocks must be replaced, do a diff on the
                         # characters in the token blocks to show individual changes
                         old_char_tokens = []
                         new_char_tokens = []
                         for token_class, token_text in old_tokens[o1:o2]:
                             for char in token_text:
                                 old_char_tokens.append((token_class, char))
                         for token_class, token_text in new_tokens[n1:n2]:
                             for char in token_text:
                                 new_char_tokens.append((token_class, char))
                         old_string = ''.join([token_text for
                             token_class, token_text in old_char_tokens])
                         new_string = ''.join([token_text for
                             token_class, token_text in new_char_tokens])
                         char_sequence = difflib.SequenceMatcher(
                             None, old_string, new_string)
                         copcodes = char_sequence.get_opcodes()
                         obuffer, nbuffer = [], []
                         if use_diff_match_patch:
                             dmp = diff_match_patch()
                             dmp.Diff_EditCost = 11  # TODO: dan: extract this to a setting
                             reps = dmp.diff_main(old_string, new_string)
                             dmp.diff_cleanupEfficiency(reps)
                             a, b = 0, 0
                             for op, rep in reps:
                                 l = len(rep)
                                 if op == 0:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], '', c))
                                         nbuffer.append((new_char_tokens[b+i][0], '', c))
                                     a += l
                                     b += l
                                 elif op == -1:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], 'del', c))
                                     a += l
                                 elif op == 1:
                                     for i, c in enumerate(rep):
                                         nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
                                     b += l
                         else:
                             for ctag, co1, co2, cn1, cn2 in copcodes:
                                 if ctag == 'equal':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, '', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, '', token_text))
                                 elif ctag == 'delete':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                 elif ctag == 'insert':
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                                 elif ctag == 'replace':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                         old_tokens_result.extend(obuffer)
                         new_tokens_result.extend(nbuffer)
                 return old_tokens_result, new_tokens_result, similarity
             def diffset_node_getter(commit):
                 def get_node(fname):
                     try:
                         return commit.get_node(fname)
                     except NodeDoesNotExistError:
                         return None
                 return get_node
             class DiffSet(object):
                 """
                 An object for parsing the diff result from diffs.DiffProcessor and
                 adding highlighting, side by side/unified renderings and line diffs
                 """
                 HL_REAL = 'REAL'  # highlights using original file, slow
                 HL_FAST = 'FAST'  # highlights using just the line, fast but not correct
                                   # in the case of multiline code
                 HL_NONE = 'NONE'  # no highlighting, fastest
                 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
                              source_repo_name=None,
                              source_node_getter=lambda filename: None,
                              target_node_getter=lambda filename: None,
                              source_nodes=None, target_nodes=None,
                              # files over this size will use fast highlighting
                              max_file_size_limit=150 * 1024,
                              ):
                     self.highlight_mode = highlight_mode
                     self.highlighted_filenodes = {}
                     self.source_node_getter = source_node_getter
                     self.target_node_getter = target_node_getter
                     self.source_nodes = source_nodes or {}
                     self.target_nodes = target_nodes or {}
                     self.repo_name = repo_name
                     self.source_repo_name = source_repo_name or repo_name
                     self.max_file_size_limit = max_file_size_limit
                 def render_patchset(self, patchset, source_ref=None, target_ref=None):
                     diffset = AttributeDict(dict(
                         lines_added=0,
                         lines_deleted=0,
                         changed_files=0,
                         files=[],
                         file_stats={},
                         limited_diff=isinstance(patchset, LimitedDiffContainer),
                         repo_name=self.repo_name,
                         source_repo_name=self.source_repo_name,
                         source_ref=source_ref,
                         target_ref=target_ref,
                     ))
                     for patch in patchset:
                         diffset.file_stats[patch['filename']] = patch['stats']
                         filediff = self.render_patch(patch)
                         filediff.diffset = StrictAttributeDict(dict(
                             source_ref=diffset.source_ref,
                             target_ref=diffset.target_ref,
                             repo_name=diffset.repo_name,
                             source_repo_name=diffset.source_repo_name,
                         ))
                         diffset.files.append(filediff)
                         diffset.changed_files += 1
                         if not patch['stats']['binary']:
                             diffset.lines_added += patch['stats']['added']
                             diffset.lines_deleted += patch['stats']['deleted']
                     return diffset
                 _lexer_cache = {}
                 def _get_lexer_for_filename(self, filename, filenode=None):
                     # cached because we might need to call it twice for source/target
                     if filename not in self._lexer_cache:
                         if filenode:
                             lexer = filenode.lexer
                             extension = filenode.extension
                         else:
                             lexer = FileNode.get_lexer(filename=filename)
                             extension = filename.split('.')[-1]
                         lexer = get_custom_lexer(extension) or lexer
                         self._lexer_cache[filename] = lexer
                     return self._lexer_cache[filename]
                 def render_patch(self, patch):
                     log.debug('rendering diff for %r', patch['filename'])
                     source_filename = patch['original_filename']
                     target_filename = patch['filename']
                     source_lexer = plain_text_lexer
                     target_lexer = plain_text_lexer
                     if not patch['stats']['binary']:
                         node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
                         hl_mode = node_hl_mode or self.highlight_mode
                         if hl_mode == self.HL_REAL:
                             if (source_filename and patch['operation'] in ('D', 'M')
                                 and source_filename not in self.source_nodes):
                                     self.source_nodes[source_filename] = (
                                         self.source_node_getter(source_filename))
                             if (target_filename and patch['operation'] in ('A', 'M')
                                 and target_filename not in self.target_nodes):
                                     self.target_nodes[target_filename] = (
                                         self.target_node_getter(target_filename))
                         elif hl_mode == self.HL_FAST:
                             source_lexer = self._get_lexer_for_filename(source_filename)
                             target_lexer = self._get_lexer_for_filename(target_filename)
                     source_file = self.source_nodes.get(source_filename, source_filename)
                     target_file = self.target_nodes.get(target_filename, target_filename)
                     source_filenode, target_filenode = None, None
                     # TODO: dan: FileNode.lexer works on the content of the file - which
                     # can be slow - issue #4289 explains a lexer clean up - which once
                     # done can allow caching a lexer for a filenode to avoid the file lookup
                     if isinstance(source_file, FileNode):
                         source_filenode = source_file
                         #source_lexer = source_file.lexer
                         source_lexer = self._get_lexer_for_filename(source_filename)
                         source_file.lexer = source_lexer
                     if isinstance(target_file, FileNode):
                         target_filenode = target_file
                         #target_lexer = target_file.lexer
                         target_lexer = self._get_lexer_for_filename(target_filename)
                         target_file.lexer = target_lexer
                     source_file_path, target_file_path = None, None
                     if source_filename != '/dev/null':
                         source_file_path = source_filename
                     if target_filename != '/dev/null':
                         target_file_path = target_filename
                     source_file_type = source_lexer.name
                     target_file_type = target_lexer.name
                     filediff = AttributeDict({
                         'source_file_path': source_file_path,
                         'target_file_path': target_file_path,
                         'source_filenode': source_filenode,
                         'target_filenode': target_filenode,
                         'source_file_type': target_file_type,
                         'target_file_type': source_file_type,
                         'patch': {'filename': patch['filename'], 'stats': patch['stats']},
                         'operation': patch['operation'],
                         'source_mode': patch['stats']['old_mode'],
                         'target_mode': patch['stats']['new_mode'],
                         'limited_diff': isinstance(patch, LimitedDiffContainer),
                         'hunks': [],
                         'hunk_ops': None,
                         'diffset': self,
                     })
                     file_chunks = patch['chunks'][1:]
                     for hunk in file_chunks:
                         hunkbit = self.parse_hunk(hunk, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         filediff.hunks.append(hunkbit)
                     # Simulate hunk on OPS type line which doesn't really contain any diff
                     # this allows commenting on those
                     if not file_chunks:
                         actions = []
                         for op_id, op_text in filediff.patch['stats']['ops'].items():
                             if op_id == DEL_FILENODE:
-                                actions.append(u'file was deleted')
+                                actions.append(u'file was removed')
                             elif op_id == BIN_FILENODE:
                                 actions.append(u'binary diff hidden')
                             else:
                                 actions.append(safe_unicode(op_text))
                         action_line = u'NO CONTENT: ' + \
                                       u', '.join(actions) or u'UNDEFINED_ACTION'
                         hunk_ops = {'source_length': 0, 'source_start': 0,
                                     'lines': [
                                         {'new_lineno': 0, 'old_lineno': 1,
                                          'action': 'unmod-no-hl', 'line': action_line}
                                     ],
                                     'section_header': u'', 'target_start': 1, 'target_length': 1}
                         hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         filediff.hunk_ops = hunkbit
                     return filediff
                 def parse_hunk(self, hunk, source_file, target_file):
                     result = AttributeDict(dict(
                         source_start=hunk['source_start'],
                         source_length=hunk['source_length'],
                         target_start=hunk['target_start'],
                         target_length=hunk['target_length'],
                         section_header=hunk['section_header'],
                         lines=[],
                     ))
                     before, after = [], []
                     for line in hunk['lines']:
                         if line['action'] in ['unmod', 'unmod-no-hl']:
                             no_hl = line['action'] == 'unmod-no-hl'
                             result.lines.extend(
                                 self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
                             after.append(line)
                             before.append(line)
                         elif line['action'] == 'add':
                             after.append(line)
                         elif line['action'] == 'del':
                             before.append(line)
                         elif line['action'] == 'old-no-nl':
                             before.append(line)
                         elif line['action'] == 'new-no-nl':
                             after.append(line)
                     all_actions = [x['action'] for x in after] + [x['action'] for x in before]
                     no_hl = {x for x in all_actions} == {'unmod-no-hl'}
                     result.lines.extend(
                         self.parse_lines(before, after, source_file, target_file, no_hl=no_hl))
                     # NOTE(marcink): we must keep list() call here so we can cache the result...
                     result.unified = list(self.as_unified(result.lines))
                     result.sideside = result.lines
                     return result
                 def parse_lines(self, before_lines, after_lines, source_file, target_file,
                                 no_hl=False):
                     # TODO: dan: investigate doing the diff comparison and fast highlighting
                     # on the entire before and after buffered block lines rather than by
                     # line, this means we can get better 'fast' highlighting if the context
                     # allows it - eg.
                     # line 4: """
                     # line 5: this gets highlighted as a string
                     # line 6: """
                     lines = []
                     before_newline = AttributeDict()
                     after_newline = AttributeDict()
                     if before_lines and before_lines[-1]['action'] == 'old-no-nl':
                         before_newline_line = before_lines.pop(-1)
                         before_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', before_newline_line['line'])]]))
                     if after_lines and after_lines[-1]['action'] == 'new-no-nl':
                         after_newline_line = after_lines.pop(-1)
                         after_newline.content = '\n {}'.format(
                             render_tokenstream(
                                 [(x[0], '', x[1])
                                  for x in [('nonl', after_newline_line['line'])]]))
                     while before_lines or after_lines:
                         before, after = None, None
                         before_tokens, after_tokens = None, None
                         if before_lines:
                             before = before_lines.pop(0)
                         if after_lines:
                             after = after_lines.pop(0)
                         original = AttributeDict()
                         modified = AttributeDict()
                         if before:
                             if before['action'] == 'old-no-nl':
                                 before_tokens = [('nonl', before['line'])]
                             else:
                                 before_tokens = self.get_line_tokens(
                                     line_text=before['line'], line_number=before['old_lineno'],
                                     input_file=source_file, no_hl=no_hl)
                             original.lineno = before['old_lineno']
                             original.content = before['line']
                             original.action = self.action_to_op(before['action'])
                             original.get_comment_args = (
                                 source_file, 'o', before['old_lineno'])
                         if after:
                             if after['action'] == 'new-no-nl':
                                 after_tokens = [('nonl', after['line'])]
                             else:
                                 after_tokens = self.get_line_tokens(
                                     line_text=after['line'], line_number=after['new_lineno'],
                                     input_file=target_file, no_hl=no_hl)
                             modified.lineno = after['new_lineno']
                             modified.content = after['line']
                             modified.action = self.action_to_op(after['action'])
                             modified.get_comment_args = (target_file, 'n', after['new_lineno'])
                         # diff the lines
                         if before_tokens and after_tokens:
                             o_tokens, m_tokens, similarity = tokens_diff(
                                 before_tokens, after_tokens)
                             original.content = render_tokenstream(o_tokens)
                             modified.content = render_tokenstream(m_tokens)
                         elif before_tokens:
                             original.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in before_tokens])
                         elif after_tokens:
                             modified.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in after_tokens])
                         if not before_lines and before_newline:
                             original.content += before_newline.content
                             before_newline = None
                         if not after_lines and after_newline:
                             modified.content += after_newline.content
                             after_newline = None
                         lines.append(AttributeDict({
                             'original': original,
                             'modified': modified,
                         }))
                     return lines
                 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False):
                     filenode = None
                     filename = None
                     if isinstance(input_file, basestring):
                         filename = input_file
                     elif isinstance(input_file, FileNode):
                         filenode = input_file
                         filename = input_file.unicode_path
                     hl_mode = self.HL_NONE if no_hl else self.highlight_mode
                     if hl_mode == self.HL_REAL and filenode:
                         lexer = self._get_lexer_for_filename(filename)
                         file_size_allowed = input_file.size < self.max_file_size_limit
                         if line_number and file_size_allowed:
                             return self.get_tokenized_filenode_line(
                                 input_file, line_number, lexer)
                     if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
                         lexer = self._get_lexer_for_filename(filename)
                         return list(tokenize_string(line_text, lexer))
                     return list(tokenize_string(line_text, plain_text_lexer))
                 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
                     if filenode not in self.highlighted_filenodes:
                         tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
                         self.highlighted_filenodes[filenode] = tokenized_lines
                     return self.highlighted_filenodes[filenode][line_number - 1]
                 def action_to_op(self, action):
                     return {
                         'add': '+',
                         'del': '-',
                         'unmod': ' ',
                         'unmod-no-hl': ' ',
                         'old-no-nl': ' ',
                         'new-no-nl': ' ',
                     }.get(action, action)
                 def as_unified(self, lines):
                     """
                     Return a generator that yields the lines of a diff in unified order
                     """
                     def generator():
                         buf = []
                         for line in lines:
                             if buf and not line.original or line.original.action == ' ':
                                 for b in buf:
                                     yield b
                                 buf = []
                             if line.original:
                                 if line.original.action == ' ':
                                     yield (line.original.lineno, line.modified.lineno,
                                            line.original.action, line.original.content,
                                            line.original.get_comment_args)
                                     continue
                                 if line.original.action == '-':
                                     yield (line.original.lineno, None,
                                            line.original.action, line.original.content,
                                            line.original.get_comment_args)
                                 if line.modified.action == '+':
                                     buf.append((
                                         None, line.modified.lineno,
                                         line.modified.action, line.modified.content,
                                         line.modified.get_comment_args))
                                     continue
                             if line.modified:
                                 yield (None, line.modified.lineno,
                                        line.modified.action, line.modified.content,
                                        line.modified.get_comment_args)
                         for b in buf:
                             yield b
                     return generator()