u/pc/rhodecode-enterprise-ce-fork-pc Commit - r1833:56150ab5

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Renderer for markup languages with ability to parse using rst or markdown

23

Renderer for markup languages with ability to parse using rst or markdown

24

"""

24

"""

25

26

import re

26

import re

27

import os

27

import os

28

import lxml

28

import lxml

29

import logging

29

import logging

30

import urlparse

30

import urlparse

31

32

from mako.lookup import TemplateLookup

32

from mako.lookup import TemplateLookup

33

from mako.template import Template as MakoTemplate

33

from mako.template import Template as MakoTemplate

34

35

from docutils.core import publish_parts

35

from docutils.core import publish_parts

36

from docutils.parsers.rst import directives

36

from docutils.parsers.rst import directives

37

from docutils import writers

38

from docutils.writers import html4css1

37

import markdown

39

import markdown

38

40

39

from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension

41

from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension

40

from rhodecode.lib.utils2 import (

42

from rhodecode.lib.utils2 import (

41

safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)

43

safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)

42

44

43

log = logging.getLogger(__name__)

45

log = logging.getLogger(__name__)

44

46

45

# default renderer used to generate automated comments

47

# default renderer used to generate automated comments

46

DEFAULT_COMMENTS_RENDERER = 'rst'

48

DEFAULT_COMMENTS_RENDERER = 'rst'

47

49

48

50

51

class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):

52

"""

53

Custom HTML Translator used for sandboxing potential

54

JS injections in ref links

55

"""

56

57

def visit_reference(self, node):

58

if 'refuri' in node.attributes:

59

refuri = node['refuri']

60

if ':' in refuri:

61

prefix, link = refuri.lstrip().split(':', 1)

62

if prefix == 'javascript':

63

# we don't allow javascript type of refs...

64

node['refuri'] = 'javascript:alert("SandBoxedJavascript")'

65

66

# old style class requires this...

67

return html4css1.HTMLTranslator.visit_reference(self, node)

68

69

70

class RhodeCodeWriter(writers.html4css1.Writer):

71

def __init__(self):

72

writers.Writer.__init__(self)

73

self.translator_class = CustomHTMLTranslator

74

75

49

def relative_links(html_source, server_path):

76

def relative_links(html_source, server_path):

50

if not html_source:

77

if not html_source:

51

return html_source

78

return html_source

52

79

53

try:

80

try:

54

doc = lxml.html.fromstring(html_source)

81

doc = lxml.html.fromstring(html_source)

55

except Exception:

82

except Exception:

56

return html_source

83

return html_source

57

84

58

for el in doc.cssselect('img, video'):

85

for el in doc.cssselect('img, video'):

59

src = el.attrib['src']

86

src = el.attrib['src']

60

if src:

87

if src:

61

el.attrib['src'] = relative_path(src, server_path)

88

el.attrib['src'] = relative_path(src, server_path)

62

89

63

for el in doc.cssselect('a:not(.gfm)'):

90

for el in doc.cssselect('a:not(.gfm)'):

64

src = el.attrib['href']

91

src = el.attrib['href']

65

if src:

92

if src:

66

el.attrib['href'] = relative_path(src, server_path)

93

el.attrib['href'] = relative_path(src, server_path)

67

94

68

return lxml.html.tostring(doc)

95

return lxml.html.tostring(doc)

69

96

70

97

71

def relative_path(path, request_path, is_repo_file=None):

98

def relative_path(path, request_path, is_repo_file=None):

72

"""

99

"""

73

relative link support, path is a rel path, and request_path is current

100

relative link support, path is a rel path, and request_path is current

74

server path (not absolute)

101

server path (not absolute)

75

102

76

e.g.

103

e.g.

77

104

78

path = '../logo.png'

105

path = '../logo.png'

79

request_path= '/repo/files/path/file.md'

106

request_path= '/repo/files/path/file.md'

80

produces: '/repo/files/logo.png'

107

produces: '/repo/files/logo.png'

81

"""

108

"""

82

# TODO(marcink): unicode/str support ?

109

# TODO(marcink): unicode/str support ?

83

# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

110

# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

84

111

85

def dummy_check(p):

112

def dummy_check(p):

86

return True # assume default is a valid file path

113

return True # assume default is a valid file path

87

114

88

is_repo_file = is_repo_file or dummy_check

115

is_repo_file = is_repo_file or dummy_check

89

if not path:

116

if not path:

90

return request_path

117

return request_path

91

118

92

path = safe_unicode(path)

119

path = safe_unicode(path)

93

request_path = safe_unicode(request_path)

120

request_path = safe_unicode(request_path)

94

121

95

if path.startswith((u'data:', u'javascript:', u'#', u':')):

122

if path.startswith((u'data:', u'javascript:', u'#', u':')):

96

# skip data, anchor, invalid links

123

# skip data, anchor, invalid links

97

return path

124

return path

98

125

99

is_absolute = bool(urlparse.urlparse(path).netloc)

126

is_absolute = bool(urlparse.urlparse(path).netloc)

100

if is_absolute:

127

if is_absolute:

101

return path

128

return path

102

129

103

if not request_path:

130

if not request_path:

104

return path

131

return path

105

132

106

if path.startswith(u'/'):

133

if path.startswith(u'/'):

107

path = path[1:]

134

path = path[1:]

108

135

109

if path.startswith(u'./'):

136

if path.startswith(u'./'):

110

path = path[2:]

137

path = path[2:]

111

138

112

parts = request_path.split('/')

139

parts = request_path.split('/')

113

# compute how deep we need to traverse the request_path

140

# compute how deep we need to traverse the request_path

114

depth = 0

141

depth = 0

115

142

116

if is_repo_file(request_path):

143

if is_repo_file(request_path):

117

# if request path is a VALID file, we use a relative path with

144

# if request path is a VALID file, we use a relative path with

118

# one level up

145

# one level up

119

depth += 1

146

depth += 1

120

147

121

while path.startswith(u'../'):

148

while path.startswith(u'../'):

122

depth += 1

149

depth += 1

123

path = path[3:]

150

path = path[3:]

124

151

125

if depth > 0:

152

if depth > 0:

126

parts = parts[:-depth]

153

parts = parts[:-depth]

127

154

128

parts.append(path)

155

parts.append(path)

129

final_path = u'/'.join(parts).lstrip(u'/')

156

final_path = u'/'.join(parts).lstrip(u'/')

130

157

131

return u'/' + final_path

158

return u'/' + final_path

132

159

133

160

134

class MarkupRenderer(object):

161

class MarkupRenderer(object):

135

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

162

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

136

163

137

MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)

164

MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)

138

RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)

165

RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)

139

JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)

166

JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)

140

PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

167

PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

141

168

142

extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']

169

extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']

143

markdown_renderer = markdown.Markdown(

170

markdown_renderer = markdown.Markdown(

144

extensions, safe_mode=True, enable_attributes=False)

171

extensions, safe_mode=True, enable_attributes=False)

145

172

146

markdown_renderer_flavored = markdown.Markdown(

173

markdown_renderer_flavored = markdown.Markdown(

147

extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,

174

extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,

148

enable_attributes=False)

175

enable_attributes=False)

149

176

150

# extension together with weights. Lower is first means we control how

177

# extension together with weights. Lower is first means we control how

151

# extensions are attached to readme names with those.

178

# extensions are attached to readme names with those.

152

PLAIN_EXTS = [

179

PLAIN_EXTS = [

153

# prefer no extension

180

# prefer no extension

154

('', 0), # special case that renders READMES names without extension

181

('', 0), # special case that renders READMES names without extension

155

('.text', 2), ('.TEXT', 2),

182

('.text', 2), ('.TEXT', 2),

156

('.txt', 3), ('.TXT', 3)

183

('.txt', 3), ('.TXT', 3)

157

]

184

]

158

185

159

RST_EXTS = [

186

RST_EXTS = [

160

('.rst', 1), ('.rest', 1),

187

('.rst', 1), ('.rest', 1),

161

('.RST', 2), ('.REST', 2)

188

('.RST', 2), ('.REST', 2)

162

]

189

]

163

190

164

MARKDOWN_EXTS = [

191

MARKDOWN_EXTS = [

165

('.md', 1), ('.MD', 1),

192

('.md', 1), ('.MD', 1),

166

('.mkdn', 2), ('.MKDN', 2),

193

('.mkdn', 2), ('.MKDN', 2),

167

('.mdown', 3), ('.MDOWN', 3),

194

('.mdown', 3), ('.MDOWN', 3),

168

('.markdown', 4), ('.MARKDOWN', 4)

195

('.markdown', 4), ('.MARKDOWN', 4)

169

]

196

]

170

197

171

def _detect_renderer(self, source, filename=None):

198

def _detect_renderer(self, source, filename=None):

172

"""

199

"""

173

runs detection of what renderer should be used for generating html

200

runs detection of what renderer should be used for generating html

174

from a markup language

201

from a markup language

175

202

176

filename can be also explicitly a renderer name

203

filename can be also explicitly a renderer name

177

204

178

:param source:

205

:param source:

179

:param filename:

206

:param filename:

180

"""

207

"""

181

208

182

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

209

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

183

detected_renderer = 'markdown'

210

detected_renderer = 'markdown'

184

elif MarkupRenderer.RST_PAT.findall(filename):

211

elif MarkupRenderer.RST_PAT.findall(filename):

185

detected_renderer = 'rst'

212

detected_renderer = 'rst'

186

elif MarkupRenderer.JUPYTER_PAT.findall(filename):

213

elif MarkupRenderer.JUPYTER_PAT.findall(filename):

187

detected_renderer = 'jupyter'

214

detected_renderer = 'jupyter'

188

elif MarkupRenderer.PLAIN_PAT.findall(filename):

215

elif MarkupRenderer.PLAIN_PAT.findall(filename):

189

detected_renderer = 'plain'

216

detected_renderer = 'plain'

190

else:

217

else:

191

detected_renderer = 'plain'

218

detected_renderer = 'plain'

192

219

193

return getattr(MarkupRenderer, detected_renderer)

220

return getattr(MarkupRenderer, detected_renderer)

194

221

195

@classmethod

222

@classmethod

196

def renderer_from_filename(cls, filename, exclude):

223

def renderer_from_filename(cls, filename, exclude):

197

"""

224

"""

198

Detect renderer markdown/rst from filename and optionally use exclude

225

Detect renderer markdown/rst from filename and optionally use exclude

199

list to remove some options. This is mostly used in helpers.

226

list to remove some options. This is mostly used in helpers.

200

Returns None when no renderer can be detected.

227

Returns None when no renderer can be detected.

201

"""

228

"""

202

def _filter(elements):

229

def _filter(elements):

203

if isinstance(exclude, (list, tuple)):

230

if isinstance(exclude, (list, tuple)):

204

return [x for x in elements if x not in exclude]

231

return [x for x in elements if x not in exclude]

205

return elements

232

return elements

206

233

207

if filename.endswith(

234

if filename.endswith(

208

tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):

235

tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):

209

return 'markdown'

236

return 'markdown'

210

if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):

237

if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):

211

return 'rst'

238

return 'rst'

212

239

213

return None

240

return None

214

241

215

def render(self, source, filename=None):

242

def render(self, source, filename=None):

216

"""

243

"""

217

Renders a given filename using detected renderer

244

Renders a given filename using detected renderer

218

it detects renderers based on file extension or mimetype.

245

it detects renderers based on file extension or mimetype.

219

At last it will just do a simple html replacing new lines with <br/>

246

At last it will just do a simple html replacing new lines with <br/>

220

247

221

:param file_name:

248

:param file_name:

222

:param source:

249

:param source:

223

"""

250

"""

224

251

225

renderer = self._detect_renderer(source, filename)

252

renderer = self._detect_renderer(source, filename)

226

readme_data = renderer(source)

253

readme_data = renderer(source)

227

return readme_data

254

return readme_data

228

255

229

@classmethod

256

@classmethod

230

def _flavored_markdown(cls, text):

257

def _flavored_markdown(cls, text):

231

"""

258

"""

232

Github style flavored markdown

259

Github style flavored markdown

233

260

234

:param text:

261

:param text:

235

"""

262

"""

236

263

237

# Extract pre blocks.

264

# Extract pre blocks.

238

extractions = {}

265

extractions = {}

239

266

240

def pre_extraction_callback(matchobj):

267

def pre_extraction_callback(matchobj):

241

digest = md5_safe(matchobj.group(0))

268

digest = md5_safe(matchobj.group(0))

242

extractions[digest] = matchobj.group(0)

269

extractions[digest] = matchobj.group(0)

243

return "{gfm-extraction-%s}" % digest

270

return "{gfm-extraction-%s}" % digest

244

pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

271

pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

245

text = re.sub(pattern, pre_extraction_callback, text)

272

text = re.sub(pattern, pre_extraction_callback, text)

246

273

247

# Prevent foo_bar_baz from ending up with an italic word in the middle.

274

# Prevent foo_bar_baz from ending up with an italic word in the middle.

248

def italic_callback(matchobj):

275

def italic_callback(matchobj):

249

s = matchobj.group(0)

276

s = matchobj.group(0)

250

if list(s).count('_') >= 2:

277

if list(s).count('_') >= 2:

251

return s.replace('_', r'\_')

278

return s.replace('_', r'\_')

252

return s

279

return s

253

text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

280

text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

254

281

255

# Insert pre block extractions.

282

# Insert pre block extractions.

256

def pre_insert_callback(matchobj):

283

def pre_insert_callback(matchobj):

257

return '\n\n' + extractions[matchobj.group(1)]

284

return '\n\n' + extractions[matchobj.group(1)]

258

text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',

285

text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',

259

pre_insert_callback, text)

286

pre_insert_callback, text)

260

287

261

return text

288

return text

262

289

263

@classmethod

290

@classmethod

264

def urlify_text(cls, text):

291

def urlify_text(cls, text):

265

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

292

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

266

r'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

293

r'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

267

294

268

def url_func(match_obj):

295

def url_func(match_obj):

269

url_full = match_obj.groups()[0]

296

url_full = match_obj.groups()[0]

270

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

297

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

271

298

272

return url_pat.sub(url_func, text)

299

return url_pat.sub(url_func, text)

273

300

274

@classmethod

301

@classmethod

275

def plain(cls, source, universal_newline=True):

302

def plain(cls, source, universal_newline=True):

276

source = safe_unicode(source)

303

source = safe_unicode(source)

277

if universal_newline:

304

if universal_newline:

278

newline = '\n'

305

newline = '\n'

279

source = newline.join(source.splitlines())

306

source = newline.join(source.splitlines())

280

307

281

source = cls.urlify_text(source)

308

source = cls.urlify_text(source)

282

return '<br />' + source.replace("\n", '<br />')

309

return '<br />' + source.replace("\n", '<br />')

283

310

284

@classmethod

311

@classmethod

285

def markdown(cls, source, safe=True, flavored=True, mentions=False):

312

def markdown(cls, source, safe=True, flavored=True, mentions=False):

286

# It does not allow to insert inline HTML. In presence of HTML tags, it

313

# It does not allow to insert inline HTML. In presence of HTML tags, it

287

# will replace them instead with [HTML_REMOVED]. This is controlled by

314

# will replace them instead with [HTML_REMOVED]. This is controlled by

288

# the safe_mode=True parameter of the markdown method.

315

# the safe_mode=True parameter of the markdown method.

289

316

290

if flavored:

317

if flavored:

291

markdown_renderer = cls.markdown_renderer_flavored

318

markdown_renderer = cls.markdown_renderer_flavored

292

else:

319

else:

293

markdown_renderer = cls.markdown_renderer

320

markdown_renderer = cls.markdown_renderer

294

321

295

if mentions:

322

if mentions:

296

mention_pat = re.compile(MENTIONS_REGEX)

323

mention_pat = re.compile(MENTIONS_REGEX)

297

324

298

def wrapp(match_obj):

325

def wrapp(match_obj):

299

uname = match_obj.groups()[0]

326

uname = match_obj.groups()[0]

300

return ' **@%(uname)s** ' % {'uname': uname}

327

return ' **@%(uname)s** ' % {'uname': uname}

301

mention_hl = mention_pat.sub(wrapp, source).strip()

328

mention_hl = mention_pat.sub(wrapp, source).strip()

302

# we extracted mentions render with this using Mentions false

329

# we extracted mentions render with this using Mentions false

303

return cls.markdown(mention_hl, safe=safe, flavored=flavored,

330

return cls.markdown(mention_hl, safe=safe, flavored=flavored,

304

mentions=False)

331

mentions=False)

305

332

306

source = safe_unicode(source)

333

source = safe_unicode(source)

307

try:

334

try:

308

if flavored:

335

if flavored:

309

source = cls._flavored_markdown(source)

336

source = cls._flavored_markdown(source)

310

return markdown_renderer.convert(source)

337

return markdown_renderer.convert(source)

311

except Exception:

338

except Exception:

312

log.exception('Error when rendering Markdown')

339

log.exception('Error when rendering Markdown')

313

if safe:

340

if safe:

314

log.debug('Fallback to render in plain mode')

341

log.debug('Fallback to render in plain mode')

315

return cls.plain(source)

342

return cls.plain(source)

316

else:

343

else:

317

raise

344

raise

318

345

319

@classmethod

346

@classmethod

320

def rst(cls, source, safe=True, mentions=False):

347

def rst(cls, source, safe=True, mentions=False):

321

if mentions:

348

if mentions:

322

mention_pat = re.compile(MENTIONS_REGEX)

349

mention_pat = re.compile(MENTIONS_REGEX)

323

350

324

def wrapp(match_obj):

351

def wrapp(match_obj):

325

uname = match_obj.groups()[0]

352

uname = match_obj.groups()[0]

326

return ' **@%(uname)s** ' % {'uname': uname}

353

return ' **@%(uname)s** ' % {'uname': uname}

327

mention_hl = mention_pat.sub(wrapp, source).strip()

354

mention_hl = mention_pat.sub(wrapp, source).strip()

328

# we extracted mentions render with this using Mentions false

355

# we extracted mentions render with this using Mentions false

329

return cls.rst(mention_hl, safe=safe, mentions=False)

356

return cls.rst(mention_hl, safe=safe, mentions=False)

330

357

331

source = safe_unicode(source)

358

source = safe_unicode(source)

332

try:

359

try:

333

docutils_settings = dict(

360

docutils_settings = dict(

334

[(alias, None) for alias in

361

[(alias, None) for alias in

335

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

362

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

336

363

337

docutils_settings.update({'input_encoding': 'unicode',

364

docutils_settings.update({'input_encoding': 'unicode',

338

'report_level': 4})

365

'report_level': 4})

339

366

340

for k, v in docutils_settings.iteritems():

367

for k, v in docutils_settings.iteritems():

341

directives.register_directive(k, v)

368

directives.register_directive(k, v)

342

369

343

parts = publish_parts(source=source,

370

parts = publish_parts(source=source,

344

writer~~_name~~=~~"html4css1"~~,

371

writer=RhodeCodeWriter(),

345

settings_overrides=docutils_settings)

372

settings_overrides=docutils_settings)

346

373

347

return parts['html_title'] + parts["fragment"]

374

return parts['html_title'] + parts["fragment"]

348

except Exception:

375

except Exception:

349

log.exception('Error when rendering RST')

376

log.exception('Error when rendering RST')

350

if safe:

377

if safe:

351

log.debug('Fallbacking to render in plain mode')

378

log.debug('Fallbacking to render in plain mode')

352

return cls.plain(source)

379

return cls.plain(source)

353

else:

380

else:

354

raise

381

raise

355

382

356

@classmethod

383

@classmethod

357

def jupyter(cls, source, safe=True):

384

def jupyter(cls, source, safe=True):

358

from rhodecode.lib import helpers

385

from rhodecode.lib import helpers

359

386

360

from traitlets.config import Config

387

from traitlets.config import Config

361

import nbformat

388

import nbformat

362

from nbconvert import HTMLExporter

389

from nbconvert import HTMLExporter

363

from nbconvert.preprocessors import Preprocessor

390

from nbconvert.preprocessors import Preprocessor

364

391

365

class CustomHTMLExporter(HTMLExporter):

392

class CustomHTMLExporter(HTMLExporter):

366

def _template_file_default(self):

393

def _template_file_default(self):

367

return 'basic'

394

return 'basic'

368

395

369

class Sandbox(Preprocessor):

396

class Sandbox(Preprocessor):

370

397

371

def preprocess(self, nb, resources):

398

def preprocess(self, nb, resources):

372

sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'

399

sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'

373

for cell in nb['cells']:

400

for cell in nb['cells']:

374

if safe and 'outputs' in cell:

401

if safe and 'outputs' in cell:

375

for cell_output in cell['outputs']:

402

for cell_output in cell['outputs']:

376

if 'data' in cell_output:

403

if 'data' in cell_output:

377

if 'application/javascript' in cell_output['data']:

404

if 'application/javascript' in cell_output['data']:

378

cell_output['data']['text/plain'] = sandbox_text

405

cell_output['data']['text/plain'] = sandbox_text

379

cell_output['data'].pop('application/javascript', None)

406

cell_output['data'].pop('application/javascript', None)

380

return nb, resources

407

return nb, resources

381

408

382

def _sanitize_resources(resources):

409

def _sanitize_resources(resources):

383

"""

410

"""

384

Skip/sanitize some of the CSS generated and included in jupyter

411

Skip/sanitize some of the CSS generated and included in jupyter

385

so it doesn't messes up UI so much

412

so it doesn't messes up UI so much

386

"""

413

"""

387

414

388

# TODO(marcink): probably we should replace this with whole custom

415

# TODO(marcink): probably we should replace this with whole custom

389

# CSS set that doesn't screw up, but jupyter generated html has some

416

# CSS set that doesn't screw up, but jupyter generated html has some

390

# special markers, so it requires Custom HTML exporter template with

417

# special markers, so it requires Custom HTML exporter template with

391

# _default_template_path_default, to achieve that

418

# _default_template_path_default, to achieve that

392

419

393

# strip the reset CSS

420

# strip the reset CSS

394

resources[0] = resources[0][resources[0].find('/*! Source'):]

421

resources[0] = resources[0][resources[0].find('/*! Source'):]

395

return resources

422

return resources

396

423

397

def as_html(notebook):

424

def as_html(notebook):

398

conf = Config()

425

conf = Config()

399

conf.CustomHTMLExporter.preprocessors = [Sandbox]

426

conf.CustomHTMLExporter.preprocessors = [Sandbox]

400

html_exporter = CustomHTMLExporter(config=conf)

427

html_exporter = CustomHTMLExporter(config=conf)

401

428

402

(body, resources) = html_exporter.from_notebook_node(notebook)

429

(body, resources) = html_exporter.from_notebook_node(notebook)

403

header = ''

430

header = ''

404

js = MakoTemplate(r'''

431

js = MakoTemplate(r'''

405

432

406

433

407

434

408

MathJax.Hub.Config({

435

MathJax.Hub.Config({

409

jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],

436

jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],

410

extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],

437

extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],

411

TeX: {

438

TeX: {

412

extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]

439

extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]

413

},

440

},

414

tex2jax: {

441

tex2jax: {

415

inlineMath: [ ['$','$'], ["\$","\$"] ],

442

inlineMath: [ ['$','$'], ["\$","\$"] ],

416

displayMath: [ ['$$','$$'], ["\\[","\\]"] ],

443

displayMath: [ ['$$','$$'], ["\\[","\\]"] ],

417

processEscapes: true,

444

processEscapes: true,

418

processEnvironments: true

445

processEnvironments: true

419

},

446

},

420

// Center justify equations in code and markdown cells. Elsewhere

447

// Center justify equations in code and markdown cells. Elsewhere

421

// we use CSS to left justify single line equations in code cells.

448

// we use CSS to left justify single line equations in code cells.

422

displayAlign: 'center',

449

displayAlign: 'center',

423

"HTML-CSS": {

450

"HTML-CSS": {

424

styles: {'.MathJax_Display': {"margin": 0}},

451

styles: {'.MathJax_Display': {"margin": 0}},

425

linebreaks: { automatic: true },

452

linebreaks: { automatic: true },

426

availableFonts: ["STIX", "TeX"]

453

availableFonts: ["STIX", "TeX"]

427

},

454

},

428

showMathMenu: false

455

showMathMenu: false

429

});

456

});

430

</script>

457

</script>

431

458

432

459

433

''').render(h=helpers)

460

''').render(h=helpers)

434

461

435

css = '<style>{}</style>'.format(

462

css = '<style>{}</style>'.format(

436

''.join(_sanitize_resources(resources['inlining']['css'])))

463

''.join(_sanitize_resources(resources['inlining']['css'])))

437

464

438

body = '\n'.join([header, css, js, body])

465

body = '\n'.join([header, css, js, body])

439

return body, resources

466

return body, resources

440

467

441

notebook = nbformat.reads(source, as_version=4)

468

notebook = nbformat.reads(source, as_version=4)

442

(body, resources) = as_html(notebook)

469

(body, resources) = as_html(notebook)

443

return body

470

return body

444

471

445

472

446

class RstTemplateRenderer(object):

473

class RstTemplateRenderer(object):

447

474

448

def __init__(self):

475

def __init__(self):

449

base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

476

base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

450

rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]

477

rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]

451

self.template_store = TemplateLookup(

478

self.template_store = TemplateLookup(

452

directories=rst_template_dirs,

479

directories=rst_template_dirs,

453

input_encoding='utf-8',

480

input_encoding='utf-8',

454

imports=['from rhodecode.lib import helpers as h'])

481

imports=['from rhodecode.lib import helpers as h'])

455

482

456

def _get_template(self, templatename):

483

def _get_template(self, templatename):

457

return self.template_store.get_template(templatename)

484

return self.template_store.get_template(templatename)

458

485

459

def render(self, template_name, **kwargs):

486

def render(self, template_name, **kwargs):

460

template = self._get_template(template_name)

487

template = self._get_template(template_name)

461

return template.render(**kwargs)

488

return template.render(**kwargs)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2017 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Renderer for markup languages with ability to parse using rst or markdown
             """
             import re
             import os
             import lxml
             import logging
             import urlparse
             from mako.lookup import TemplateLookup
             from mako.template import Template as MakoTemplate
             from docutils.core import publish_parts
             from docutils.parsers.rst import directives
+            from docutils import writers
+            from docutils.writers import html4css1
             import markdown
             from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
             from rhodecode.lib.utils2 import (
                 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
             log = logging.getLogger(__name__)
             # default renderer used to generate automated comments
             DEFAULT_COMMENTS_RENDERER = 'rst'
+            class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
+                """
+                Custom HTML Translator used for sandboxing potential
+                JS injections in ref links
+                """
+                def visit_reference(self, node):
+                    if 'refuri' in node.attributes:
+                        refuri = node['refuri']
+                        if ':' in refuri:
+                            prefix, link = refuri.lstrip().split(':', 1)
+                            if prefix == 'javascript':
+                                # we don't allow javascript type of refs...
+                                node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
+                    # old style class requires this...
+                    return html4css1.HTMLTranslator.visit_reference(self, node)
+            class RhodeCodeWriter(writers.html4css1.Writer):
+                def __init__(self):
+                    writers.Writer.__init__(self)
+                    self.translator_class = CustomHTMLTranslator
             def relative_links(html_source, server_path):
                 if not html_source:
                     return html_source
                 try:
                     doc = lxml.html.fromstring(html_source)
                 except Exception:
                     return html_source
                 for el in doc.cssselect('img, video'):
                     src = el.attrib['src']
                     if src:
                         el.attrib['src'] = relative_path(src, server_path)
                 for el in doc.cssselect('a:not(.gfm)'):
                     src = el.attrib['href']
                     if src:
                         el.attrib['href'] = relative_path(src, server_path)
                 return lxml.html.tostring(doc)
             def relative_path(path, request_path, is_repo_file=None):
                 """
                 relative link support, path is a rel path, and request_path is current
                 server path (not absolute)
                 e.g.
                 path = '../logo.png'
                 request_path= '/repo/files/path/file.md'
                 produces: '/repo/files/logo.png'
                 """
                 # TODO(marcink): unicode/str support ?
                 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
                 def dummy_check(p):
                     return True  # assume default is a valid file path
                 is_repo_file = is_repo_file or dummy_check
                 if not path:
                     return request_path
                 path = safe_unicode(path)
                 request_path = safe_unicode(request_path)
                 if path.startswith((u'data:', u'javascript:', u'#', u':')):
                     # skip data, anchor, invalid links
                     return path
                 is_absolute = bool(urlparse.urlparse(path).netloc)
                 if is_absolute:
                     return path
                 if not request_path:
                     return path
                 if path.startswith(u'/'):
                     path = path[1:]
                 if path.startswith(u'./'):
                     path = path[2:]
                 parts = request_path.split('/')
                 # compute how deep we need to traverse the request_path
                 depth = 0
                 if is_repo_file(request_path):
                     # if request path is a VALID file, we use a relative path with
                     # one level up
                     depth += 1
                 while path.startswith(u'../'):
                     depth += 1
                     path = path[3:]
                 if depth > 0:
                     parts = parts[:-depth]
                 parts.append(path)
                 final_path = u'/'.join(parts).lstrip(u'/')
                 return u'/' + final_path
             class MarkupRenderer(object):
                 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
                 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
                 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
                 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
                 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
                 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
                 markdown_renderer = markdown.Markdown(
                     extensions, safe_mode=True, enable_attributes=False)
                 markdown_renderer_flavored = markdown.Markdown(
                     extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
                     enable_attributes=False)
                 # extension together with weights. Lower is first means we control how
                 # extensions are attached to readme names with those.
                 PLAIN_EXTS = [
                     # prefer no extension
                     ('', 0),  # special case that renders READMES names without extension
                     ('.text', 2), ('.TEXT', 2),
                     ('.txt', 3), ('.TXT', 3)
                 ]
                 RST_EXTS = [
                     ('.rst', 1), ('.rest', 1),
                     ('.RST', 2), ('.REST', 2)
                 ]
                 MARKDOWN_EXTS = [
                     ('.md', 1), ('.MD', 1),
                     ('.mkdn', 2), ('.MKDN', 2),
                     ('.mdown', 3), ('.MDOWN', 3),
                     ('.markdown', 4), ('.MARKDOWN', 4)
                 ]
                 def _detect_renderer(self, source, filename=None):
                     """
                     runs detection of what renderer should be used for generating html
                     from a markup language
                     filename can be also explicitly a renderer name
                     :param source:
                     :param filename:
                     """
                     if MarkupRenderer.MARKDOWN_PAT.findall(filename):
                         detected_renderer = 'markdown'
                     elif MarkupRenderer.RST_PAT.findall(filename):
                         detected_renderer = 'rst'
                     elif MarkupRenderer.JUPYTER_PAT.findall(filename):
                         detected_renderer = 'jupyter'
                     elif MarkupRenderer.PLAIN_PAT.findall(filename):
                         detected_renderer = 'plain'
                     else:
                         detected_renderer = 'plain'
                     return getattr(MarkupRenderer, detected_renderer)
                 @classmethod
                 def renderer_from_filename(cls, filename, exclude):
                     """
                     Detect renderer markdown/rst from filename and optionally use exclude
                     list to remove some options. This is mostly used in helpers.
                     Returns None when no renderer can be detected.
                     """
                     def _filter(elements):
                         if isinstance(exclude, (list, tuple)):
                             return [x for x in elements if x not in exclude]
                         return elements
                     if filename.endswith(
                             tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
                         return 'markdown'
                     if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
                         return 'rst'
                     return None
                 def render(self, source, filename=None):
                     """
                     Renders a given filename using detected renderer
                     it detects renderers based on file extension or mimetype.
                     At last it will just do a simple html replacing new lines with <br/>
                     :param file_name:
                     :param source:
                     """
                     renderer = self._detect_renderer(source, filename)
                     readme_data = renderer(source)
                     return readme_data
                 @classmethod
                 def _flavored_markdown(cls, text):
                     """
                     Github style flavored markdown
                     :param text:
                     """
                     # Extract pre blocks.
                     extractions = {}
                     def pre_extraction_callback(matchobj):
                         digest = md5_safe(matchobj.group(0))
                         extractions[digest] = matchobj.group(0)
                         return "{gfm-extraction-%s}" % digest
                     pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
                     text = re.sub(pattern, pre_extraction_callback, text)
                     # Prevent foo_bar_baz from ending up with an italic word in the middle.
                     def italic_callback(matchobj):
                         s = matchobj.group(0)
                         if list(s).count('_') >= 2:
                             return s.replace('_', r'\_')
                         return s
                     text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
                     # Insert pre block extractions.
                     def pre_insert_callback(matchobj):
                         return '\n\n' + extractions[matchobj.group(1)]
                     text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
                                   pre_insert_callback, text)
                     return text
                 @classmethod
                 def urlify_text(cls, text):
                     url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
                                          r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
                     def url_func(match_obj):
                         url_full = match_obj.groups()[0]
                         return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
                     return url_pat.sub(url_func, text)
                 @classmethod
                 def plain(cls, source, universal_newline=True):
                     source = safe_unicode(source)
                     if universal_newline:
                         newline = '\n'
                         source = newline.join(source.splitlines())
                     source = cls.urlify_text(source)
                     return '<br />' + source.replace("\n", '<br />')
                 @classmethod
                 def markdown(cls, source, safe=True, flavored=True, mentions=False):
                     # It does not allow to insert inline HTML. In presence of HTML tags, it
                     # will replace them instead with [HTML_REMOVED]. This is controlled by
                     # the safe_mode=True parameter of the markdown method.
                     if flavored:
                         markdown_renderer = cls.markdown_renderer_flavored
                     else:
                         markdown_renderer = cls.markdown_renderer
                     if mentions:
                         mention_pat = re.compile(MENTIONS_REGEX)
                         def wrapp(match_obj):
                             uname = match_obj.groups()[0]
                             return ' **@%(uname)s** ' % {'uname': uname}
                         mention_hl = mention_pat.sub(wrapp, source).strip()
                         # we extracted mentions render with this using Mentions false
                         return cls.markdown(mention_hl, safe=safe, flavored=flavored,
                                             mentions=False)
                     source = safe_unicode(source)
                     try:
                         if flavored:
                             source = cls._flavored_markdown(source)
                         return markdown_renderer.convert(source)
                     except Exception:
                         log.exception('Error when rendering Markdown')
                         if safe:
                             log.debug('Fallback to render in plain mode')
                             return cls.plain(source)
                         else:
                             raise
                 @classmethod
                 def rst(cls, source, safe=True, mentions=False):
                     if mentions:
                         mention_pat = re.compile(MENTIONS_REGEX)
                         def wrapp(match_obj):
                             uname = match_obj.groups()[0]
                             return ' **@%(uname)s** ' % {'uname': uname}
                         mention_hl = mention_pat.sub(wrapp, source).strip()
                         # we extracted mentions render with this using Mentions false
                         return cls.rst(mention_hl, safe=safe, mentions=False)
                     source = safe_unicode(source)
                     try:
                         docutils_settings = dict(
                             [(alias, None) for alias in
                              cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
                         docutils_settings.update({'input_encoding': 'unicode',
                                                   'report_level': 4})
                         for k, v in docutils_settings.iteritems():
                             directives.register_directive(k, v)
                         parts = publish_parts(source=source,
-                                              writer_name="html4css1",
+                                              writer=RhodeCodeWriter(),
                                               settings_overrides=docutils_settings)
                         return parts['html_title'] + parts["fragment"]
                     except Exception:
                         log.exception('Error when rendering RST')
                         if safe:
                             log.debug('Fallbacking to render in plain mode')
                             return cls.plain(source)
                         else:
                             raise
                 @classmethod
                 def jupyter(cls, source, safe=True):
                     from rhodecode.lib import helpers
                     from traitlets.config import Config
                     import nbformat
                     from nbconvert import HTMLExporter
                     from nbconvert.preprocessors import Preprocessor
                     class CustomHTMLExporter(HTMLExporter):
                         def _template_file_default(self):
                             return 'basic'
                     class Sandbox(Preprocessor):
                         def preprocess(self, nb, resources):
                             sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
                             for cell in nb['cells']:
                                 if safe and 'outputs' in cell:
                                     for cell_output in cell['outputs']:
                                         if 'data' in cell_output:
                                             if 'application/javascript' in cell_output['data']:
                                                 cell_output['data']['text/plain'] = sandbox_text
                                                 cell_output['data'].pop('application/javascript', None)
                             return nb, resources
                     def _sanitize_resources(resources):
                         """
                         Skip/sanitize some of the CSS generated and included in jupyter
                         so it doesn't messes up UI so much
                         """
                         # TODO(marcink): probably we should replace this with whole custom
                         # CSS set that doesn't screw up, but jupyter generated html has some
                         # special markers, so it requires Custom HTML exporter template with
                         # _default_template_path_default, to achieve that
                         # strip the reset CSS
                         resources[0] = resources[0][resources[0].find('/*! Source'):]
                         return resources
                     def as_html(notebook):
                         conf = Config()
                         conf.CustomHTMLExporter.preprocessors = [Sandbox]
                         html_exporter = CustomHTMLExporter(config=conf)
                         (body, resources) = html_exporter.from_notebook_node(notebook)
                         header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
                         js = MakoTemplate(r'''
                         <!-- Load mathjax -->
                             <!-- MathJax configuration -->
                             <script type="text/x-mathjax-config">
                             MathJax.Hub.Config({
                                 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
                                 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
                                 TeX: {
                                     extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
                                 },
                                 tex2jax: {
                                     inlineMath: [ ['$','$'], ["\\(","\\)"] ],
                                     displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
                                     processEscapes: true,
                                     processEnvironments: true
                                 },
                                 // Center justify equations in code and markdown cells. Elsewhere
                                 // we use CSS to left justify single line equations in code cells.
                                 displayAlign: 'center',
                                 "HTML-CSS": {
                                     styles: {'.MathJax_Display': {"margin": 0}},
                                     linebreaks: { automatic: true },
                                     availableFonts: ["STIX", "TeX"]
                                 },
                                 showMathMenu: false
                             });
                             </script>
                             <!-- End of mathjax configuration -->
                             <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
                         ''').render(h=helpers)
                         css = '<style>{}</style>'.format(
                             ''.join(_sanitize_resources(resources['inlining']['css'])))
                         body = '\n'.join([header, css, js, body])
                         return body, resources
                     notebook = nbformat.reads(source, as_version=4)
                     (body, resources) = as_html(notebook)
                     return body
             class RstTemplateRenderer(object):
                 def __init__(self):
                     base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
                     rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
                     self.template_store = TemplateLookup(
                         directories=rst_template_dirs,
                         input_encoding='utf-8',
                         imports=['from rhodecode.lib import helpers as h'])
                 def _get_template(self, templatename):
                     return self.template_store.get_template(templatename)
                 def render(self, template_name, **kwargs):
                     template = self._get_template(template_name)
                     return template.render(**kwargs)