rhodecode-enterprise-ce Commit - r1840:05beb7b6

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

3

4

#

4

#

5

# This program is free software: you can redistribute it and/or modify

5

# This program is free software: you can redistribute it and/or modify

6

# it under the terms of the GNU Affero General Public License, version 3

6

# it under the terms of the GNU Affero General Public License, version 3

7

# (only), as published by the Free Software Foundation.

7

# (only), as published by the Free Software Foundation.

8

#

8

#

9

# This program is distributed in the hope that it will be useful,

9

# This program is distributed in the hope that it will be useful,

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# but WITHOUT ANY WARRANTY; without even the implied warranty of

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12

# GNU General Public License for more details.

12

# GNU General Public License for more details.

13

#

13

#

14

# You should have received a copy of the GNU Affero General Public License

14

# You should have received a copy of the GNU Affero General Public License

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

15

# along with this program. If not, see <http://www.gnu.org/licenses/>.

16

#

16

#

17

# This program is dual-licensed. If you wish to learn more about the

17

# This program is dual-licensed. If you wish to learn more about the

18

# RhodeCode Enterprise Edition, including its added features, Support services,

18

# RhodeCode Enterprise Edition, including its added features, Support services,

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

19

# and proprietary license terms, please see https://rhodecode.com/licenses/

20

21

22

"""

22

"""

23

Renderer for markup languages with ability to parse using rst or markdown

23

Renderer for markup languages with ability to parse using rst or markdown

24

"""

24

"""

25

26

import re

26

import re

27

import os

27

import os

28

import lxml

28

import lxml

29

import logging

29

import logging

30

import urlparse

30

import urlparse

31

32

from mako.lookup import TemplateLookup

32

from mako.lookup import TemplateLookup

33

from mako.template import Template as MakoTemplate

33

from mako.template import Template as MakoTemplate

34

35

from docutils.core import publish_parts

35

from docutils.core import publish_parts

36

from docutils.parsers.rst import directives

36

from docutils.parsers.rst import directives

37

from docutils import writers

37

from docutils import writers

38

from docutils.writers import html4css1

38

from docutils.writers import html4css1

39

import markdown

39

import markdown

40

41

from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension

41

from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension

42

from rhodecode.lib.utils2 import (

42

from rhodecode.lib.utils2 import (

43

safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)

43

safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)

44

45

log = logging.getLogger(__name__)

45

log = logging.getLogger(__name__)

46

47

# default renderer used to generate automated comments

47

# default renderer used to generate automated comments

48

DEFAULT_COMMENTS_RENDERER = 'rst'

48

DEFAULT_COMMENTS_RENDERER = 'rst'

49

50

51

class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):

51

class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):

52

"""

52

"""

53

Custom HTML Translator used for sandboxing potential

53

Custom HTML Translator used for sandboxing potential

54

JS injections in ref links

54

JS injections in ref links

55

"""

55

"""

56

57

def visit_reference(self, node):

57

def visit_reference(self, node):

58

if 'refuri' in node.attributes:

58

if 'refuri' in node.attributes:

59

refuri = node['refuri']

59

refuri = node['refuri']

60

if ':' in refuri:

60

if ':' in refuri:

61

prefix, link = refuri.lstrip().split(':', 1)

61

prefix, link = refuri.lstrip().split(':', 1)

62

if prefix == 'javascript':

62

if prefix == 'javascript':

63

# we don't allow javascript type of refs...

63

# we don't allow javascript type of refs...

64

node['refuri'] = 'javascript:alert("SandBoxedJavascript")'

64

node['refuri'] = 'javascript:alert("SandBoxedJavascript")'

65

66

# old style class requires this...

66

# old style class requires this...

67

return html4css1.HTMLTranslator.visit_reference(self, node)

67

return html4css1.HTMLTranslator.visit_reference(self, node)

68

69

70

class RhodeCodeWriter(writers.html4css1.Writer):

70

class RhodeCodeWriter(writers.html4css1.Writer):

71

def __init__(self):

71

def __init__(self):

72

writers.Writer.__init__(self)

72

writers.Writer.__init__(self)

73

self.translator_class = CustomHTMLTranslator

73

self.translator_class = CustomHTMLTranslator

74

75

76

def relative_links(html_source, server_path):

76

def relative_links(html_source, server_path):

77

if not html_source:

77

if not html_source:

78

return html_source

78

return html_source

79

80

try:

80

try:

81

doc = lxml.html.fromstring(html_source)

81

doc = lxml.html.fromstring(html_source)

82

except Exception:

82

except Exception:

83

return html_source

83

return html_source

84

85

for el in doc.cssselect('img, video'):

85

for el in doc.cssselect('img, video'):

86

src = el.attrib['src']

86

src = el.attrib.get('src')

87

if src:

87

if src:

88

el.attrib['src'] = relative_path(src, server_path)

88

el.attrib['src'] = relative_path(src, server_path)

89

90

for el in doc.cssselect('a:not(.gfm)'):

90

for el in doc.cssselect('a:not(.gfm)'):

91

src = el.attrib['href']

91

src = el.attrib.get('href')

92

if src:

92

if src:

93

el.attrib['href'] = relative_path(src, server_path)

93

el.attrib['href'] = relative_path(src, server_path)

94

95

return lxml.html.tostring(doc)

95

return lxml.html.tostring(doc)

96

97

98

def relative_path(path, request_path, is_repo_file=None):

98

def relative_path(path, request_path, is_repo_file=None):

99

"""

99

"""

100

relative link support, path is a rel path, and request_path is current

100

relative link support, path is a rel path, and request_path is current

101

server path (not absolute)

101

server path (not absolute)

102

103

e.g.

103

e.g.

104

105

path = '../logo.png'

105

path = '../logo.png'

106

request_path= '/repo/files/path/file.md'

106

request_path= '/repo/files/path/file.md'

107

produces: '/repo/files/logo.png'

107

produces: '/repo/files/logo.png'

108

"""

108

"""

109

# TODO(marcink): unicode/str support ?

109

# TODO(marcink): unicode/str support ?

110

# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

110

# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

111

112

def dummy_check(p):

112

def dummy_check(p):

113

return True # assume default is a valid file path

113

return True # assume default is a valid file path

114

115

is_repo_file = is_repo_file or dummy_check

115

is_repo_file = is_repo_file or dummy_check

116

if not path:

116

if not path:

117

return request_path

117

return request_path

118

119

path = safe_unicode(path)

119

path = safe_unicode(path)

120

request_path = safe_unicode(request_path)

120

request_path = safe_unicode(request_path)

121

122

if path.startswith((u'data:', u'javascript:', u'#', u':')):

122

if path.startswith((u'data:', u'javascript:', u'#', u':')):

123

# skip data, anchor, invalid links

123

# skip data, anchor, invalid links

124

return path

124

return path

125

126

is_absolute = bool(urlparse.urlparse(path).netloc)

126

is_absolute = bool(urlparse.urlparse(path).netloc)

127

if is_absolute:

127

if is_absolute:

128

return path

128

return path

129

130

if not request_path:

130

if not request_path:

131

return path

131

return path

132

133

if path.startswith(u'/'):

133

if path.startswith(u'/'):

134

path = path[1:]

134

path = path[1:]

135

136

if path.startswith(u'./'):

136

if path.startswith(u'./'):

137

path = path[2:]

137

path = path[2:]

138

139

parts = request_path.split('/')

139

parts = request_path.split('/')

140

# compute how deep we need to traverse the request_path

140

# compute how deep we need to traverse the request_path

141

depth = 0

141

depth = 0

142

143

if is_repo_file(request_path):

143

if is_repo_file(request_path):

144

# if request path is a VALID file, we use a relative path with

144

# if request path is a VALID file, we use a relative path with

145

# one level up

145

# one level up

146

depth += 1

146

depth += 1

147

148

while path.startswith(u'../'):

148

while path.startswith(u'../'):

149

depth += 1

149

depth += 1

150

path = path[3:]

150

path = path[3:]

151

152

if depth > 0:

152

if depth > 0:

153

parts = parts[:-depth]

153

parts = parts[:-depth]

154

155

parts.append(path)

155

parts.append(path)

156

final_path = u'/'.join(parts).lstrip(u'/')

156

final_path = u'/'.join(parts).lstrip(u'/')

157

158

return u'/' + final_path

158

return u'/' + final_path

159

160

161

class MarkupRenderer(object):

161

class MarkupRenderer(object):

162

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

162

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

163

164

MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)

164

MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)

165

RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)

165

RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)

166

JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)

166

JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)

167

PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

167

PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

168

169

extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']

169

extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']

170

markdown_renderer = markdown.Markdown(

170

markdown_renderer = markdown.Markdown(

171

extensions, safe_mode=True, enable_attributes=False)

171

extensions, safe_mode=True, enable_attributes=False)

172

173

markdown_renderer_flavored = markdown.Markdown(

173

markdown_renderer_flavored = markdown.Markdown(

174

extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,

174

extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,

175

enable_attributes=False)

175

enable_attributes=False)

176

177

# extension together with weights. Lower is first means we control how

177

# extension together with weights. Lower is first means we control how

178

# extensions are attached to readme names with those.

178

# extensions are attached to readme names with those.

179

PLAIN_EXTS = [

179

PLAIN_EXTS = [

180

# prefer no extension

180

# prefer no extension

181

('', 0), # special case that renders READMES names without extension

181

('', 0), # special case that renders READMES names without extension

182

('.text', 2), ('.TEXT', 2),

182

('.text', 2), ('.TEXT', 2),

183

('.txt', 3), ('.TXT', 3)

183

('.txt', 3), ('.TXT', 3)

184

]

184

]

185

186

RST_EXTS = [

186

RST_EXTS = [

187

('.rst', 1), ('.rest', 1),

187

('.rst', 1), ('.rest', 1),

188

('.RST', 2), ('.REST', 2)

188

('.RST', 2), ('.REST', 2)

189

]

189

]

190

191

MARKDOWN_EXTS = [

191

MARKDOWN_EXTS = [

192

('.md', 1), ('.MD', 1),

192

('.md', 1), ('.MD', 1),

193

('.mkdn', 2), ('.MKDN', 2),

193

('.mkdn', 2), ('.MKDN', 2),

194

('.mdown', 3), ('.MDOWN', 3),

194

('.mdown', 3), ('.MDOWN', 3),

195

('.markdown', 4), ('.MARKDOWN', 4)

195

('.markdown', 4), ('.MARKDOWN', 4)

196

]

196

]

197

198

def _detect_renderer(self, source, filename=None):

198

def _detect_renderer(self, source, filename=None):

199

"""

199

"""

200

runs detection of what renderer should be used for generating html

200

runs detection of what renderer should be used for generating html

201

from a markup language

201

from a markup language

202

203

filename can be also explicitly a renderer name

203

filename can be also explicitly a renderer name

204

205

:param source:

205

:param source:

206

:param filename:

206

:param filename:

207

"""

207

"""

208

209

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

209

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

210

detected_renderer = 'markdown'

210

detected_renderer = 'markdown'

211

elif MarkupRenderer.RST_PAT.findall(filename):

211

elif MarkupRenderer.RST_PAT.findall(filename):

212

detected_renderer = 'rst'

212

detected_renderer = 'rst'

213

elif MarkupRenderer.JUPYTER_PAT.findall(filename):

213

elif MarkupRenderer.JUPYTER_PAT.findall(filename):

214

detected_renderer = 'jupyter'

214

detected_renderer = 'jupyter'

215

elif MarkupRenderer.PLAIN_PAT.findall(filename):

215

elif MarkupRenderer.PLAIN_PAT.findall(filename):

216

detected_renderer = 'plain'

216

detected_renderer = 'plain'

217

else:

217

else:

218

detected_renderer = 'plain'

218

detected_renderer = 'plain'

219

220

return getattr(MarkupRenderer, detected_renderer)

220

return getattr(MarkupRenderer, detected_renderer)

221

222

@classmethod

222

@classmethod

223

def renderer_from_filename(cls, filename, exclude):

223

def renderer_from_filename(cls, filename, exclude):

224

"""

224

"""

225

Detect renderer markdown/rst from filename and optionally use exclude

225

Detect renderer markdown/rst from filename and optionally use exclude

226

list to remove some options. This is mostly used in helpers.

226

list to remove some options. This is mostly used in helpers.

227

Returns None when no renderer can be detected.

227

Returns None when no renderer can be detected.

228

"""

228

"""

229

def _filter(elements):

229

def _filter(elements):

230

if isinstance(exclude, (list, tuple)):

230

if isinstance(exclude, (list, tuple)):

231

return [x for x in elements if x not in exclude]

231

return [x for x in elements if x not in exclude]

232

return elements

232

return elements

233

234

if filename.endswith(

234

if filename.endswith(

235

tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):

235

tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):

236

return 'markdown'

236

return 'markdown'

237

if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):

237

if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):

238

return 'rst'

238

return 'rst'

239

240

return None

240

return None

241

242

def render(self, source, filename=None):

242

def render(self, source, filename=None):

243

"""

243

"""

244

Renders a given filename using detected renderer

244

Renders a given filename using detected renderer

245

it detects renderers based on file extension or mimetype.

245

it detects renderers based on file extension or mimetype.

246

At last it will just do a simple html replacing new lines with <br/>

246

At last it will just do a simple html replacing new lines with <br/>

247

248

:param file_name:

248

:param file_name:

249

:param source:

249

:param source:

250

"""

250

"""

251

252

renderer = self._detect_renderer(source, filename)

252

renderer = self._detect_renderer(source, filename)

253

readme_data = renderer(source)

253

readme_data = renderer(source)

254

return readme_data

254

return readme_data

255

256

@classmethod

256

@classmethod

257

def _flavored_markdown(cls, text):

257

def _flavored_markdown(cls, text):

258

"""

258

"""

259

Github style flavored markdown

259

Github style flavored markdown

260

261

:param text:

261

:param text:

262

"""

262

"""

263

264

# Extract pre blocks.

264

# Extract pre blocks.

265

extractions = {}

265

extractions = {}

266

267

def pre_extraction_callback(matchobj):

267

def pre_extraction_callback(matchobj):

268

digest = md5_safe(matchobj.group(0))

268

digest = md5_safe(matchobj.group(0))

269

extractions[digest] = matchobj.group(0)

269

extractions[digest] = matchobj.group(0)

270

return "{gfm-extraction-%s}" % digest

270

return "{gfm-extraction-%s}" % digest

271

pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

271

pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

272

text = re.sub(pattern, pre_extraction_callback, text)

272

text = re.sub(pattern, pre_extraction_callback, text)

273

274

# Prevent foo_bar_baz from ending up with an italic word in the middle.

274

# Prevent foo_bar_baz from ending up with an italic word in the middle.

275

def italic_callback(matchobj):

275

def italic_callback(matchobj):

276

s = matchobj.group(0)

276

s = matchobj.group(0)

277

if list(s).count('_') >= 2:

277

if list(s).count('_') >= 2:

278

return s.replace('_', r'\_')

278

return s.replace('_', r'\_')

279

return s

279

return s

280

text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

280

text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

281

282

# Insert pre block extractions.

282

# Insert pre block extractions.

283

def pre_insert_callback(matchobj):

283

def pre_insert_callback(matchobj):

284

return '\n\n' + extractions[matchobj.group(1)]

284

return '\n\n' + extractions[matchobj.group(1)]

285

text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',

285

text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',

286

pre_insert_callback, text)

286

pre_insert_callback, text)

287

288

return text

288

return text

289

290

@classmethod

290

@classmethod

291

def urlify_text(cls, text):

291

def urlify_text(cls, text):

292

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

292

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

293

r'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

293

r'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

294

295

def url_func(match_obj):

295

def url_func(match_obj):

296

url_full = match_obj.groups()[0]

296

url_full = match_obj.groups()[0]

297

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

297

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

298

299

return url_pat.sub(url_func, text)

299

return url_pat.sub(url_func, text)

300

301

@classmethod

301

@classmethod

302

def plain(cls, source, universal_newline=True):

302

def plain(cls, source, universal_newline=True):

303

source = safe_unicode(source)

303

source = safe_unicode(source)

304

if universal_newline:

304

if universal_newline:

305

newline = '\n'

305

newline = '\n'

306

source = newline.join(source.splitlines())

306

source = newline.join(source.splitlines())

307

308

source = cls.urlify_text(source)

308

source = cls.urlify_text(source)

309

return '<br />' + source.replace("\n", '<br />')

309

return '<br />' + source.replace("\n", '<br />')

310

311

@classmethod

311

@classmethod

312

def markdown(cls, source, safe=True, flavored=True, mentions=False):

312

def markdown(cls, source, safe=True, flavored=True, mentions=False):

313

# It does not allow to insert inline HTML. In presence of HTML tags, it

313

# It does not allow to insert inline HTML. In presence of HTML tags, it

314

# will replace them instead with [HTML_REMOVED]. This is controlled by

314

# will replace them instead with [HTML_REMOVED]. This is controlled by

315

# the safe_mode=True parameter of the markdown method.

315

# the safe_mode=True parameter of the markdown method.

316

317

if flavored:

317

if flavored:

318

markdown_renderer = cls.markdown_renderer_flavored

318

markdown_renderer = cls.markdown_renderer_flavored

319

else:

319

else:

320

markdown_renderer = cls.markdown_renderer

320

markdown_renderer = cls.markdown_renderer

321

322

if mentions:

322

if mentions:

323

mention_pat = re.compile(MENTIONS_REGEX)

323

mention_pat = re.compile(MENTIONS_REGEX)

324

325

def wrapp(match_obj):

325

def wrapp(match_obj):

326

uname = match_obj.groups()[0]

326

uname = match_obj.groups()[0]

327

return ' **@%(uname)s** ' % {'uname': uname}

327

return ' **@%(uname)s** ' % {'uname': uname}

328

mention_hl = mention_pat.sub(wrapp, source).strip()

328

mention_hl = mention_pat.sub(wrapp, source).strip()

329

# we extracted mentions render with this using Mentions false

329

# we extracted mentions render with this using Mentions false

330

return cls.markdown(mention_hl, safe=safe, flavored=flavored,

330

return cls.markdown(mention_hl, safe=safe, flavored=flavored,

331

mentions=False)

331

mentions=False)

332

333

source = safe_unicode(source)

333

source = safe_unicode(source)

334

try:

334

try:

335

if flavored:

335

if flavored:

336

source = cls._flavored_markdown(source)

336

source = cls._flavored_markdown(source)

337

return markdown_renderer.convert(source)

337

return markdown_renderer.convert(source)

338

except Exception:

338

except Exception:

339

log.exception('Error when rendering Markdown')

339

log.exception('Error when rendering Markdown')

340

if safe:

340

if safe:

341

log.debug('Fallback to render in plain mode')

341

log.debug('Fallback to render in plain mode')

342

return cls.plain(source)

342

return cls.plain(source)

343

else:

343

else:

344

raise

344

raise

345

346

@classmethod

346

@classmethod

347

def rst(cls, source, safe=True, mentions=False):

347

def rst(cls, source, safe=True, mentions=False):

348

if mentions:

348

if mentions:

349

mention_pat = re.compile(MENTIONS_REGEX)

349

mention_pat = re.compile(MENTIONS_REGEX)

350

351

def wrapp(match_obj):

351

def wrapp(match_obj):

352

uname = match_obj.groups()[0]

352

uname = match_obj.groups()[0]

353

return ' **@%(uname)s** ' % {'uname': uname}

353

return ' **@%(uname)s** ' % {'uname': uname}

354

mention_hl = mention_pat.sub(wrapp, source).strip()

354

mention_hl = mention_pat.sub(wrapp, source).strip()

355

# we extracted mentions render with this using Mentions false

355

# we extracted mentions render with this using Mentions false

356

return cls.rst(mention_hl, safe=safe, mentions=False)

356

return cls.rst(mention_hl, safe=safe, mentions=False)

357

358

source = safe_unicode(source)

358

source = safe_unicode(source)

359

try:

359

try:

360

docutils_settings = dict(

360

docutils_settings = dict(

361

[(alias, None) for alias in

361

[(alias, None) for alias in

362

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

362

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

363

364

docutils_settings.update({'input_encoding': 'unicode',

364

docutils_settings.update({'input_encoding': 'unicode',

365

'report_level': 4})

365

'report_level': 4})

366

367

for k, v in docutils_settings.iteritems():

367

for k, v in docutils_settings.iteritems():

368

directives.register_directive(k, v)

368

directives.register_directive(k, v)

369

370

parts = publish_parts(source=source,

370

parts = publish_parts(source=source,

371

writer=RhodeCodeWriter(),

371

writer=RhodeCodeWriter(),

372

settings_overrides=docutils_settings)

372

settings_overrides=docutils_settings)

373

374

return parts['html_title'] + parts["fragment"]

374

return parts['html_title'] + parts["fragment"]

375

except Exception:

375

except Exception:

376

log.exception('Error when rendering RST')

376

log.exception('Error when rendering RST')

377

if safe:

377

if safe:

378

log.debug('Fallbacking to render in plain mode')

378

log.debug('Fallbacking to render in plain mode')

379

return cls.plain(source)

379

return cls.plain(source)

380

else:

380

else:

381

raise

381

raise

382

383

@classmethod

383

@classmethod

384

def jupyter(cls, source, safe=True):

384

def jupyter(cls, source, safe=True):

385

from rhodecode.lib import helpers

385

from rhodecode.lib import helpers

386

387

from traitlets.config import Config

387

from traitlets.config import Config

388

import nbformat

388

import nbformat

389

from nbconvert import HTMLExporter

389

from nbconvert import HTMLExporter

390

from nbconvert.preprocessors import Preprocessor

390

from nbconvert.preprocessors import Preprocessor

391

392

class CustomHTMLExporter(HTMLExporter):

392

class CustomHTMLExporter(HTMLExporter):

393

def _template_file_default(self):

393

def _template_file_default(self):

394

return 'basic'

394

return 'basic'

395

396

class Sandbox(Preprocessor):

396

class Sandbox(Preprocessor):

397

398

def preprocess(self, nb, resources):

398

def preprocess(self, nb, resources):

399

sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'

399

sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'

400

for cell in nb['cells']:

400

for cell in nb['cells']:

401

if safe and 'outputs' in cell:

401

if safe and 'outputs' in cell:

402

for cell_output in cell['outputs']:

402

for cell_output in cell['outputs']:

403

if 'data' in cell_output:

403

if 'data' in cell_output:

404

if 'application/javascript' in cell_output['data']:

404

if 'application/javascript' in cell_output['data']:

405

cell_output['data']['text/plain'] = sandbox_text

405

cell_output['data']['text/plain'] = sandbox_text

406

cell_output['data'].pop('application/javascript', None)

406

cell_output['data'].pop('application/javascript', None)

407

return nb, resources

407

return nb, resources

408

409

def _sanitize_resources(resources):

409

def _sanitize_resources(resources):

410

"""

410

"""

411

Skip/sanitize some of the CSS generated and included in jupyter

411

Skip/sanitize some of the CSS generated and included in jupyter

412

so it doesn't messes up UI so much

412

so it doesn't messes up UI so much

413

"""

413

"""

414

415

# TODO(marcink): probably we should replace this with whole custom

415

# TODO(marcink): probably we should replace this with whole custom

416

# CSS set that doesn't screw up, but jupyter generated html has some

416

# CSS set that doesn't screw up, but jupyter generated html has some

417

# special markers, so it requires Custom HTML exporter template with

417

# special markers, so it requires Custom HTML exporter template with

418

# _default_template_path_default, to achieve that

418

# _default_template_path_default, to achieve that

419

420

# strip the reset CSS

420

# strip the reset CSS

421

resources[0] = resources[0][resources[0].find('/*! Source'):]

421

resources[0] = resources[0][resources[0].find('/*! Source'):]

422

return resources

422

return resources

423

424

def as_html(notebook):

424

def as_html(notebook):

425

conf = Config()

425

conf = Config()

426

conf.CustomHTMLExporter.preprocessors = [Sandbox]

426

conf.CustomHTMLExporter.preprocessors = [Sandbox]

427

html_exporter = CustomHTMLExporter(config=conf)

427

html_exporter = CustomHTMLExporter(config=conf)

428

429

(body, resources) = html_exporter.from_notebook_node(notebook)

429

(body, resources) = html_exporter.from_notebook_node(notebook)

430

header = ''

430

header = ''

431

js = MakoTemplate(r'''

431

js = MakoTemplate(r'''

432

432

433

433

434

434

435

MathJax.Hub.Config({

435

MathJax.Hub.Config({

436

jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],

436

jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],

437

extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],

437

extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],

438

TeX: {

438

TeX: {

439

extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]

439

extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]

440

},

440

},

441

tex2jax: {

441

tex2jax: {

442

inlineMath: [ ['$','$'], ["\$","\$"] ],

442

inlineMath: [ ['$','$'], ["\$","\$"] ],

443

displayMath: [ ['$$','$$'], ["\\[","\\]"] ],

443

displayMath: [ ['$$','$$'], ["\\[","\\]"] ],

444

processEscapes: true,

444

processEscapes: true,

445

processEnvironments: true

445

processEnvironments: true

446

},

446

},

447

// Center justify equations in code and markdown cells. Elsewhere

447

// Center justify equations in code and markdown cells. Elsewhere

448

// we use CSS to left justify single line equations in code cells.

448

// we use CSS to left justify single line equations in code cells.

449

displayAlign: 'center',

449

displayAlign: 'center',

450

"HTML-CSS": {

450

"HTML-CSS": {

451

styles: {'.MathJax_Display': {"margin": 0}},

451

styles: {'.MathJax_Display': {"margin": 0}},

452

linebreaks: { automatic: true },

452

linebreaks: { automatic: true },

453

availableFonts: ["STIX", "TeX"]

453

availableFonts: ["STIX", "TeX"]

454

},

454

},

455

showMathMenu: false

455

showMathMenu: false

456

});

456

});

457

</script>

457

</script>

458

458

459

459

460

''').render(h=helpers)

460

''').render(h=helpers)

461

462

css = '<style>{}</style>'.format(

462

css = '<style>{}</style>'.format(

463

''.join(_sanitize_resources(resources['inlining']['css'])))

463

''.join(_sanitize_resources(resources['inlining']['css'])))

464

465

body = '\n'.join([header, css, js, body])

465

body = '\n'.join([header, css, js, body])

466

return body, resources

466

return body, resources

467

468

notebook = nbformat.reads(source, as_version=4)

468

notebook = nbformat.reads(source, as_version=4)

469

(body, resources) = as_html(notebook)

469

(body, resources) = as_html(notebook)

470

return body

470

return body

471

472

473

class RstTemplateRenderer(object):

473

class RstTemplateRenderer(object):

474

475

def __init__(self):

475

def __init__(self):

476

base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

476

base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

477

rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]

477

rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]

478

self.template_store = TemplateLookup(

478

self.template_store = TemplateLookup(

479

directories=rst_template_dirs,

479

directories=rst_template_dirs,

480

input_encoding='utf-8',

480

input_encoding='utf-8',

481

imports=['from rhodecode.lib import helpers as h'])

481

imports=['from rhodecode.lib import helpers as h'])

482

483

def _get_template(self, templatename):

483

def _get_template(self, templatename):

484

return self.template_store.get_template(templatename)

484

return self.template_store.get_template(templatename)

485

486

def render(self, template_name, **kwargs):

486

def render(self, template_name, **kwargs):

487

template = self._get_template(template_name)

487

template = self._get_template(template_name)

488

return template.render(**kwargs)

488

return template.render(**kwargs)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2017 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Renderer for markup languages with ability to parse using rst or markdown
             """
             import re
             import os
             import lxml
             import logging
             import urlparse
             from mako.lookup import TemplateLookup
             from mako.template import Template as MakoTemplate
             from docutils.core import publish_parts
             from docutils.parsers.rst import directives
             from docutils import writers
             from docutils.writers import html4css1
             import markdown
             from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
             from rhodecode.lib.utils2 import (
                 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
             log = logging.getLogger(__name__)
             # default renderer used to generate automated comments
             DEFAULT_COMMENTS_RENDERER = 'rst'
             class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
                 """
                 Custom HTML Translator used for sandboxing potential
                 JS injections in ref links
                 """
                 def visit_reference(self, node):
                     if 'refuri' in node.attributes:
                         refuri = node['refuri']
                         if ':' in refuri:
                             prefix, link = refuri.lstrip().split(':', 1)
                             if prefix == 'javascript':
                                 # we don't allow javascript type of refs...
                                 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
                     # old style class requires this...
                     return html4css1.HTMLTranslator.visit_reference(self, node)
             class RhodeCodeWriter(writers.html4css1.Writer):
                 def __init__(self):
                     writers.Writer.__init__(self)
                     self.translator_class = CustomHTMLTranslator
             def relative_links(html_source, server_path):
                 if not html_source:
                     return html_source
                 try:
                     doc = lxml.html.fromstring(html_source)
                 except Exception:
                     return html_source
                 for el in doc.cssselect('img, video'):
-                    src = el.attrib['src']
+                    src = el.attrib.get('src')
                     if src:
                         el.attrib['src'] = relative_path(src, server_path)
                 for el in doc.cssselect('a:not(.gfm)'):
-                    src = el.attrib['href']
+                    src = el.attrib.get('href')
                     if src:
                         el.attrib['href'] = relative_path(src, server_path)
                 return lxml.html.tostring(doc)
             def relative_path(path, request_path, is_repo_file=None):
                 """
                 relative link support, path is a rel path, and request_path is current
                 server path (not absolute)
                 e.g.
                 path = '../logo.png'
                 request_path= '/repo/files/path/file.md'
                 produces: '/repo/files/logo.png'
                 """
                 # TODO(marcink): unicode/str support ?
                 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
                 def dummy_check(p):
                     return True  # assume default is a valid file path
                 is_repo_file = is_repo_file or dummy_check
                 if not path:
                     return request_path
                 path = safe_unicode(path)
                 request_path = safe_unicode(request_path)
                 if path.startswith((u'data:', u'javascript:', u'#', u':')):
                     # skip data, anchor, invalid links
                     return path
                 is_absolute = bool(urlparse.urlparse(path).netloc)
                 if is_absolute:
                     return path
                 if not request_path:
                     return path
                 if path.startswith(u'/'):
                     path = path[1:]
                 if path.startswith(u'./'):
                     path = path[2:]
                 parts = request_path.split('/')
                 # compute how deep we need to traverse the request_path
                 depth = 0
                 if is_repo_file(request_path):
                     # if request path is a VALID file, we use a relative path with
                     # one level up
                     depth += 1
                 while path.startswith(u'../'):
                     depth += 1
                     path = path[3:]
                 if depth > 0:
                     parts = parts[:-depth]
                 parts.append(path)
                 final_path = u'/'.join(parts).lstrip(u'/')
                 return u'/' + final_path
             class MarkupRenderer(object):
                 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
                 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
                 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
                 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
                 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
                 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
                 markdown_renderer = markdown.Markdown(
                     extensions, safe_mode=True, enable_attributes=False)
                 markdown_renderer_flavored = markdown.Markdown(
                     extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
                     enable_attributes=False)
                 # extension together with weights. Lower is first means we control how
                 # extensions are attached to readme names with those.
                 PLAIN_EXTS = [
                     # prefer no extension
                     ('', 0),  # special case that renders READMES names without extension
                     ('.text', 2), ('.TEXT', 2),
                     ('.txt', 3), ('.TXT', 3)
                 ]
                 RST_EXTS = [
                     ('.rst', 1), ('.rest', 1),
                     ('.RST', 2), ('.REST', 2)
                 ]
                 MARKDOWN_EXTS = [
                     ('.md', 1), ('.MD', 1),
                     ('.mkdn', 2), ('.MKDN', 2),
                     ('.mdown', 3), ('.MDOWN', 3),
                     ('.markdown', 4), ('.MARKDOWN', 4)
                 ]
                 def _detect_renderer(self, source, filename=None):
                     """
                     runs detection of what renderer should be used for generating html
                     from a markup language
                     filename can be also explicitly a renderer name
                     :param source:
                     :param filename:
                     """
                     if MarkupRenderer.MARKDOWN_PAT.findall(filename):
                         detected_renderer = 'markdown'
                     elif MarkupRenderer.RST_PAT.findall(filename):
                         detected_renderer = 'rst'
                     elif MarkupRenderer.JUPYTER_PAT.findall(filename):
                         detected_renderer = 'jupyter'
                     elif MarkupRenderer.PLAIN_PAT.findall(filename):
                         detected_renderer = 'plain'
                     else:
                         detected_renderer = 'plain'
                     return getattr(MarkupRenderer, detected_renderer)
                 @classmethod
                 def renderer_from_filename(cls, filename, exclude):
                     """
                     Detect renderer markdown/rst from filename and optionally use exclude
                     list to remove some options. This is mostly used in helpers.
                     Returns None when no renderer can be detected.
                     """
                     def _filter(elements):
                         if isinstance(exclude, (list, tuple)):
                             return [x for x in elements if x not in exclude]
                         return elements
                     if filename.endswith(
                             tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
                         return 'markdown'
                     if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
                         return 'rst'
                     return None
                 def render(self, source, filename=None):
                     """
                     Renders a given filename using detected renderer
                     it detects renderers based on file extension or mimetype.
                     At last it will just do a simple html replacing new lines with <br/>
                     :param file_name:
                     :param source:
                     """
                     renderer = self._detect_renderer(source, filename)
                     readme_data = renderer(source)
                     return readme_data
                 @classmethod
                 def _flavored_markdown(cls, text):
                     """
                     Github style flavored markdown
                     :param text:
                     """
                     # Extract pre blocks.
                     extractions = {}
                     def pre_extraction_callback(matchobj):
                         digest = md5_safe(matchobj.group(0))
                         extractions[digest] = matchobj.group(0)
                         return "{gfm-extraction-%s}" % digest
                     pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
                     text = re.sub(pattern, pre_extraction_callback, text)
                     # Prevent foo_bar_baz from ending up with an italic word in the middle.
                     def italic_callback(matchobj):
                         s = matchobj.group(0)
                         if list(s).count('_') >= 2:
                             return s.replace('_', r'\_')
                         return s
                     text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
                     # Insert pre block extractions.
                     def pre_insert_callback(matchobj):
                         return '\n\n' + extractions[matchobj.group(1)]
                     text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
                                   pre_insert_callback, text)
                     return text
                 @classmethod
                 def urlify_text(cls, text):
                     url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
                                          r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
                     def url_func(match_obj):
                         url_full = match_obj.groups()[0]
                         return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
                     return url_pat.sub(url_func, text)
                 @classmethod
                 def plain(cls, source, universal_newline=True):
                     source = safe_unicode(source)
                     if universal_newline:
                         newline = '\n'
                         source = newline.join(source.splitlines())
                     source = cls.urlify_text(source)
                     return '<br />' + source.replace("\n", '<br />')
                 @classmethod
                 def markdown(cls, source, safe=True, flavored=True, mentions=False):
                     # It does not allow to insert inline HTML. In presence of HTML tags, it
                     # will replace them instead with [HTML_REMOVED]. This is controlled by
                     # the safe_mode=True parameter of the markdown method.
                     if flavored:
                         markdown_renderer = cls.markdown_renderer_flavored
                     else:
                         markdown_renderer = cls.markdown_renderer
                     if mentions:
                         mention_pat = re.compile(MENTIONS_REGEX)
                         def wrapp(match_obj):
                             uname = match_obj.groups()[0]
                             return ' **@%(uname)s** ' % {'uname': uname}
                         mention_hl = mention_pat.sub(wrapp, source).strip()
                         # we extracted mentions render with this using Mentions false
                         return cls.markdown(mention_hl, safe=safe, flavored=flavored,
                                             mentions=False)
                     source = safe_unicode(source)
                     try:
                         if flavored:
                             source = cls._flavored_markdown(source)
                         return markdown_renderer.convert(source)
                     except Exception:
                         log.exception('Error when rendering Markdown')
                         if safe:
                             log.debug('Fallback to render in plain mode')
                             return cls.plain(source)
                         else:
                             raise
                 @classmethod
                 def rst(cls, source, safe=True, mentions=False):
                     if mentions:
                         mention_pat = re.compile(MENTIONS_REGEX)
                         def wrapp(match_obj):
                             uname = match_obj.groups()[0]
                             return ' **@%(uname)s** ' % {'uname': uname}
                         mention_hl = mention_pat.sub(wrapp, source).strip()
                         # we extracted mentions render with this using Mentions false
                         return cls.rst(mention_hl, safe=safe, mentions=False)
                     source = safe_unicode(source)
                     try:
                         docutils_settings = dict(
                             [(alias, None) for alias in
                              cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
                         docutils_settings.update({'input_encoding': 'unicode',
                                                   'report_level': 4})
                         for k, v in docutils_settings.iteritems():
                             directives.register_directive(k, v)
                         parts = publish_parts(source=source,
                                               writer=RhodeCodeWriter(),
                                               settings_overrides=docutils_settings)
                         return parts['html_title'] + parts["fragment"]
                     except Exception:
                         log.exception('Error when rendering RST')
                         if safe:
                             log.debug('Fallbacking to render in plain mode')
                             return cls.plain(source)
                         else:
                             raise
                 @classmethod
                 def jupyter(cls, source, safe=True):
                     from rhodecode.lib import helpers
                     from traitlets.config import Config
                     import nbformat
                     from nbconvert import HTMLExporter
                     from nbconvert.preprocessors import Preprocessor
                     class CustomHTMLExporter(HTMLExporter):
                         def _template_file_default(self):
                             return 'basic'
                     class Sandbox(Preprocessor):
                         def preprocess(self, nb, resources):
                             sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
                             for cell in nb['cells']:
                                 if safe and 'outputs' in cell:
                                     for cell_output in cell['outputs']:
                                         if 'data' in cell_output:
                                             if 'application/javascript' in cell_output['data']:
                                                 cell_output['data']['text/plain'] = sandbox_text
                                                 cell_output['data'].pop('application/javascript', None)
                             return nb, resources
                     def _sanitize_resources(resources):
                         """
                         Skip/sanitize some of the CSS generated and included in jupyter
                         so it doesn't messes up UI so much
                         """
                         # TODO(marcink): probably we should replace this with whole custom
                         # CSS set that doesn't screw up, but jupyter generated html has some
                         # special markers, so it requires Custom HTML exporter template with
                         # _default_template_path_default, to achieve that
                         # strip the reset CSS
                         resources[0] = resources[0][resources[0].find('/*! Source'):]
                         return resources
                     def as_html(notebook):
                         conf = Config()
                         conf.CustomHTMLExporter.preprocessors = [Sandbox]
                         html_exporter = CustomHTMLExporter(config=conf)
                         (body, resources) = html_exporter.from_notebook_node(notebook)
                         header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
                         js = MakoTemplate(r'''
                         <!-- Load mathjax -->
                             <!-- MathJax configuration -->
                             <script type="text/x-mathjax-config">
                             MathJax.Hub.Config({
                                 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
                                 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
                                 TeX: {
                                     extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
                                 },
                                 tex2jax: {
                                     inlineMath: [ ['$','$'], ["\\(","\\)"] ],
                                     displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
                                     processEscapes: true,
                                     processEnvironments: true
                                 },
                                 // Center justify equations in code and markdown cells. Elsewhere
                                 // we use CSS to left justify single line equations in code cells.
                                 displayAlign: 'center',
                                 "HTML-CSS": {
                                     styles: {'.MathJax_Display': {"margin": 0}},
                                     linebreaks: { automatic: true },
                                     availableFonts: ["STIX", "TeX"]
                                 },
                                 showMathMenu: false
                             });
                             </script>
                             <!-- End of mathjax configuration -->
                             <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
                         ''').render(h=helpers)
                         css = '<style>{}</style>'.format(
                             ''.join(_sanitize_resources(resources['inlining']['css'])))
                         body = '\n'.join([header, css, js, body])
                         return body, resources
                     notebook = nbformat.reads(source, as_version=4)
                     (body, resources) = as_html(notebook)
                     return body
             class RstTemplateRenderer(object):
                 def __init__(self):
                     base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
                     rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
                     self.template_store = TemplateLookup(
                         directories=rst_template_dirs,
                         input_encoding='utf-8',
                         imports=['from rhodecode.lib import helpers as h'])
                 def _get_template(self, templatename):
                     return self.template_store.get_template(templatename)
                 def render(self, template_name, **kwargs):
                     template = self._get_template(template_name)
                     return template.render(**kwargs)