upstream/ipython Commit - r6537:60b2da9c

1

2

//

3

// Licensed under the Apache License, Version 2.0 (the "License");

4

// you may not use this file except in compliance with the License.

5

// You may obtain a copy of the License at

6

//

7

// http://www.apache.org/licenses/LICENSE-2.0

8

//

9

// Unless required by applicable law or agreed to in writing, software

10

// distributed under the License is distributed on an "AS IS" BASIS,

11

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12

// See the License for the specific language governing permissions and

13

// limitations under the License.

14

15

16

/**

17

* @fileoverview

18

* some functions for browser-side pretty printing of code contained in html.

19

*

20

*

21

* For a fairly comprehensive set of languages see the

22

* <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a>

23

* file that came with this source. At a minimum, the lexer should work on a

24

* number of languages including C and friends, Java, Python, Bash, SQL, HTML,

25

* XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk

26

* and a subset of Perl, but, because of commenting conventions, doesn't work on

27

* Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.

28

*

29

* Usage: <ol>

30

* <li> include this source file in an html page via

31

* {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}

32

* <li> define style rules. See the example page for examples.

33

* <li> mark the {@code <pre>} and {@code <code>} tags in your source with

34

* {@code class=prettyprint.}

35

* You can also use the (html deprecated) {@code <xmp>} tag, but the pretty

36

* printer needs to do more substantial DOM manipulations to support that, so

37

* some css styles may not be preserved.

38

* </ol>

39

* That's it. I wanted to keep the API as simple as possible, so there's no

40

* need to specify which language the code is in, but if you wish, you can add

41

* another class to the {@code <pre>} or {@code <code>} element to specify the

42

* language, as in {@code <pre class="prettyprint lang-java">}. Any class that

43

* starts with "lang-" followed by a file extension, specifies the file type.

44

* See the "lang-*.js" files in this directory for code that implements

45

* per-language file handlers.

46

*

47

* Change log:

48

* cbeust, 2006/08/22

49

* <blockquote>

50

* Java annotations (start with "@") are now captured as literals ("lit")

51

* </blockquote>

52

* @requires console

53

*/

54

55

// JSLint declarations

56

/*global console, document, navigator, setTimeout, window */

57

58

/**

59

* Split {@code prettyPrint} into multiple timeouts so as not to interfere with

60

* UI events.

61

* If set to {@code false}, {@code prettyPrint()} is synchronous.

62

*/

63

window['PR_SHOULD_USE_CONTINUATION'] = true;

64

65

(function () {

66

// Keyword lists for various languages.

67

// We use things that coerce to strings to make them compact when minified

68

// and to defeat aggressive optimizers that fold large string constants.

69

var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];

70

var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +

71

"double,enum,extern,float,goto,int,long,register,short,signed,sizeof," +

72

"static,struct,switch,typedef,union,unsigned,void,volatile"];

73

var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +

74

"new,operator,private,protected,public,this,throw,true,try,typeof"];

75

var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," +

76

"concept,concept_map,const_cast,constexpr,decltype," +

77

"dynamic_cast,explicit,export,friend,inline,late_check," +

78

"mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," +

79

"template,typeid,typename,using,virtual,where"];

80

var JAVA_KEYWORDS = [COMMON_KEYWORDS,

81

"abstract,boolean,byte,extends,final,finally,implements,import," +

82

"instanceof,null,native,package,strictfp,super,synchronized,throws," +

83

"transient"];

84

var CSHARP_KEYWORDS = [JAVA_KEYWORDS,

85

"as,base,by,checked,decimal,delegate,descending,dynamic,event," +

86

"fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock," +

87

"object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed," +

88

"stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"];

89

var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +

90

"for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +

91

"true,try,unless,until,when,while,yes";

92

var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,

93

"debugger,eval,export,function,get,null,set,undefined,var,with," +

94

"Infinity,NaN"];

95

var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +

96

"goto,if,import,last,local,my,next,no,our,print,package,redo,require," +

97

"sub,undef,unless,until,use,wantarray,while,BEGIN,END";

98

var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +

99

"elif,except,exec,finally,from,global,import,in,is,lambda," +

100

"nonlocal,not,or,pass,print,raise,try,with,yield," +

101

"False,True,None"];

102

var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +

103

"def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +

104

"rescue,retry,self,super,then,true,undef,unless,until,when,yield," +

105

"BEGIN,END"];

106

var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +

107

"function,in,local,set,then,until"];

108

var ALL_KEYWORDS = [

109

CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS +

110

PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];

111

112

113

// token style names. correspond to css classes

114

/**

115

* token style for a string literal

116

* @const

117

*/

118

var PR_STRING = 'str';

119

/**

120

* token style for a keyword

121

* @const

122

*/

123

var PR_KEYWORD = 'kwd';

124

/**

125

* token style for a comment

126

* @const

127

*/

128

var PR_COMMENT = 'com';

129

/**

130

* token style for a type

131

* @const

132

*/

133

var PR_TYPE = 'typ';

134

/**

135

* token style for a literal value. e.g. 1, null, true.

136

* @const

137

*/

138

var PR_LITERAL = 'lit';

139

/**

140

* token style for a punctuation string.

141

* @const

142

*/

143

var PR_PUNCTUATION = 'pun';

144

/**

145

* token style for a punctuation string.

146

* @const

147

*/

148

var PR_PLAIN = 'pln';

149

150

/**

151

* token style for an sgml tag.

152

* @const

153

*/

154

var PR_TAG = 'tag';

155

/**

156

* token style for a markup declaration such as a DOCTYPE.

157

* @const

158

*/

159

var PR_DECLARATION = 'dec';

160

/**

161

* token style for embedded source.

162

* @const

163

*/

164

var PR_SOURCE = 'src';

165

/**

166

* token style for an sgml attribute name.

167

* @const

168

*/

169

var PR_ATTRIB_NAME = 'atn';

170

/**

171

* token style for an sgml attribute value.

172

* @const

173

*/

174

var PR_ATTRIB_VALUE = 'atv';

175

176

/**

177

* A class that indicates a section of markup that is not code, e.g. to allow

178

* embedding of line numbers within code listings.

179

* @const

180

*/

181

var PR_NOCODE = 'nocode';

182

183

184

185

/**

186

* A set of tokens that can precede a regular expression literal in

187

* javascript

188

* http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html

189

* has the full list, but I've removed ones that might be problematic when

190

* seen in languages that don't support regular expression literals.

191

*

192

* Specifically, I've removed any keywords that can't precede a regexp

193

* literal in a syntactically legal javascript program, and I've removed the

194

* "in" keyword since it's not a keyword in many languages, and might be used

195

* as a count of inches.

196

*

197

* The link a above does not accurately describe EcmaScript rules since

198

* it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works

199

* very well in practice.

200

*

201

* @private

202

* @const

203

*/

204

var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|\\!|\\!=|\\!==|\\#|\\%|\\%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|\\,|\\-=|\\->|\\/|\\/=|:|::|\\;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\@|\\[|\\^|\\^=|\\^\\^|\\^\\^=|\\{|\\||\\|=|\\|\\||\\|\\|=|\\~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';

205

206

// CAVEAT: this does not properly handle the case where a regular

207

// expression immediately follows another since a regular expression may

208

// have flags for case-sensitivity and the like. Having regexp tokens

209

// adjacent is not valid in any language I'm aware of, so I'm punting.

210

// TODO: maybe style special characters inside a regexp as punctuation.

211

212

213

/**

214

* Given a group of {@link RegExp}s, returns a {@code RegExp} that globally

215

* matches the union of the sets of strings matched by the input RegExp.

216

* Since it matches globally, if the input strings have a start-of-input

217

* anchor (/^.../), it is ignored for the purposes of unioning.

218

* @param {Array.<RegExp>} regexs non multiline, non-global regexs.

219

* @return {RegExp} a global regex.

220

*/

221

function combinePrefixPatterns(regexs) {

222

var capturedGroupIndex = 0;

223

224

var needToFoldCase = false;

225

var ignoreCase = false;

226

for (var i = 0, n = regexs.length; i < n; ++i) {

227

var regex = regexs[i];

228

if (regex.ignoreCase) {

229

ignoreCase = true;

230

} else if (/[a-z]/i.test(regex.source.replace(

231

/\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {

232

needToFoldCase = true;

233

ignoreCase = false;

234

break;

235

}

236

}

237

238

var escapeCharToCodeUnit = {

239

'b': 8,

240

't': 9,

241

'n': 0xa,

242

'v': 0xb,

243

'f': 0xc,

244

'r': 0xd

245

};

246

247

function decodeEscape(charsetPart) {

248

var cc0 = charsetPart.charCodeAt(0);

249

if (cc0 !== 92 /* \\ */) {

250

return cc0;

251

}

252

var c1 = charsetPart.charAt(1);

253

cc0 = escapeCharToCodeUnit[c1];

254

if (cc0) {

255

return cc0;

256

} else if ('0' <= c1 && c1 <= '7') {

257

return parseInt(charsetPart.substring(1), 8);

258

} else if (c1 === 'u' || c1 === 'x') {

259

return parseInt(charsetPart.substring(2), 16);

260

} else {

261

return charsetPart.charCodeAt(1);

262

}

263

}

264

265

function encodeEscape(charCode) {

266

if (charCode < 0x20) {

267

return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);

268

}

269

var ch = String.fromCharCode(charCode);

270

if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {

271

ch = '\\' + ch;

272

}

273

return ch;

274

}

275

276

function caseFoldCharset(charSet) {

277

var charsetParts = charSet.substring(1, charSet.length - 1).match(

278

new RegExp(

279

'\\\\u[0-9A-Fa-f]{4}'

280

+ '|\\\\x[0-9A-Fa-f]{2}'

281

+ '|\\\\[0-3][0-7]{0,2}'

282

+ '|\\\\[0-7]{1,2}'

283

+ '|\\\\[\\s\\S]'

284

+ '|-'

285

+ '|[^-\\\\]',

286

'g'));

287

var groups = [];

288

var ranges = [];

289

var inverse = charsetParts[0] === '^';

290

for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {

291

var p = charsetParts[i];

292

if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.

293

groups.push(p);

294

} else {

295

var start = decodeEscape(p);

296

var end;

297

if (i + 2 < n && '-' === charsetParts[i + 1]) {

298

end = decodeEscape(charsetParts[i + 2]);

299

i += 2;

300

} else {

301

end = start;

302

}

303

ranges.push([start, end]);

304

// If the range might intersect letters, then expand it.

305

// This case handling is too simplistic.

306

// It does not deal with non-latin case folding.

307

// It works for latin source code identifiers though.

308

if (!(end < 65 || start > 122)) {

309

if (!(end < 65 || start > 90)) {

310

ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);

311

}

312

if (!(end < 97 || start > 122)) {

313

ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);

314

}

315

}

316

}

317

}

318

319

// [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]

320

// -> [[1, 12], [14, 14], [16, 17]]

321

ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });

322

var consolidatedRanges = [];

323

var lastRange = [NaN, NaN];

324

for (var i = 0; i < ranges.length; ++i) {

325

var range = ranges[i];

326

if (range[0] <= lastRange[1] + 1) {

327

lastRange[1] = Math.max(lastRange[1], range[1]);

328

} else {

329

consolidatedRanges.push(lastRange = range);

330

}

331

}

332

333

var out = ['['];

334

if (inverse) { out.push('^'); }

335

out.push.apply(out, groups);

336

for (var i = 0; i < consolidatedRanges.length; ++i) {

337

var range = consolidatedRanges[i];

338

out.push(encodeEscape(range[0]));

339

if (range[1] > range[0]) {

340

if (range[1] + 1 > range[0]) { out.push('-'); }

341

out.push(encodeEscape(range[1]));

342

}

343

}

344

out.push(']');

345

return out.join('');

346

}

347

348

function allowAnywhereFoldCaseAndRenumberGroups(regex) {

349

// Split into character sets, escape sequences, punctuation strings

350

// like ('(', '(?:', ')', '^'), and runs of characters that do not

351

// include any of the above.

352

var parts = regex.source.match(

353

new RegExp(

354

'(?:'

355

+ '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set

356

+ '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape

357

+ '|\\\\x[A-Fa-f0-9]{2}' // a hex escape

358

+ '|\\\\[0-9]+' // a back-reference or octal escape

359

+ '|\\\\[^ux0-9]' // other escape sequence

360

+ '|\\(\\?[:!=]' // start of a non-capturing group

361

+ '|[\$\$\\^]' // start/emd of a group, or line start

362

+ '|[^\\x5B\\x5C\$\$\\^]+' // run of other characters

363

+ ')',

364

'g'));

365

var n = parts.length;

366

367

// Maps captured group numbers to the number they will occupy in

368

// the output or to -1 if that has not been determined, or to

369

// undefined if they need not be capturing in the output.

370

var capturedGroups = [];

371

372

// Walk over and identify back references to build the capturedGroups

373

// mapping.

374

for (var i = 0, groupIndex = 0; i < n; ++i) {

375

var p = parts[i];

376

if (p === '(') {

377

// groups are 1-indexed, so max group index is count of '('

378

++groupIndex;

379

} else if ('\\' === p.charAt(0)) {

380

var decimalValue = +p.substring(1);

381

if (decimalValue && decimalValue <= groupIndex) {

382

capturedGroups[decimalValue] = -1;

383

}

384

}

385

}

386

387

// Renumber groups and reduce capturing groups to non-capturing groups

388

// where possible.

389

for (var i = 1; i < capturedGroups.length; ++i) {

390

if (-1 === capturedGroups[i]) {

391

capturedGroups[i] = ++capturedGroupIndex;

392

}

393

}

394

for (var i = 0, groupIndex = 0; i < n; ++i) {

395

var p = parts[i];

396

if (p === '(') {

397

++groupIndex;

398

if (capturedGroups[groupIndex] === undefined) {

399

parts[i] = '(?:';

400

}

401

} else if ('\\' === p.charAt(0)) {

402

var decimalValue = +p.substring(1);

403

if (decimalValue && decimalValue <= groupIndex) {

404

parts[i] = '\\' + capturedGroups[groupIndex];

405

}

406

}

407

}

408

409

// Remove any prefix anchors so that the output will match anywhere.

410

// ^^ really does mean an anchored match though.

411

for (var i = 0, groupIndex = 0; i < n; ++i) {

412

if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }

413

}

414

415

// Expand letters to groups to handle mixing of case-sensitive and

416

// case-insensitive patterns if necessary.

417

if (regex.ignoreCase && needToFoldCase) {

418

for (var i = 0; i < n; ++i) {

419

var p = parts[i];

420

var ch0 = p.charAt(0);

421

if (p.length >= 2 && ch0 === '[') {

422

parts[i] = caseFoldCharset(p);

423

} else if (ch0 !== '\\') {

424

// TODO: handle letters in numeric escapes.

425

parts[i] = p.replace(

426

/[a-zA-Z]/g,

427

function (ch) {

428

var cc = ch.charCodeAt(0);

429

return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';

430

});

431

}

432

}

433

}

434

435

return parts.join('');

436

}

437

438

var rewritten = [];

439

for (var i = 0, n = regexs.length; i < n; ++i) {

440

var regex = regexs[i];

441

if (regex.global || regex.multiline) { throw new Error('' + regex); }

442

rewritten.push(

443

'(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');

444

}

445

446

return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');

447

}

448

449

450

/**

451

* Split markup into a string of source code and an array mapping ranges in

452

* that string to the text nodes in which they appear.

453

*

454

*

455

* The HTML DOM structure:

456

* <pre>

457

* (Element "p"

458

* (Element "b"

459

* (Text "print ")) ; #1

460

* (Text "'Hello '") ; #2

461

* (Element "br") ; #3

462

* (Text " + 'World';")) ; #4

463

* </pre>

464

*

465

* corresponds to the HTML

466

* {@code print 'Hello ' + 'World';}.

467

*

468

*

469

* It will produce the output:

470

* <pre>

471

* {

472

* sourceCode: "print 'Hello '\n + 'World';",

473

* // 1 2

474

* // 012345678901234 5678901234567

475

* spans: [0, #1, 6, #2, 14, #3, 15, #4]

476

* }

477

* </pre>

478

*

479

* where #1 is a reference to the {@code "print "} text node above, and so

480

* on for the other text nodes.

481

*

482

*

483

*

484

* The {@code} spans array is an array of pairs. Even elements are the start

485

* indices of substrings, and odd elements are the text nodes (or BR elements)

486

* that contain the text for those substrings.

487

* Substrings continue until the next index or the end of the source.

488

*

489

*

490

* @param {Node} node an HTML DOM subtree containing source-code.

491

* @return {Object} source code and the text nodes in which they occur.

492

*/

493

function extractSourceSpans(node) {

494

var nocode = /(?:^|\s)nocode(?:\s|$)/;

495

496

var chunks = [];

497

var length = 0;

498

var spans = [];

499

var k = 0;

500

501

var whitespace;

502

if (node.currentStyle) {

503

whitespace = node.currentStyle.whiteSpace;

504

} else if (window.getComputedStyle) {

505

whitespace = document.defaultView.getComputedStyle(node, null)

506

.getPropertyValue('white-space');

507

}

508

var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);

509

510

function walk(node) {

511

switch (node.nodeType) {

512

case 1: // Element

513

if (nocode.test(node.className)) { return; }

514

for (var child = node.firstChild; child; child = child.nextSibling) {

515

walk(child);

516

}

517

var nodeName = node.nodeName;

518

if ('BR' === nodeName || 'LI' === nodeName) {

519

chunks[k] = '\n';

520

spans[k << 1] = length++;

521

spans[(k++ << 1) | 1] = node;

522

}

523

break;

524

case 3: case 4: // Text

525

var text = node.nodeValue;

526

if (text.length) {

527

if (!isPreformatted) {

528

text = text.replace(/[ \t\r\n]+/g, ' ');

529

} else {

530

text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.

531

}

532

// TODO: handle tabs here?

533

chunks[k] = text;

534

spans[k << 1] = length;

535

length += text.length;

536

spans[(k++ << 1) | 1] = node;

537

}

538

break;

539

}

540

}

541

542

walk(node);

543

544

return {

545

sourceCode: chunks.join('').replace(/\n$/, ''),

546

spans: spans

547

};

548

}

549

550

551

/**

552

* Apply the given language handler to sourceCode and add the resulting

553

* decorations to out.

554

* @param {number} basePos the index of sourceCode within the chunk of source

555

* whose decorations are already present on out.

556

*/

557

function appendDecorations(basePos, sourceCode, langHandler, out) {

558

if (!sourceCode) { return; }

559

var job = {

560

sourceCode: sourceCode,

561

basePos: basePos

562

};

563

langHandler(job);

564

out.push.apply(out, job.decorations);

565

}

566

567

var notWs = /\S/;

568

569

/**

570

* Given an element, if it contains only one child element and any text nodes

571

* it contains contain only space characters, return the sole child element.

572

* Otherwise returns undefined.

573

*

574

* This is meant to return the CODE element in {@code <pre><code ...>} when

575

* there is a single child element that contains all the non-space textual

576

* content, but not to return anything where there are multiple child elements

577

* as in {@code <pre><code>...</code><code>...</code></pre>} or when there

578

* is textual content.

579

*/

580

function childContentWrapper(element) {

581

var wrapper = undefined;

582

for (var c = element.firstChild; c; c = c.nextSibling) {

583

var type = c.nodeType;

584

wrapper = (type === 1) // Element Node

585

? (wrapper ? element : c)

586

: (type === 3) // Text Node

587

? (notWs.test(c.nodeValue) ? element : wrapper)

588

: wrapper;

589

}

590

return wrapper === element ? undefined : wrapper;

591

}

592

593

/** Given triples of [style, pattern, context] returns a lexing function,

594

* The lexing function interprets the patterns to find token boundaries and

595

* returns a decoration list of the form

596

* [index_0, style_0, index_1, style_1, ..., index_n, style_n]

597

* where index_n is an index into the sourceCode, and style_n is a style

598

* constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to

599

* all characters in sourceCode[index_n-1:index_n].

600

*

601

* The stylePatterns is a list whose elements have the form

602

* [style : string, pattern : RegExp, DEPRECATED, shortcut : string].

603

*

604

* Style is a style constant like PR_PLAIN, or can be a string of the

605

* form 'lang-FOO', where FOO is a language extension describing the

606

* language of the portion of the token in $1 after pattern executes.

607

* E.g., if style is 'lang-lisp', and group 1 contains the text

608

* '(hello (world))', then that portion of the token will be passed to the

609

* registered lisp handler for formatting.

610

* The text before and after group 1 will be restyled using this decorator

611

* so decorators should take care that this doesn't result in infinite

612

* recursion. For example, the HTML lexer rule for SCRIPT elements looks

613

* something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match

614

* '<script>foo()<\/script>', which would cause the current decorator to

615

* be called with '<script>' which would not match the same rule since

616

* group 1 must not be empty, so it would be instead styled as PR_TAG by

617

* the generic tag rule. The handler registered for the 'js' extension would

618

* then be called with 'foo()', and finally, the current decorator would

619

* be called with '<\/script>' which would not match the original rule and

620

* so the generic tag rule would identify it as a tag.

621

*

622

* Pattern must only match prefixes, and if it matches a prefix, then that

623

* match is considered a token with the same style.

624

*

625

* Context is applied to the last non-whitespace, non-comment token

626

* recognized.

627

*

628

* Shortcut is an optional string of characters, any of which, if the first

629

* character, gurantee that this pattern and only this pattern matches.

630

*

631

* @param {Array} shortcutStylePatterns patterns that always start with

632

* a known character. Must have a shortcut string.

633

* @param {Array} fallthroughStylePatterns patterns that will be tried in

634

* order if the shortcut ones fail. May have shortcuts.

635

*

636

* @return {function (Object)} a

637

* function that takes source code and returns a list of decorations.

638

*/

639

function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {

640

var shortcuts = {};

641

var tokenizer;

642

(function () {

643

var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);

644

var allRegexs = [];

645

var regexKeys = {};

646

for (var i = 0, n = allPatterns.length; i < n; ++i) {

647

var patternParts = allPatterns[i];

648

var shortcutChars = patternParts[3];

649

if (shortcutChars) {

650

for (var c = shortcutChars.length; --c >= 0;) {

651

shortcuts[shortcutChars.charAt(c)] = patternParts;

652

}

653

}

654

var regex = patternParts[1];

655

var k = '' + regex;

656

if (!regexKeys.hasOwnProperty(k)) {

657

allRegexs.push(regex);

658

regexKeys[k] = null;

659

}

660

}

661

allRegexs.push(/[\0-\uffff]/);

662

tokenizer = combinePrefixPatterns(allRegexs);

663

})();

664

665

var nPatterns = fallthroughStylePatterns.length;

666

667

/**

668

* Lexes job.sourceCode and produces an output array job.decorations of

669

* style classes preceded by the position at which they start in

670

* job.sourceCode in order.

671

*

672

* @param {Object} job an object like <pre>{

673

* sourceCode: {string} sourceText plain text,

674

* basePos: {int} position of job.sourceCode in the larger chunk of

675

* sourceCode.

676

* }</pre>

677

*/

678

var decorate = function (job) {

679

var sourceCode = job.sourceCode, basePos = job.basePos;

680

/** Even entries are positions in source in ascending order. Odd enties

681

* are style markers (e.g., PR_COMMENT) that run from that position until

682

* the end.

683

* @type {Array.<number|string>}

684

*/

685

var decorations = [basePos, PR_PLAIN];

686

var pos = 0; // index into sourceCode

687

var tokens = sourceCode.match(tokenizer) || [];

688

var styleCache = {};

689

690

for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {

691

var token = tokens[ti];

692

var style = styleCache[token];

693

var match = void 0;

694

695

var isEmbedded;

696

if (typeof style === 'string') {

697

isEmbedded = false;

698

} else {

699

var patternParts = shortcuts[token.charAt(0)];

700

if (patternParts) {

701

match = token.match(patternParts[1]);

702

style = patternParts[0];

703

} else {

704

for (var i = 0; i < nPatterns; ++i) {

705

patternParts = fallthroughStylePatterns[i];

706

match = token.match(patternParts[1]);

707

if (match) {

708

style = patternParts[0];

709

break;

710

}

711

}

712

713

if (!match) { // make sure that we make progress

714

style = PR_PLAIN;

715

}

716

}

717

718

isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);

719

if (isEmbedded && !(match && typeof match[1] === 'string')) {

720

isEmbedded = false;

721

style = PR_SOURCE;

722

}

723

724

if (!isEmbedded) { styleCache[token] = style; }

725

}

726

727

var tokenStart = pos;

728

pos += token.length;

729

730

if (!isEmbedded) {

731

decorations.push(basePos + tokenStart, style);

732

} else { // Treat group 1 as an embedded block of source code.

733

var embeddedSource = match[1];

734

var embeddedSourceStart = token.indexOf(embeddedSource);

735

var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;

736

if (match[2]) {

737

// If embeddedSource can be blank, then it would match at the

738

// beginning which would cause us to infinitely recurse on the

739

// entire token, so we catch the right context in match[2].

740

embeddedSourceEnd = token.length - match[2].length;

741

embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;

742

}

743

var lang = style.substring(5);

744

// Decorate the left of the embedded source

745

appendDecorations(

746

basePos + tokenStart,

747

token.substring(0, embeddedSourceStart),

748

decorate, decorations);

749

// Decorate the embedded source

750

appendDecorations(

751

basePos + tokenStart + embeddedSourceStart,

752

embeddedSource,

753

langHandlerForExtension(lang, embeddedSource),

754

decorations);

755

// Decorate the right of the embedded section

756

appendDecorations(

757

basePos + tokenStart + embeddedSourceEnd,

758

token.substring(embeddedSourceEnd),

759

decorate, decorations);

760

}

761

}

762

job.decorations = decorations;

763

};

764

return decorate;

765

}

766

767

/** returns a function that produces a list of decorations from source text.

768

*

769

* This code treats ", ', and ` as string delimiters, and \ as a string

770

* escape. It does not recognize perl's qq() style strings.

771

* It has no special handling for double delimiter escapes as in basic, or

772

* the tripled delimiters used in python, but should work on those regardless

773

* although in those cases a single string literal may be broken up into

774

* multiple adjacent string literals.

775

*

776

* It recognizes C, C++, and shell style comments.

777

*

778

* @param {Object} options a set of optional parameters.

779

* @return {function (Object)} a function that examines the source code

780

* in the input job and builds the decoration list.

781

*/

782

function sourceDecorator(options) {

783

var shortcutStylePatterns = [], fallthroughStylePatterns = [];

784

if (options['tripleQuotedStrings']) {

785

// '''multi-line-string''', 'single-line-string', and double-quoted

786

shortcutStylePatterns.push(

787

[PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,

788

null, '\'"']);

789

} else if (options['multiLineStrings']) {

790

// 'multi-line-string', "multi-line-string"

791

shortcutStylePatterns.push(

792

[PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,

793

null, '\'"`']);

794

} else {

795

// 'single-line-string', "single-line-string"

796

shortcutStylePatterns.push(

797

[PR_STRING,

798

/^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,

799

null, '"\'']);

800

}

801

if (options['verbatimStrings']) {

802

// verbatim-string-literal production from the C# grammar. See issue 93.

803

fallthroughStylePatterns.push(

804

[PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);

805

}

806

var hc = options['hashComments'];

807

if (hc) {

808

if (options['cStyleComments']) {

809

if (hc > 1) { // multiline hash comments

810

shortcutStylePatterns.push(

811

[PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);

812

} else {

813

// Stop C preprocessor declarations at an unclosed open comment

814

shortcutStylePatterns.push(

815

816

null, '#']);

817

}

818

fallthroughStylePatterns.push(

819

[PR_STRING,

820

/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,

821

null]);

822

} else {

823

shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);

824

}

825

}

826

if (options['cStyleComments']) {

827

fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);

828

fallthroughStylePatterns.push(

829

[PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);

830

}

831

if (options['regexLiterals']) {

832

/**

833

* @const

834

*/

835

var REGEX_LITERAL = (

836

// A regular expression literal starts with a slash that is

837

// not followed by * or / so that it is not confused with

838

// comments.

839

'/(?=[^/*])'

840

// and then contains any number of raw characters,

841

+ '(?:[^/\\x5B\\x5C]'

842

// escape sequences (\x5C),

843

+ '|\\x5C[\\s\\S]'

844

// or non-nesting character sets (\x5B\x5D);

845

+ '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'

846

// finally closed by a /.

847

+ '/');

848

fallthroughStylePatterns.push(

849

['lang-regex',

850

new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')

851

]);

852

}

853

854

var types = options['types'];

855

if (types) {

856

fallthroughStylePatterns.push([PR_TYPE, types]);

857

}

858

859

var keywords = ("" + options['keywords']).replace(/^ | $/g, '');

860

if (keywords.length) {

861

fallthroughStylePatterns.push(

862

[PR_KEYWORD,

863

new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),

864

null]);

865

}

866

867

shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);

868

fallthroughStylePatterns.push(

869

// TODO(mikesamuel): recognize non-latin letters and numerals in idents

870

[PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],

871

[PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],

872

[PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],

873

[PR_LITERAL,

874

new RegExp(

875

'^(?:'

876

// A hex number

877

+ '0x[a-f0-9]+'

878

// or an octal or decimal number,

879

+ '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'

880

// possibly in scientific notation

881

+ '(?:e[+\\-]?\\d+)?'

882

+ ')'

883

// with an optional modifier like UL for unsigned long

884

+ '[a-z]*', 'i'),

885

null, '0123456789'],

886

// Don't treat escaped quotes in bash as starting strings. See issue 144.

887

[PR_PLAIN, /^\\[\s\S]?/, null],

888

[PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);

889

890

return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);

891

}

892

893

var decorateSource = sourceDecorator({

894

'keywords': ALL_KEYWORDS,

895

'hashComments': true,

896

'cStyleComments': true,

897

'multiLineStrings': true,

898

'regexLiterals': true

899

});

900

901

/**

902

* Given a DOM subtree, wraps it in a list, and puts each line into its own

903

* list item.

904

*

905

* @param {Node} node modified in place. Its content is pulled into an

906

* HTMLOListElement, and each line is moved into a separate list item.

907

* This requires cloning elements, so the input might not have unique

908

* IDs after numbering.

909

*/

910

function numberLines(node, opt_startLineNum) {

911

var nocode = /(?:^|\s)nocode(?:\s|$)/;

912

var lineBreak = /\r\n?|\n/;

913

914

var document = node.ownerDocument;

915

916

var whitespace;

917

if (node.currentStyle) {

918

whitespace = node.currentStyle.whiteSpace;

919

} else if (window.getComputedStyle) {

920

whitespace = document.defaultView.getComputedStyle(node, null)

921

.getPropertyValue('white-space');

922

}

923

// If it's preformatted, then we need to split lines on line breaks

924

// in addition to s.

925

var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);

926

927

var li = document.createElement('LI');

928

while (node.firstChild) {

929

li.appendChild(node.firstChild);

930

}

931

// An array of lines. We split below, so this is initialized to one

932

// un-split line.

933

var listItems = [li];

934

935

function walk(node) {

936

switch (node.nodeType) {

937

case 1: // Element

938

if (nocode.test(node.className)) { break; }

939

if ('BR' === node.nodeName) {

940

breakAfter(node);

941

// Discard the since it is now flush against a </LI>.

942

if (node.parentNode) {

943

node.parentNode.removeChild(node);

944

}

945

} else {

946

for (var child = node.firstChild; child; child = child.nextSibling) {

947

walk(child);

948

}

949

}

950

break;

951

case 3: case 4: // Text

952

if (isPreformatted) {

953

var text = node.nodeValue;

954

var match = text.match(lineBreak);

955

if (match) {

956

var firstLine = text.substring(0, match.index);

957

node.nodeValue = firstLine;

958

var tail = text.substring(match.index + match[0].length);

959

if (tail) {

960

var parent = node.parentNode;

961

parent.insertBefore(

962

document.createTextNode(tail), node.nextSibling);

963

}

964

breakAfter(node);

965

if (!firstLine) {

966

// Don't leave blank text nodes in the DOM.

967

node.parentNode.removeChild(node);

968

}

969

}

970

}

971

break;

972

}

973

}

974

975

// Split a line after the given node.

976

function breakAfter(lineEndNode) {

977

// If there's nothing to the right, then we can skip ending the line

978

// here, and move root-wards since splitting just before an end-tag

979

// would require us to create a bunch of empty copies.

980

while (!lineEndNode.nextSibling) {

981

lineEndNode = lineEndNode.parentNode;

982

if (!lineEndNode) { return; }

983

}

984

985

function breakLeftOf(limit, copy) {

986

// Clone shallowly if this node needs to be on both sides of the break.

987

var rightSide = copy ? limit.cloneNode(false) : limit;

988

var parent = limit.parentNode;

989

if (parent) {

990

// We clone the parent chain.

991

// This helps us resurrect important styling elements that cross lines.

992

// E.g. in Foo Bar

993

// should be rewritten to <li>Foo</li><li>Bar</li>.

994

var parentClone = breakLeftOf(parent, 1);

995

// Move the clone and everything to the right of the original

996

// onto the cloned parent.

997

var next = limit.nextSibling;

998

parentClone.appendChild(rightSide);

999

for (var sibling = next; sibling; sibling = next) {

1000

next = sibling.nextSibling;

1001

parentClone.appendChild(sibling);

1002

}

1003

}

1004

return rightSide;

1005

}

1006

1007

var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);

1008

1009

// Walk the parent chain until we reach an unattached LI.

1010

for (var parent;

1011

// Check nodeType since IE invents document fragments.

1012

(parent = copiedListItem.parentNode) && parent.nodeType === 1;) {

1013

copiedListItem = parent;

1014

}

1015

// Put it on the list of lines for later processing.

1016

listItems.push(copiedListItem);

1017

}

1018

1019

// Split lines while there are lines left to split.

1020

for (var i = 0; // Number of lines that have been split so far.

1021

i < listItems.length; // length updated by breakAfter calls.

1022

++i) {

1023

walk(listItems[i]);

1024

}

1025

1026

// Make sure numeric indices show correctly.

1027

if (opt_startLineNum === (opt_startLineNum|0)) {

1028

listItems[0].setAttribute('value', opt_startLineNum);

1029

}

1030

1031

var ol = document.createElement('OL');

1032

ol.className = 'linenums';

1033

var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0;

1034

for (var i = 0, n = listItems.length; i < n; ++i) {

1035

li = listItems[i];

1036

// Stick a class on the LIs so that stylesheets can

1037

// color odd/even rows, or any other row pattern that

1038

// is co-prime with 10.

1039

li.className = 'L' + ((i + offset) % 10);

1040

if (!li.firstChild) {

1041

li.appendChild(document.createTextNode('\xA0'));

1042

}

1043

ol.appendChild(li);

1044

}

1045

1046

node.appendChild(ol);

1047

}

1048

1049

/**

1050

* Breaks {@code job.sourceCode} around style boundaries in

1051

* {@code job.decorations} and modifies {@code job.sourceNode} in place.

1052

* @param {Object} job like <pre>{

1053

* sourceCode: {string} source as plain text,

1054

* spans: {Array.<number|Node>} alternating span start indices into source

1055

* and the text node or element (e.g. {@code }) corresponding to that

1056

* span.

1057

* decorations: {Array.<number|string} an array of style classes preceded

1058

* by the position at which they start in job.sourceCode in order

1059

* }</pre>

1060

* @private

1061

*/

1062

function recombineTagsAndDecorations(job) {

1063

var isIE = /\bMSIE\b/.test(navigator.userAgent);

1064

var newlineRe = /\n/g;

1065

1066

var source = job.sourceCode;

1067

var sourceLength = source.length;

1068

// Index into source after the last code-unit recombined.

1069

var sourceIndex = 0;

1070

1071

var spans = job.spans;

1072

var nSpans = spans.length;

1073

// Index into spans after the last span which ends at or before sourceIndex.

1074

var spanIndex = 0;

1075

1076

var decorations = job.decorations;

1077

var nDecorations = decorations.length;

1078

// Index into decorations after the last decoration which ends at or before

1079

// sourceIndex.

1080

var decorationIndex = 0;

1081

1082

// Remove all zero-length decorations.

1083

decorations[nDecorations] = sourceLength;

1084

var decPos, i;

1085

for (i = decPos = 0; i < nDecorations;) {

1086

if (decorations[i] !== decorations[i + 2]) {

1087

decorations[decPos++] = decorations[i++];

1088

decorations[decPos++] = decorations[i++];

1089

} else {

1090

i += 2;

1091

}

1092

}

1093

nDecorations = decPos;

1094

1095

// Simplify decorations.

1096

for (i = decPos = 0; i < nDecorations;) {

1097

var startPos = decorations[i];

1098

// Conflate all adjacent decorations that use the same style.

1099

var startDec = decorations[i + 1];

1100

var end = i + 2;

1101

while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {

1102

end += 2;

1103

}

1104

decorations[decPos++] = startPos;

1105

decorations[decPos++] = startDec;

1106

i = end;

1107

}

1108

1109

nDecorations = decorations.length = decPos;

1110

1111

var decoration = null;

1112

while (spanIndex < nSpans) {

1113

var spanStart = spans[spanIndex];

1114

var spanEnd = spans[spanIndex + 2] || sourceLength;

1115

1116

var decStart = decorations[decorationIndex];

1117

var decEnd = decorations[decorationIndex + 2] || sourceLength;

1118

1119

var end = Math.min(spanEnd, decEnd);

1120

1121

var textNode = spans[spanIndex + 1];

1122

var styledText;

1123

if (textNode.nodeType !== 1 // Don't muck with s or <LI>s

1124

// Don't introduce spans around empty text nodes.

1125

&& (styledText = source.substring(sourceIndex, end))) {

1126

// This may seem bizarre, and it is. Emitting LF on IE causes the

1127

// code to display with spaces instead of line breaks.

1128

// Emitting Windows standard issue linebreaks (CRLF) causes a blank

1129

// space to appear at the beginning of every line but the first.

1130

// Emitting an old Mac OS 9 line separator makes everything spiffy.

1131

if (isIE) { styledText = styledText.replace(newlineRe, '\r'); }

1132

textNode.nodeValue = styledText;

1133

var document = textNode.ownerDocument;

1134

var span = document.createElement('SPAN');

1135

span.className = decorations[decorationIndex + 1];

1136

var parentNode = textNode.parentNode;

1137

parentNode.replaceChild(span, textNode);

1138

span.appendChild(textNode);

1139

if (sourceIndex < spanEnd) { // Split off a text node.

1140

spans[spanIndex + 1] = textNode

1141

// TODO: Possibly optimize by using '' if there's no flicker.

1142

= document.createTextNode(source.substring(end, spanEnd));

1143

parentNode.insertBefore(textNode, span.nextSibling);

1144

}

1145

}

1146

1147

sourceIndex = end;

1148

1149

if (sourceIndex >= spanEnd) {

1150

spanIndex += 2;

1151

}

1152

if (sourceIndex >= decEnd) {

1153

decorationIndex += 2;

1154

}

1155

}

1156

}

1157

1158

1159

/** Maps language-specific file extensions to handlers. */

1160

var langHandlerRegistry = {};

1161

/** Register a language handler for the given file extensions.

1162

* @param {function (Object)} handler a function from source code to a list

1163

* of decorations. Takes a single argument job which describes the

1164

* state of the computation. The single parameter has the form

1165

* {@code {

1166

* sourceCode: {string} as plain text.

1167

* decorations: {Array.<number|string>} an array of style classes

1168

* preceded by the position at which they start in

1169

* job.sourceCode in order.

1170

* The language handler should assigned this field.

1171

* basePos: {int} the position of source in the larger source chunk.

1172

* All positions in the output decorations array are relative

1173

* to the larger source chunk.

1174

* } }

1175

* @param {Array.<string>} fileExtensions

1176

*/

1177

function registerLangHandler(handler, fileExtensions) {

1178

for (var i = fileExtensions.length; --i >= 0;) {

1179

var ext = fileExtensions[i];

1180

if (!langHandlerRegistry.hasOwnProperty(ext)) {

1181

langHandlerRegistry[ext] = handler;

1182

} else if (window['console']) {

1183

console['warn']('cannot override language handler %s', ext);

1184

}

1185

}

1186

}

1187

function langHandlerForExtension(extension, source) {

1188

if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {

1189

// Treat it as markup if the first non whitespace character is a < and

1190

// the last non-whitespace character is a >.

1191

extension = /^\s*</.test(source)

1192

? 'default-markup'

1193

: 'default-code';

1194

}

1195

return langHandlerRegistry[extension];

1196

}

1197

registerLangHandler(decorateSource, ['default-code']);

1198

registerLangHandler(

1199

createSimpleLexer(

1200

[],

1201

[

1202

[PR_PLAIN, /^[^<?]+/],

1203

[PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],

1204

[PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/],

1205

// Unescaped content in an unknown language

1206

['lang-', /^<\?([\s\S]+?)(?:\?>|$)/],

1207

['lang-', /^<%([\s\S]+?)(?:%>|$)/],

1208

[PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],

1209

['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],

1210

// Unescaped content in javascript. (Or possibly vbscript).

1211

['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],

1212

// Contains unescaped stylesheet content

1213

['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],

1214

['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]

1215

]),

1216

['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);

1217

registerLangHandler(

1218

createSimpleLexer(

1219

[

1220

[PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],

1221

[PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']

1222

],

1223

[

1224

[PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],

1225

[PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],

1226

['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],

1227

[PR_PUNCTUATION, /^[=<>\/]+/],

1228

['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],

1229

['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],

1230

['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],

1231

['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],

1232

['lang-css', /^style\s*=\s*\'([^\']+)\'/i],

1233

['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]

1234

]),

1235

['in.tag']);

1236

registerLangHandler(

1237

createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);

1238

registerLangHandler(sourceDecorator({

1239

'keywords': CPP_KEYWORDS,

1240

'hashComments': true,

1241

'cStyleComments': true,

1242

'types': C_TYPES

1243

}), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);

1244

registerLangHandler(sourceDecorator({

1245

'keywords': 'null,true,false'

1246

}), ['json']);

1247

registerLangHandler(sourceDecorator({

1248

'keywords': CSHARP_KEYWORDS,

1249

'hashComments': true,

1250

'cStyleComments': true,

1251

'verbatimStrings': true,

1252

'types': C_TYPES

1253

}), ['cs']);

1254

registerLangHandler(sourceDecorator({

1255

'keywords': JAVA_KEYWORDS,

1256

'cStyleComments': true

1257

}), ['java']);

1258

registerLangHandler(sourceDecorator({

1259

'keywords': SH_KEYWORDS,

1260

'hashComments': true,

1261

'multiLineStrings': true

1262

}), ['bsh', 'csh', 'sh']);

1263

registerLangHandler(sourceDecorator({

1264

'keywords': PYTHON_KEYWORDS,

1265

'hashComments': true,

1266

'multiLineStrings': true,

1267

'tripleQuotedStrings': true

1268

}), ['cv', 'py']);

1269

registerLangHandler(sourceDecorator({

1270

'keywords': PERL_KEYWORDS,

1271

'hashComments': true,

1272

'multiLineStrings': true,

1273

'regexLiterals': true

1274

}), ['perl', 'pl', 'pm']);

1275

registerLangHandler(sourceDecorator({

1276

'keywords': RUBY_KEYWORDS,

1277

'hashComments': true,

1278

'multiLineStrings': true,

1279

'regexLiterals': true

1280

}), ['rb']);

1281

registerLangHandler(sourceDecorator({

1282

'keywords': JSCRIPT_KEYWORDS,

1283

'cStyleComments': true,

1284

'regexLiterals': true

1285

}), ['js']);

1286

registerLangHandler(sourceDecorator({

1287

'keywords': COFFEE_KEYWORDS,

1288

'hashComments': 3, // ### style block comments

1289

'cStyleComments': true,

1290

'multilineStrings': true,

1291

'tripleQuotedStrings': true,

1292

'regexLiterals': true

1293

}), ['coffee']);

1294

registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);

1295

1296

function applyDecorator(job) {

1297

var opt_langExtension = job.langExtension;

1298

1299

try {

1300

// Extract tags, and convert the source code to plain text.

1301

var sourceAndSpans = extractSourceSpans(job.sourceNode);

1302

/** Plain text. @type {string} */

1303

var source = sourceAndSpans.sourceCode;

1304

job.sourceCode = source;

1305

job.spans = sourceAndSpans.spans;

1306

job.basePos = 0;

1307

1308

// Apply the appropriate language handler

1309

langHandlerForExtension(opt_langExtension, source)(job);

1310

1311

// Integrate the decorations and tags back into the source code,

1312

// modifying the sourceNode in place.

1313

recombineTagsAndDecorations(job);

1314

} catch (e) {

1315

if ('console' in window) {

1316

console['log'](e && e['stack'] ? e['stack'] : e);

1317

}

1318

}

1319

}

1320

1321

/**

1322

* @param sourceCodeHtml {string} The HTML to pretty print.

1323

* @param opt_langExtension {string} The language name to use.

1324

* Typically, a filename extension like 'cpp' or 'java'.

1325

* @param opt_numberLines {number|boolean} True to number lines,

1326

* or the 1-indexed number of the first line in sourceCodeHtml.

1327

*/

1328

function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {

1329

var container = document.createElement('PRE');

1330

// This could cause images to load and onload listeners to fire.

1331

// E.g. <img onerror="alert(1337)" src="nosuchimage.png">.

1332

// We assume that the inner HTML is from a trusted source.

1333

container.innerHTML = sourceCodeHtml;

1334

if (opt_numberLines) {

1335

numberLines(container, opt_numberLines);

1336

}

1337

1338

var job = {

1339

langExtension: opt_langExtension,

1340

numberLines: opt_numberLines,

1341

sourceNode: container

1342

};

1343

applyDecorator(job);

1344

return container.innerHTML;

1345

}

1346

1347

function prettyPrint(opt_whenDone) {

1348

function byTagName(tn) { return document.getElementsByTagName(tn); }

1349

// fetch a list of nodes to rewrite

1350

var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];

1351

var elements = [];

1352

for (var i = 0; i < codeSegments.length; ++i) {

1353

for (var j = 0, n = codeSegments[i].length; j < n; ++j) {

1354

elements.push(codeSegments[i][j]);

1355

}

1356

}

1357

codeSegments = null;

1358

1359

var clock = Date;

1360

if (!clock['now']) {

1361

clock = { 'now': function () { return +(new Date); } };

1362

}

1363

1364

// The loop is broken into a series of continuations to make sure that we

1365

// don't make the browser unresponsive when rewriting a large page.

1366

var k = 0;

1367

var prettyPrintingJob;

1368

1369

var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;

1370

var prettyPrintRe = /\bprettyprint\b/;

1371

1372

function doWork() {

1373

var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?

1374

clock['now']() + 250 /* ms */ :

1375

Infinity);

1376

for (; k < elements.length && clock['now']() < endTime; k++) {

1377

var cs = elements[k];

1378

var className = cs.className;

1379

if (className.indexOf('prettyprint') >= 0) {

1380

// If the classes includes a language extensions, use it.

1381

// Language extensions can be specified like

1382

// <pre class="prettyprint lang-cpp">

1383

// the language extension "cpp" is used to find a language handler as

1384

// passed to PR.registerLangHandler.

1385

// HTML5 recommends that a language be specified using "language-"

1386

// as the prefix instead. Google Code Prettify supports both.

1387

// http://dev.w3.org/html5/spec-author-view/the-code-element.html

1388

var langExtension = className.match(langExtensionRe);

1389

// Support <pre class="prettyprint"><code class="language-c">

1390

var wrapper;

1391

if (!langExtension && (wrapper = childContentWrapper(cs))

1392

&& "CODE" === wrapper.tagName) {

1393

langExtension = wrapper.className.match(langExtensionRe);

1394

}

1395

1396

if (langExtension) {

1397

langExtension = langExtension[1];

1398

}

1399

1400

// make sure this is not nested in an already prettified element

1401

var nested = false;

1402

for (var p = cs.parentNode; p; p = p.parentNode) {

1403

if ((p.tagName === 'pre' || p.tagName === 'code' ||

1404

p.tagName === 'xmp') &&

1405

p.className && p.className.indexOf('prettyprint') >= 0) {

1406

nested = true;

1407

break;

1408

}

1409

}

1410

if (!nested) {

1411

// Look for a class like linenums or linenums:<n> where <n> is the

1412

// 1-indexed number of the first line.

1413

var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/);

1414

lineNums = lineNums

1415

? lineNums[1] && lineNums[1].length ? +lineNums[1] : true

1416

: false;

1417

if (lineNums) { numberLines(cs, lineNums); }

1418

1419

// do the pretty printing

1420

prettyPrintingJob = {

1421

langExtension: langExtension,

1422

sourceNode: cs,

1423

numberLines: lineNums

1424

};

1425

applyDecorator(prettyPrintingJob);

1426

}

1427

}

1428

}

1429

if (k < elements.length) {

1430

// finish up in a continuation

1431

setTimeout(doWork, 250);

1432

} else if (opt_whenDone) {

1433

opt_whenDone();

1434

}

1435

}

1436

1437

doWork();

1438

}

1439

1440

/**

1441

* Find all the {@code <pre>} and {@code <code>} tags in the DOM with

1442

* {@code class=prettyprint} and prettify them.

1443

*

1444

* @param {Function?} opt_whenDone if specified, called when the last entry

1445

* has been finished.

1446

*/

1447

window['prettyPrintOne'] = prettyPrintOne;

1448

/**

1449

* Pretty print a chunk of code.

1450

*

1451

* @param {string} sourceCodeHtml code as html

1452

* @return {string} code as html, but prettier

1453

*/

1454

window['prettyPrint'] = prettyPrint;

1455

/**

1456

* Contains functions for creating and registering new language handlers.

1457

* @type {Object}

1458

*/

1459

window['PR'] = {

1460

'createSimpleLexer': createSimpleLexer,

1461

'registerLangHandler': registerLangHandler,

1462

'sourceDecorator': sourceDecorator,

1463

'PR_ATTRIB_NAME': PR_ATTRIB_NAME,

1464

'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,

1465

'PR_COMMENT': PR_COMMENT,

1466

'PR_DECLARATION': PR_DECLARATION,

1467

'PR_KEYWORD': PR_KEYWORD,

1468

'PR_LITERAL': PR_LITERAL,

1469

'PR_NOCODE': PR_NOCODE,

1470

'PR_PLAIN': PR_PLAIN,

1471

'PR_PUNCTUATION': PR_PUNCTUATION,

1472

'PR_SOURCE': PR_SOURCE,

1473

'PR_STRING': PR_STRING,

1474

'PR_TAG': PR_TAG,

1475

'PR_TYPE': PR_TYPE

1476

};

1477

})();

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages