upstream/ipython Commit - r7273:055b9cfd

1

# -*- coding: utf-8 -*-

2

# module pyparsing.py

3

#

4

5

#

6

# Permission is hereby granted, free of charge, to any person obtaining

7

# a copy of this software and associated documentation files (the

8

# "Software"), to deal in the Software without restriction, including

9

# without limitation the rights to use, copy, modify, merge, publish,

10

# distribute, sublicense, and/or sell copies of the Software, and to

11

# permit persons to whom the Software is furnished to do so, subject to

12

# the following conditions:

13

#

14

# The above copyright notice and this permission notice shall be

15

# included in all copies or substantial portions of the Software.

16

#

17

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

18

# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

19

# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

20

# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

21

# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

22

# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

23

# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

24

#

25

#from __future__ import generators

26

27

__doc__ = \

28

"""

29

pyparsing module - Classes and methods to define and execute parsing grammars

30

31

The pyparsing module is an alternative approach to creating and executing simple grammars,

32

vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you

33

don't need to learn a new syntax for defining grammars or matching expressions - the parsing module

34

provides a library of classes that you use to construct the grammar directly in Python.

35

36

Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::

37

38

from pyparsing import Word, alphas

39

40

# define grammar of a greeting

41

greet = Word( alphas ) + "," + Word( alphas ) + "!"

42

43

hello = "Hello, World!"

44

print hello, "->", greet.parseString( hello )

45

46

The program outputs the following::

47

48

Hello, World! -> ['Hello', ',', 'World', '!']

49

50

The Python representation of the grammar is quite readable, owing to the self-explanatory

51

class names, and the use of '+', '|' and '^' operators.

52

53

The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an

54

object with named attributes.

55

56

The pyparsing module handles some of the problems that are typically vexing when writing text parsers:

57

- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)

58

- quoted strings

59

- embedded comments

60

"""

61

62

__version__ = "1.5.2"

63

__versionTime__ = "17 February 2009 19:45"

64

__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

65

66

import string

67

from weakref import ref as wkref

68

import copy

69

import sys

70

import warnings

71

import re

72

import sre_constants

73

#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )

74

75

__all__ = [

76

'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',

77

'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',

78

'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',

79

'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',

80

'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',

81

'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',

82

'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',

83

'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',

84

'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',

85

'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',

86

'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',

87

'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',

88

'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',

89

'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',

90

'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',

91

'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',

92

'indentedBlock', 'originalTextFor',

93

]

94

95

96

"""

97

Detect if we are running version 3.X and make appropriate changes

98

Robert A. Clark

99

"""

100

if sys.version_info[0] > 2:

101

_PY3K = True

102

_MAX_INT = sys.maxsize

103

basestring = str

104

else:

105

_PY3K = False

106

_MAX_INT = sys.maxint

107

108

if not _PY3K:

109

def _ustr(obj):

110

"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries

111

str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It

112

then < returns the unicode object | encodes it with the default encoding | ... >.

113

"""

114

if isinstance(obj,unicode):

115

return obj

116

117

try:

118

# If this works, then _ustr(obj) has the same behaviour as str(obj), so

119

# it won't break any existing code.

120

return str(obj)

121

122

except UnicodeEncodeError:

123

# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)

124

# state that "The return value must be a string object". However, does a

125

# unicode object (being a subclass of basestring) count as a "string

126

# object"?

127

# If so, then return a unicode object:

128

return unicode(obj)

129

# Else encode it... but how? There are many choices... :)

130

# Replace unprintables with escape codes?

131

#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')

132

# Replace unprintables with question marks?

133

#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')

134

# ...

135

else:

136

_ustr = str

137

unichr = chr

138

139

if not _PY3K:

140

def _str2dict(strg):

141

return dict( [(c,0) for c in strg] )

142

else:

143

_str2dict = set

144

145

def _xml_escape(data):

146

"""Escape &, <, >, ", ', etc. in a string of data."""

147

148

# ampersand must be replaced first

149

from_symbols = '&><"\''

150

to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]

151

for from_,to_ in zip(from_symbols, to_symbols):

152

data = data.replace(from_, to_)

153

return data

154

155

class _Constants(object):

156

pass

157

158

if not _PY3K:

159

alphas = string.lowercase + string.uppercase

160

else:

161

alphas = string.ascii_lowercase + string.ascii_uppercase

162

nums = string.digits

163

hexnums = nums + "ABCDEFabcdef"

164

alphanums = alphas + nums

165

_bslash = chr(92)

166

printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )

167

168

class ParseBaseException(Exception):

169

"""base exception class for all parsing runtime exceptions"""

170

# Performance tuning: we construct a *lot* of these, so keep this

171

# constructor as small and fast as possible

172

def __init__( self, pstr, loc=0, msg=None, elem=None ):

173

self.loc = loc

174

if msg is None:

175

self.msg = pstr

176

self.pstr = ""

177

else:

178

self.msg = msg

179

self.pstr = pstr

180

self.parserElement = elem

181

182

def __getattr__( self, aname ):

183

"""supported attributes by name are:

184

- lineno - returns the line number of the exception text

185

- col - returns the column number of the exception text

186

- line - returns the line containing the exception text

187

"""

188

if( aname == "lineno" ):

189

return lineno( self.loc, self.pstr )

190

elif( aname in ("col", "column") ):

191

return col( self.loc, self.pstr )

192

elif( aname == "line" ):

193

return line( self.loc, self.pstr )

194

else:

195

raise AttributeError(aname)

196

197

def __str__( self ):

198

return "%s (at char %d), (line:%d, col:%d)" % \

199

( self.msg, self.loc, self.lineno, self.column )

200

def __repr__( self ):

201

return _ustr(self)

202

def markInputline( self, markerString = ">!<" ):

203

"""Extracts the exception line from the input string, and marks

204

the location of the exception with a special symbol.

205

"""

206

line_str = self.line

207

line_column = self.column - 1

208

if markerString:

209

line_str = "".join( [line_str[:line_column],

210

markerString, line_str[line_column:]])

211

return line_str.strip()

212

def __dir__(self):

213

return "loc msg pstr parserElement lineno col line " \

214

"markInputLine __str__ __repr__".split()

215

216

class ParseException(ParseBaseException):

217

"""exception thrown when parse expressions don't match class;

218

supported attributes by name are:

219

- lineno - returns the line number of the exception text

220

- col - returns the column number of the exception text

221

- line - returns the line containing the exception text

222

"""

223

pass

224

225

class ParseFatalException(ParseBaseException):

226

"""user-throwable exception thrown when inconsistent parse content

227

is found; stops all parsing immediately"""

228

pass

229

230

class ParseSyntaxException(ParseFatalException):

231

"""just like ParseFatalException, but thrown internally when an

232

ErrorStop indicates that parsing is to stop immediately because

233

an unbacktrackable syntax error has been found"""

234

def __init__(self, pe):

235

super(ParseSyntaxException, self).__init__(

236

pe.pstr, pe.loc, pe.msg, pe.parserElement)

237

238

#~ class ReparseException(ParseBaseException):

239

#~ """Experimental class - parse actions can raise this exception to cause

240

#~ pyparsing to reparse the input string:

241

#~ - with a modified input string, and/or

242

#~ - with a modified start location

243

#~ Set the values of the ReparseException in the constructor, and raise the

244

#~ exception in a parse action to cause pyparsing to use the new string/location.

245

#~ Setting the values as None causes no change to be made.

246

#~ """

247

#~ def __init_( self, newstring, restartLoc ):

248

#~ self.newParseText = newstring

249

#~ self.reparseLoc = restartLoc

250

251

class RecursiveGrammarException(Exception):

252

"""exception thrown by validate() if the grammar could be improperly recursive"""

253

def __init__( self, parseElementList ):

254

self.parseElementTrace = parseElementList

255

256

def __str__( self ):

257

return "RecursiveGrammarException: %s" % self.parseElementTrace

258

259

class _ParseResultsWithOffset(object):

260

def __init__(self,p1,p2):

261

self.tup = (p1,p2)

262

def __getitem__(self,i):

263

return self.tup[i]

264

def __repr__(self):

265

return repr(self.tup)

266

def setOffset(self,i):

267

self.tup = (self.tup[0],i)

268

269

class ParseResults(object):

270

"""Structured parse results, to provide multiple means of access to the parsed data:

271

- as a list (len(results))

272

- by list index (results[0], results[1], etc.)

273

- by attribute (results.<resultsName>)

274

"""

275

__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )

276

def __new__(cls, toklist, name=None, asList=True, modal=True ):

277

if isinstance(toklist, cls):

278

return toklist

279

retobj = object.__new__(cls)

280

retobj.__doinit = True

281

return retobj

282

283

# Performance tuning: we construct a *lot* of these, so keep this

284

# constructor as small and fast as possible

285

def __init__( self, toklist, name=None, asList=True, modal=True ):

286

if self.__doinit:

287

self.__doinit = False

288

self.__name = None

289

self.__parent = None

290

self.__accumNames = {}

291

if isinstance(toklist, list):

292

self.__toklist = toklist[:]

293

else:

294

self.__toklist = [toklist]

295

self.__tokdict = dict()

296

297

if name:

298

if not modal:

299

self.__accumNames[name] = 0

300

if isinstance(name,int):

301

name = _ustr(name) # will always return a str, but use _ustr for consistency

302

self.__name = name

303

if not toklist in (None,'',[]):

304

if isinstance(toklist,basestring):

305

toklist = [ toklist ]

306

if asList:

307

if isinstance(toklist,ParseResults):

308

self[name] = _ParseResultsWithOffset(toklist.copy(),0)

309

else:

310

self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)

311

self[name].__name = name

312

else:

313

try:

314

self[name] = toklist[0]

315

except (KeyError,TypeError,IndexError):

316

self[name] = toklist

317

318

def __getitem__( self, i ):

319

if isinstance( i, (int,slice) ):

320

return self.__toklist[i]

321

else:

322

if i not in self.__accumNames:

323

return self.__tokdict[i][-1][0]

324

else:

325

return ParseResults([ v[0] for v in self.__tokdict[i] ])

326

327

def __setitem__( self, k, v ):

328

if isinstance(v,_ParseResultsWithOffset):

329

self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]

330

sub = v[0]

331

elif isinstance(k,int):

332

self.__toklist[k] = v

333

sub = v

334

else:

335

self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]

336

sub = v

337

if isinstance(sub,ParseResults):

338

sub.__parent = wkref(self)

339

340

def __delitem__( self, i ):

341

if isinstance(i,(int,slice)):

342

mylen = len( self.__toklist )

343

del self.__toklist[i]

344

345

# convert int to slice

346

if isinstance(i, int):

347

if i < 0:

348

i += mylen

349

i = slice(i, i+1)

350

# get removed indices

351

removed = list(range(*i.indices(mylen)))

352

removed.reverse()

353

# fixup indices in token dictionary

354

for name in self.__tokdict:

355

occurrences = self.__tokdict[name]

356

for j in removed:

357

for k, (value, position) in enumerate(occurrences):

358

occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))

359

else:

360

del self.__tokdict[i]

361

362

def __contains__( self, k ):

363

return k in self.__tokdict

364

365

def __len__( self ): return len( self.__toklist )

366

def __bool__(self): return len( self.__toklist ) > 0

367

__nonzero__ = __bool__

368

def __iter__( self ): return iter( self.__toklist )

369

def __reversed__( self ): return iter( reversed(self.__toklist) )

370

def keys( self ):

371

"""Returns all named result keys."""

372

return self.__tokdict.keys()

373

374

def pop( self, index=-1 ):

375

"""Removes and returns item at specified index (default=last).

376

Will work with either numeric indices or dict-key indicies."""

377

ret = self[index]

378

del self[index]

379

return ret

380

381

def get(self, key, defaultValue=None):

382

"""Returns named result matching the given key, or if there is no

383

such name, then returns the given defaultValue or None if no

384

defaultValue is specified."""

385

if key in self:

386

return self[key]

387

else:

388

return defaultValue

389

390

def insert( self, index, insStr ):

391

self.__toklist.insert(index, insStr)

392

# fixup indices in token dictionary

393

for name in self.__tokdict:

394

occurrences = self.__tokdict[name]

395

for k, (value, position) in enumerate(occurrences):

396

occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))

397

398

def items( self ):

399

"""Returns all named result keys and values as a list of tuples."""

400

return [(k,self[k]) for k in self.__tokdict]

401

402

def values( self ):

403

"""Returns all named result values."""

404

return [ v[-1][0] for v in self.__tokdict.itervalues() ]

405

406

def __getattr__( self, name ):

407

if name not in self.__slots__:

408

if name in self.__tokdict:

409

if name not in self.__accumNames:

410

return self.__tokdict[name][-1][0]

411

else:

412

return ParseResults([ v[0] for v in self.__tokdict[name] ])

413

else:

414

return ""

415

return None

416

417

def __add__( self, other ):

418

ret = self.copy()

419

ret += other

420

return ret

421

422

def __iadd__( self, other ):

423

if other.__tokdict:

424

offset = len(self.__toklist)

425

addoffset = ( lambda a: (a<0 and offset) or (a+offset) )

426

otheritems = other.__tokdict.iteritems()

427

otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )

428

for (k,vlist) in otheritems for v in vlist]

429

for k,v in otherdictitems:

430

self[k] = v

431

if isinstance(v[0],ParseResults):

432

v[0].__parent = wkref(self)

433

434

self.__toklist += other.__toklist

435

self.__accumNames.update( other.__accumNames )

436

del other

437

return self

438

439

def __repr__( self ):

440

return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )

441

442

def __str__( self ):

443

out = "["

444

sep = ""

445

for i in self.__toklist:

446

if isinstance(i, ParseResults):

447

out += sep + _ustr(i)

448

else:

449

out += sep + repr(i)

450

sep = ", "

451

out += "]"

452

return out

453

454

def _asStringList( self, sep='' ):

455

out = []

456

for item in self.__toklist:

457

if out and sep:

458

out.append(sep)

459

if isinstance( item, ParseResults ):

460

out += item._asStringList()

461

else:

462

out.append( _ustr(item) )

463

return out

464

465

def asList( self ):

466

"""Returns the parse results as a nested list of matching tokens, all converted to strings."""

467

out = []

468

for res in self.__toklist:

469

if isinstance(res,ParseResults):

470

out.append( res.asList() )

471

else:

472

out.append( res )

473

return out

474

475

def asDict( self ):

476

"""Returns the named parse results as dictionary."""

477

return dict( self.items() )

478

479

def copy( self ):

480

"""Returns a new copy of a ParseResults object."""

481

ret = ParseResults( self.__toklist )

482

ret.__tokdict = self.__tokdict.copy()

483

ret.__parent = self.__parent

484

ret.__accumNames.update( self.__accumNames )

485

ret.__name = self.__name

486

return ret

487

488

def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):

489

"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""

490

nl = "\n"

491

out = []

492

namedItems = dict([(v[1],k) for (k,vlist) in self.__tokdict.iteritems()

493

for v in vlist ] )

494

nextLevelIndent = indent + " "

495

496

# collapse out indents if formatting is not desired

497

if not formatted:

498

indent = ""

499

nextLevelIndent = ""

500

nl = ""

501

502

selfTag = None

503

if doctag is not None:

504

selfTag = doctag

505

else:

506

if self.__name:

507

selfTag = self.__name

508

509

if not selfTag:

510

if namedItemsOnly:

511

return ""

512

else:

513

selfTag = "ITEM"

514

515

out += [ nl, indent, "<", selfTag, ">" ]

516

517

worklist = self.__toklist

518

for i,res in enumerate(worklist):

519

if isinstance(res,ParseResults):

520

if i in namedItems:

521

out += [ res.asXML(namedItems[i],

522

namedItemsOnly and doctag is None,

523

nextLevelIndent,

524

formatted)]

525

else:

526

out += [ res.asXML(None,

527

namedItemsOnly and doctag is None,

528

nextLevelIndent,

529

formatted)]

530

else:

531

# individual token, see if there is a name for it

532

resTag = None

533

if i in namedItems:

534

resTag = namedItems[i]

535

if not resTag:

536

if namedItemsOnly:

537

continue

538

else:

539

resTag = "ITEM"

540

xmlBodyText = _xml_escape(_ustr(res))

541

out += [ nl, nextLevelIndent, "<", resTag, ">",

542

xmlBodyText,

543

"</", resTag, ">" ]

544

545

out += [ nl, indent, "</", selfTag, ">" ]

546

return "".join(out)

547

548

def __lookup(self,sub):

549

for k,vlist in self.__tokdict.iteritems():

550

for v,loc in vlist:

551

if sub is v:

552

return k

553

return None

554

555

def getName(self):

556

"""Returns the results name for this token expression."""

557

if self.__name:

558

return self.__name

559

elif self.__parent:

560

par = self.__parent()

561

if par:

562

return par.__lookup(self)

563

else:

564

return None

565

elif (len(self) == 1 and

566

len(self.__tokdict) == 1 and

567

self.__tokdict.values()[0][0][1] in (0,-1)):

568

return self.__tokdict.keys()[0]

569

else:

570

return None

571

572

def dump(self,indent='',depth=0):

573

"""Diagnostic method for listing out the contents of a ParseResults.

574

Accepts an optional indent argument so that this string can be embedded

575

in a nested display of other data."""

576

out = []

577

out.append( indent+_ustr(self.asList()) )

578

keys = self.items()

579

keys.sort()

580

for k,v in keys:

581

if out:

582

out.append('\n')

583

out.append( "%s%s- %s: " % (indent,(' '*depth), k) )

584

if isinstance(v,ParseResults):

585

if v.keys():

586

#~ out.append('\n')

587

out.append( v.dump(indent,depth+1) )

588

#~ out.append('\n')

589

else:

590

out.append(_ustr(v))

591

else:

592

out.append(_ustr(v))

593

#~ out.append('\n')

594

return "".join(out)

595

596

# add support for pickle protocol

597

def __getstate__(self):

598

return ( self.__toklist,

599

( self.__tokdict.copy(),

600

self.__parent is not None and self.__parent() or None,

601

self.__accumNames,

602

self.__name ) )

603

604

def __setstate__(self,state):

605

self.__toklist = state[0]

606

self.__tokdict, \

607

par, \

608

inAccumNames, \

609

self.__name = state[1]

610

self.__accumNames = {}

611

self.__accumNames.update(inAccumNames)

612

if par is not None:

613

self.__parent = wkref(par)

614

else:

615

self.__parent = None

616

617

def __dir__(self):

618

return dir(super(ParseResults,self)) + self.keys()

619

620

def col (loc,strg):

621

"""Returns current column within a string, counting newlines as line separators.

622

The first column is number 1.

623

624

Note: the default parsing behavior is to expand tabs in the input string

625

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

626

on parsing strings containing <TAB>s, and suggested methods to maintain a

627

consistent view of the parsed string, the parse location, and line and column

628

positions within the parsed string.

629

"""

630

return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)

631

632

def lineno(loc,strg):

633

"""Returns current line number within a string, counting newlines as line separators.

634

The first line is number 1.

635

636

Note: the default parsing behavior is to expand tabs in the input string

637

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

638

on parsing strings containing <TAB>s, and suggested methods to maintain a

639

consistent view of the parsed string, the parse location, and line and column

640

positions within the parsed string.

641

"""

642

return strg.count("\n",0,loc) + 1

643

644

def line( loc, strg ):

645

"""Returns the line of text containing loc within a string, counting newlines as line separators.

646

"""

647

lastCR = strg.rfind("\n", 0, loc)

648

nextCR = strg.find("\n", loc)

649

if nextCR > 0:

650

return strg[lastCR+1:nextCR]

651

else:

652

return strg[lastCR+1:]

653

654

def _defaultStartDebugAction( instring, loc, expr ):

655

print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

656

657

def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):

658

print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))

659

660

def _defaultExceptionDebugAction( instring, loc, expr, exc ):

661

print ("Exception raised:" + _ustr(exc))

662

663

def nullDebugAction(*args):

664

"""'Do-nothing' debug action, to suppress debugging output during parsing."""

665

pass

666

667

class ParserElement(object):

668

"""Abstract base level parser element class."""

669

DEFAULT_WHITE_CHARS = " \n\t\r"

670

671

def setDefaultWhitespaceChars( chars ):

672

"""Overrides the default whitespace chars

673

"""

674

ParserElement.DEFAULT_WHITE_CHARS = chars

675

setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)

676

677

def __init__( self, savelist=False ):

678

self.parseAction = list()

679

self.failAction = None

680

#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall

681

self.strRepr = None

682

self.resultsName = None

683

self.saveAsList = savelist

684

self.skipWhitespace = True

685

self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

686

self.copyDefaultWhiteChars = True

687

self.mayReturnEmpty = False # used when checking for left-recursion

688

self.keepTabs = False

689

self.ignoreExprs = list()

690

self.debug = False

691

self.streamlined = False

692

self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index

693

self.errmsg = ""

694

self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)

695

self.debugActions = ( None, None, None ) #custom debug actions

696

self.re = None

697

self.callPreparse = True # used to avoid redundant calls to preParse

698

self.callDuringTry = False

699

700

def copy( self ):

701

"""Make a copy of this ParserElement. Useful for defining different parse actions

702

for the same parsing pattern, using copies of the original parse element."""

703

cpy = copy.copy( self )

704

cpy.parseAction = self.parseAction[:]

705

cpy.ignoreExprs = self.ignoreExprs[:]

706

if self.copyDefaultWhiteChars:

707

cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

708

return cpy

709

710

def setName( self, name ):

711

"""Define name for this expression, for use in debugging."""

712

self.name = name

713

self.errmsg = "Expected " + self.name

714

if hasattr(self,"exception"):

715

self.exception.msg = self.errmsg

716

return self

717

718

def setResultsName( self, name, listAllMatches=False ):

719

"""Define name for referencing matching tokens as a nested attribute

720

of the returned parse results.

721

NOTE: this returns a *copy* of the original ParserElement object;

722

this is so that the client can define a basic element, such as an

723

integer, and reference it in multiple places with different names.

724

"""

725

newself = self.copy()

726

newself.resultsName = name

727

newself.modalResults = not listAllMatches

728

return newself

729

730

def setBreak(self,breakFlag = True):

731

"""Method to invoke the Python pdb debugger when this element is

732

about to be parsed. Set breakFlag to True to enable, False to

733

disable.

734

"""

735

if breakFlag:

736

_parseMethod = self._parse

737

def breaker(instring, loc, doActions=True, callPreParse=True):

738

import pdb

739

pdb.set_trace()

740

return _parseMethod( instring, loc, doActions, callPreParse )

741

breaker._originalParseMethod = _parseMethod

742

self._parse = breaker

743

else:

744

if hasattr(self._parse,"_originalParseMethod"):

745

self._parse = self._parse._originalParseMethod

746

return self

747

748

def _normalizeParseActionArgs( f ):

749

"""Internal method used to decorate parse actions that take fewer than 3 arguments,

750

so that all parse actions can be called as f(s,l,t)."""

751

STAR_ARGS = 4

752

753

try:

754

restore = None

755

if isinstance(f,type):

756

restore = f

757

f = f.__init__

758

if not _PY3K:

759

codeObj = f.func_code

760

else:

761

codeObj = f.code

762

if codeObj.co_flags & STAR_ARGS:

763

return f

764

numargs = codeObj.co_argcount

765

if not _PY3K:

766

if hasattr(f,"im_self"):

767

numargs -= 1

768

else:

769

if hasattr(f,"__self__"):

770

numargs -= 1

771

if restore:

772

f = restore

773

except AttributeError:

774

try:

775

if not _PY3K:

776

call_im_func_code = f.__call__.im_func.func_code

777

else:

778

call_im_func_code = f.__code__

779

780

# not a function, must be a callable object, get info from the

781

# im_func binding of its bound __call__ method

782

if call_im_func_code.co_flags & STAR_ARGS:

783

return f

784

numargs = call_im_func_code.co_argcount

785

if not _PY3K:

786

if hasattr(f.__call__,"im_self"):

787

numargs -= 1

788

else:

789

if hasattr(f.__call__,"__self__"):

790

numargs -= 0

791

except AttributeError:

792

if not _PY3K:

793

call_func_code = f.__call__.func_code

794

else:

795

call_func_code = f.__call__.__code__

796

# not a bound method, get info directly from __call__ method

797

if call_func_code.co_flags & STAR_ARGS:

798

return f

799

numargs = call_func_code.co_argcount

800

if not _PY3K:

801

if hasattr(f.__call__,"im_self"):

802

numargs -= 1

803

else:

804

if hasattr(f.__call__,"__self__"):

805

numargs -= 1

806

807

808

#~ print ("adding function %s with %d args" % (f.func_name,numargs))

809

if numargs == 3:

810

return f

811

else:

812

if numargs > 3:

813

def tmp(s,l,t):

814

return f(f.__call__.__self__, s,l,t)

815

if numargs == 2:

816

def tmp(s,l,t):

817

return f(l,t)

818

elif numargs == 1:

819

def tmp(s,l,t):

820

return f(t)

821

else: #~ numargs == 0:

822

def tmp(s,l,t):

823

return f()

824

try:

825

tmp.__name__ = f.__name__

826

except (AttributeError,TypeError):

827

# no need for special handling if attribute doesnt exist

828

pass

829

try:

830

tmp.__doc__ = f.__doc__

831

except (AttributeError,TypeError):

832

# no need for special handling if attribute doesnt exist

833

pass

834

try:

835

tmp.__dict__.update(f.__dict__)

836

except (AttributeError,TypeError):

837

# no need for special handling if attribute doesnt exist

838

pass

839

return tmp

840

_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)

841

842

def setParseAction( self, *fns, **kwargs ):

843

"""Define action to perform when successfully matching parse element definition.

844

Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),

845

fn(loc,toks), fn(toks), or just fn(), where:

846

- s = the original string being parsed (see note below)

847

- loc = the location of the matching substring

848

- toks = a list of the matched tokens, packaged as a ParseResults object

849

If the functions in fns modify the tokens, they can return them as the return

850

value from fn, and the modified list of tokens will replace the original.

851

Otherwise, fn does not need to return any value.

852

853

Note: the default parsing behavior is to expand tabs in the input string

854

before starting the parsing process. See L{I{parseString}<parseString>} for more information

855

on parsing strings containing <TAB>s, and suggested methods to maintain a

856

consistent view of the parsed string, the parse location, and line and column

857

positions within the parsed string.

858

"""

859

self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))

860

self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])

861

return self

862

863

def addParseAction( self, *fns, **kwargs ):

864

"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""

865

self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))

866

self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])

867

return self

868

869

def setFailAction( self, fn ):

870

"""Define action to perform if parsing fails at this expression.

871

Fail acton fn is a callable function that takes the arguments

872

fn(s,loc,expr,err) where:

873

- s = string being parsed

874

- loc = location where expression match was attempted and failed

875

- expr = the parse expression that failed

876

- err = the exception thrown

877

The function returns no value. It may throw ParseFatalException

878

if it is desired to stop parsing immediately."""

879

self.failAction = fn

880

return self

881

882

def _skipIgnorables( self, instring, loc ):

883

exprsFound = True

884

while exprsFound:

885

exprsFound = False

886

for e in self.ignoreExprs:

887

try:

888

while 1:

889

loc,dummy = e._parse( instring, loc )

890

exprsFound = True

891

except ParseException:

892

pass

893

return loc

894

895

def preParse( self, instring, loc ):

896

if self.ignoreExprs:

897

loc = self._skipIgnorables( instring, loc )

898

899

if self.skipWhitespace:

900

wt = self.whiteChars

901

instrlen = len(instring)

902

while loc < instrlen and instring[loc] in wt:

903

loc += 1

904

905

return loc

906

907

def parseImpl( self, instring, loc, doActions=True ):

908

return loc, []

909

910

def postParse( self, instring, loc, tokenlist ):

911

return tokenlist

912

913

#~ @profile

914

def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):

915

debugging = ( self.debug ) #and doActions )

916

917

if debugging or self.failAction:

918

#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

919

if (self.debugActions[0] ):

920

self.debugActions[0]( instring, loc, self )

921

if callPreParse and self.callPreparse:

922

preloc = self.preParse( instring, loc )

923

else:

924

preloc = loc

925

tokensStart = loc

926

try:

927

try:

928

loc,tokens = self.parseImpl( instring, preloc, doActions )

929

except IndexError:

930

raise ParseException( instring, len(instring), self.errmsg, self )

931

except ParseBaseException, err:

932

#~ print ("Exception raised:", err)

933

if self.debugActions[2]:

934

self.debugActions[2]( instring, tokensStart, self, err )

935

if self.failAction:

936

self.failAction( instring, tokensStart, self, err )

937

raise

938

else:

939

if callPreParse and self.callPreparse:

940

preloc = self.preParse( instring, loc )

941

else:

942

preloc = loc

943

tokensStart = loc

944

if self.mayIndexError or loc >= len(instring):

945

try:

946

loc,tokens = self.parseImpl( instring, preloc, doActions )

947

except IndexError:

948

raise ParseException( instring, len(instring), self.errmsg, self )

949

else:

950

loc,tokens = self.parseImpl( instring, preloc, doActions )

951

952

tokens = self.postParse( instring, loc, tokens )

953

954

retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )

955

if self.parseAction and (doActions or self.callDuringTry):

956

if debugging:

957

try:

958

for fn in self.parseAction:

959

tokens = fn( instring, tokensStart, retTokens )

960

if tokens is not None:

961

retTokens = ParseResults( tokens,

962

self.resultsName,

963

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

964

modal=self.modalResults )

965

except ParseBaseException, err:

966

#~ print "Exception raised in user parse action:", err

967

if (self.debugActions[2] ):

968

self.debugActions[2]( instring, tokensStart, self, err )

969

raise

970

else:

971

for fn in self.parseAction:

972

tokens = fn( instring, tokensStart, retTokens )

973

if tokens is not None:

974

retTokens = ParseResults( tokens,

975

self.resultsName,

976

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

977

modal=self.modalResults )

978

979

if debugging:

980

#~ print ("Matched",self,"->",retTokens.asList())

981

if (self.debugActions[1] ):

982

self.debugActions[1]( instring, tokensStart, loc, self, retTokens )

983

984

return loc, retTokens

985

986

def tryParse( self, instring, loc ):

987

try:

988

return self._parse( instring, loc, doActions=False )[0]

989

except ParseFatalException:

990

raise ParseException( instring, loc, self.errmsg, self)

991

992

# this method gets repeatedly called during backtracking with the same arguments -

993

# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

994

def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):

995

lookup = (self,instring,loc,callPreParse,doActions)

996

if lookup in ParserElement._exprArgCache:

997

value = ParserElement._exprArgCache[ lookup ]

998

if isinstance(value,Exception):

999

raise value

1000

return value

1001

else:

1002

try:

1003

value = self._parseNoCache( instring, loc, doActions, callPreParse )

1004

ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())

1005

return value

1006

except ParseBaseException, pe:

1007

ParserElement._exprArgCache[ lookup ] = pe

1008

raise

1009

1010

_parse = _parseNoCache

1011

1012

# argument cache for optimizing repeated calls when backtracking through recursive expressions

1013

_exprArgCache = {}

1014

def resetCache():

1015

ParserElement._exprArgCache.clear()

1016

resetCache = staticmethod(resetCache)

1017

1018

_packratEnabled = False

1019

def enablePackrat():

1020

"""Enables "packrat" parsing, which adds memoizing to the parsing logic.

1021

Repeated parse attempts at the same string location (which happens

1022

often in many complex grammars) can immediately return a cached value,

1023

instead of re-executing parsing/validating code. Memoizing is done of

1024

both valid results and parsing exceptions.

1025

1026

This speedup may break existing programs that use parse actions that

1027

have side-effects. For this reason, packrat parsing is disabled when

1028

you first import pyparsing. To activate the packrat feature, your

1029

program must call the class method ParserElement.enablePackrat(). If

1030

your program uses psyco to "compile as you go", you must call

1031

enablePackrat before calling psyco.full(). If you do not do this,

1032

Python will crash. For best results, call enablePackrat() immediately

1033

after importing pyparsing.

1034

"""

1035

if not ParserElement._packratEnabled:

1036

ParserElement._packratEnabled = True

1037

ParserElement._parse = ParserElement._parseCache

1038

enablePackrat = staticmethod(enablePackrat)

1039

1040

def parseString( self, instring, parseAll=False ):

1041

"""Execute the parse expression with the given string.

1042

This is the main interface to the client code, once the complete

1043

expression has been built.

1044

1045

If you want the grammar to require that the entire input string be

1046

successfully parsed, then set parseAll to True (equivalent to ending

1047

the grammar with StringEnd()).

1048

1049

Note: parseString implicitly calls expandtabs() on the input string,

1050

in order to report proper column numbers in parse actions.

1051

If the input string contains tabs and

1052

the grammar uses parse actions that use the loc argument to index into the

1053

string being parsed, you can ensure you have a consistent view of the input

1054

string by:

1055

- calling parseWithTabs on your grammar before calling parseString

1056

(see L{I{parseWithTabs}<parseWithTabs>})

1057

- define your parse action using the full (s,loc,toks) signature, and

1058

reference the input string using the parse action's s argument

1059

- explictly expand the tabs in your input string before calling

1060

parseString

1061

"""

1062

ParserElement.resetCache()

1063

if not self.streamlined:

1064

self.streamline()

1065

#~ self.saveAsList = True

1066

for e in self.ignoreExprs:

1067

e.streamline()

1068

if not self.keepTabs:

1069

instring = instring.expandtabs()

1070

try:

1071

loc, tokens = self._parse( instring, 0 )

1072

if parseAll:

1073

loc = self.preParse( instring, loc )

1074

StringEnd()._parse( instring, loc )

1075

except ParseBaseException, exc:

1076

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1077

raise exc

1078

else:

1079

return tokens

1080

1081

def scanString( self, instring, maxMatches=_MAX_INT ):

1082

"""Scan the input string for expression matches. Each match will return the

1083

matching tokens, start location, and end location. May be called with optional

1084

maxMatches argument, to clip scanning after 'n' matches are found.

1085

1086

Note that the start and end locations are reported relative to the string

1087

being parsed. See L{I{parseString}<parseString>} for more information on parsing

1088

strings with embedded tabs."""

1089

if not self.streamlined:

1090

self.streamline()

1091

for e in self.ignoreExprs:

1092

e.streamline()

1093

1094

if not self.keepTabs:

1095

instring = _ustr(instring).expandtabs()

1096

instrlen = len(instring)

1097

loc = 0

1098

preparseFn = self.preParse

1099

parseFn = self._parse

1100

ParserElement.resetCache()

1101

matches = 0

1102

try:

1103

while loc <= instrlen and matches < maxMatches:

1104

try:

1105

preloc = preparseFn( instring, loc )

1106

nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )

1107

except ParseException:

1108

loc = preloc+1

1109

else:

1110

matches += 1

1111

yield tokens, preloc, nextLoc

1112

loc = nextLoc

1113

except ParseBaseException, pe:

1114

raise pe

1115

1116

def transformString( self, instring ):

1117

"""Extension to scanString, to modify matching text with modified tokens that may

1118

be returned from a parse action. To use transformString, define a grammar and

1119

attach a parse action to it that modifies the returned token list.

1120

Invoking transformString() on a target string will then scan for matches,

1121

and replace the matched text patterns according to the logic in the parse

1122

action. transformString() returns the resulting transformed string."""

1123

out = []

1124

lastE = 0

1125

# force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1126

# keep string locs straight between transformString and scanString

1127

self.keepTabs = True

1128

try:

1129

for t,s,e in self.scanString( instring ):

1130

out.append( instring[lastE:s] )

1131

if t:

1132

if isinstance(t,ParseResults):

1133

out += t.asList()

1134

elif isinstance(t,list):

1135

out += t

1136

else:

1137

out.append(t)

1138

lastE = e

1139

out.append(instring[lastE:])

1140

return "".join(map(_ustr,out))

1141

except ParseBaseException, pe:

1142

raise pe

1143

1144

def searchString( self, instring, maxMatches=_MAX_INT ):

1145

"""Another extension to scanString, simplifying the access to the tokens found

1146

to match the given parse expression. May be called with optional

1147

maxMatches argument, to clip searching after 'n' matches are found.

1148

"""

1149

try:

1150

return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])

1151

except ParseBaseException, pe:

1152

raise pe

1153

1154

def __add__(self, other ):

1155

"""Implementation of + operator - returns And"""

1156

if isinstance( other, basestring ):

1157

other = Literal( other )

1158

if not isinstance( other, ParserElement ):

1159

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1160

SyntaxWarning, stacklevel=2)

1161

return None

1162

return And( [ self, other ] )

1163

1164

def __radd__(self, other ):

1165

"""Implementation of + operator when left operand is not a ParserElement"""

1166

if isinstance( other, basestring ):

1167

other = Literal( other )

1168

if not isinstance( other, ParserElement ):

1169

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1170

SyntaxWarning, stacklevel=2)

1171

return None

1172

return other + self

1173

1174

def __sub__(self, other):

1175

"""Implementation of - operator, returns And with error stop"""

1176

if isinstance( other, basestring ):

1177

other = Literal( other )

1178

if not isinstance( other, ParserElement ):

1179

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1180

SyntaxWarning, stacklevel=2)

1181

return None

1182

return And( [ self, And._ErrorStop(), other ] )

1183

1184

def __rsub__(self, other ):

1185

"""Implementation of - operator when left operand is not a ParserElement"""

1186

if isinstance( other, basestring ):

1187

other = Literal( other )

1188

if not isinstance( other, ParserElement ):

1189

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1190

SyntaxWarning, stacklevel=2)

1191

return None

1192

return other - self

1193

1194

def __mul__(self,other):

1195

if isinstance(other,int):

1196

minElements, optElements = other,0

1197

elif isinstance(other,tuple):

1198

other = (other + (None, None))[:2]

1199

if other[0] is None:

1200

other = (0, other[1])

1201

if isinstance(other[0],int) and other[1] is None:

1202

if other[0] == 0:

1203

return ZeroOrMore(self)

1204

if other[0] == 1:

1205

return OneOrMore(self)

1206

else:

1207

return self*other[0] + ZeroOrMore(self)

1208

elif isinstance(other[0],int) and isinstance(other[1],int):

1209

minElements, optElements = other

1210

optElements -= minElements

1211

else:

1212

raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))

1213

else:

1214

raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

1215

1216

if minElements < 0:

1217

raise ValueError("cannot multiply ParserElement by negative value")

1218

if optElements < 0:

1219

raise ValueError("second tuple value must be greater or equal to first tuple value")

1220

if minElements == optElements == 0:

1221

raise ValueError("cannot multiply ParserElement by 0 or (0,0)")

1222

1223

if (optElements):

1224

def makeOptionalList(n):

1225

if n>1:

1226

return Optional(self + makeOptionalList(n-1))

1227

else:

1228

return Optional(self)

1229

if minElements:

1230

if minElements == 1:

1231

ret = self + makeOptionalList(optElements)

1232

else:

1233

ret = And([self]*minElements) + makeOptionalList(optElements)

1234

else:

1235

ret = makeOptionalList(optElements)

1236

else:

1237

if minElements == 1:

1238

ret = self

1239

else:

1240

ret = And([self]*minElements)

1241

return ret

1242

1243

def __rmul__(self, other):

1244

return self.__mul__(other)

1245

1246

def __or__(self, other ):

1247

"""Implementation of | operator - returns MatchFirst"""

1248

if isinstance( other, basestring ):

1249

other = Literal( other )

1250

if not isinstance( other, ParserElement ):

1251

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1252

SyntaxWarning, stacklevel=2)

1253

return None

1254

return MatchFirst( [ self, other ] )

1255

1256

def __ror__(self, other ):

1257

"""Implementation of | operator when left operand is not a ParserElement"""

1258

if isinstance( other, basestring ):

1259

other = Literal( other )

1260

if not isinstance( other, ParserElement ):

1261

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1262

SyntaxWarning, stacklevel=2)

1263

return None

1264

return other | self

1265

1266

def __xor__(self, other ):

1267

"""Implementation of ^ operator - returns Or"""

1268

if isinstance( other, basestring ):

1269

other = Literal( other )

1270

if not isinstance( other, ParserElement ):

1271

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1272

SyntaxWarning, stacklevel=2)

1273

return None

1274

return Or( [ self, other ] )

1275

1276

def __rxor__(self, other ):

1277

"""Implementation of ^ operator when left operand is not a ParserElement"""

1278

if isinstance( other, basestring ):

1279

other = Literal( other )

1280

if not isinstance( other, ParserElement ):

1281

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1282

SyntaxWarning, stacklevel=2)

1283

return None

1284

return other ^ self

1285

1286

def __and__(self, other ):

1287

"""Implementation of & operator - returns Each"""

1288

if isinstance( other, basestring ):

1289

other = Literal( other )

1290

if not isinstance( other, ParserElement ):

1291

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1292

SyntaxWarning, stacklevel=2)

1293

return None

1294

return Each( [ self, other ] )

1295

1296

def __rand__(self, other ):

1297

"""Implementation of & operator when left operand is not a ParserElement"""

1298

if isinstance( other, basestring ):

1299

other = Literal( other )

1300

if not isinstance( other, ParserElement ):

1301

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1302

SyntaxWarning, stacklevel=2)

1303

return None

1304

return other & self

1305

1306

def __invert__( self ):

1307

"""Implementation of ~ operator - returns NotAny"""

1308

return NotAny( self )

1309

1310

def __call__(self, name):

1311

"""Shortcut for setResultsName, with listAllMatches=default::

1312

userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")

1313

could be written as::

1314

userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")

1315

"""

1316

return self.setResultsName(name)

1317

1318

def suppress( self ):

1319

"""Suppresses the output of this ParserElement; useful to keep punctuation from

1320

cluttering up returned output.

1321

"""

1322

return Suppress( self )

1323

1324

def leaveWhitespace( self ):

1325

"""Disables the skipping of whitespace before matching the characters in the

1326

ParserElement's defined pattern. This is normally only used internally by

1327

the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1328

"""

1329

self.skipWhitespace = False

1330

return self

1331

1332

def setWhitespaceChars( self, chars ):

1333

"""Overrides the default whitespace chars

1334

"""

1335

self.skipWhitespace = True

1336

self.whiteChars = chars

1337

self.copyDefaultWhiteChars = False

1338

return self

1339

1340

def parseWithTabs( self ):

1341

"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.

1342

Must be called before parseString when the input grammar contains elements that

1343

match <TAB> characters."""

1344

self.keepTabs = True

1345

return self

1346

1347

def ignore( self, other ):

1348

"""Define expression to be ignored (e.g., comments) while doing pattern

1349

matching; may be called repeatedly, to define multiple comment or other

1350

ignorable patterns.

1351

"""

1352

if isinstance( other, Suppress ):

1353

if other not in self.ignoreExprs:

1354

self.ignoreExprs.append( other )

1355

else:

1356

self.ignoreExprs.append( Suppress( other ) )

1357

return self

1358

1359

def setDebugActions( self, startAction, successAction, exceptionAction ):

1360

"""Enable display of debugging messages while doing pattern matching."""

1361

self.debugActions = (startAction or _defaultStartDebugAction,

1362

successAction or _defaultSuccessDebugAction,

1363

exceptionAction or _defaultExceptionDebugAction)

1364

self.debug = True

1365

return self

1366

1367

def setDebug( self, flag=True ):

1368

"""Enable display of debugging messages while doing pattern matching.

1369

Set flag to True to enable, False to disable."""

1370

if flag:

1371

self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )

1372

else:

1373

self.debug = False

1374

return self

1375

1376

def __str__( self ):

1377

return self.name

1378

1379

def __repr__( self ):

1380

return _ustr(self)

1381

1382

def streamline( self ):

1383

self.streamlined = True

1384

self.strRepr = None

1385

return self

1386

1387

def checkRecursion( self, parseElementList ):

1388

pass

1389

1390

def validate( self, validateTrace=[] ):

1391

"""Check defined expressions for valid structure, check for infinite recursive definitions."""

1392

self.checkRecursion( [] )

1393

1394

def parseFile( self, file_or_filename, parseAll=False ):

1395

"""Execute the parse expression on the given file or filename.

1396

If a filename is specified (instead of a file object),

1397

the entire file is opened, read, and closed before parsing.

1398

"""

1399

try:

1400

file_contents = file_or_filename.read()

1401

except AttributeError:

1402

f = open(file_or_filename, "rb")

1403

file_contents = f.read()

1404

f.close()

1405

try:

1406

return self.parseString(file_contents, parseAll)

1407

except ParseBaseException, exc:

1408

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1409

raise exc

1410

1411

def getException(self):

1412

return ParseException("",0,self.errmsg,self)

1413

1414

def __getattr__(self,aname):

1415

if aname == "myException":

1416

self.myException = ret = self.getException();

1417

return ret;

1418

else:

1419

raise AttributeError("no such attribute " + aname)

1420

1421

def __eq__(self,other):

1422

if isinstance(other, ParserElement):

1423

return self is other or self.__dict__ == other.__dict__

1424

elif isinstance(other, basestring):

1425

try:

1426

self.parseString(_ustr(other), parseAll=True)

1427

return True

1428

except ParseBaseException:

1429

return False

1430

else:

1431

return super(ParserElement,self)==other

1432

1433

def __ne__(self,other):

1434

return not (self == other)

1435

1436

def __hash__(self):

1437

return hash(id(self))

1438

1439

def __req__(self,other):

1440

return self == other

1441

1442

def __rne__(self,other):

1443

return not (self == other)

1444

1445

1446

class Token(ParserElement):

1447

"""Abstract ParserElement subclass, for defining atomic matching patterns."""

1448

def __init__( self ):

1449

super(Token,self).__init__( savelist=False )

1450

#self.myException = ParseException("",0,"",self)

1451

1452

def setName(self, name):

1453

s = super(Token,self).setName(name)

1454

self.errmsg = "Expected " + self.name

1455

#s.myException.msg = self.errmsg

1456

return s

1457

1458

1459

class Empty(Token):

1460

"""An empty token, will always match."""

1461

def __init__( self ):

1462

super(Empty,self).__init__()

1463

self.name = "Empty"

1464

self.mayReturnEmpty = True

1465

self.mayIndexError = False

1466

1467

1468

class NoMatch(Token):

1469

"""A token that will never match."""

1470

def __init__( self ):

1471

super(NoMatch,self).__init__()

1472

self.name = "NoMatch"

1473

self.mayReturnEmpty = True

1474

self.mayIndexError = False

1475

self.errmsg = "Unmatchable token"

1476

#self.myException.msg = self.errmsg

1477

1478

def parseImpl( self, instring, loc, doActions=True ):

1479

exc = self.myException

1480

exc.loc = loc

1481

exc.pstr = instring

1482

raise exc

1483

1484

1485

class Literal(Token):

1486

"""Token to exactly match a specified string."""

1487

def __init__( self, matchString ):

1488

super(Literal,self).__init__()

1489

self.match = matchString

1490

self.matchLen = len(matchString)

1491

try:

1492

self.firstMatchChar = matchString[0]

1493

except IndexError:

1494

warnings.warn("null string passed to Literal; use Empty() instead",

1495

SyntaxWarning, stacklevel=2)

1496

self.__class__ = Empty

1497

self.name = '"%s"' % _ustr(self.match)

1498

self.errmsg = "Expected " + self.name

1499

self.mayReturnEmpty = False

1500

#self.myException.msg = self.errmsg

1501

self.mayIndexError = False

1502

1503

# Performance tuning: this routine gets called a *lot*

1504

# if this is a single character match string and the first character matches,

1505

# short-circuit as quickly as possible, and avoid calling startswith

1506

#~ @profile

1507

def parseImpl( self, instring, loc, doActions=True ):

1508

if (instring[loc] == self.firstMatchChar and

1509

(self.matchLen==1 or instring.startswith(self.match,loc)) ):

1510

return loc+self.matchLen, self.match

1511

#~ raise ParseException( instring, loc, self.errmsg )

1512

exc = self.myException

1513

exc.loc = loc

1514

exc.pstr = instring

1515

raise exc

1516

_L = Literal

1517

1518

class Keyword(Token):

1519

"""Token to exactly match a specified string as a keyword, that is, it must be

1520

immediately followed by a non-keyword character. Compare with Literal::

1521

Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.

1522

Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'

1523

Accepts two optional constructor arguments in addition to the keyword string:

1524

identChars is a string of characters that would be valid identifier characters,

1525

defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive

1526

matching, default is False.

1527

"""

1528

DEFAULT_KEYWORD_CHARS = alphanums+"_$"

1529

1530

def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):

1531

super(Keyword,self).__init__()

1532

self.match = matchString

1533

self.matchLen = len(matchString)

1534

try:

1535

self.firstMatchChar = matchString[0]

1536

except IndexError:

1537

warnings.warn("null string passed to Keyword; use Empty() instead",

1538

SyntaxWarning, stacklevel=2)

1539

self.name = '"%s"' % self.match

1540

self.errmsg = "Expected " + self.name

1541

self.mayReturnEmpty = False

1542

#self.myException.msg = self.errmsg

1543

self.mayIndexError = False

1544

self.caseless = caseless

1545

if caseless:

1546

self.caselessmatch = matchString.upper()

1547

identChars = identChars.upper()

1548

self.identChars = _str2dict(identChars)

1549

1550

def parseImpl( self, instring, loc, doActions=True ):

1551

if self.caseless:

1552

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

1553

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and

1554

(loc == 0 or instring[loc-1].upper() not in self.identChars) ):

1555

return loc+self.matchLen, self.match

1556

else:

1557

if (instring[loc] == self.firstMatchChar and

1558

(self.matchLen==1 or instring.startswith(self.match,loc)) and

1559

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and

1560

(loc == 0 or instring[loc-1] not in self.identChars) ):

1561

return loc+self.matchLen, self.match

1562

#~ raise ParseException( instring, loc, self.errmsg )

1563

exc = self.myException

1564

exc.loc = loc

1565

exc.pstr = instring

1566

raise exc

1567

1568

def copy(self):

1569

c = super(Keyword,self).copy()

1570

c.identChars = Keyword.DEFAULT_KEYWORD_CHARS

1571

return c

1572

1573

def setDefaultKeywordChars( chars ):

1574

"""Overrides the default Keyword chars

1575

"""

1576

Keyword.DEFAULT_KEYWORD_CHARS = chars

1577

setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)

1578

1579

class CaselessLiteral(Literal):

1580

"""Token to match a specified string, ignoring case of letters.

1581

Note: the matched results will always be in the case of the given

1582

match string, NOT the case of the input text.

1583

"""

1584

def __init__( self, matchString ):

1585

super(CaselessLiteral,self).__init__( matchString.upper() )

1586

# Preserve the defining literal.

1587

self.returnString = matchString

1588

self.name = "'%s'" % self.returnString

1589

self.errmsg = "Expected " + self.name

1590

#self.myException.msg = self.errmsg

1591

1592

def parseImpl( self, instring, loc, doActions=True ):

1593

if instring[ loc:loc+self.matchLen ].upper() == self.match:

1594

return loc+self.matchLen, self.returnString

1595

#~ raise ParseException( instring, loc, self.errmsg )

1596

exc = self.myException

1597

exc.loc = loc

1598

exc.pstr = instring

1599

raise exc

1600

1601

class CaselessKeyword(Keyword):

1602

def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):

1603

super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )

1604

1605

def parseImpl( self, instring, loc, doActions=True ):

1606

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

1607

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):

1608

return loc+self.matchLen, self.match

1609

#~ raise ParseException( instring, loc, self.errmsg )

1610

exc = self.myException

1611

exc.loc = loc

1612

exc.pstr = instring

1613

raise exc

1614

1615

class Word(Token):

1616

"""Token for matching words composed of allowed character sets.

1617

Defined with string containing all allowed initial characters,

1618

an optional string containing allowed body characters (if omitted,

1619

defaults to the initial character set), and an optional minimum,

1620

maximum, and/or exact length. The default value for min is 1 (a

1621

minimum value < 1 is not valid); the default values for max and exact

1622

are 0, meaning no maximum or exact length restriction.

1623

"""

1624

def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):

1625

super(Word,self).__init__()

1626

self.initCharsOrig = initChars

1627

self.initChars = _str2dict(initChars)

1628

if bodyChars :

1629

self.bodyCharsOrig = bodyChars

1630

self.bodyChars = _str2dict(bodyChars)

1631

else:

1632

self.bodyCharsOrig = initChars

1633

self.bodyChars = _str2dict(initChars)

1634

1635

self.maxSpecified = max > 0

1636

1637

if min < 1:

1638

raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")

1639

1640

self.minLen = min

1641

1642

if max > 0:

1643

self.maxLen = max

1644

else:

1645

self.maxLen = _MAX_INT

1646

1647

if exact > 0:

1648

self.maxLen = exact

1649

self.minLen = exact

1650

1651

self.name = _ustr(self)

1652

self.errmsg = "Expected " + self.name

1653

#self.myException.msg = self.errmsg

1654

self.mayIndexError = False

1655

self.asKeyword = asKeyword

1656

1657

if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):

1658

if self.bodyCharsOrig == self.initCharsOrig:

1659

self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)

1660

elif len(self.bodyCharsOrig) == 1:

1661

self.reString = "%s[%s]*" % \

1662

(re.escape(self.initCharsOrig),

1663

_escapeRegexRangeChars(self.bodyCharsOrig),)

1664

else:

1665

self.reString = "[%s][%s]*" % \

1666

(_escapeRegexRangeChars(self.initCharsOrig),

1667

_escapeRegexRangeChars(self.bodyCharsOrig),)

1668

if self.asKeyword:

1669

self.reString = r"\b"+self.reString+r"\b"

1670

try:

1671

self.re = re.compile( self.reString )

1672

except:

1673

self.re = None

1674

1675

def parseImpl( self, instring, loc, doActions=True ):

1676

if self.re:

1677

result = self.re.match(instring,loc)

1678

if not result:

1679

exc = self.myException

1680

exc.loc = loc

1681

exc.pstr = instring

1682

raise exc

1683

1684

loc = result.end()

1685

return loc,result.group()

1686

1687

if not(instring[ loc ] in self.initChars):

1688

#~ raise ParseException( instring, loc, self.errmsg )

1689

exc = self.myException

1690

exc.loc = loc

1691

exc.pstr = instring

1692

raise exc

1693

start = loc

1694

loc += 1

1695

instrlen = len(instring)

1696

bodychars = self.bodyChars

1697

maxloc = start + self.maxLen

1698

maxloc = min( maxloc, instrlen )

1699

while loc < maxloc and instring[loc] in bodychars:

1700

loc += 1

1701

1702

throwException = False

1703

if loc - start < self.minLen:

1704

throwException = True

1705

if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

1706

throwException = True

1707

if self.asKeyword:

1708

if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):

1709

throwException = True

1710

1711

if throwException:

1712

#~ raise ParseException( instring, loc, self.errmsg )

1713

exc = self.myException

1714

exc.loc = loc

1715

exc.pstr = instring

1716

raise exc

1717

1718

return loc, instring[start:loc]

1719

1720

def __str__( self ):

1721

try:

1722

return super(Word,self).__str__()

1723

except:

1724

pass

1725

1726

1727

if self.strRepr is None:

1728

1729

def charsAsStr(s):

1730

if len(s)>4:

1731

return s[:4]+"..."

1732

else:

1733

return s

1734

1735

if ( self.initCharsOrig != self.bodyCharsOrig ):

1736

self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )

1737

else:

1738

self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)

1739

1740

return self.strRepr

1741

1742

1743

class Regex(Token):

1744

"""Token for matching strings that match a given regular expression.

1745

Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.

1746

"""

1747

def __init__( self, pattern, flags=0):

1748

"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""

1749

super(Regex,self).__init__()

1750

1751

if len(pattern) == 0:

1752

warnings.warn("null string passed to Regex; use Empty() instead",

1753

SyntaxWarning, stacklevel=2)

1754

1755

self.pattern = pattern

1756

self.flags = flags

1757

1758

try:

1759

self.re = re.compile(self.pattern, self.flags)

1760

self.reString = self.pattern

1761

except sre_constants.error:

1762

warnings.warn("invalid pattern (%s) passed to Regex" % pattern,

1763

SyntaxWarning, stacklevel=2)

1764

raise

1765

1766

self.name = _ustr(self)

1767

self.errmsg = "Expected " + self.name

1768

#self.myException.msg = self.errmsg

1769

self.mayIndexError = False

1770

self.mayReturnEmpty = True

1771

1772

def parseImpl( self, instring, loc, doActions=True ):

1773

result = self.re.match(instring,loc)

1774

if not result:

1775

exc = self.myException

1776

exc.loc = loc

1777

exc.pstr = instring

1778

raise exc

1779

1780

loc = result.end()

1781

d = result.groupdict()

1782

ret = ParseResults(result.group())

1783

if d:

1784

for k in d:

1785

ret[k] = d[k]

1786

return loc,ret

1787

1788

def __str__( self ):

1789

try:

1790

return super(Regex,self).__str__()

1791

except:

1792

pass

1793

1794

if self.strRepr is None:

1795

self.strRepr = "Re:(%s)" % repr(self.pattern)

1796

1797

return self.strRepr

1798

1799

1800

class QuotedString(Token):

1801

"""Token for matching strings that are delimited by quoting characters.

1802

"""

1803

def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):

1804

"""

1805

Defined with the following parameters:

1806

- quoteChar - string of one or more characters defining the quote delimiting string

1807

- escChar - character to escape quotes, typically backslash (default=None)

1808

- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)

1809

- multiline - boolean indicating whether quotes can span multiple lines (default=False)

1810

- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)

1811

- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)

1812

"""

1813

super(QuotedString,self).__init__()

1814

1815

# remove white space from quote chars - wont work anyway

1816

quoteChar = quoteChar.strip()

1817

if len(quoteChar) == 0:

1818

warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

1819

raise SyntaxError()

1820

1821

if endQuoteChar is None:

1822

endQuoteChar = quoteChar

1823

else:

1824

endQuoteChar = endQuoteChar.strip()

1825

if len(endQuoteChar) == 0:

1826

warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

1827

raise SyntaxError()

1828

1829

self.quoteChar = quoteChar

1830

self.quoteCharLen = len(quoteChar)

1831

self.firstQuoteChar = quoteChar[0]

1832

self.endQuoteChar = endQuoteChar

1833

self.endQuoteCharLen = len(endQuoteChar)

1834

self.escChar = escChar

1835

self.escQuote = escQuote

1836

self.unquoteResults = unquoteResults

1837

1838

if multiline:

1839

self.flags = re.MULTILINE | re.DOTALL

1840

self.pattern = r'%s(?:[^%s%s]' % \

1841

( re.escape(self.quoteChar),

1842

_escapeRegexRangeChars(self.endQuoteChar[0]),

1843

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

1844

else:

1845

self.flags = 0

1846

self.pattern = r'%s(?:[^%s\n\r%s]' % \

1847

( re.escape(self.quoteChar),

1848

_escapeRegexRangeChars(self.endQuoteChar[0]),

1849

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

1850

if len(self.endQuoteChar) > 1:

1851

self.pattern += (

1852

'|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),

1853

_escapeRegexRangeChars(self.endQuoteChar[i]))

1854

for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'

1855

)

1856

if escQuote:

1857

self.pattern += (r'|(?:%s)' % re.escape(escQuote))

1858

if escChar:

1859

self.pattern += (r'|(?:%s.)' % re.escape(escChar))

1860

self.escCharReplacePattern = re.escape(self.escChar)+"(.)"

1861

self.pattern += (r')*%s' % re.escape(self.endQuoteChar))

1862

1863

try:

1864

self.re = re.compile(self.pattern, self.flags)

1865

self.reString = self.pattern

1866

except sre_constants.error:

1867

warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

1868

SyntaxWarning, stacklevel=2)

1869

raise

1870

1871

self.name = _ustr(self)

1872

self.errmsg = "Expected " + self.name

1873

#self.myException.msg = self.errmsg

1874

self.mayIndexError = False

1875

self.mayReturnEmpty = True

1876

1877

def parseImpl( self, instring, loc, doActions=True ):

1878

result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None

1879

if not result:

1880

exc = self.myException

1881

exc.loc = loc

1882

exc.pstr = instring

1883

raise exc

1884

1885

loc = result.end()

1886

ret = result.group()

1887

1888

if self.unquoteResults:

1889

1890

# strip off quotes

1891

ret = ret[self.quoteCharLen:-self.endQuoteCharLen]

1892

1893

if isinstance(ret,basestring):

1894

# replace escaped characters

1895

if self.escChar:

1896

ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)

1897

1898

# replace escaped quotes

1899

if self.escQuote:

1900

ret = ret.replace(self.escQuote, self.endQuoteChar)

1901

1902

return loc, ret

1903

1904

def __str__( self ):

1905

try:

1906

return super(QuotedString,self).__str__()

1907

except:

1908

pass

1909

1910

if self.strRepr is None:

1911

self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)

1912

1913

return self.strRepr

1914

1915

1916

class CharsNotIn(Token):

1917

"""Token for matching words composed of characters *not* in a given set.

1918

Defined with string containing all disallowed characters, and an optional

1919

minimum, maximum, and/or exact length. The default value for min is 1 (a

1920

minimum value < 1 is not valid); the default values for max and exact

1921

are 0, meaning no maximum or exact length restriction.

1922

"""

1923

def __init__( self, notChars, min=1, max=0, exact=0 ):

1924

super(CharsNotIn,self).__init__()

1925

self.skipWhitespace = False

1926

self.notChars = notChars

1927

1928

if min < 1:

1929

raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")

1930

1931

self.minLen = min

1932

1933

if max > 0:

1934

self.maxLen = max

1935

else:

1936

self.maxLen = _MAX_INT

1937

1938

if exact > 0:

1939

self.maxLen = exact

1940

self.minLen = exact

1941

1942

self.name = _ustr(self)

1943

self.errmsg = "Expected " + self.name

1944

self.mayReturnEmpty = ( self.minLen == 0 )

1945

#self.myException.msg = self.errmsg

1946

self.mayIndexError = False

1947

1948

def parseImpl( self, instring, loc, doActions=True ):

1949

if instring[loc] in self.notChars:

1950

#~ raise ParseException( instring, loc, self.errmsg )

1951

exc = self.myException

1952

exc.loc = loc

1953

exc.pstr = instring

1954

raise exc

1955

1956

start = loc

1957

loc += 1

1958

notchars = self.notChars

1959

maxlen = min( start+self.maxLen, len(instring) )

1960

while loc < maxlen and \

1961

(instring[loc] not in notchars):

1962

loc += 1

1963

1964

if loc - start < self.minLen:

1965

#~ raise ParseException( instring, loc, self.errmsg )

1966

exc = self.myException

1967

exc.loc = loc

1968

exc.pstr = instring

1969

raise exc

1970

1971

return loc, instring[start:loc]

1972

1973

def __str__( self ):

1974

try:

1975

return super(CharsNotIn, self).__str__()

1976

except:

1977

pass

1978

1979

if self.strRepr is None:

1980

if len(self.notChars) > 4:

1981

self.strRepr = "!W:(%s...)" % self.notChars[:4]

1982

else:

1983

self.strRepr = "!W:(%s)" % self.notChars

1984

1985

return self.strRepr

1986

1987

class White(Token):

1988

"""Special matching class for matching whitespace. Normally, whitespace is ignored

1989

by pyparsing grammars. This class is included when some whitespace structures

1990

are significant. Define with a string containing the whitespace characters to be

1991

matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,

1992

as defined for the Word class."""

1993

whiteStrs = {

1994

" " : "<SPC>",

1995

"\t": "<TAB>",

1996

"\n": "<LF>",

1997

"\r": "<CR>",

1998

"\f": "<FF>",

1999

}

2000

def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):

2001

super(White,self).__init__()

2002

self.matchWhite = ws

2003

self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )

2004

#~ self.leaveWhitespace()

2005

self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))

2006

self.mayReturnEmpty = True

2007

self.errmsg = "Expected " + self.name

2008

#self.myException.msg = self.errmsg

2009

2010

self.minLen = min

2011

2012

if max > 0:

2013

self.maxLen = max

2014

else:

2015

self.maxLen = _MAX_INT

2016

2017

if exact > 0:

2018

self.maxLen = exact

2019

self.minLen = exact

2020

2021

def parseImpl( self, instring, loc, doActions=True ):

2022

if not(instring[ loc ] in self.matchWhite):

2023

#~ raise ParseException( instring, loc, self.errmsg )

2024

exc = self.myException

2025

exc.loc = loc

2026

exc.pstr = instring

2027

raise exc

2028

start = loc

2029

loc += 1

2030

maxloc = start + self.maxLen

2031

maxloc = min( maxloc, len(instring) )

2032

while loc < maxloc and instring[loc] in self.matchWhite:

2033

loc += 1

2034

2035

if loc - start < self.minLen:

2036

#~ raise ParseException( instring, loc, self.errmsg )

2037

exc = self.myException

2038

exc.loc = loc

2039

exc.pstr = instring

2040

raise exc

2041

2042

return loc, instring[start:loc]

2043

2044

2045

class _PositionToken(Token):

2046

def __init__( self ):

2047

super(_PositionToken,self).__init__()

2048

self.name=self.__class__.__name__

2049

self.mayReturnEmpty = True

2050

self.mayIndexError = False

2051

2052

class GoToColumn(_PositionToken):

2053

"""Token to advance to a specific column of input text; useful for tabular report scraping."""

2054

def __init__( self, colno ):

2055

super(GoToColumn,self).__init__()

2056

self.col = colno

2057

2058

def preParse( self, instring, loc ):

2059

if col(loc,instring) != self.col:

2060

instrlen = len(instring)

2061

if self.ignoreExprs:

2062

loc = self._skipIgnorables( instring, loc )

2063

while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :

2064

loc += 1

2065

return loc

2066

2067

def parseImpl( self, instring, loc, doActions=True ):

2068

thiscol = col( loc, instring )

2069

if thiscol > self.col:

2070

raise ParseException( instring, loc, "Text not in expected column", self )

2071

newloc = loc + self.col - thiscol

2072

ret = instring[ loc: newloc ]

2073

return newloc, ret

2074

2075

class LineStart(_PositionToken):

2076

"""Matches if current position is at the beginning of a line within the parse string"""

2077

def __init__( self ):

2078

super(LineStart,self).__init__()

2079

self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

2080

self.errmsg = "Expected start of line"

2081

#self.myException.msg = self.errmsg

2082

2083

def preParse( self, instring, loc ):

2084

preloc = super(LineStart,self).preParse(instring,loc)

2085

if instring[preloc] == "\n":

2086

loc += 1

2087

return loc

2088

2089

def parseImpl( self, instring, loc, doActions=True ):

2090

if not( loc==0 or

2091

(loc == self.preParse( instring, 0 )) or

2092

(instring[loc-1] == "\n") ): #col(loc, instring) != 1:

2093

#~ raise ParseException( instring, loc, "Expected start of line" )

2094

exc = self.myException

2095

exc.loc = loc

2096

exc.pstr = instring

2097

raise exc

2098

return loc, []

2099

2100

class LineEnd(_PositionToken):

2101

"""Matches if current position is at the end of a line within the parse string"""

2102

def __init__( self ):

2103

super(LineEnd,self).__init__()

2104

self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

2105

self.errmsg = "Expected end of line"

2106

#self.myException.msg = self.errmsg

2107

2108

def parseImpl( self, instring, loc, doActions=True ):

2109

if loc<len(instring):

2110

if instring[loc] == "\n":

2111

return loc+1, "\n"

2112

else:

2113

#~ raise ParseException( instring, loc, "Expected end of line" )

2114

exc = self.myException

2115

exc.loc = loc

2116

exc.pstr = instring

2117

raise exc

2118

elif loc == len(instring):

2119

return loc+1, []

2120

else:

2121

exc = self.myException

2122

exc.loc = loc

2123

exc.pstr = instring

2124

raise exc

2125

2126

class StringStart(_PositionToken):

2127

"""Matches if current position is at the beginning of the parse string"""

2128

def __init__( self ):

2129

super(StringStart,self).__init__()

2130

self.errmsg = "Expected start of text"

2131

#self.myException.msg = self.errmsg

2132

2133

def parseImpl( self, instring, loc, doActions=True ):

2134

if loc != 0:

2135

# see if entire string up to here is just whitespace and ignoreables

2136

if loc != self.preParse( instring, 0 ):

2137

#~ raise ParseException( instring, loc, "Expected start of text" )

2138

exc = self.myException

2139

exc.loc = loc

2140

exc.pstr = instring

2141

raise exc

2142

return loc, []

2143

2144

class StringEnd(_PositionToken):

2145

"""Matches if current position is at the end of the parse string"""

2146

def __init__( self ):

2147

super(StringEnd,self).__init__()

2148

self.errmsg = "Expected end of text"

2149

#self.myException.msg = self.errmsg

2150

2151

def parseImpl( self, instring, loc, doActions=True ):

2152

if loc < len(instring):

2153

#~ raise ParseException( instring, loc, "Expected end of text" )

2154

exc = self.myException

2155

exc.loc = loc

2156

exc.pstr = instring

2157

raise exc

2158

elif loc == len(instring):

2159

return loc+1, []

2160

elif loc > len(instring):

2161

return loc, []

2162

else:

2163

exc = self.myException

2164

exc.loc = loc

2165

exc.pstr = instring

2166

raise exc

2167

2168

class WordStart(_PositionToken):

2169

"""Matches if the current position is at the beginning of a Word, and

2170

is not preceded by any character in a given set of wordChars

2171

(default=printables). To emulate the \b behavior of regular expressions,

2172

use WordStart(alphanums). WordStart will also match at the beginning of

2173

the string being parsed, or at the beginning of a line.

2174

"""

2175

def __init__(self, wordChars = printables):

2176

super(WordStart,self).__init__()

2177

self.wordChars = _str2dict(wordChars)

2178

self.errmsg = "Not at the start of a word"

2179

2180

def parseImpl(self, instring, loc, doActions=True ):

2181

if loc != 0:

2182

if (instring[loc-1] in self.wordChars or

2183

instring[loc] not in self.wordChars):

2184

exc = self.myException

2185

exc.loc = loc

2186

exc.pstr = instring

2187

raise exc

2188

return loc, []

2189

2190

class WordEnd(_PositionToken):

2191

"""Matches if the current position is at the end of a Word, and

2192

is not followed by any character in a given set of wordChars

2193

(default=printables). To emulate the \b behavior of regular expressions,

2194

use WordEnd(alphanums). WordEnd will also match at the end of

2195

the string being parsed, or at the end of a line.

2196

"""

2197

def __init__(self, wordChars = printables):

2198

super(WordEnd,self).__init__()

2199

self.wordChars = _str2dict(wordChars)

2200

self.skipWhitespace = False

2201

self.errmsg = "Not at the end of a word"

2202

2203

def parseImpl(self, instring, loc, doActions=True ):

2204

instrlen = len(instring)

2205

if instrlen>0 and loc<instrlen:

2206

if (instring[loc] in self.wordChars or

2207

instring[loc-1] not in self.wordChars):

2208

#~ raise ParseException( instring, loc, "Expected end of word" )

2209

exc = self.myException

2210

exc.loc = loc

2211

exc.pstr = instring

2212

raise exc

2213

return loc, []

2214

2215

2216

class ParseExpression(ParserElement):

2217

"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""

2218

def __init__( self, exprs, savelist = False ):

2219

super(ParseExpression,self).__init__(savelist)

2220

if isinstance( exprs, list ):

2221

self.exprs = exprs

2222

elif isinstance( exprs, basestring ):

2223

self.exprs = [ Literal( exprs ) ]

2224

else:

2225

try:

2226

self.exprs = list( exprs )

2227

except TypeError:

2228

self.exprs = [ exprs ]

2229

self.callPreparse = False

2230

2231

def __getitem__( self, i ):

2232

return self.exprs[i]

2233

2234

def append( self, other ):

2235

self.exprs.append( other )

2236

self.strRepr = None

2237

return self

2238

2239

def leaveWhitespace( self ):

2240

"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on

2241

all contained expressions."""

2242

self.skipWhitespace = False

2243

self.exprs = [ e.copy() for e in self.exprs ]

2244

for e in self.exprs:

2245

e.leaveWhitespace()

2246

return self

2247

2248

def ignore( self, other ):

2249

if isinstance( other, Suppress ):

2250

if other not in self.ignoreExprs:

2251

super( ParseExpression, self).ignore( other )

2252

for e in self.exprs:

2253

e.ignore( self.ignoreExprs[-1] )

2254

else:

2255

super( ParseExpression, self).ignore( other )

2256

for e in self.exprs:

2257

e.ignore( self.ignoreExprs[-1] )

2258

return self

2259

2260

def __str__( self ):

2261

try:

2262

return super(ParseExpression,self).__str__()

2263

except:

2264

pass

2265

2266

if self.strRepr is None:

2267

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )

2268

return self.strRepr

2269

2270

def streamline( self ):

2271

super(ParseExpression,self).streamline()

2272

2273

for e in self.exprs:

2274

e.streamline()

2275

2276

# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )

2277

# but only if there are no parse actions or resultsNames on the nested And's

2278

# (likewise for Or's and MatchFirst's)

2279

if ( len(self.exprs) == 2 ):

2280

other = self.exprs[0]

2281

if ( isinstance( other, self.__class__ ) and

2282

not(other.parseAction) and

2283

other.resultsName is None and

2284

not other.debug ):

2285

self.exprs = other.exprs[:] + [ self.exprs[1] ]

2286

self.strRepr = None

2287

self.mayReturnEmpty |= other.mayReturnEmpty

2288

self.mayIndexError |= other.mayIndexError

2289

2290

other = self.exprs[-1]

2291

if ( isinstance( other, self.__class__ ) and

2292

not(other.parseAction) and

2293

other.resultsName is None and

2294

not other.debug ):

2295

self.exprs = self.exprs[:-1] + other.exprs[:]

2296

self.strRepr = None

2297

self.mayReturnEmpty |= other.mayReturnEmpty

2298

self.mayIndexError |= other.mayIndexError

2299

2300

return self

2301

2302

def setResultsName( self, name, listAllMatches=False ):

2303

ret = super(ParseExpression,self).setResultsName(name,listAllMatches)

2304

return ret

2305

2306

def validate( self, validateTrace=[] ):

2307

tmp = validateTrace[:]+[self]

2308

for e in self.exprs:

2309

e.validate(tmp)

2310

self.checkRecursion( [] )

2311

2312

class And(ParseExpression):

2313

"""Requires all given ParseExpressions to be found in the given order.

2314

Expressions may be separated by whitespace.

2315

May be constructed using the '+' operator.

2316

"""

2317

2318

class _ErrorStop(Empty):

2319

def __init__(self, *args, **kwargs):

2320

super(Empty,self).__init__(*args, **kwargs)

2321

self.leaveWhitespace()

2322

2323

def __init__( self, exprs, savelist = True ):

2324

super(And,self).__init__(exprs, savelist)

2325

self.mayReturnEmpty = True

2326

for e in self.exprs:

2327

if not e.mayReturnEmpty:

2328

self.mayReturnEmpty = False

2329

break

2330

self.setWhitespaceChars( exprs[0].whiteChars )

2331

self.skipWhitespace = exprs[0].skipWhitespace

2332

self.callPreparse = True

2333

2334

def parseImpl( self, instring, loc, doActions=True ):

2335

# pass False as last arg to _parse for first element, since we already

2336

# pre-parsed the string as part of our And pre-parsing

2337

loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )

2338

errorStop = False

2339

for e in self.exprs[1:]:

2340

if isinstance(e, And._ErrorStop):

2341

errorStop = True

2342

continue

2343

if errorStop:

2344

try:

2345

loc, exprtokens = e._parse( instring, loc, doActions )

2346

except ParseSyntaxException:

2347

raise

2348

except ParseBaseException, pe:

2349

raise ParseSyntaxException(pe)

2350

except IndexError, ie:

2351

raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )

2352

else:

2353

loc, exprtokens = e._parse( instring, loc, doActions )

2354

if exprtokens or exprtokens.keys():

2355

resultlist += exprtokens

2356

return loc, resultlist

2357

2358

def __iadd__(self, other ):

2359

if isinstance( other, basestring ):

2360

other = Literal( other )

2361

return self.append( other ) #And( [ self, other ] )

2362

2363

def checkRecursion( self, parseElementList ):

2364

subRecCheckList = parseElementList[:] + [ self ]

2365

for e in self.exprs:

2366

e.checkRecursion( subRecCheckList )

2367

if not e.mayReturnEmpty:

2368

break

2369

2370

def __str__( self ):

2371

if hasattr(self,"name"):

2372

return self.name

2373

2374

if self.strRepr is None:

2375

self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2376

2377

return self.strRepr

2378

2379

2380

class Or(ParseExpression):

2381

"""Requires that at least one ParseExpression is found.

2382

If two expressions match, the expression that matches the longest string will be used.

2383

May be constructed using the '^' operator.

2384

"""

2385

def __init__( self, exprs, savelist = False ):

2386

super(Or,self).__init__(exprs, savelist)

2387

self.mayReturnEmpty = False

2388

for e in self.exprs:

2389

if e.mayReturnEmpty:

2390

self.mayReturnEmpty = True

2391

break

2392

2393

def parseImpl( self, instring, loc, doActions=True ):

2394

maxExcLoc = -1

2395

maxMatchLoc = -1

2396

maxException = None

2397

for e in self.exprs:

2398

try:

2399

loc2 = e.tryParse( instring, loc )

2400

except ParseException, err:

2401

if err.loc > maxExcLoc:

2402

maxException = err

2403

maxExcLoc = err.loc

2404

except IndexError:

2405

if len(instring) > maxExcLoc:

2406

maxException = ParseException(instring,len(instring),e.errmsg,self)

2407

maxExcLoc = len(instring)

2408

else:

2409

if loc2 > maxMatchLoc:

2410

maxMatchLoc = loc2

2411

maxMatchExp = e

2412

2413

if maxMatchLoc < 0:

2414

if maxException is not None:

2415

raise maxException

2416

else:

2417

raise ParseException(instring, loc, "no defined alternatives to match", self)

2418

2419

return maxMatchExp._parse( instring, loc, doActions )

2420

2421

def __ixor__(self, other ):

2422

if isinstance( other, basestring ):

2423

other = Literal( other )

2424

return self.append( other ) #Or( [ self, other ] )

2425

2426

def __str__( self ):

2427

if hasattr(self,"name"):

2428

return self.name

2429

2430

if self.strRepr is None:

2431

self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2432

2433

return self.strRepr

2434

2435

def checkRecursion( self, parseElementList ):

2436

subRecCheckList = parseElementList[:] + [ self ]

2437

for e in self.exprs:

2438

e.checkRecursion( subRecCheckList )

2439

2440

2441

class MatchFirst(ParseExpression):

2442

"""Requires that at least one ParseExpression is found.

2443

If two expressions match, the first one listed is the one that will match.

2444

May be constructed using the '|' operator.

2445

"""

2446

def __init__( self, exprs, savelist = False ):

2447

super(MatchFirst,self).__init__(exprs, savelist)

2448

if exprs:

2449

self.mayReturnEmpty = False

2450

for e in self.exprs:

2451

if e.mayReturnEmpty:

2452

self.mayReturnEmpty = True

2453

break

2454

else:

2455

self.mayReturnEmpty = True

2456

2457

def parseImpl( self, instring, loc, doActions=True ):

2458

maxExcLoc = -1

2459

maxException = None

2460

for e in self.exprs:

2461

try:

2462

ret = e._parse( instring, loc, doActions )

2463

return ret

2464

except ParseException, err:

2465

if err.loc > maxExcLoc:

2466

maxException = err

2467

maxExcLoc = err.loc

2468

except IndexError:

2469

if len(instring) > maxExcLoc:

2470

maxException = ParseException(instring,len(instring),e.errmsg,self)

2471

maxExcLoc = len(instring)

2472

2473

# only got here if no expression matched, raise exception for match that made it the furthest

2474

else:

2475

if maxException is not None:

2476

raise maxException

2477

else:

2478

raise ParseException(instring, loc, "no defined alternatives to match", self)

2479

2480

def __ior__(self, other ):

2481

if isinstance( other, basestring ):

2482

other = Literal( other )

2483

return self.append( other ) #MatchFirst( [ self, other ] )

2484

2485

def __str__( self ):

2486

if hasattr(self,"name"):

2487

return self.name

2488

2489

if self.strRepr is None:

2490

self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2491

2492

return self.strRepr

2493

2494

def checkRecursion( self, parseElementList ):

2495

subRecCheckList = parseElementList[:] + [ self ]

2496

for e in self.exprs:

2497

e.checkRecursion( subRecCheckList )

2498

2499

2500

class Each(ParseExpression):

2501

"""Requires all given ParseExpressions to be found, but in any order.

2502

Expressions may be separated by whitespace.

2503

May be constructed using the '&' operator.

2504

"""

2505

def __init__( self, exprs, savelist = True ):

2506

super(Each,self).__init__(exprs, savelist)

2507

self.mayReturnEmpty = True

2508

for e in self.exprs:

2509

if not e.mayReturnEmpty:

2510

self.mayReturnEmpty = False

2511

break

2512

self.skipWhitespace = True

2513

self.initExprGroups = True

2514

2515

def parseImpl( self, instring, loc, doActions=True ):

2516

if self.initExprGroups:

2517

self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]

2518

self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]

2519

self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]

2520

self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]

2521

self.required += self.multirequired

2522

self.initExprGroups = False

2523

tmpLoc = loc

2524

tmpReqd = self.required[:]

2525

tmpOpt = self.optionals[:]

2526

matchOrder = []

2527

2528

keepMatching = True

2529

while keepMatching:

2530

tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired

2531

failed = []

2532

for e in tmpExprs:

2533

try:

2534

tmpLoc = e.tryParse( instring, tmpLoc )

2535

except ParseException:

2536

failed.append(e)

2537

else:

2538

matchOrder.append(e)

2539

if e in tmpReqd:

2540

tmpReqd.remove(e)

2541

elif e in tmpOpt:

2542

tmpOpt.remove(e)

2543

if len(failed) == len(tmpExprs):

2544

keepMatching = False

2545

2546

if tmpReqd:

2547

missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )

2548

raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )

2549

2550

# add any unmatched Optionals, in case they have default values defined

2551

matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)

2552

2553

resultlist = []

2554

for e in matchOrder:

2555

loc,results = e._parse(instring,loc,doActions)

2556

resultlist.append(results)

2557

2558

finalResults = ParseResults([])

2559

for r in resultlist:

2560

dups = {}

2561

for k in r.keys():

2562

if k in finalResults.keys():

2563

tmp = ParseResults(finalResults[k])

2564

tmp += ParseResults(r[k])

2565

dups[k] = tmp

2566

finalResults += ParseResults(r)

2567

for k,v in dups.iteritems():

2568

finalResults[k] = v

2569

return loc, finalResults

2570

2571

def __str__( self ):

2572

if hasattr(self,"name"):

2573

return self.name

2574

2575

if self.strRepr is None:

2576

self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2577

2578

return self.strRepr

2579

2580

def checkRecursion( self, parseElementList ):

2581

subRecCheckList = parseElementList[:] + [ self ]

2582

for e in self.exprs:

2583

e.checkRecursion( subRecCheckList )

2584

2585

2586

class ParseElementEnhance(ParserElement):

2587

"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""

2588

def __init__( self, expr, savelist=False ):

2589

super(ParseElementEnhance,self).__init__(savelist)

2590

if isinstance( expr, basestring ):

2591

expr = Literal(expr)

2592

self.expr = expr

2593

self.strRepr = None

2594

if expr is not None:

2595

self.mayIndexError = expr.mayIndexError

2596

self.mayReturnEmpty = expr.mayReturnEmpty

2597

self.setWhitespaceChars( expr.whiteChars )

2598

self.skipWhitespace = expr.skipWhitespace

2599

self.saveAsList = expr.saveAsList

2600

self.callPreparse = expr.callPreparse

2601

self.ignoreExprs.extend(expr.ignoreExprs)

2602

2603

def parseImpl( self, instring, loc, doActions=True ):

2604

if self.expr is not None:

2605

return self.expr._parse( instring, loc, doActions, callPreParse=False )

2606

else:

2607

raise ParseException("",loc,self.errmsg,self)

2608

2609

def leaveWhitespace( self ):

2610

self.skipWhitespace = False

2611

self.expr = self.expr.copy()

2612

if self.expr is not None:

2613

self.expr.leaveWhitespace()

2614

return self

2615

2616

def ignore( self, other ):

2617

if isinstance( other, Suppress ):

2618

if other not in self.ignoreExprs:

2619

super( ParseElementEnhance, self).ignore( other )

2620

if self.expr is not None:

2621

self.expr.ignore( self.ignoreExprs[-1] )

2622

else:

2623

super( ParseElementEnhance, self).ignore( other )

2624

if self.expr is not None:

2625

self.expr.ignore( self.ignoreExprs[-1] )

2626

return self

2627

2628

def streamline( self ):

2629

super(ParseElementEnhance,self).streamline()

2630

if self.expr is not None:

2631

self.expr.streamline()

2632

return self

2633

2634

def checkRecursion( self, parseElementList ):

2635

if self in parseElementList:

2636

raise RecursiveGrammarException( parseElementList+[self] )

2637

subRecCheckList = parseElementList[:] + [ self ]

2638

if self.expr is not None:

2639

self.expr.checkRecursion( subRecCheckList )

2640

2641

def validate( self, validateTrace=[] ):

2642

tmp = validateTrace[:]+[self]

2643

if self.expr is not None:

2644

self.expr.validate(tmp)

2645

self.checkRecursion( [] )

2646

2647

def __str__( self ):

2648

try:

2649

return super(ParseElementEnhance,self).__str__()

2650

except:

2651

pass

2652

2653

if self.strRepr is None and self.expr is not None:

2654

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )

2655

return self.strRepr

2656

2657

2658

class FollowedBy(ParseElementEnhance):

2659

"""Lookahead matching of the given parse expression. FollowedBy

2660

does *not* advance the parsing position within the input string, it only

2661

verifies that the specified parse expression matches at the current

2662

position. FollowedBy always returns a null token list."""

2663

def __init__( self, expr ):

2664

super(FollowedBy,self).__init__(expr)

2665

self.mayReturnEmpty = True

2666

2667

def parseImpl( self, instring, loc, doActions=True ):

2668

self.expr.tryParse( instring, loc )

2669

return loc, []

2670

2671

2672

class NotAny(ParseElementEnhance):

2673

"""Lookahead to disallow matching with the given parse expression. NotAny

2674

does *not* advance the parsing position within the input string, it only

2675

verifies that the specified parse expression does *not* match at the current

2676

position. Also, NotAny does *not* skip over leading whitespace. NotAny

2677

always returns a null token list. May be constructed using the '~' operator."""

2678

def __init__( self, expr ):

2679

super(NotAny,self).__init__(expr)

2680

#~ self.leaveWhitespace()

2681

self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs

2682

self.mayReturnEmpty = True

2683

self.errmsg = "Found unwanted token, "+_ustr(self.expr)

2684

#self.myException = ParseException("",0,self.errmsg,self)

2685

2686

def parseImpl( self, instring, loc, doActions=True ):

2687

try:

2688

self.expr.tryParse( instring, loc )

2689

except (ParseException,IndexError):

2690

pass

2691

else:

2692

#~ raise ParseException(instring, loc, self.errmsg )

2693

exc = self.myException

2694

exc.loc = loc

2695

exc.pstr = instring

2696

raise exc

2697

return loc, []

2698

2699

def __str__( self ):

2700

if hasattr(self,"name"):

2701

return self.name

2702

2703

if self.strRepr is None:

2704

self.strRepr = "~{" + _ustr(self.expr) + "}"

2705

2706

return self.strRepr

2707

2708

2709

class ZeroOrMore(ParseElementEnhance):

2710

"""Optional repetition of zero or more of the given expression."""

2711

def __init__( self, expr ):

2712

super(ZeroOrMore,self).__init__(expr)

2713

self.mayReturnEmpty = True

2714

2715

def parseImpl( self, instring, loc, doActions=True ):

2716

tokens = []

2717

try:

2718

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2719

hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )

2720

while 1:

2721

if hasIgnoreExprs:

2722

preloc = self._skipIgnorables( instring, loc )

2723

else:

2724

preloc = loc

2725

loc, tmptokens = self.expr._parse( instring, preloc, doActions )

2726

if tmptokens or tmptokens.keys():

2727

tokens += tmptokens

2728

except (ParseException,IndexError):

2729

pass

2730

2731

return loc, tokens

2732

2733

def __str__( self ):

2734

if hasattr(self,"name"):

2735

return self.name

2736

2737

if self.strRepr is None:

2738

self.strRepr = "[" + _ustr(self.expr) + "]..."

2739

2740

return self.strRepr

2741

2742

def setResultsName( self, name, listAllMatches=False ):

2743

ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)

2744

ret.saveAsList = True

2745

return ret

2746

2747

2748

class OneOrMore(ParseElementEnhance):

2749

"""Repetition of one or more of the given expression."""

2750

def parseImpl( self, instring, loc, doActions=True ):

2751

# must be at least one

2752

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2753

try:

2754

hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )

2755

while 1:

2756

if hasIgnoreExprs:

2757

preloc = self._skipIgnorables( instring, loc )

2758

else:

2759

preloc = loc

2760

loc, tmptokens = self.expr._parse( instring, preloc, doActions )

2761

if tmptokens or tmptokens.keys():

2762

tokens += tmptokens

2763

except (ParseException,IndexError):

2764

pass

2765

2766

return loc, tokens

2767

2768

def __str__( self ):

2769

if hasattr(self,"name"):

2770

return self.name

2771

2772

if self.strRepr is None:

2773

self.strRepr = "{" + _ustr(self.expr) + "}..."

2774

2775

return self.strRepr

2776

2777

def setResultsName( self, name, listAllMatches=False ):

2778

ret = super(OneOrMore,self).setResultsName(name,listAllMatches)

2779

ret.saveAsList = True

2780

return ret

2781

2782

class _NullToken(object):

2783

def __bool__(self):

2784

return False

2785

__nonzero__ = __bool__

2786

def __str__(self):

2787

return ""

2788

2789

_optionalNotMatched = _NullToken()

2790

class Optional(ParseElementEnhance):

2791

"""Optional matching of the given expression.

2792

A default return string can also be specified, if the optional expression

2793

is not found.

2794

"""

2795

def __init__( self, exprs, default=_optionalNotMatched ):

2796

super(Optional,self).__init__( exprs, savelist=False )

2797

self.defaultValue = default

2798

self.mayReturnEmpty = True

2799

2800

def parseImpl( self, instring, loc, doActions=True ):

2801

try:

2802

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2803

except (ParseException,IndexError):

2804

if self.defaultValue is not _optionalNotMatched:

2805

if self.expr.resultsName:

2806

tokens = ParseResults([ self.defaultValue ])

2807

tokens[self.expr.resultsName] = self.defaultValue

2808

else:

2809

tokens = [ self.defaultValue ]

2810

else:

2811

tokens = []

2812

return loc, tokens

2813

2814

def __str__( self ):

2815

if hasattr(self,"name"):

2816

return self.name

2817

2818

if self.strRepr is None:

2819

self.strRepr = "[" + _ustr(self.expr) + "]"

2820

2821

return self.strRepr

2822

2823

2824

class SkipTo(ParseElementEnhance):

2825

"""Token for skipping over all undefined text until the matched expression is found.

2826

If include is set to true, the matched expression is also parsed (the skipped text

2827

and matched expression are returned as a 2-element list). The ignore

2828

argument is used to define grammars (typically quoted strings and comments) that

2829

might contain false matches.

2830

"""

2831

def __init__( self, other, include=False, ignore=None, failOn=None ):

2832

super( SkipTo, self ).__init__( other )

2833

self.ignoreExpr = ignore

2834

self.mayReturnEmpty = True

2835

self.mayIndexError = False

2836

self.includeMatch = include

2837

self.asList = False

2838

if failOn is not None and isinstance(failOn, basestring):

2839

self.failOn = Literal(failOn)

2840

else:

2841

self.failOn = failOn

2842

self.errmsg = "No match found for "+_ustr(self.expr)

2843

#self.myException = ParseException("",0,self.errmsg,self)

2844

2845

def parseImpl( self, instring, loc, doActions=True ):

2846

startLoc = loc

2847

instrlen = len(instring)

2848

expr = self.expr

2849

failParse = False

2850

while loc <= instrlen:

2851

try:

2852

if self.failOn:

2853

try:

2854

self.failOn.tryParse(instring, loc)

2855

except ParseBaseException:

2856

pass

2857

else:

2858

failParse = True

2859

raise ParseException(instring, loc, "Found expression " + str(self.failOn))

2860

failParse = False

2861

if self.ignoreExpr is not None:

2862

while 1:

2863

try:

2864

loc = self.ignoreExpr.tryParse(instring,loc)

2865

print "found ignoreExpr, advance to", loc

2866

except ParseBaseException:

2867

break

2868

expr._parse( instring, loc, doActions=False, callPreParse=False )

2869

skipText = instring[startLoc:loc]

2870

if self.includeMatch:

2871

loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)

2872

if mat:

2873

skipRes = ParseResults( skipText )

2874

skipRes += mat

2875

return loc, [ skipRes ]

2876

else:

2877

return loc, [ skipText ]

2878

else:

2879

return loc, [ skipText ]

2880

except (ParseException,IndexError):

2881

if failParse:

2882

raise

2883

else:

2884

loc += 1

2885

exc = self.myException

2886

exc.loc = loc

2887

exc.pstr = instring

2888

raise exc

2889

2890

class Forward(ParseElementEnhance):

2891

"""Forward declaration of an expression to be defined later -

2892

used for recursive grammars, such as algebraic infix notation.

2893

When the expression is known, it is assigned to the Forward variable using the '<<' operator.

2894

2895

Note: take care when assigning to Forward not to overlook precedence of operators.

2896

Specifically, '|' has a lower precedence than '<<', so that::

2897

fwdExpr << a | b | c

2898

will actually be evaluated as::

2899

(fwdExpr << a) | b | c

2900

thereby leaving b and c out as parseable alternatives. It is recommended that you

2901

explicitly group the values inserted into the Forward::

2902

fwdExpr << (a | b | c)

2903

"""

2904

def __init__( self, other=None ):

2905

super(Forward,self).__init__( other, savelist=False )

2906

2907

def __lshift__( self, other ):

2908

if isinstance( other, basestring ):

2909

other = Literal(other)

2910

self.expr = other

2911

self.mayReturnEmpty = other.mayReturnEmpty

2912

self.strRepr = None

2913

self.mayIndexError = self.expr.mayIndexError

2914

self.mayReturnEmpty = self.expr.mayReturnEmpty

2915

self.setWhitespaceChars( self.expr.whiteChars )

2916

self.skipWhitespace = self.expr.skipWhitespace

2917

self.saveAsList = self.expr.saveAsList

2918

self.ignoreExprs.extend(self.expr.ignoreExprs)

2919

return None

2920

2921

def leaveWhitespace( self ):

2922

self.skipWhitespace = False

2923

return self

2924

2925

def streamline( self ):

2926

if not self.streamlined:

2927

self.streamlined = True

2928

if self.expr is not None:

2929

self.expr.streamline()

2930

return self

2931

2932

def validate( self, validateTrace=[] ):

2933

if self not in validateTrace:

2934

tmp = validateTrace[:]+[self]

2935

if self.expr is not None:

2936

self.expr.validate(tmp)

2937

self.checkRecursion([])

2938

2939

def __str__( self ):

2940

if hasattr(self,"name"):

2941

return self.name

2942

2943

self._revertClass = self.__class__

2944

self.__class__ = _ForwardNoRecurse

2945

try:

2946

if self.expr is not None:

2947

retString = _ustr(self.expr)

2948

else:

2949

retString = "None"

2950

finally:

2951

self.__class__ = self._revertClass

2952

return self.__class__.__name__ + ": " + retString

2953

2954

def copy(self):

2955

if self.expr is not None:

2956

return super(Forward,self).copy()

2957

else:

2958

ret = Forward()

2959

ret << self

2960

return ret

2961

2962

class _ForwardNoRecurse(Forward):

2963

def __str__( self ):

2964

return "..."

2965

2966

class TokenConverter(ParseElementEnhance):

2967

"""Abstract subclass of ParseExpression, for converting parsed results."""

2968

def __init__( self, expr, savelist=False ):

2969

super(TokenConverter,self).__init__( expr )#, savelist )

2970

self.saveAsList = False

2971

2972

class Upcase(TokenConverter):

2973

"""Converter to upper case all matching tokens."""

2974

def __init__(self, *args):

2975

super(Upcase,self).__init__(*args)

2976

warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",

2977

DeprecationWarning,stacklevel=2)

2978

2979

def postParse( self, instring, loc, tokenlist ):

2980

return list(map( string.upper, tokenlist ))

2981

2982

2983

class Combine(TokenConverter):

2984

"""Converter to concatenate all matching tokens to a single string.

2985

By default, the matching patterns must also be contiguous in the input string;

2986

this can be disabled by specifying 'adjacent=False' in the constructor.

2987

"""

2988

def __init__( self, expr, joinString="", adjacent=True ):

2989

super(Combine,self).__init__( expr )

2990

# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

2991

if adjacent:

2992

self.leaveWhitespace()

2993

self.adjacent = adjacent

2994

self.skipWhitespace = True

2995

self.joinString = joinString

2996

2997

def ignore( self, other ):

2998

if self.adjacent:

2999

ParserElement.ignore(self, other)

3000

else:

3001

super( Combine, self).ignore( other )

3002

return self

3003

3004

def postParse( self, instring, loc, tokenlist ):

3005

retToks = tokenlist.copy()

3006

del retToks[:]

3007

retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)

3008

3009

if self.resultsName and len(retToks.keys())>0:

3010

return [ retToks ]

3011

else:

3012

return retToks

3013

3014

class Group(TokenConverter):

3015

"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""

3016

def __init__( self, expr ):

3017

super(Group,self).__init__( expr )

3018

self.saveAsList = True

3019

3020

def postParse( self, instring, loc, tokenlist ):

3021

return [ tokenlist ]

3022

3023

class Dict(TokenConverter):

3024

"""Converter to return a repetitive expression as a list, but also as a dictionary.

3025

Each element can also be referenced using the first token in the expression as its key.

3026

Useful for tabular report scraping when the first column can be used as a item key.

3027

"""

3028

def __init__( self, exprs ):

3029

super(Dict,self).__init__( exprs )

3030

self.saveAsList = True

3031

3032

def postParse( self, instring, loc, tokenlist ):

3033

for i,tok in enumerate(tokenlist):

3034

if len(tok) == 0:

3035

continue

3036

ikey = tok[0]

3037

if isinstance(ikey,int):

3038

ikey = _ustr(tok[0]).strip()

3039

if len(tok)==1:

3040

tokenlist[ikey] = _ParseResultsWithOffset("",i)

3041

elif len(tok)==2 and not isinstance(tok[1],ParseResults):

3042

tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)

3043

else:

3044

dictvalue = tok.copy() #ParseResults(i)

3045

del dictvalue[0]

3046

if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):

3047

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)

3048

else:

3049

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)

3050

3051

if self.resultsName:

3052

return [ tokenlist ]

3053

else:

3054

return tokenlist

3055

3056

3057

class Suppress(TokenConverter):

3058

"""Converter for ignoring the results of a parsed expression."""

3059

def postParse( self, instring, loc, tokenlist ):

3060

return []

3061

3062

def suppress( self ):

3063

return self

3064

3065

3066

class OnlyOnce(object):

3067

"""Wrapper for parse actions, to ensure they are only called once."""

3068

def __init__(self, methodCall):

3069

self.callable = ParserElement._normalizeParseActionArgs(methodCall)

3070

self.called = False

3071

def __call__(self,s,l,t):

3072

if not self.called:

3073

results = self.callable(s,l,t)

3074

self.called = True

3075

return results

3076

raise ParseException(s,l,"")

3077

def reset(self):

3078

self.called = False

3079

3080

def traceParseAction(f):

3081

"""Decorator for debugging parse actions."""

3082

f = ParserElement._normalizeParseActionArgs(f)

3083

def z(*paArgs):

3084

thisFunc = f.func_name

3085

s,l,t = paArgs[-3:]

3086

if len(paArgs)>3:

3087

thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc

3088

sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )

3089

try:

3090

ret = f(*paArgs)

3091

except Exception, exc:

3092

sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )

3093

raise

3094

sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )

3095

return ret

3096

try:

3097

z.__name__ = f.__name__

3098

except AttributeError:

3099

pass

3100

return z

3101

3102

#

3103

# global helpers

3104

#

3105

def delimitedList( expr, delim=",", combine=False ):

3106

"""Helper to define a delimited list of expressions - the delimiter defaults to ','.

3107

By default, the list elements and delimiters can have intervening whitespace, and

3108

comments, but this can be overridden by passing 'combine=True' in the constructor.

3109

If combine is set to True, the matching tokens are returned as a single token

3110

string, with the delimiters included; otherwise, the matching tokens are returned

3111

as a list of tokens, with the delimiters suppressed.

3112

"""

3113

dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."

3114

if combine:

3115

return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)

3116

else:

3117

return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)

3118

3119

def countedArray( expr ):

3120

"""Helper to define a counted list of expressions.

3121

This helper defines a pattern of the form::

3122

integer expr expr expr...

3123

where the leading integer tells how many expr expressions follow.

3124

The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.

3125

"""

3126

arrayExpr = Forward()

3127

def countFieldParseAction(s,l,t):

3128

n = int(t[0])

3129

arrayExpr << (n and Group(And([expr]*n)) or Group(empty))

3130

return []

3131

return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )

3132

3133

def _flatten(L):

3134

if type(L) is not list: return [L]

3135

if L == []: return L

3136

return _flatten(L[0]) + _flatten(L[1:])

3137

3138

def matchPreviousLiteral(expr):

3139

"""Helper to define an expression that is indirectly defined from

3140

the tokens matched in a previous expression, that is, it looks

3141

for a 'repeat' of a previous expression. For example::

3142

first = Word(nums)

3143

second = matchPreviousLiteral(first)

3144

matchExpr = first + ":" + second

3145

will match "1:1", but not "1:2". Because this matches a

3146

previous literal, will also match the leading "1:1" in "1:10".

3147

If this is not desired, use matchPreviousExpr.

3148

Do *not* use with packrat parsing enabled.

3149

"""

3150

rep = Forward()

3151

def copyTokenToRepeater(s,l,t):

3152

if t:

3153

if len(t) == 1:

3154

rep << t[0]

3155

else:

3156

# flatten t tokens

3157

tflat = _flatten(t.asList())

3158

rep << And( [ Literal(tt) for tt in tflat ] )

3159

else:

3160

rep << Empty()

3161

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

3162

return rep

3163

3164

def matchPreviousExpr(expr):

3165

"""Helper to define an expression that is indirectly defined from

3166

the tokens matched in a previous expression, that is, it looks

3167

for a 'repeat' of a previous expression. For example::

3168

first = Word(nums)

3169

second = matchPreviousExpr(first)

3170

matchExpr = first + ":" + second

3171

will match "1:1", but not "1:2". Because this matches by

3172

expressions, will *not* match the leading "1:1" in "1:10";

3173

the expressions are evaluated first, and then compared, so

3174

"1" is compared with "10".

3175

Do *not* use with packrat parsing enabled.

3176

"""

3177

rep = Forward()

3178

e2 = expr.copy()

3179

rep << e2

3180

def copyTokenToRepeater(s,l,t):

3181

matchTokens = _flatten(t.asList())

3182

def mustMatchTheseTokens(s,l,t):

3183

theseTokens = _flatten(t.asList())

3184

if theseTokens != matchTokens:

3185

raise ParseException("",0,"")

3186

rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )

3187

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

3188

return rep

3189

3190

def _escapeRegexRangeChars(s):

3191

#~ escape these chars: ^-]

3192

for c in r"\^-]":

3193

s = s.replace(c,_bslash+c)

3194

s = s.replace("\n",r"\n")

3195

s = s.replace("\t",r"\t")

3196

return _ustr(s)

3197

3198

def oneOf( strs, caseless=False, useRegex=True ):

3199

"""Helper to quickly define a set of alternative Literals, and makes sure to do

3200

longest-first testing when there is a conflict, regardless of the input order,

3201

but returns a MatchFirst for best performance.

3202

3203

Parameters:

3204

- strs - a string of space-delimited literals, or a list of string literals

3205

- caseless - (default=False) - treat all literals as caseless

3206

- useRegex - (default=True) - as an optimization, will generate a Regex

3207

object; otherwise, will generate a MatchFirst object (if caseless=True, or

3208

if creating a Regex raises an exception)

3209

"""

3210

if caseless:

3211

isequal = ( lambda a,b: a.upper() == b.upper() )

3212

masks = ( lambda a,b: b.upper().startswith(a.upper()) )

3213

parseElementClass = CaselessLiteral

3214

else:

3215

isequal = ( lambda a,b: a == b )

3216

masks = ( lambda a,b: b.startswith(a) )

3217

parseElementClass = Literal

3218

3219

if isinstance(strs,(list,tuple)):

3220

symbols = list(strs[:])

3221

elif isinstance(strs,basestring):

3222

symbols = strs.split()

3223

else:

3224

warnings.warn("Invalid argument to oneOf, expected string or list",

3225

SyntaxWarning, stacklevel=2)

3226

3227

i = 0

3228

while i < len(symbols)-1:

3229

cur = symbols[i]

3230

for j,other in enumerate(symbols[i+1:]):

3231

if ( isequal(other, cur) ):

3232

del symbols[i+j+1]

3233

break

3234

elif ( masks(cur, other) ):

3235

del symbols[i+j+1]

3236

symbols.insert(i,other)

3237

cur = other

3238

break

3239

else:

3240

i += 1

3241

3242

if not caseless and useRegex:

3243

#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))

3244

try:

3245

if len(symbols)==len("".join(symbols)):

3246

return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )

3247

else:

3248

return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )

3249

except:

3250

warnings.warn("Exception creating Regex for oneOf, building MatchFirst",

3251

SyntaxWarning, stacklevel=2)

3252

3253

3254

# last resort, just use MatchFirst

3255

return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )

3256

3257

def dictOf( key, value ):

3258

"""Helper to easily and clearly define a dictionary by specifying the respective patterns

3259

for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens

3260

in the proper order. The key pattern can include delimiting markers or punctuation,

3261

as long as they are suppressed, thereby leaving the significant key text. The value

3262

pattern can include named results, so that the Dict results can include named token

3263

fields.

3264

"""

3265

return Dict( ZeroOrMore( Group ( key + value ) ) )

3266

3267

def originalTextFor(expr, asString=True):

3268

"""Helper to return the original, untokenized text for a given expression. Useful to

3269

restore the parsed fields of an HTML start tag into the raw tag text itself, or to

3270

revert separate tokens with intervening whitespace back to the original matching

3271

input text. Simpler to use than the parse action keepOriginalText, and does not

3272

require the inspect module to chase up the call stack. By default, returns a

3273

string containing the original parsed text.

3274

3275

If the optional asString argument is passed as False, then the return value is a

3276

ParseResults containing any results names that were originally matched, and a

3277

single token containing the original matched text from the input string. So if

3278

the expression passed to originalTextFor contains expressions with defined

3279

results names, you must set asString to False if you want to preserve those

3280

results name values."""

3281

locMarker = Empty().setParseAction(lambda s,loc,t: loc)

3282

matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")

3283

if asString:

3284

extractText = lambda s,l,t: s[t._original_start:t._original_end]

3285

else:

3286

def extractText(s,l,t):

3287

del t[:]

3288

t.insert(0, s[t._original_start:t._original_end])

3289

del t["_original_start"]

3290

del t["_original_end"]

3291

matchExpr.setParseAction(extractText)

3292

return matchExpr

3293

3294

# convenience constants for positional expressions

3295

empty = Empty().setName("empty")

3296

lineStart = LineStart().setName("lineStart")

3297

lineEnd = LineEnd().setName("lineEnd")

3298

stringStart = StringStart().setName("stringStart")

3299

stringEnd = StringEnd().setName("stringEnd")

3300

3301

_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])

3302

_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])

3303

_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))

3304

_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))

3305

_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)

3306

_charRange = Group(_singleChar + Suppress("-") + _singleChar)

3307

_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"

3308

3309

_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)

3310

3311

def srange(s):

3312

r"""Helper to easily define string ranges for use in Word construction. Borrows

3313

syntax from regexp '[]' string range definitions::

3314

srange("[0-9]") -> "0123456789"

3315

srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

3316

srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

3317

The input string must be enclosed in []'s, and the returned string is the expanded

3318

character set joined into a single string.

3319

The values enclosed in the []'s may be::

3320

a single character

3321

an escaped character with a leading backslash (such as \- or \])

3322

an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)

3323

an escaped octal character with a leading '\0' (\041, which is a '!' character)

3324

a range of any of the above, separated by a dash ('a-z', etc.)

3325

any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)

3326

"""

3327

try:

3328

return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])

3329

except:

3330

return ""

3331

3332

def matchOnlyAtCol(n):

3333

"""Helper method for defining parse actions that require matching at a specific

3334

column in the input text.

3335

"""

3336

def verifyCol(strg,locn,toks):

3337

if col(locn,strg) != n:

3338

raise ParseException(strg,locn,"matched token not at column %d" % n)

3339

return verifyCol

3340

3341

def replaceWith(replStr):

3342

"""Helper method for common parse actions that simply return a literal value. Especially

3343

useful when used with transformString().

3344

"""

3345

def _replFunc(*args):

3346

return [replStr]

3347

return _replFunc

3348

3349

def removeQuotes(s,l,t):

3350

"""Helper parse action for removing quotation marks from parsed quoted strings.

3351

To use, add this parse action to quoted string using::

3352

quotedString.setParseAction( removeQuotes )

3353

"""

3354

return t[0][1:-1]

3355

3356

def upcaseTokens(s,l,t):

3357

"""Helper parse action to convert tokens to upper case."""

3358

return [ tt.upper() for tt in map(_ustr,t) ]

3359

3360

def downcaseTokens(s,l,t):

3361

"""Helper parse action to convert tokens to lower case."""

3362

return [ tt.lower() for tt in map(_ustr,t) ]

3363

3364

def keepOriginalText(s,startLoc,t):

3365

"""Helper parse action to preserve original parsed text,

3366

overriding any nested parse actions."""

3367

try:

3368

endloc = getTokensEndLoc()

3369

except ParseException:

3370

raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")

3371

del t[:]

3372

t += ParseResults(s[startLoc:endloc])

3373

return t

3374

3375

def getTokensEndLoc():

3376

"""Method to be called from within a parse action to determine the end

3377

location of the parsed tokens."""

3378

import inspect

3379

fstack = inspect.stack()

3380

try:

3381

# search up the stack (through intervening argument normalizers) for correct calling routine

3382

for f in fstack[2:]:

3383

if f[3] == "_parseNoCache":

3384

endloc = f[0].f_locals["loc"]

3385

return endloc

3386

else:

3387

raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")

3388

finally:

3389

del fstack

3390

3391

def _makeTags(tagStr, xml):

3392

"""Internal helper to construct opening and closing tag expressions, given a tag name"""

3393

if isinstance(tagStr,basestring):

3394

resname = tagStr

3395

tagStr = Keyword(tagStr, caseless=not xml)

3396

else:

3397

resname = tagStr.name

3398

3399

tagAttrName = Word(alphas,alphanums+"_-:")

3400

if (xml):

3401

tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )

3402

openTag = Suppress("<") + tagStr + \

3403

Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \

3404

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

3405

else:

3406

printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )

3407

tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)

3408

openTag = Suppress("<") + tagStr + \

3409

Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \

3410

Optional( Suppress("=") + tagAttrValue ) ))) + \

3411

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

3412

closeTag = Combine(_L("</") + tagStr + ">")

3413

3414

openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)

3415

closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)

3416

3417

return openTag, closeTag

3418

3419

def makeHTMLTags(tagStr):

3420

"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""

3421

return _makeTags( tagStr, False )

3422

3423

def makeXMLTags(tagStr):

3424

"""Helper to construct opening and closing tag expressions for XML, given a tag name"""

3425

return _makeTags( tagStr, True )

3426

3427

def withAttribute(*args,**attrDict):

3428

"""Helper to create a validating parse action to be used with start tags created

3429

with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag

3430

with a required attribute value, to avoid false matches on common tags such as

3431

<TD> or <DIV>.

3432

3433

Call withAttribute with a series of attribute names and values. Specify the list

3434

of filter attributes names and values as:

3435

- keyword arguments, as in (class="Customer",align="right"), or

3436

- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )

3437

For attribute names with a namespace prefix, you must use the second form. Attribute

3438

names are matched insensitive to upper/lower case.

3439

3440

To verify that the attribute exists, but without specifying a value, pass

3441

withAttribute.ANY_VALUE as the value.

3442

"""

3443

if args:

3444

attrs = args[:]

3445

else:

3446

attrs = attrDict.iteritems()

3447

attrs = [(k,v) for k,v in attrs]

3448

def pa(s,l,tokens):

3449

for attrName,attrValue in attrs:

3450

if attrName not in tokens:

3451

raise ParseException(s,l,"no matching attribute " + attrName)

3452

if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:

3453

raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %

3454

(attrName, tokens[attrName], attrValue))

3455

return pa

3456

withAttribute.ANY_VALUE = object()

3457

3458

opAssoc = _Constants()

3459

opAssoc.LEFT = object()

3460

opAssoc.RIGHT = object()

3461

3462

def operatorPrecedence( baseExpr, opList ):

3463

"""Helper method for constructing grammars of expressions made up of

3464

operators working in a precedence hierarchy. Operators may be unary or

3465

binary, left- or right-associative. Parse actions can also be attached

3466

to operator expressions.

3467

3468

Parameters:

3469

- baseExpr - expression representing the most basic element for the nested

3470

- opList - list of tuples, one for each operator precedence level in the

3471

expression grammar; each tuple is of the form

3472

(opExpr, numTerms, rightLeftAssoc, parseAction), where:

3473

- opExpr is the pyparsing expression for the operator;

3474

may also be a string, which will be converted to a Literal;

3475

if numTerms is 3, opExpr is a tuple of two expressions, for the

3476

two operators separating the 3 terms

3477

- numTerms is the number of terms for this operator (must

3478

be 1, 2, or 3)

3479

- rightLeftAssoc is the indicator whether the operator is

3480

right or left associative, using the pyparsing-defined

3481

constants opAssoc.RIGHT and opAssoc.LEFT.

3482

- parseAction is the parse action to be associated with

3483

expressions matching this operator expression (the

3484

parse action tuple member may be omitted)

3485

"""

3486

ret = Forward()

3487

lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )

3488

for i,operDef in enumerate(opList):

3489

opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]

3490

if arity == 3:

3491

if opExpr is None or len(opExpr) != 2:

3492

raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")

3493

opExpr1, opExpr2 = opExpr

3494

thisExpr = Forward()#.setName("expr%d" % i)

3495

if rightLeftAssoc == opAssoc.LEFT:

3496

if arity == 1:

3497

matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )

3498

elif arity == 2:

3499

if opExpr is not None:

3500

matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )

3501

else:

3502

matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )

3503

elif arity == 3:

3504

matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \

3505

Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )

3506

else:

3507

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

3508

elif rightLeftAssoc == opAssoc.RIGHT:

3509

if arity == 1:

3510

# try to avoid LR with this extra test

3511

if not isinstance(opExpr, Optional):

3512

opExpr = Optional(opExpr)

3513

matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )

3514

elif arity == 2:

3515

if opExpr is not None:

3516

matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )

3517

else:

3518

matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )

3519

elif arity == 3:

3520

matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \

3521

Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )

3522

else:

3523

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

3524

else:

3525

raise ValueError("operator must indicate right or left associativity")

3526

if pa:

3527

matchExpr.setParseAction( pa )

3528

thisExpr << ( matchExpr | lastExpr )

3529

lastExpr = thisExpr

3530

ret << lastExpr

3531

return ret

3532

3533

dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")

3534

sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")

3535

quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")

3536

unicodeString = Combine(_L('u') + quotedString.copy())

3537

3538

def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):

3539

"""Helper method for defining nested lists enclosed in opening and closing

3540

delimiters ("(" and ")" are the default).

3541

3542

Parameters:

3543

- opener - opening character for a nested list (default="("); can also be a pyparsing expression

3544

- closer - closing character for a nested list (default=")"); can also be a pyparsing expression

3545

- content - expression for items within the nested lists (default=None)

3546

- ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)

3547

3548

If an expression is not provided for the content argument, the nested

3549

expression will capture all whitespace-delimited content between delimiters

3550

as a list of separate values.

3551

3552

Use the ignoreExpr argument to define expressions that may contain

3553

opening or closing characters that should not be treated as opening

3554

or closing characters for nesting, such as quotedString or a comment

3555

expression. Specify multiple expressions using an Or or MatchFirst.

3556

The default is quotedString, but if no expressions are to be ignored,

3557

then pass None for this argument.

3558

"""

3559

if opener == closer:

3560

raise ValueError("opening and closing strings cannot be the same")

3561

if content is None:

3562

if isinstance(opener,basestring) and isinstance(closer,basestring):

3563

if len(opener) == 1 and len(closer)==1:

3564

if ignoreExpr is not None:

3565

content = (Combine(OneOrMore(~ignoreExpr +

3566

CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3567

).setParseAction(lambda t:t[0].strip()))

3568

else:

3569

content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS

3570

).setParseAction(lambda t:t[0].strip()))

3571

else:

3572

if ignoreExpr is not None:

3573

content = (Combine(OneOrMore(~ignoreExpr +

3574

~Literal(opener) + ~Literal(closer) +

3575

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3576

).setParseAction(lambda t:t[0].strip()))

3577

else:

3578

content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +

3579

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3580

).setParseAction(lambda t:t[0].strip()))

3581

else:

3582

raise ValueError("opening and closing arguments must be strings if no content expression is given")

3583

ret = Forward()

3584

if ignoreExpr is not None:

3585

ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )

3586

else:

3587

ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )

3588

return ret

3589

3590

def indentedBlock(blockStatementExpr, indentStack, indent=True):

3591

"""Helper method for defining space-delimited indentation blocks, such as

3592

those used to define block statements in Python source code.

3593

3594

Parameters:

3595

- blockStatementExpr - expression defining syntax of statement that

3596

is repeated within the indented block

3597

- indentStack - list created by caller to manage indentation stack

3598

(multiple statementWithIndentedBlock expressions within a single grammar

3599

should share a common indentStack)

3600

- indent - boolean indicating whether block must be indented beyond the

3601

the current level; set to False for block of left-most statements

3602

(default=True)

3603

3604

A valid block must contain at least one blockStatement.

3605

"""

3606

def checkPeerIndent(s,l,t):

3607

if l >= len(s): return

3608

curCol = col(l,s)

3609

if curCol != indentStack[-1]:

3610

if curCol > indentStack[-1]:

3611

raise ParseFatalException(s,l,"illegal nesting")

3612

raise ParseException(s,l,"not a peer entry")

3613

3614

def checkSubIndent(s,l,t):

3615

curCol = col(l,s)

3616

if curCol > indentStack[-1]:

3617

indentStack.append( curCol )

3618

else:

3619

raise ParseException(s,l,"not a subentry")

3620

3621

def checkUnindent(s,l,t):

3622

if l >= len(s): return

3623

curCol = col(l,s)

3624

if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):

3625

raise ParseException(s,l,"not an unindent")

3626

indentStack.pop()

3627

3628

NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())

3629

INDENT = Empty() + Empty().setParseAction(checkSubIndent)

3630

PEER = Empty().setParseAction(checkPeerIndent)

3631

UNDENT = Empty().setParseAction(checkUnindent)

3632

if indent:

3633

smExpr = Group( Optional(NL) +

3634

FollowedBy(blockStatementExpr) +

3635

INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)

3636

else:

3637

smExpr = Group( Optional(NL) +

3638

(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )

3639

blockStatementExpr.ignore(_bslash + LineEnd())

3640

return smExpr

3641

3642

alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

3643

punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

3644

3645

anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))

3646

commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()

3647

_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))

3648

replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None

3649

3650

# it's easy to get these comment structures wrong - they're very common, so may as well make them available

3651

cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")

3652

3653

htmlComment = Regex(r"")

3654

restOfLine = Regex(r".*").leaveWhitespace()

3655

dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")

3656

cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")

3657

3658

javaStyleComment = cppStyleComment

3659

pythonStyleComment = Regex(r"#.*").setName("Python style comment")

3660

_noncomma = "".join( [ c for c in printables if c != "," ] )

3661

_commasepitem = Combine(OneOrMore(Word(_noncomma) +

3662

Optional( Word(" \t") +

3663

~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")

3664

commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")

3665

3666

3667

if __name__ == "__main__":

3668

3669

def test( teststring ):

3670

try:

3671

tokens = simpleSQL.parseString( teststring )

3672

tokenlist = tokens.asList()

3673

print (teststring + "->" + str(tokenlist))

3674

print ("tokens = " + str(tokens))

3675

print ("tokens.columns = " + str(tokens.columns))

3676

print ("tokens.tables = " + str(tokens.tables))

3677

print (tokens.asXML("SQL",True))

3678

except ParseBaseException,err:

3679

print (teststring + "->")

3680

print (err.line)

3681

print (" "*(err.column-1) + "^")

3682

print (err)

3683

print()

3684

3685

selectToken = CaselessLiteral( "select" )

3686

fromToken = CaselessLiteral( "from" )

3687

3688

ident = Word( alphas, alphanums + "_$" )

3689

columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )

3690

columnNameList = Group( delimitedList( columnName ) )#.setName("columns")

3691

tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )

3692

tableNameList = Group( delimitedList( tableName ) )#.setName("tables")

3693

simpleSQL = ( selectToken + \

3694

( '*' | columnNameList ).setResultsName( "columns" ) + \

3695

fromToken + \

3696

tableNameList.setResultsName( "tables" ) )

3697

3698

test( "SELECT * from XYZZY, ABC" )

3699

test( "select * from SYS.XYZZY" )

3700

test( "Select A from Sys.dual" )

3701

test( "Select AA,BB,CC from Sys.dual" )

3702

test( "Select A, B, C from Sys.dual" )

3703

test( "Select A, B, C from Sys.dual" )

3704

test( "Xelect A, B, C from Sys.dual" )

3705

test( "Select A, B, C frox Sys.dual" )

3706

test( "Select" )

3707

test( "Select ^^^ frox Sys.dual" )

3708

test( "Select A, B, C from Sys.dual, Table2 " )

	@@ -1,4 +0,0 b''
	1	try:
	2	from pyparsing import *
	3	except ImportError:
	4	from _pyparsing import *

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

                 The transformers are instances of :class:`PrefilterTransformer` and have
                 a single method :meth:`transform` that takes a line and returns a
                 transformed line.  The transformation can be accomplished using any
-                tool, but our current ones use regular expressions for speed.  We also
+                tool, but our current ones use regular expressions for speed.
-                ship :mod:`pyparsing` in :mod:`IPython.external` for use in transformers.
                 After all the transformers have been run, the line is fed to the checkers,
                 which are instances of :class:`PrefilterChecker`.  The line is passed to