upstream/ipython Commit - r2272:aee8f62a

1

# module pyparsing.py

2

#

3

4

#

5

# Permission is hereby granted, free of charge, to any person obtaining

6

# a copy of this software and associated documentation files (the

7

# "Software"), to deal in the Software without restriction, including

8

# without limitation the rights to use, copy, modify, merge, publish,

9

# distribute, sublicense, and/or sell copies of the Software, and to

10

# permit persons to whom the Software is furnished to do so, subject to

11

# the following conditions:

12

#

13

# The above copyright notice and this permission notice shall be

14

# included in all copies or substantial portions of the Software.

15

#

16

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

17

# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

18

# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

19

# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

20

# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

21

# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

22

# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

23

#

24

#from __future__ import generators

25

26

__doc__ = \

27

"""

28

pyparsing module - Classes and methods to define and execute parsing grammars

29

30

The pyparsing module is an alternative approach to creating and executing simple grammars,

31

vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you

32

don't need to learn a new syntax for defining grammars or matching expressions - the parsing module

33

provides a library of classes that you use to construct the grammar directly in Python.

34

35

Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::

36

37

from pyparsing import Word, alphas

38

39

# define grammar of a greeting

40

greet = Word( alphas ) + "," + Word( alphas ) + "!"

41

42

hello = "Hello, World!"

43

print hello, "->", greet.parseString( hello )

44

45

The program outputs the following::

46

47

Hello, World! -> ['Hello', ',', 'World', '!']

48

49

The Python representation of the grammar is quite readable, owing to the self-explanatory

50

class names, and the use of '+', '|' and '^' operators.

51

52

The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an

53

object with named attributes.

54

55

The pyparsing module handles some of the problems that are typically vexing when writing text parsers:

56

- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)

57

- quoted strings

58

- embedded comments

59

"""

60

61

__version__ = "1.5.2"

62

__versionTime__ = "17 February 2009 19:45"

63

__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

64

65

import string

66

from weakref import ref as wkref

67

import copy

68

import sys

69

import warnings

70

import re

71

import sre_constants

72

#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )

73

74

__all__ = [

75

'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',

76

'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',

77

'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',

78

'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',

79

'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',

80

'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',

81

'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',

82

'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',

83

'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',

84

'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',

85

'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',

86

'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',

87

'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',

88

'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',

89

'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',

90

'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',

91

'indentedBlock', 'originalTextFor',

92

]

93

94

95

"""

96

Detect if we are running version 3.X and make appropriate changes

97

Robert A. Clark

98

"""

99

if sys.version_info[0] > 2:

100

_PY3K = True

101

_MAX_INT = sys.maxsize

102

basestring = str

103

else:

104

_PY3K = False

105

_MAX_INT = sys.maxint

106

107

if not _PY3K:

108

def _ustr(obj):

109

"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries

110

str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It

111

then < returns the unicode object | encodes it with the default encoding | ... >.

112

"""

113

if isinstance(obj,unicode):

114

return obj

115

116

try:

117

# If this works, then _ustr(obj) has the same behaviour as str(obj), so

118

# it won't break any existing code.

119

return str(obj)

120

121

except UnicodeEncodeError:

122

# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)

123

# state that "The return value must be a string object". However, does a

124

# unicode object (being a subclass of basestring) count as a "string

125

# object"?

126

# If so, then return a unicode object:

127

return unicode(obj)

128

# Else encode it... but how? There are many choices... :)

129

# Replace unprintables with escape codes?

130

#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')

131

# Replace unprintables with question marks?

132

#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')

133

# ...

134

else:

135

_ustr = str

136

unichr = chr

137

138

if not _PY3K:

139

def _str2dict(strg):

140

return dict( [(c,0) for c in strg] )

141

else:

142

_str2dict = set

143

144

def _xml_escape(data):

145

"""Escape &, <, >, ", ', etc. in a string of data."""

146

147

# ampersand must be replaced first

148

from_symbols = '&><"\''

149

to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]

150

for from_,to_ in zip(from_symbols, to_symbols):

151

data = data.replace(from_, to_)

152

return data

153

154

class _Constants(object):

155

pass

156

157

if not _PY3K:

158

alphas = string.lowercase + string.uppercase

159

else:

160

alphas = string.ascii_lowercase + string.ascii_uppercase

161

nums = string.digits

162

hexnums = nums + "ABCDEFabcdef"

163

alphanums = alphas + nums

164

_bslash = chr(92)

165

printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )

166

167

class ParseBaseException(Exception):

168

"""base exception class for all parsing runtime exceptions"""

169

# Performance tuning: we construct a *lot* of these, so keep this

170

# constructor as small and fast as possible

171

def __init__( self, pstr, loc=0, msg=None, elem=None ):

172

self.loc = loc

173

if msg is None:

174

self.msg = pstr

175

self.pstr = ""

176

else:

177

self.msg = msg

178

self.pstr = pstr

179

self.parserElement = elem

180

181

def __getattr__( self, aname ):

182

"""supported attributes by name are:

183

- lineno - returns the line number of the exception text

184

- col - returns the column number of the exception text

185

- line - returns the line containing the exception text

186

"""

187

if( aname == "lineno" ):

188

return lineno( self.loc, self.pstr )

189

elif( aname in ("col", "column") ):

190

return col( self.loc, self.pstr )

191

elif( aname == "line" ):

192

return line( self.loc, self.pstr )

193

else:

194

raise AttributeError(aname)

195

196

def __str__( self ):

197

return "%s (at char %d), (line:%d, col:%d)" % \

198

( self.msg, self.loc, self.lineno, self.column )

199

def __repr__( self ):

200

return _ustr(self)

201

def markInputline( self, markerString = ">!<" ):

202

"""Extracts the exception line from the input string, and marks

203

the location of the exception with a special symbol.

204

"""

205

line_str = self.line

206

line_column = self.column - 1

207

if markerString:

208

line_str = "".join( [line_str[:line_column],

209

markerString, line_str[line_column:]])

210

return line_str.strip()

211

def __dir__(self):

212

return "loc msg pstr parserElement lineno col line " \

213

"markInputLine __str__ __repr__".split()

214

215

class ParseException(ParseBaseException):

216

"""exception thrown when parse expressions don't match class;

217

supported attributes by name are:

218

- lineno - returns the line number of the exception text

219

- col - returns the column number of the exception text

220

- line - returns the line containing the exception text

221

"""

222

pass

223

224

class ParseFatalException(ParseBaseException):

225

"""user-throwable exception thrown when inconsistent parse content

226

is found; stops all parsing immediately"""

227

pass

228

229

class ParseSyntaxException(ParseFatalException):

230

"""just like ParseFatalException, but thrown internally when an

231

ErrorStop indicates that parsing is to stop immediately because

232

an unbacktrackable syntax error has been found"""

233

def __init__(self, pe):

234

super(ParseSyntaxException, self).__init__(

235

pe.pstr, pe.loc, pe.msg, pe.parserElement)

236

237

#~ class ReparseException(ParseBaseException):

238

#~ """Experimental class - parse actions can raise this exception to cause

239

#~ pyparsing to reparse the input string:

240

#~ - with a modified input string, and/or

241

#~ - with a modified start location

242

#~ Set the values of the ReparseException in the constructor, and raise the

243

#~ exception in a parse action to cause pyparsing to use the new string/location.

244

#~ Setting the values as None causes no change to be made.

245

#~ """

246

#~ def __init_( self, newstring, restartLoc ):

247

#~ self.newParseText = newstring

248

#~ self.reparseLoc = restartLoc

249

250

class RecursiveGrammarException(Exception):

251

"""exception thrown by validate() if the grammar could be improperly recursive"""

252

def __init__( self, parseElementList ):

253

self.parseElementTrace = parseElementList

254

255

def __str__( self ):

256

return "RecursiveGrammarException: %s" % self.parseElementTrace

257

258

class _ParseResultsWithOffset(object):

259

def __init__(self,p1,p2):

260

self.tup = (p1,p2)

261

def __getitem__(self,i):

262

return self.tup[i]

263

def __repr__(self):

264

return repr(self.tup)

265

def setOffset(self,i):

266

self.tup = (self.tup[0],i)

267

268

class ParseResults(object):

269

"""Structured parse results, to provide multiple means of access to the parsed data:

270

- as a list (len(results))

271

- by list index (results[0], results[1], etc.)

272

- by attribute (results.<resultsName>)

273

"""

274

__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )

275

def __new__(cls, toklist, name=None, asList=True, modal=True ):

276

if isinstance(toklist, cls):

277

return toklist

278

retobj = object.__new__(cls)

279

retobj.__doinit = True

280

return retobj

281

282

# Performance tuning: we construct a *lot* of these, so keep this

283

# constructor as small and fast as possible

284

def __init__( self, toklist, name=None, asList=True, modal=True ):

285

if self.__doinit:

286

self.__doinit = False

287

self.__name = None

288

self.__parent = None

289

self.__accumNames = {}

290

if isinstance(toklist, list):

291

self.__toklist = toklist[:]

292

else:

293

self.__toklist = [toklist]

294

self.__tokdict = dict()

295

296

if name:

297

if not modal:

298

self.__accumNames[name] = 0

299

if isinstance(name,int):

300

name = _ustr(name) # will always return a str, but use _ustr for consistency

301

self.__name = name

302

if not toklist in (None,'',[]):

303

if isinstance(toklist,basestring):

304

toklist = [ toklist ]

305

if asList:

306

if isinstance(toklist,ParseResults):

307

self[name] = _ParseResultsWithOffset(toklist.copy(),0)

308

else:

309

self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)

310

self[name].__name = name

311

else:

312

try:

313

self[name] = toklist[0]

314

except (KeyError,TypeError,IndexError):

315

self[name] = toklist

316

317

def __getitem__( self, i ):

318

if isinstance( i, (int,slice) ):

319

return self.__toklist[i]

320

else:

321

if i not in self.__accumNames:

322

return self.__tokdict[i][-1][0]

323

else:

324

return ParseResults([ v[0] for v in self.__tokdict[i] ])

325

326

def __setitem__( self, k, v ):

327

if isinstance(v,_ParseResultsWithOffset):

328

self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]

329

sub = v[0]

330

elif isinstance(k,int):

331

self.__toklist[k] = v

332

sub = v

333

else:

334

self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]

335

sub = v

336

if isinstance(sub,ParseResults):

337

sub.__parent = wkref(self)

338

339

def __delitem__( self, i ):

340

if isinstance(i,(int,slice)):

341

mylen = len( self.__toklist )

342

del self.__toklist[i]

343

344

# convert int to slice

345

if isinstance(i, int):

346

if i < 0:

347

i += mylen

348

i = slice(i, i+1)

349

# get removed indices

350

removed = list(range(*i.indices(mylen)))

351

removed.reverse()

352

# fixup indices in token dictionary

353

for name in self.__tokdict:

354

occurrences = self.__tokdict[name]

355

for j in removed:

356

for k, (value, position) in enumerate(occurrences):

357

occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))

358

else:

359

del self.__tokdict[i]

360

361

def __contains__( self, k ):

362

return k in self.__tokdict

363

364

def __len__( self ): return len( self.__toklist )

365

def __bool__(self): return len( self.__toklist ) > 0

366

__nonzero__ = __bool__

367

def __iter__( self ): return iter( self.__toklist )

368

def __reversed__( self ): return iter( reversed(self.__toklist) )

369

def keys( self ):

370

"""Returns all named result keys."""

371

return self.__tokdict.keys()

372

373

def pop( self, index=-1 ):

374

"""Removes and returns item at specified index (default=last).

375

Will work with either numeric indices or dict-key indicies."""

376

ret = self[index]

377

del self[index]

378

return ret

379

380

def get(self, key, defaultValue=None):

381

"""Returns named result matching the given key, or if there is no

382

such name, then returns the given defaultValue or None if no

383

defaultValue is specified."""

384

if key in self:

385

return self[key]

386

else:

387

return defaultValue

388

389

def insert( self, index, insStr ):

390

self.__toklist.insert(index, insStr)

391

# fixup indices in token dictionary

392

for name in self.__tokdict:

393

occurrences = self.__tokdict[name]

394

for k, (value, position) in enumerate(occurrences):

395

occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))

396

397

def items( self ):

398

"""Returns all named result keys and values as a list of tuples."""

399

return [(k,self[k]) for k in self.__tokdict]

400

401

def values( self ):

402

"""Returns all named result values."""

403

return [ v[-1][0] for v in self.__tokdict.values() ]

404

405

def __getattr__( self, name ):

406

if name not in self.__slots__:

407

if name in self.__tokdict:

408

if name not in self.__accumNames:

409

return self.__tokdict[name][-1][0]

410

else:

411

return ParseResults([ v[0] for v in self.__tokdict[name] ])

412

else:

413

return ""

414

return None

415

416

def __add__( self, other ):

417

ret = self.copy()

418

ret += other

419

return ret

420

421

def __iadd__( self, other ):

422

if other.__tokdict:

423

offset = len(self.__toklist)

424

addoffset = ( lambda a: (a<0 and offset) or (a+offset) )

425

otheritems = other.__tokdict.items()

426

otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )

427

for (k,vlist) in otheritems for v in vlist]

428

for k,v in otherdictitems:

429

self[k] = v

430

if isinstance(v[0],ParseResults):

431

v[0].__parent = wkref(self)

432

433

self.__toklist += other.__toklist

434

self.__accumNames.update( other.__accumNames )

435

del other

436

return self

437

438

def __repr__( self ):

439

return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )

440

441

def __str__( self ):

442

out = "["

443

sep = ""

444

for i in self.__toklist:

445

if isinstance(i, ParseResults):

446

out += sep + _ustr(i)

447

else:

448

out += sep + repr(i)

449

sep = ", "

450

out += "]"

451

return out

452

453

def _asStringList( self, sep='' ):

454

out = []

455

for item in self.__toklist:

456

if out and sep:

457

out.append(sep)

458

if isinstance( item, ParseResults ):

459

out += item._asStringList()

460

else:

461

out.append( _ustr(item) )

462

return out

463

464

def asList( self ):

465

"""Returns the parse results as a nested list of matching tokens, all converted to strings."""

466

out = []

467

for res in self.__toklist:

468

if isinstance(res,ParseResults):

469

out.append( res.asList() )

470

else:

471

out.append( res )

472

return out

473

474

def asDict( self ):

475

"""Returns the named parse results as dictionary."""

476

return dict( self.items() )

477

478

def copy( self ):

479

"""Returns a new copy of a ParseResults object."""

480

ret = ParseResults( self.__toklist )

481

ret.__tokdict = self.__tokdict.copy()

482

ret.__parent = self.__parent

483

ret.__accumNames.update( self.__accumNames )

484

ret.__name = self.__name

485

return ret

486

487

def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):

488

"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""

489

nl = "\n"

490

out = []

491

namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()

492

for v in vlist ] )

493

nextLevelIndent = indent + " "

494

495

# collapse out indents if formatting is not desired

496

if not formatted:

497

indent = ""

498

nextLevelIndent = ""

499

nl = ""

500

501

selfTag = None

502

if doctag is not None:

503

selfTag = doctag

504

else:

505

if self.__name:

506

selfTag = self.__name

507

508

if not selfTag:

509

if namedItemsOnly:

510

return ""

511

else:

512

selfTag = "ITEM"

513

514

out += [ nl, indent, "<", selfTag, ">" ]

515

516

worklist = self.__toklist

517

for i,res in enumerate(worklist):

518

if isinstance(res,ParseResults):

519

if i in namedItems:

520

out += [ res.asXML(namedItems[i],

521

namedItemsOnly and doctag is None,

522

nextLevelIndent,

523

formatted)]

524

else:

525

out += [ res.asXML(None,

526

namedItemsOnly and doctag is None,

527

nextLevelIndent,

528

formatted)]

529

else:

530

# individual token, see if there is a name for it

531

resTag = None

532

if i in namedItems:

533

resTag = namedItems[i]

534

if not resTag:

535

if namedItemsOnly:

536

continue

537

else:

538

resTag = "ITEM"

539

xmlBodyText = _xml_escape(_ustr(res))

540

out += [ nl, nextLevelIndent, "<", resTag, ">",

541

xmlBodyText,

542

"</", resTag, ">" ]

543

544

out += [ nl, indent, "</", selfTag, ">" ]

545

return "".join(out)

546

547

def __lookup(self,sub):

548

for k,vlist in self.__tokdict.items():

549

for v,loc in vlist:

550

if sub is v:

551

return k

552

return None

553

554

def getName(self):

555

"""Returns the results name for this token expression."""

556

if self.__name:

557

return self.__name

558

elif self.__parent:

559

par = self.__parent()

560

if par:

561

return par.__lookup(self)

562

else:

563

return None

564

elif (len(self) == 1 and

565

len(self.__tokdict) == 1 and

566

self.__tokdict.values()[0][0][1] in (0,-1)):

567

return self.__tokdict.keys()[0]

568

else:

569

return None

570

571

def dump(self,indent='',depth=0):

572

"""Diagnostic method for listing out the contents of a ParseResults.

573

Accepts an optional indent argument so that this string can be embedded

574

in a nested display of other data."""

575

out = []

576

out.append( indent+_ustr(self.asList()) )

577

keys = self.items()

578

keys.sort()

579

for k,v in keys:

580

if out:

581

out.append('\n')

582

out.append( "%s%s- %s: " % (indent,(' '*depth), k) )

583

if isinstance(v,ParseResults):

584

if v.keys():

585

#~ out.append('\n')

586

out.append( v.dump(indent,depth+1) )

587

#~ out.append('\n')

588

else:

589

out.append(_ustr(v))

590

else:

591

out.append(_ustr(v))

592

#~ out.append('\n')

593

return "".join(out)

594

595

# add support for pickle protocol

596

def __getstate__(self):

597

return ( self.__toklist,

598

( self.__tokdict.copy(),

599

self.__parent is not None and self.__parent() or None,

600

self.__accumNames,

601

self.__name ) )

602

603

def __setstate__(self,state):

604

self.__toklist = state[0]

605

self.__tokdict, \

606

par, \

607

inAccumNames, \

608

self.__name = state[1]

609

self.__accumNames = {}

610

self.__accumNames.update(inAccumNames)

611

if par is not None:

612

self.__parent = wkref(par)

613

else:

614

self.__parent = None

615

616

def __dir__(self):

617

return dir(super(ParseResults,self)) + self.keys()

618

619

def col (loc,strg):

620

"""Returns current column within a string, counting newlines as line separators.

621

The first column is number 1.

622

623

Note: the default parsing behavior is to expand tabs in the input string

624

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

625

on parsing strings containing <TAB>s, and suggested methods to maintain a

626

consistent view of the parsed string, the parse location, and line and column

627

positions within the parsed string.

628

"""

629

return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)

630

631

def lineno(loc,strg):

632

"""Returns current line number within a string, counting newlines as line separators.

633

The first line is number 1.

634

635

Note: the default parsing behavior is to expand tabs in the input string

636

before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

637

on parsing strings containing <TAB>s, and suggested methods to maintain a

638

consistent view of the parsed string, the parse location, and line and column

639

positions within the parsed string.

640

"""

641

return strg.count("\n",0,loc) + 1

642

643

def line( loc, strg ):

644

"""Returns the line of text containing loc within a string, counting newlines as line separators.

645

"""

646

lastCR = strg.rfind("\n", 0, loc)

647

nextCR = strg.find("\n", loc)

648

if nextCR > 0:

649

return strg[lastCR+1:nextCR]

650

else:

651

return strg[lastCR+1:]

652

653

def _defaultStartDebugAction( instring, loc, expr ):

654

print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

655

656

def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):

657

print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))

658

659

def _defaultExceptionDebugAction( instring, loc, expr, exc ):

660

print ("Exception raised:" + _ustr(exc))

661

662

def nullDebugAction(*args):

663

"""'Do-nothing' debug action, to suppress debugging output during parsing."""

664

pass

665

666

class ParserElement(object):

667

"""Abstract base level parser element class."""

668

DEFAULT_WHITE_CHARS = " \n\t\r"

669

670

def setDefaultWhitespaceChars( chars ):

671

"""Overrides the default whitespace chars

672

"""

673

ParserElement.DEFAULT_WHITE_CHARS = chars

674

setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)

675

676

def __init__( self, savelist=False ):

677

self.parseAction = list()

678

self.failAction = None

679

#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall

680

self.strRepr = None

681

self.resultsName = None

682

self.saveAsList = savelist

683

self.skipWhitespace = True

684

self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

685

self.copyDefaultWhiteChars = True

686

self.mayReturnEmpty = False # used when checking for left-recursion

687

self.keepTabs = False

688

self.ignoreExprs = list()

689

self.debug = False

690

self.streamlined = False

691

self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index

692

self.errmsg = ""

693

self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)

694

self.debugActions = ( None, None, None ) #custom debug actions

695

self.re = None

696

self.callPreparse = True # used to avoid redundant calls to preParse

697

self.callDuringTry = False

698

699

def copy( self ):

700

"""Make a copy of this ParserElement. Useful for defining different parse actions

701

for the same parsing pattern, using copies of the original parse element."""

702

cpy = copy.copy( self )

703

cpy.parseAction = self.parseAction[:]

704

cpy.ignoreExprs = self.ignoreExprs[:]

705

if self.copyDefaultWhiteChars:

706

cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

707

return cpy

708

709

def setName( self, name ):

710

"""Define name for this expression, for use in debugging."""

711

self.name = name

712

self.errmsg = "Expected " + self.name

713

if hasattr(self,"exception"):

714

self.exception.msg = self.errmsg

715

return self

716

717

def setResultsName( self, name, listAllMatches=False ):

718

"""Define name for referencing matching tokens as a nested attribute

719

of the returned parse results.

720

NOTE: this returns a *copy* of the original ParserElement object;

721

this is so that the client can define a basic element, such as an

722

integer, and reference it in multiple places with different names.

723

"""

724

newself = self.copy()

725

newself.resultsName = name

726

newself.modalResults = not listAllMatches

727

return newself

728

729

def setBreak(self,breakFlag = True):

730

"""Method to invoke the Python pdb debugger when this element is

731

about to be parsed. Set breakFlag to True to enable, False to

732

disable.

733

"""

734

if breakFlag:

735

_parseMethod = self._parse

736

def breaker(instring, loc, doActions=True, callPreParse=True):

737

import pdb

738

pdb.set_trace()

739

return _parseMethod( instring, loc, doActions, callPreParse )

740

breaker._originalParseMethod = _parseMethod

741

self._parse = breaker

742

else:

743

if hasattr(self._parse,"_originalParseMethod"):

744

self._parse = self._parse._originalParseMethod

745

return self

746

747

def _normalizeParseActionArgs( f ):

748

"""Internal method used to decorate parse actions that take fewer than 3 arguments,

749

so that all parse actions can be called as f(s,l,t)."""

750

STAR_ARGS = 4

751

752

try:

753

restore = None

754

if isinstance(f,type):

755

restore = f

756

f = f.__init__

757

if not _PY3K:

758

codeObj = f.func_code

759

else:

760

codeObj = f.code

761

if codeObj.co_flags & STAR_ARGS:

762

return f

763

numargs = codeObj.co_argcount

764

if not _PY3K:

765

if hasattr(f,"im_self"):

766

numargs -= 1

767

else:

768

if hasattr(f,"__self__"):

769

numargs -= 1

770

if restore:

771

f = restore

772

except AttributeError:

773

try:

774

if not _PY3K:

775

call_im_func_code = f.__call__.im_func.func_code

776

else:

777

call_im_func_code = f.__code__

778

779

# not a function, must be a callable object, get info from the

780

# im_func binding of its bound __call__ method

781

if call_im_func_code.co_flags & STAR_ARGS:

782

return f

783

numargs = call_im_func_code.co_argcount

784

if not _PY3K:

785

if hasattr(f.__call__,"im_self"):

786

numargs -= 1

787

else:

788

if hasattr(f.__call__,"__self__"):

789

numargs -= 0

790

except AttributeError:

791

if not _PY3K:

792

call_func_code = f.__call__.func_code

793

else:

794

call_func_code = f.__call__.__code__

795

# not a bound method, get info directly from __call__ method

796

if call_func_code.co_flags & STAR_ARGS:

797

return f

798

numargs = call_func_code.co_argcount

799

if not _PY3K:

800

if hasattr(f.__call__,"im_self"):

801

numargs -= 1

802

else:

803

if hasattr(f.__call__,"__self__"):

804

numargs -= 1

805

806

807

#~ print ("adding function %s with %d args" % (f.func_name,numargs))

808

if numargs == 3:

809

return f

810

else:

811

if numargs > 3:

812

def tmp(s,l,t):

813

return f(f.__call__.__self__, s,l,t)

814

if numargs == 2:

815

def tmp(s,l,t):

816

return f(l,t)

817

elif numargs == 1:

818

def tmp(s,l,t):

819

return f(t)

820

else: #~ numargs == 0:

821

def tmp(s,l,t):

822

return f()

823

try:

824

tmp.__name__ = f.__name__

825

except (AttributeError,TypeError):

826

# no need for special handling if attribute doesnt exist

827

pass

828

try:

829

tmp.__doc__ = f.__doc__

830

except (AttributeError,TypeError):

831

# no need for special handling if attribute doesnt exist

832

pass

833

try:

834

tmp.__dict__.update(f.__dict__)

835

except (AttributeError,TypeError):

836

# no need for special handling if attribute doesnt exist

837

pass

838

return tmp

839

_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)

840

841

def setParseAction( self, *fns, **kwargs ):

842

"""Define action to perform when successfully matching parse element definition.

843

Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),

844

fn(loc,toks), fn(toks), or just fn(), where:

845

- s = the original string being parsed (see note below)

846

- loc = the location of the matching substring

847

- toks = a list of the matched tokens, packaged as a ParseResults object

848

If the functions in fns modify the tokens, they can return them as the return

849

value from fn, and the modified list of tokens will replace the original.

850

Otherwise, fn does not need to return any value.

851

852

Note: the default parsing behavior is to expand tabs in the input string

853

before starting the parsing process. See L{I{parseString}<parseString>} for more information

854

on parsing strings containing <TAB>s, and suggested methods to maintain a

855

consistent view of the parsed string, the parse location, and line and column

856

positions within the parsed string.

857

"""

858

self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))

859

self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])

860

return self

861

862

def addParseAction( self, *fns, **kwargs ):

863

"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""

864

self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))

865

self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])

866

return self

867

868

def setFailAction( self, fn ):

869

"""Define action to perform if parsing fails at this expression.

870

Fail acton fn is a callable function that takes the arguments

871

fn(s,loc,expr,err) where:

872

- s = string being parsed

873

- loc = location where expression match was attempted and failed

874

- expr = the parse expression that failed

875

- err = the exception thrown

876

The function returns no value. It may throw ParseFatalException

877

if it is desired to stop parsing immediately."""

878

self.failAction = fn

879

return self

880

881

def _skipIgnorables( self, instring, loc ):

882

exprsFound = True

883

while exprsFound:

884

exprsFound = False

885

for e in self.ignoreExprs:

886

try:

887

while 1:

888

loc,dummy = e._parse( instring, loc )

889

exprsFound = True

890

except ParseException:

891

pass

892

return loc

893

894

def preParse( self, instring, loc ):

895

if self.ignoreExprs:

896

loc = self._skipIgnorables( instring, loc )

897

898

if self.skipWhitespace:

899

wt = self.whiteChars

900

instrlen = len(instring)

901

while loc < instrlen and instring[loc] in wt:

902

loc += 1

903

904

return loc

905

906

def parseImpl( self, instring, loc, doActions=True ):

907

return loc, []

908

909

def postParse( self, instring, loc, tokenlist ):

910

return tokenlist

911

912

#~ @profile

913

def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):

914

debugging = ( self.debug ) #and doActions )

915

916

if debugging or self.failAction:

917

#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

918

if (self.debugActions[0] ):

919

self.debugActions[0]( instring, loc, self )

920

if callPreParse and self.callPreparse:

921

preloc = self.preParse( instring, loc )

922

else:

923

preloc = loc

924

tokensStart = loc

925

try:

926

try:

927

loc,tokens = self.parseImpl( instring, preloc, doActions )

928

except IndexError:

929

raise ParseException( instring, len(instring), self.errmsg, self )

930

except ParseBaseException, err:

931

#~ print ("Exception raised:", err)

932

if self.debugActions[2]:

933

self.debugActions[2]( instring, tokensStart, self, err )

934

if self.failAction:

935

self.failAction( instring, tokensStart, self, err )

936

raise

937

else:

938

if callPreParse and self.callPreparse:

939

preloc = self.preParse( instring, loc )

940

else:

941

preloc = loc

942

tokensStart = loc

943

if self.mayIndexError or loc >= len(instring):

944

try:

945

loc,tokens = self.parseImpl( instring, preloc, doActions )

946

except IndexError:

947

raise ParseException( instring, len(instring), self.errmsg, self )

948

else:

949

loc,tokens = self.parseImpl( instring, preloc, doActions )

950

951

tokens = self.postParse( instring, loc, tokens )

952

953

retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )

954

if self.parseAction and (doActions or self.callDuringTry):

955

if debugging:

956

try:

957

for fn in self.parseAction:

958

tokens = fn( instring, tokensStart, retTokens )

959

if tokens is not None:

960

retTokens = ParseResults( tokens,

961

self.resultsName,

962

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

963

modal=self.modalResults )

964

except ParseBaseException, err:

965

#~ print "Exception raised in user parse action:", err

966

if (self.debugActions[2] ):

967

self.debugActions[2]( instring, tokensStart, self, err )

968

raise

969

else:

970

for fn in self.parseAction:

971

tokens = fn( instring, tokensStart, retTokens )

972

if tokens is not None:

973

retTokens = ParseResults( tokens,

974

self.resultsName,

975

asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

976

modal=self.modalResults )

977

978

if debugging:

979

#~ print ("Matched",self,"->",retTokens.asList())

980

if (self.debugActions[1] ):

981

self.debugActions[1]( instring, tokensStart, loc, self, retTokens )

982

983

return loc, retTokens

984

985

def tryParse( self, instring, loc ):

986

try:

987

return self._parse( instring, loc, doActions=False )[0]

988

except ParseFatalException:

989

raise ParseException( instring, loc, self.errmsg, self)

990

991

# this method gets repeatedly called during backtracking with the same arguments -

992

# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

993

def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):

994

lookup = (self,instring,loc,callPreParse,doActions)

995

if lookup in ParserElement._exprArgCache:

996

value = ParserElement._exprArgCache[ lookup ]

997

if isinstance(value,Exception):

998

raise value

999

return value

1000

else:

1001

try:

1002

value = self._parseNoCache( instring, loc, doActions, callPreParse )

1003

ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())

1004

return value

1005

except ParseBaseException, pe:

1006

ParserElement._exprArgCache[ lookup ] = pe

1007

raise

1008

1009

_parse = _parseNoCache

1010

1011

# argument cache for optimizing repeated calls when backtracking through recursive expressions

1012

_exprArgCache = {}

1013

def resetCache():

1014

ParserElement._exprArgCache.clear()

1015

resetCache = staticmethod(resetCache)

1016

1017

_packratEnabled = False

1018

def enablePackrat():

1019

"""Enables "packrat" parsing, which adds memoizing to the parsing logic.

1020

Repeated parse attempts at the same string location (which happens

1021

often in many complex grammars) can immediately return a cached value,

1022

instead of re-executing parsing/validating code. Memoizing is done of

1023

both valid results and parsing exceptions.

1024

1025

This speedup may break existing programs that use parse actions that

1026

have side-effects. For this reason, packrat parsing is disabled when

1027

you first import pyparsing. To activate the packrat feature, your

1028

program must call the class method ParserElement.enablePackrat(). If

1029

your program uses psyco to "compile as you go", you must call

1030

enablePackrat before calling psyco.full(). If you do not do this,

1031

Python will crash. For best results, call enablePackrat() immediately

1032

after importing pyparsing.

1033

"""

1034

if not ParserElement._packratEnabled:

1035

ParserElement._packratEnabled = True

1036

ParserElement._parse = ParserElement._parseCache

1037

enablePackrat = staticmethod(enablePackrat)

1038

1039

def parseString( self, instring, parseAll=False ):

1040

"""Execute the parse expression with the given string.

1041

This is the main interface to the client code, once the complete

1042

expression has been built.

1043

1044

If you want the grammar to require that the entire input string be

1045

successfully parsed, then set parseAll to True (equivalent to ending

1046

the grammar with StringEnd()).

1047

1048

Note: parseString implicitly calls expandtabs() on the input string,

1049

in order to report proper column numbers in parse actions.

1050

If the input string contains tabs and

1051

the grammar uses parse actions that use the loc argument to index into the

1052

string being parsed, you can ensure you have a consistent view of the input

1053

string by:

1054

- calling parseWithTabs on your grammar before calling parseString

1055

(see L{I{parseWithTabs}<parseWithTabs>})

1056

- define your parse action using the full (s,loc,toks) signature, and

1057

reference the input string using the parse action's s argument

1058

- explictly expand the tabs in your input string before calling

1059

parseString

1060

"""

1061

ParserElement.resetCache()

1062

if not self.streamlined:

1063

self.streamline()

1064

#~ self.saveAsList = True

1065

for e in self.ignoreExprs:

1066

e.streamline()

1067

if not self.keepTabs:

1068

instring = instring.expandtabs()

1069

try:

1070

loc, tokens = self._parse( instring, 0 )

1071

if parseAll:

1072

loc = self.preParse( instring, loc )

1073

StringEnd()._parse( instring, loc )

1074

except ParseBaseException, exc:

1075

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1076

raise exc

1077

else:

1078

return tokens

1079

1080

def scanString( self, instring, maxMatches=_MAX_INT ):

1081

"""Scan the input string for expression matches. Each match will return the

1082

matching tokens, start location, and end location. May be called with optional

1083

maxMatches argument, to clip scanning after 'n' matches are found.

1084

1085

Note that the start and end locations are reported relative to the string

1086

being parsed. See L{I{parseString}<parseString>} for more information on parsing

1087

strings with embedded tabs."""

1088

if not self.streamlined:

1089

self.streamline()

1090

for e in self.ignoreExprs:

1091

e.streamline()

1092

1093

if not self.keepTabs:

1094

instring = _ustr(instring).expandtabs()

1095

instrlen = len(instring)

1096

loc = 0

1097

preparseFn = self.preParse

1098

parseFn = self._parse

1099

ParserElement.resetCache()

1100

matches = 0

1101

try:

1102

while loc <= instrlen and matches < maxMatches:

1103

try:

1104

preloc = preparseFn( instring, loc )

1105

nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )

1106

except ParseException:

1107

loc = preloc+1

1108

else:

1109

matches += 1

1110

yield tokens, preloc, nextLoc

1111

loc = nextLoc

1112

except ParseBaseException, pe:

1113

raise pe

1114

1115

def transformString( self, instring ):

1116

"""Extension to scanString, to modify matching text with modified tokens that may

1117

be returned from a parse action. To use transformString, define a grammar and

1118

attach a parse action to it that modifies the returned token list.

1119

Invoking transformString() on a target string will then scan for matches,

1120

and replace the matched text patterns according to the logic in the parse

1121

action. transformString() returns the resulting transformed string."""

1122

out = []

1123

lastE = 0

1124

# force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1125

# keep string locs straight between transformString and scanString

1126

self.keepTabs = True

1127

try:

1128

for t,s,e in self.scanString( instring ):

1129

out.append( instring[lastE:s] )

1130

if t:

1131

if isinstance(t,ParseResults):

1132

out += t.asList()

1133

elif isinstance(t,list):

1134

out += t

1135

else:

1136

out.append(t)

1137

lastE = e

1138

out.append(instring[lastE:])

1139

return "".join(map(_ustr,out))

1140

except ParseBaseException, pe:

1141

raise pe

1142

1143

def searchString( self, instring, maxMatches=_MAX_INT ):

1144

"""Another extension to scanString, simplifying the access to the tokens found

1145

to match the given parse expression. May be called with optional

1146

maxMatches argument, to clip searching after 'n' matches are found.

1147

"""

1148

try:

1149

return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])

1150

except ParseBaseException, pe:

1151

raise pe

1152

1153

def __add__(self, other ):

1154

"""Implementation of + operator - returns And"""

1155

if isinstance( other, basestring ):

1156

other = Literal( other )

1157

if not isinstance( other, ParserElement ):

1158

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1159

SyntaxWarning, stacklevel=2)

1160

return None

1161

return And( [ self, other ] )

1162

1163

def __radd__(self, other ):

1164

"""Implementation of + operator when left operand is not a ParserElement"""

1165

if isinstance( other, basestring ):

1166

other = Literal( other )

1167

if not isinstance( other, ParserElement ):

1168

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1169

SyntaxWarning, stacklevel=2)

1170

return None

1171

return other + self

1172

1173

def __sub__(self, other):

1174

"""Implementation of - operator, returns And with error stop"""

1175

if isinstance( other, basestring ):

1176

other = Literal( other )

1177

if not isinstance( other, ParserElement ):

1178

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1179

SyntaxWarning, stacklevel=2)

1180

return None

1181

return And( [ self, And._ErrorStop(), other ] )

1182

1183

def __rsub__(self, other ):

1184

"""Implementation of - operator when left operand is not a ParserElement"""

1185

if isinstance( other, basestring ):

1186

other = Literal( other )

1187

if not isinstance( other, ParserElement ):

1188

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1189

SyntaxWarning, stacklevel=2)

1190

return None

1191

return other - self

1192

1193

def __mul__(self,other):

1194

if isinstance(other,int):

1195

minElements, optElements = other,0

1196

elif isinstance(other,tuple):

1197

other = (other + (None, None))[:2]

1198

if other[0] is None:

1199

other = (0, other[1])

1200

if isinstance(other[0],int) and other[1] is None:

1201

if other[0] == 0:

1202

return ZeroOrMore(self)

1203

if other[0] == 1:

1204

return OneOrMore(self)

1205

else:

1206

return self*other[0] + ZeroOrMore(self)

1207

elif isinstance(other[0],int) and isinstance(other[1],int):

1208

minElements, optElements = other

1209

optElements -= minElements

1210

else:

1211

raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))

1212

else:

1213

raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

1214

1215

if minElements < 0:

1216

raise ValueError("cannot multiply ParserElement by negative value")

1217

if optElements < 0:

1218

raise ValueError("second tuple value must be greater or equal to first tuple value")

1219

if minElements == optElements == 0:

1220

raise ValueError("cannot multiply ParserElement by 0 or (0,0)")

1221

1222

if (optElements):

1223

def makeOptionalList(n):

1224

if n>1:

1225

return Optional(self + makeOptionalList(n-1))

1226

else:

1227

return Optional(self)

1228

if minElements:

1229

if minElements == 1:

1230

ret = self + makeOptionalList(optElements)

1231

else:

1232

ret = And([self]*minElements) + makeOptionalList(optElements)

1233

else:

1234

ret = makeOptionalList(optElements)

1235

else:

1236

if minElements == 1:

1237

ret = self

1238

else:

1239

ret = And([self]*minElements)

1240

return ret

1241

1242

def __rmul__(self, other):

1243

return self.__mul__(other)

1244

1245

def __or__(self, other ):

1246

"""Implementation of | operator - returns MatchFirst"""

1247

if isinstance( other, basestring ):

1248

other = Literal( other )

1249

if not isinstance( other, ParserElement ):

1250

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1251

SyntaxWarning, stacklevel=2)

1252

return None

1253

return MatchFirst( [ self, other ] )

1254

1255

def __ror__(self, other ):

1256

"""Implementation of | operator when left operand is not a ParserElement"""

1257

if isinstance( other, basestring ):

1258

other = Literal( other )

1259

if not isinstance( other, ParserElement ):

1260

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1261

SyntaxWarning, stacklevel=2)

1262

return None

1263

return other | self

1264

1265

def __xor__(self, other ):

1266

"""Implementation of ^ operator - returns Or"""

1267

if isinstance( other, basestring ):

1268

other = Literal( other )

1269

if not isinstance( other, ParserElement ):

1270

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1271

SyntaxWarning, stacklevel=2)

1272

return None

1273

return Or( [ self, other ] )

1274

1275

def __rxor__(self, other ):

1276

"""Implementation of ^ operator when left operand is not a ParserElement"""

1277

if isinstance( other, basestring ):

1278

other = Literal( other )

1279

if not isinstance( other, ParserElement ):

1280

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1281

SyntaxWarning, stacklevel=2)

1282

return None

1283

return other ^ self

1284

1285

def __and__(self, other ):

1286

"""Implementation of & operator - returns Each"""

1287

if isinstance( other, basestring ):

1288

other = Literal( other )

1289

if not isinstance( other, ParserElement ):

1290

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1291

SyntaxWarning, stacklevel=2)

1292

return None

1293

return Each( [ self, other ] )

1294

1295

def __rand__(self, other ):

1296

"""Implementation of & operator when left operand is not a ParserElement"""

1297

if isinstance( other, basestring ):

1298

other = Literal( other )

1299

if not isinstance( other, ParserElement ):

1300

warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1301

SyntaxWarning, stacklevel=2)

1302

return None

1303

return other & self

1304

1305

def __invert__( self ):

1306

"""Implementation of ~ operator - returns NotAny"""

1307

return NotAny( self )

1308

1309

def __call__(self, name):

1310

"""Shortcut for setResultsName, with listAllMatches=default::

1311

userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")

1312

could be written as::

1313

userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")

1314

"""

1315

return self.setResultsName(name)

1316

1317

def suppress( self ):

1318

"""Suppresses the output of this ParserElement; useful to keep punctuation from

1319

cluttering up returned output.

1320

"""

1321

return Suppress( self )

1322

1323

def leaveWhitespace( self ):

1324

"""Disables the skipping of whitespace before matching the characters in the

1325

ParserElement's defined pattern. This is normally only used internally by

1326

the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1327

"""

1328

self.skipWhitespace = False

1329

return self

1330

1331

def setWhitespaceChars( self, chars ):

1332

"""Overrides the default whitespace chars

1333

"""

1334

self.skipWhitespace = True

1335

self.whiteChars = chars

1336

self.copyDefaultWhiteChars = False

1337

return self

1338

1339

def parseWithTabs( self ):

1340

"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.

1341

Must be called before parseString when the input grammar contains elements that

1342

match <TAB> characters."""

1343

self.keepTabs = True

1344

return self

1345

1346

def ignore( self, other ):

1347

"""Define expression to be ignored (e.g., comments) while doing pattern

1348

matching; may be called repeatedly, to define multiple comment or other

1349

ignorable patterns.

1350

"""

1351

if isinstance( other, Suppress ):

1352

if other not in self.ignoreExprs:

1353

self.ignoreExprs.append( other )

1354

else:

1355

self.ignoreExprs.append( Suppress( other ) )

1356

return self

1357

1358

def setDebugActions( self, startAction, successAction, exceptionAction ):

1359

"""Enable display of debugging messages while doing pattern matching."""

1360

self.debugActions = (startAction or _defaultStartDebugAction,

1361

successAction or _defaultSuccessDebugAction,

1362

exceptionAction or _defaultExceptionDebugAction)

1363

self.debug = True

1364

return self

1365

1366

def setDebug( self, flag=True ):

1367

"""Enable display of debugging messages while doing pattern matching.

1368

Set flag to True to enable, False to disable."""

1369

if flag:

1370

self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )

1371

else:

1372

self.debug = False

1373

return self

1374

1375

def __str__( self ):

1376

return self.name

1377

1378

def __repr__( self ):

1379

return _ustr(self)

1380

1381

def streamline( self ):

1382

self.streamlined = True

1383

self.strRepr = None

1384

return self

1385

1386

def checkRecursion( self, parseElementList ):

1387

pass

1388

1389

def validate( self, validateTrace=[] ):

1390

"""Check defined expressions for valid structure, check for infinite recursive definitions."""

1391

self.checkRecursion( [] )

1392

1393

def parseFile( self, file_or_filename, parseAll=False ):

1394

"""Execute the parse expression on the given file or filename.

1395

If a filename is specified (instead of a file object),

1396

the entire file is opened, read, and closed before parsing.

1397

"""

1398

try:

1399

file_contents = file_or_filename.read()

1400

except AttributeError:

1401

f = open(file_or_filename, "rb")

1402

file_contents = f.read()

1403

f.close()

1404

try:

1405

return self.parseString(file_contents, parseAll)

1406

except ParseBaseException, exc:

1407

# catch and re-raise exception from here, clears out pyparsing internal stack trace

1408

raise exc

1409

1410

def getException(self):

1411

return ParseException("",0,self.errmsg,self)

1412

1413

def __getattr__(self,aname):

1414

if aname == "myException":

1415

self.myException = ret = self.getException();

1416

return ret;

1417

else:

1418

raise AttributeError("no such attribute " + aname)

1419

1420

def __eq__(self,other):

1421

if isinstance(other, ParserElement):

1422

return self is other or self.__dict__ == other.__dict__

1423

elif isinstance(other, basestring):

1424

try:

1425

self.parseString(_ustr(other), parseAll=True)

1426

return True

1427

except ParseBaseException:

1428

return False

1429

else:

1430

return super(ParserElement,self)==other

1431

1432

def __ne__(self,other):

1433

return not (self == other)

1434

1435

def __hash__(self):

1436

return hash(id(self))

1437

1438

def __req__(self,other):

1439

return self == other

1440

1441

def __rne__(self,other):

1442

return not (self == other)

1443

1444

1445

class Token(ParserElement):

1446

"""Abstract ParserElement subclass, for defining atomic matching patterns."""

1447

def __init__( self ):

1448

super(Token,self).__init__( savelist=False )

1449

#self.myException = ParseException("",0,"",self)

1450

1451

def setName(self, name):

1452

s = super(Token,self).setName(name)

1453

self.errmsg = "Expected " + self.name

1454

#s.myException.msg = self.errmsg

1455

return s

1456

1457

1458

class Empty(Token):

1459

"""An empty token, will always match."""

1460

def __init__( self ):

1461

super(Empty,self).__init__()

1462

self.name = "Empty"

1463

self.mayReturnEmpty = True

1464

self.mayIndexError = False

1465

1466

1467

class NoMatch(Token):

1468

"""A token that will never match."""

1469

def __init__( self ):

1470

super(NoMatch,self).__init__()

1471

self.name = "NoMatch"

1472

self.mayReturnEmpty = True

1473

self.mayIndexError = False

1474

self.errmsg = "Unmatchable token"

1475

#self.myException.msg = self.errmsg

1476

1477

def parseImpl( self, instring, loc, doActions=True ):

1478

exc = self.myException

1479

exc.loc = loc

1480

exc.pstr = instring

1481

raise exc

1482

1483

1484

class Literal(Token):

1485

"""Token to exactly match a specified string."""

1486

def __init__( self, matchString ):

1487

super(Literal,self).__init__()

1488

self.match = matchString

1489

self.matchLen = len(matchString)

1490

try:

1491

self.firstMatchChar = matchString[0]

1492

except IndexError:

1493

warnings.warn("null string passed to Literal; use Empty() instead",

1494

SyntaxWarning, stacklevel=2)

1495

self.__class__ = Empty

1496

self.name = '"%s"' % _ustr(self.match)

1497

self.errmsg = "Expected " + self.name

1498

self.mayReturnEmpty = False

1499

#self.myException.msg = self.errmsg

1500

self.mayIndexError = False

1501

1502

# Performance tuning: this routine gets called a *lot*

1503

# if this is a single character match string and the first character matches,

1504

# short-circuit as quickly as possible, and avoid calling startswith

1505

#~ @profile

1506

def parseImpl( self, instring, loc, doActions=True ):

1507

if (instring[loc] == self.firstMatchChar and

1508

(self.matchLen==1 or instring.startswith(self.match,loc)) ):

1509

return loc+self.matchLen, self.match

1510

#~ raise ParseException( instring, loc, self.errmsg )

1511

exc = self.myException

1512

exc.loc = loc

1513

exc.pstr = instring

1514

raise exc

1515

_L = Literal

1516

1517

class Keyword(Token):

1518

"""Token to exactly match a specified string as a keyword, that is, it must be

1519

immediately followed by a non-keyword character. Compare with Literal::

1520

Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.

1521

Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'

1522

Accepts two optional constructor arguments in addition to the keyword string:

1523

identChars is a string of characters that would be valid identifier characters,

1524

defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive

1525

matching, default is False.

1526

"""

1527

DEFAULT_KEYWORD_CHARS = alphanums+"_$"

1528

1529

def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):

1530

super(Keyword,self).__init__()

1531

self.match = matchString

1532

self.matchLen = len(matchString)

1533

try:

1534

self.firstMatchChar = matchString[0]

1535

except IndexError:

1536

warnings.warn("null string passed to Keyword; use Empty() instead",

1537

SyntaxWarning, stacklevel=2)

1538

self.name = '"%s"' % self.match

1539

self.errmsg = "Expected " + self.name

1540

self.mayReturnEmpty = False

1541

#self.myException.msg = self.errmsg

1542

self.mayIndexError = False

1543

self.caseless = caseless

1544

if caseless:

1545

self.caselessmatch = matchString.upper()

1546

identChars = identChars.upper()

1547

self.identChars = _str2dict(identChars)

1548

1549

def parseImpl( self, instring, loc, doActions=True ):

1550

if self.caseless:

1551

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

1552

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and

1553

(loc == 0 or instring[loc-1].upper() not in self.identChars) ):

1554

return loc+self.matchLen, self.match

1555

else:

1556

if (instring[loc] == self.firstMatchChar and

1557

(self.matchLen==1 or instring.startswith(self.match,loc)) and

1558

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and

1559

(loc == 0 or instring[loc-1] not in self.identChars) ):

1560

return loc+self.matchLen, self.match

1561

#~ raise ParseException( instring, loc, self.errmsg )

1562

exc = self.myException

1563

exc.loc = loc

1564

exc.pstr = instring

1565

raise exc

1566

1567

def copy(self):

1568

c = super(Keyword,self).copy()

1569

c.identChars = Keyword.DEFAULT_KEYWORD_CHARS

1570

return c

1571

1572

def setDefaultKeywordChars( chars ):

1573

"""Overrides the default Keyword chars

1574

"""

1575

Keyword.DEFAULT_KEYWORD_CHARS = chars

1576

setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)

1577

1578

class CaselessLiteral(Literal):

1579

"""Token to match a specified string, ignoring case of letters.

1580

Note: the matched results will always be in the case of the given

1581

match string, NOT the case of the input text.

1582

"""

1583

def __init__( self, matchString ):

1584

super(CaselessLiteral,self).__init__( matchString.upper() )

1585

# Preserve the defining literal.

1586

self.returnString = matchString

1587

self.name = "'%s'" % self.returnString

1588

self.errmsg = "Expected " + self.name

1589

#self.myException.msg = self.errmsg

1590

1591

def parseImpl( self, instring, loc, doActions=True ):

1592

if instring[ loc:loc+self.matchLen ].upper() == self.match:

1593

return loc+self.matchLen, self.returnString

1594

#~ raise ParseException( instring, loc, self.errmsg )

1595

exc = self.myException

1596

exc.loc = loc

1597

exc.pstr = instring

1598

raise exc

1599

1600

class CaselessKeyword(Keyword):

1601

def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):

1602

super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )

1603

1604

def parseImpl( self, instring, loc, doActions=True ):

1605

if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

1606

(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):

1607

return loc+self.matchLen, self.match

1608

#~ raise ParseException( instring, loc, self.errmsg )

1609

exc = self.myException

1610

exc.loc = loc

1611

exc.pstr = instring

1612

raise exc

1613

1614

class Word(Token):

1615

"""Token for matching words composed of allowed character sets.

1616

Defined with string containing all allowed initial characters,

1617

an optional string containing allowed body characters (if omitted,

1618

defaults to the initial character set), and an optional minimum,

1619

maximum, and/or exact length. The default value for min is 1 (a

1620

minimum value < 1 is not valid); the default values for max and exact

1621

are 0, meaning no maximum or exact length restriction.

1622

"""

1623

def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):

1624

super(Word,self).__init__()

1625

self.initCharsOrig = initChars

1626

self.initChars = _str2dict(initChars)

1627

if bodyChars :

1628

self.bodyCharsOrig = bodyChars

1629

self.bodyChars = _str2dict(bodyChars)

1630

else:

1631

self.bodyCharsOrig = initChars

1632

self.bodyChars = _str2dict(initChars)

1633

1634

self.maxSpecified = max > 0

1635

1636

if min < 1:

1637

raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")

1638

1639

self.minLen = min

1640

1641

if max > 0:

1642

self.maxLen = max

1643

else:

1644

self.maxLen = _MAX_INT

1645

1646

if exact > 0:

1647

self.maxLen = exact

1648

self.minLen = exact

1649

1650

self.name = _ustr(self)

1651

self.errmsg = "Expected " + self.name

1652

#self.myException.msg = self.errmsg

1653

self.mayIndexError = False

1654

self.asKeyword = asKeyword

1655

1656

if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):

1657

if self.bodyCharsOrig == self.initCharsOrig:

1658

self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)

1659

elif len(self.bodyCharsOrig) == 1:

1660

self.reString = "%s[%s]*" % \

1661

(re.escape(self.initCharsOrig),

1662

_escapeRegexRangeChars(self.bodyCharsOrig),)

1663

else:

1664

self.reString = "[%s][%s]*" % \

1665

(_escapeRegexRangeChars(self.initCharsOrig),

1666

_escapeRegexRangeChars(self.bodyCharsOrig),)

1667

if self.asKeyword:

1668

self.reString = r"\b"+self.reString+r"\b"

1669

try:

1670

self.re = re.compile( self.reString )

1671

except:

1672

self.re = None

1673

1674

def parseImpl( self, instring, loc, doActions=True ):

1675

if self.re:

1676

result = self.re.match(instring,loc)

1677

if not result:

1678

exc = self.myException

1679

exc.loc = loc

1680

exc.pstr = instring

1681

raise exc

1682

1683

loc = result.end()

1684

return loc,result.group()

1685

1686

if not(instring[ loc ] in self.initChars):

1687

#~ raise ParseException( instring, loc, self.errmsg )

1688

exc = self.myException

1689

exc.loc = loc

1690

exc.pstr = instring

1691

raise exc

1692

start = loc

1693

loc += 1

1694

instrlen = len(instring)

1695

bodychars = self.bodyChars

1696

maxloc = start + self.maxLen

1697

maxloc = min( maxloc, instrlen )

1698

while loc < maxloc and instring[loc] in bodychars:

1699

loc += 1

1700

1701

throwException = False

1702

if loc - start < self.minLen:

1703

throwException = True

1704

if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

1705

throwException = True

1706

if self.asKeyword:

1707

if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):

1708

throwException = True

1709

1710

if throwException:

1711

#~ raise ParseException( instring, loc, self.errmsg )

1712

exc = self.myException

1713

exc.loc = loc

1714

exc.pstr = instring

1715

raise exc

1716

1717

return loc, instring[start:loc]

1718

1719

def __str__( self ):

1720

try:

1721

return super(Word,self).__str__()

1722

except:

1723

pass

1724

1725

1726

if self.strRepr is None:

1727

1728

def charsAsStr(s):

1729

if len(s)>4:

1730

return s[:4]+"..."

1731

else:

1732

return s

1733

1734

if ( self.initCharsOrig != self.bodyCharsOrig ):

1735

self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )

1736

else:

1737

self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)

1738

1739

return self.strRepr

1740

1741

1742

class Regex(Token):

1743

"""Token for matching strings that match a given regular expression.

1744

Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.

1745

"""

1746

def __init__( self, pattern, flags=0):

1747

"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""

1748

super(Regex,self).__init__()

1749

1750

if len(pattern) == 0:

1751

warnings.warn("null string passed to Regex; use Empty() instead",

1752

SyntaxWarning, stacklevel=2)

1753

1754

self.pattern = pattern

1755

self.flags = flags

1756

1757

try:

1758

self.re = re.compile(self.pattern, self.flags)

1759

self.reString = self.pattern

1760

except sre_constants.error:

1761

warnings.warn("invalid pattern (%s) passed to Regex" % pattern,

1762

SyntaxWarning, stacklevel=2)

1763

raise

1764

1765

self.name = _ustr(self)

1766

self.errmsg = "Expected " + self.name

1767

#self.myException.msg = self.errmsg

1768

self.mayIndexError = False

1769

self.mayReturnEmpty = True

1770

1771

def parseImpl( self, instring, loc, doActions=True ):

1772

result = self.re.match(instring,loc)

1773

if not result:

1774

exc = self.myException

1775

exc.loc = loc

1776

exc.pstr = instring

1777

raise exc

1778

1779

loc = result.end()

1780

d = result.groupdict()

1781

ret = ParseResults(result.group())

1782

if d:

1783

for k in d:

1784

ret[k] = d[k]

1785

return loc,ret

1786

1787

def __str__( self ):

1788

try:

1789

return super(Regex,self).__str__()

1790

except:

1791

pass

1792

1793

if self.strRepr is None:

1794

self.strRepr = "Re:(%s)" % repr(self.pattern)

1795

1796

return self.strRepr

1797

1798

1799

class QuotedString(Token):

1800

"""Token for matching strings that are delimited by quoting characters.

1801

"""

1802

def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):

1803

"""

1804

Defined with the following parameters:

1805

- quoteChar - string of one or more characters defining the quote delimiting string

1806

- escChar - character to escape quotes, typically backslash (default=None)

1807

- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)

1808

- multiline - boolean indicating whether quotes can span multiple lines (default=False)

1809

- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)

1810

- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)

1811

"""

1812

super(QuotedString,self).__init__()

1813

1814

# remove white space from quote chars - wont work anyway

1815

quoteChar = quoteChar.strip()

1816

if len(quoteChar) == 0:

1817

warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

1818

raise SyntaxError()

1819

1820

if endQuoteChar is None:

1821

endQuoteChar = quoteChar

1822

else:

1823

endQuoteChar = endQuoteChar.strip()

1824

if len(endQuoteChar) == 0:

1825

warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

1826

raise SyntaxError()

1827

1828

self.quoteChar = quoteChar

1829

self.quoteCharLen = len(quoteChar)

1830

self.firstQuoteChar = quoteChar[0]

1831

self.endQuoteChar = endQuoteChar

1832

self.endQuoteCharLen = len(endQuoteChar)

1833

self.escChar = escChar

1834

self.escQuote = escQuote

1835

self.unquoteResults = unquoteResults

1836

1837

if multiline:

1838

self.flags = re.MULTILINE | re.DOTALL

1839

self.pattern = r'%s(?:[^%s%s]' % \

1840

( re.escape(self.quoteChar),

1841

_escapeRegexRangeChars(self.endQuoteChar[0]),

1842

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

1843

else:

1844

self.flags = 0

1845

self.pattern = r'%s(?:[^%s\n\r%s]' % \

1846

( re.escape(self.quoteChar),

1847

_escapeRegexRangeChars(self.endQuoteChar[0]),

1848

(escChar is not None and _escapeRegexRangeChars(escChar) or '') )

1849

if len(self.endQuoteChar) > 1:

1850

self.pattern += (

1851

'|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),

1852

_escapeRegexRangeChars(self.endQuoteChar[i]))

1853

for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'

1854

)

1855

if escQuote:

1856

self.pattern += (r'|(?:%s)' % re.escape(escQuote))

1857

if escChar:

1858

self.pattern += (r'|(?:%s.)' % re.escape(escChar))

1859

self.escCharReplacePattern = re.escape(self.escChar)+"(.)"

1860

self.pattern += (r')*%s' % re.escape(self.endQuoteChar))

1861

1862

try:

1863

self.re = re.compile(self.pattern, self.flags)

1864

self.reString = self.pattern

1865

except sre_constants.error:

1866

warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

1867

SyntaxWarning, stacklevel=2)

1868

raise

1869

1870

self.name = _ustr(self)

1871

self.errmsg = "Expected " + self.name

1872

#self.myException.msg = self.errmsg

1873

self.mayIndexError = False

1874

self.mayReturnEmpty = True

1875

1876

def parseImpl( self, instring, loc, doActions=True ):

1877

result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None

1878

if not result:

1879

exc = self.myException

1880

exc.loc = loc

1881

exc.pstr = instring

1882

raise exc

1883

1884

loc = result.end()

1885

ret = result.group()

1886

1887

if self.unquoteResults:

1888

1889

# strip off quotes

1890

ret = ret[self.quoteCharLen:-self.endQuoteCharLen]

1891

1892

if isinstance(ret,basestring):

1893

# replace escaped characters

1894

if self.escChar:

1895

ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)

1896

1897

# replace escaped quotes

1898

if self.escQuote:

1899

ret = ret.replace(self.escQuote, self.endQuoteChar)

1900

1901

return loc, ret

1902

1903

def __str__( self ):

1904

try:

1905

return super(QuotedString,self).__str__()

1906

except:

1907

pass

1908

1909

if self.strRepr is None:

1910

self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)

1911

1912

return self.strRepr

1913

1914

1915

class CharsNotIn(Token):

1916

"""Token for matching words composed of characters *not* in a given set.

1917

Defined with string containing all disallowed characters, and an optional

1918

minimum, maximum, and/or exact length. The default value for min is 1 (a

1919

minimum value < 1 is not valid); the default values for max and exact

1920

are 0, meaning no maximum or exact length restriction.

1921

"""

1922

def __init__( self, notChars, min=1, max=0, exact=0 ):

1923

super(CharsNotIn,self).__init__()

1924

self.skipWhitespace = False

1925

self.notChars = notChars

1926

1927

if min < 1:

1928

raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")

1929

1930

self.minLen = min

1931

1932

if max > 0:

1933

self.maxLen = max

1934

else:

1935

self.maxLen = _MAX_INT

1936

1937

if exact > 0:

1938

self.maxLen = exact

1939

self.minLen = exact

1940

1941

self.name = _ustr(self)

1942

self.errmsg = "Expected " + self.name

1943

self.mayReturnEmpty = ( self.minLen == 0 )

1944

#self.myException.msg = self.errmsg

1945

self.mayIndexError = False

1946

1947

def parseImpl( self, instring, loc, doActions=True ):

1948

if instring[loc] in self.notChars:

1949

#~ raise ParseException( instring, loc, self.errmsg )

1950

exc = self.myException

1951

exc.loc = loc

1952

exc.pstr = instring

1953

raise exc

1954

1955

start = loc

1956

loc += 1

1957

notchars = self.notChars

1958

maxlen = min( start+self.maxLen, len(instring) )

1959

while loc < maxlen and \

1960

(instring[loc] not in notchars):

1961

loc += 1

1962

1963

if loc - start < self.minLen:

1964

#~ raise ParseException( instring, loc, self.errmsg )

1965

exc = self.myException

1966

exc.loc = loc

1967

exc.pstr = instring

1968

raise exc

1969

1970

return loc, instring[start:loc]

1971

1972

def __str__( self ):

1973

try:

1974

return super(CharsNotIn, self).__str__()

1975

except:

1976

pass

1977

1978

if self.strRepr is None:

1979

if len(self.notChars) > 4:

1980

self.strRepr = "!W:(%s...)" % self.notChars[:4]

1981

else:

1982

self.strRepr = "!W:(%s)" % self.notChars

1983

1984

return self.strRepr

1985

1986

class White(Token):

1987

"""Special matching class for matching whitespace. Normally, whitespace is ignored

1988

by pyparsing grammars. This class is included when some whitespace structures

1989

are significant. Define with a string containing the whitespace characters to be

1990

matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,

1991

as defined for the Word class."""

1992

whiteStrs = {

1993

" " : "<SPC>",

1994

"\t": "<TAB>",

1995

"\n": "<LF>",

1996

"\r": "<CR>",

1997

"\f": "<FF>",

1998

}

1999

def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):

2000

super(White,self).__init__()

2001

self.matchWhite = ws

2002

self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )

2003

#~ self.leaveWhitespace()

2004

self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))

2005

self.mayReturnEmpty = True

2006

self.errmsg = "Expected " + self.name

2007

#self.myException.msg = self.errmsg

2008

2009

self.minLen = min

2010

2011

if max > 0:

2012

self.maxLen = max

2013

else:

2014

self.maxLen = _MAX_INT

2015

2016

if exact > 0:

2017

self.maxLen = exact

2018

self.minLen = exact

2019

2020

def parseImpl( self, instring, loc, doActions=True ):

2021

if not(instring[ loc ] in self.matchWhite):

2022

#~ raise ParseException( instring, loc, self.errmsg )

2023

exc = self.myException

2024

exc.loc = loc

2025

exc.pstr = instring

2026

raise exc

2027

start = loc

2028

loc += 1

2029

maxloc = start + self.maxLen

2030

maxloc = min( maxloc, len(instring) )

2031

while loc < maxloc and instring[loc] in self.matchWhite:

2032

loc += 1

2033

2034

if loc - start < self.minLen:

2035

#~ raise ParseException( instring, loc, self.errmsg )

2036

exc = self.myException

2037

exc.loc = loc

2038

exc.pstr = instring

2039

raise exc

2040

2041

return loc, instring[start:loc]

2042

2043

2044

class _PositionToken(Token):

2045

def __init__( self ):

2046

super(_PositionToken,self).__init__()

2047

self.name=self.__class__.__name__

2048

self.mayReturnEmpty = True

2049

self.mayIndexError = False

2050

2051

class GoToColumn(_PositionToken):

2052

"""Token to advance to a specific column of input text; useful for tabular report scraping."""

2053

def __init__( self, colno ):

2054

super(GoToColumn,self).__init__()

2055

self.col = colno

2056

2057

def preParse( self, instring, loc ):

2058

if col(loc,instring) != self.col:

2059

instrlen = len(instring)

2060

if self.ignoreExprs:

2061

loc = self._skipIgnorables( instring, loc )

2062

while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :

2063

loc += 1

2064

return loc

2065

2066

def parseImpl( self, instring, loc, doActions=True ):

2067

thiscol = col( loc, instring )

2068

if thiscol > self.col:

2069

raise ParseException( instring, loc, "Text not in expected column", self )

2070

newloc = loc + self.col - thiscol

2071

ret = instring[ loc: newloc ]

2072

return newloc, ret

2073

2074

class LineStart(_PositionToken):

2075

"""Matches if current position is at the beginning of a line within the parse string"""

2076

def __init__( self ):

2077

super(LineStart,self).__init__()

2078

self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

2079

self.errmsg = "Expected start of line"

2080

#self.myException.msg = self.errmsg

2081

2082

def preParse( self, instring, loc ):

2083

preloc = super(LineStart,self).preParse(instring,loc)

2084

if instring[preloc] == "\n":

2085

loc += 1

2086

return loc

2087

2088

def parseImpl( self, instring, loc, doActions=True ):

2089

if not( loc==0 or

2090

(loc == self.preParse( instring, 0 )) or

2091

(instring[loc-1] == "\n") ): #col(loc, instring) != 1:

2092

#~ raise ParseException( instring, loc, "Expected start of line" )

2093

exc = self.myException

2094

exc.loc = loc

2095

exc.pstr = instring

2096

raise exc

2097

return loc, []

2098

2099

class LineEnd(_PositionToken):

2100

"""Matches if current position is at the end of a line within the parse string"""

2101

def __init__( self ):

2102

super(LineEnd,self).__init__()

2103

self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

2104

self.errmsg = "Expected end of line"

2105

#self.myException.msg = self.errmsg

2106

2107

def parseImpl( self, instring, loc, doActions=True ):

2108

if loc<len(instring):

2109

if instring[loc] == "\n":

2110

return loc+1, "\n"

2111

else:

2112

#~ raise ParseException( instring, loc, "Expected end of line" )

2113

exc = self.myException

2114

exc.loc = loc

2115

exc.pstr = instring

2116

raise exc

2117

elif loc == len(instring):

2118

return loc+1, []

2119

else:

2120

exc = self.myException

2121

exc.loc = loc

2122

exc.pstr = instring

2123

raise exc

2124

2125

class StringStart(_PositionToken):

2126

"""Matches if current position is at the beginning of the parse string"""

2127

def __init__( self ):

2128

super(StringStart,self).__init__()

2129

self.errmsg = "Expected start of text"

2130

#self.myException.msg = self.errmsg

2131

2132

def parseImpl( self, instring, loc, doActions=True ):

2133

if loc != 0:

2134

# see if entire string up to here is just whitespace and ignoreables

2135

if loc != self.preParse( instring, 0 ):

2136

#~ raise ParseException( instring, loc, "Expected start of text" )

2137

exc = self.myException

2138

exc.loc = loc

2139

exc.pstr = instring

2140

raise exc

2141

return loc, []

2142

2143

class StringEnd(_PositionToken):

2144

"""Matches if current position is at the end of the parse string"""

2145

def __init__( self ):

2146

super(StringEnd,self).__init__()

2147

self.errmsg = "Expected end of text"

2148

#self.myException.msg = self.errmsg

2149

2150

def parseImpl( self, instring, loc, doActions=True ):

2151

if loc < len(instring):

2152

#~ raise ParseException( instring, loc, "Expected end of text" )

2153

exc = self.myException

2154

exc.loc = loc

2155

exc.pstr = instring

2156

raise exc

2157

elif loc == len(instring):

2158

return loc+1, []

2159

elif loc > len(instring):

2160

return loc, []

2161

else:

2162

exc = self.myException

2163

exc.loc = loc

2164

exc.pstr = instring

2165

raise exc

2166

2167

class WordStart(_PositionToken):

2168

"""Matches if the current position is at the beginning of a Word, and

2169

is not preceded by any character in a given set of wordChars

2170

(default=printables). To emulate the \b behavior of regular expressions,

2171

use WordStart(alphanums). WordStart will also match at the beginning of

2172

the string being parsed, or at the beginning of a line.

2173

"""

2174

def __init__(self, wordChars = printables):

2175

super(WordStart,self).__init__()

2176

self.wordChars = _str2dict(wordChars)

2177

self.errmsg = "Not at the start of a word"

2178

2179

def parseImpl(self, instring, loc, doActions=True ):

2180

if loc != 0:

2181

if (instring[loc-1] in self.wordChars or

2182

instring[loc] not in self.wordChars):

2183

exc = self.myException

2184

exc.loc = loc

2185

exc.pstr = instring

2186

raise exc

2187

return loc, []

2188

2189

class WordEnd(_PositionToken):

2190

"""Matches if the current position is at the end of a Word, and

2191

is not followed by any character in a given set of wordChars

2192

(default=printables). To emulate the \b behavior of regular expressions,

2193

use WordEnd(alphanums). WordEnd will also match at the end of

2194

the string being parsed, or at the end of a line.

2195

"""

2196

def __init__(self, wordChars = printables):

2197

super(WordEnd,self).__init__()

2198

self.wordChars = _str2dict(wordChars)

2199

self.skipWhitespace = False

2200

self.errmsg = "Not at the end of a word"

2201

2202

def parseImpl(self, instring, loc, doActions=True ):

2203

instrlen = len(instring)

2204

if instrlen>0 and loc<instrlen:

2205

if (instring[loc] in self.wordChars or

2206

instring[loc-1] not in self.wordChars):

2207

#~ raise ParseException( instring, loc, "Expected end of word" )

2208

exc = self.myException

2209

exc.loc = loc

2210

exc.pstr = instring

2211

raise exc

2212

return loc, []

2213

2214

2215

class ParseExpression(ParserElement):

2216

"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""

2217

def __init__( self, exprs, savelist = False ):

2218

super(ParseExpression,self).__init__(savelist)

2219

if isinstance( exprs, list ):

2220

self.exprs = exprs

2221

elif isinstance( exprs, basestring ):

2222

self.exprs = [ Literal( exprs ) ]

2223

else:

2224

try:

2225

self.exprs = list( exprs )

2226

except TypeError:

2227

self.exprs = [ exprs ]

2228

self.callPreparse = False

2229

2230

def __getitem__( self, i ):

2231

return self.exprs[i]

2232

2233

def append( self, other ):

2234

self.exprs.append( other )

2235

self.strRepr = None

2236

return self

2237

2238

def leaveWhitespace( self ):

2239

"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on

2240

all contained expressions."""

2241

self.skipWhitespace = False

2242

self.exprs = [ e.copy() for e in self.exprs ]

2243

for e in self.exprs:

2244

e.leaveWhitespace()

2245

return self

2246

2247

def ignore( self, other ):

2248

if isinstance( other, Suppress ):

2249

if other not in self.ignoreExprs:

2250

super( ParseExpression, self).ignore( other )

2251

for e in self.exprs:

2252

e.ignore( self.ignoreExprs[-1] )

2253

else:

2254

super( ParseExpression, self).ignore( other )

2255

for e in self.exprs:

2256

e.ignore( self.ignoreExprs[-1] )

2257

return self

2258

2259

def __str__( self ):

2260

try:

2261

return super(ParseExpression,self).__str__()

2262

except:

2263

pass

2264

2265

if self.strRepr is None:

2266

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )

2267

return self.strRepr

2268

2269

def streamline( self ):

2270

super(ParseExpression,self).streamline()

2271

2272

for e in self.exprs:

2273

e.streamline()

2274

2275

# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )

2276

# but only if there are no parse actions or resultsNames on the nested And's

2277

# (likewise for Or's and MatchFirst's)

2278

if ( len(self.exprs) == 2 ):

2279

other = self.exprs[0]

2280

if ( isinstance( other, self.__class__ ) and

2281

not(other.parseAction) and

2282

other.resultsName is None and

2283

not other.debug ):

2284

self.exprs = other.exprs[:] + [ self.exprs[1] ]

2285

self.strRepr = None

2286

self.mayReturnEmpty |= other.mayReturnEmpty

2287

self.mayIndexError |= other.mayIndexError

2288

2289

other = self.exprs[-1]

2290

if ( isinstance( other, self.__class__ ) and

2291

not(other.parseAction) and

2292

other.resultsName is None and

2293

not other.debug ):

2294

self.exprs = self.exprs[:-1] + other.exprs[:]

2295

self.strRepr = None

2296

self.mayReturnEmpty |= other.mayReturnEmpty

2297

self.mayIndexError |= other.mayIndexError

2298

2299

return self

2300

2301

def setResultsName( self, name, listAllMatches=False ):

2302

ret = super(ParseExpression,self).setResultsName(name,listAllMatches)

2303

return ret

2304

2305

def validate( self, validateTrace=[] ):

2306

tmp = validateTrace[:]+[self]

2307

for e in self.exprs:

2308

e.validate(tmp)

2309

self.checkRecursion( [] )

2310

2311

class And(ParseExpression):

2312

"""Requires all given ParseExpressions to be found in the given order.

2313

Expressions may be separated by whitespace.

2314

May be constructed using the '+' operator.

2315

"""

2316

2317

class _ErrorStop(Empty):

2318

def __init__(self, *args, **kwargs):

2319

super(Empty,self).__init__(*args, **kwargs)

2320

self.leaveWhitespace()

2321

2322

def __init__( self, exprs, savelist = True ):

2323

super(And,self).__init__(exprs, savelist)

2324

self.mayReturnEmpty = True

2325

for e in self.exprs:

2326

if not e.mayReturnEmpty:

2327

self.mayReturnEmpty = False

2328

break

2329

self.setWhitespaceChars( exprs[0].whiteChars )

2330

self.skipWhitespace = exprs[0].skipWhitespace

2331

self.callPreparse = True

2332

2333

def parseImpl( self, instring, loc, doActions=True ):

2334

# pass False as last arg to _parse for first element, since we already

2335

# pre-parsed the string as part of our And pre-parsing

2336

loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )

2337

errorStop = False

2338

for e in self.exprs[1:]:

2339

if isinstance(e, And._ErrorStop):

2340

errorStop = True

2341

continue

2342

if errorStop:

2343

try:

2344

loc, exprtokens = e._parse( instring, loc, doActions )

2345

except ParseSyntaxException:

2346

raise

2347

except ParseBaseException, pe:

2348

raise ParseSyntaxException(pe)

2349

except IndexError, ie:

2350

raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )

2351

else:

2352

loc, exprtokens = e._parse( instring, loc, doActions )

2353

if exprtokens or exprtokens.keys():

2354

resultlist += exprtokens

2355

return loc, resultlist

2356

2357

def __iadd__(self, other ):

2358

if isinstance( other, basestring ):

2359

other = Literal( other )

2360

return self.append( other ) #And( [ self, other ] )

2361

2362

def checkRecursion( self, parseElementList ):

2363

subRecCheckList = parseElementList[:] + [ self ]

2364

for e in self.exprs:

2365

e.checkRecursion( subRecCheckList )

2366

if not e.mayReturnEmpty:

2367

break

2368

2369

def __str__( self ):

2370

if hasattr(self,"name"):

2371

return self.name

2372

2373

if self.strRepr is None:

2374

self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2375

2376

return self.strRepr

2377

2378

2379

class Or(ParseExpression):

2380

"""Requires that at least one ParseExpression is found.

2381

If two expressions match, the expression that matches the longest string will be used.

2382

May be constructed using the '^' operator.

2383

"""

2384

def __init__( self, exprs, savelist = False ):

2385

super(Or,self).__init__(exprs, savelist)

2386

self.mayReturnEmpty = False

2387

for e in self.exprs:

2388

if e.mayReturnEmpty:

2389

self.mayReturnEmpty = True

2390

break

2391

2392

def parseImpl( self, instring, loc, doActions=True ):

2393

maxExcLoc = -1

2394

maxMatchLoc = -1

2395

maxException = None

2396

for e in self.exprs:

2397

try:

2398

loc2 = e.tryParse( instring, loc )

2399

except ParseException, err:

2400

if err.loc > maxExcLoc:

2401

maxException = err

2402

maxExcLoc = err.loc

2403

except IndexError:

2404

if len(instring) > maxExcLoc:

2405

maxException = ParseException(instring,len(instring),e.errmsg,self)

2406

maxExcLoc = len(instring)

2407

else:

2408

if loc2 > maxMatchLoc:

2409

maxMatchLoc = loc2

2410

maxMatchExp = e

2411

2412

if maxMatchLoc < 0:

2413

if maxException is not None:

2414

raise maxException

2415

else:

2416

raise ParseException(instring, loc, "no defined alternatives to match", self)

2417

2418

return maxMatchExp._parse( instring, loc, doActions )

2419

2420

def __ixor__(self, other ):

2421

if isinstance( other, basestring ):

2422

other = Literal( other )

2423

return self.append( other ) #Or( [ self, other ] )

2424

2425

def __str__( self ):

2426

if hasattr(self,"name"):

2427

return self.name

2428

2429

if self.strRepr is None:

2430

self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2431

2432

return self.strRepr

2433

2434

def checkRecursion( self, parseElementList ):

2435

subRecCheckList = parseElementList[:] + [ self ]

2436

for e in self.exprs:

2437

e.checkRecursion( subRecCheckList )

2438

2439

2440

class MatchFirst(ParseExpression):

2441

"""Requires that at least one ParseExpression is found.

2442

If two expressions match, the first one listed is the one that will match.

2443

May be constructed using the '|' operator.

2444

"""

2445

def __init__( self, exprs, savelist = False ):

2446

super(MatchFirst,self).__init__(exprs, savelist)

2447

if exprs:

2448

self.mayReturnEmpty = False

2449

for e in self.exprs:

2450

if e.mayReturnEmpty:

2451

self.mayReturnEmpty = True

2452

break

2453

else:

2454

self.mayReturnEmpty = True

2455

2456

def parseImpl( self, instring, loc, doActions=True ):

2457

maxExcLoc = -1

2458

maxException = None

2459

for e in self.exprs:

2460

try:

2461

ret = e._parse( instring, loc, doActions )

2462

return ret

2463

except ParseException, err:

2464

if err.loc > maxExcLoc:

2465

maxException = err

2466

maxExcLoc = err.loc

2467

except IndexError:

2468

if len(instring) > maxExcLoc:

2469

maxException = ParseException(instring,len(instring),e.errmsg,self)

2470

maxExcLoc = len(instring)

2471

2472

# only got here if no expression matched, raise exception for match that made it the furthest

2473

else:

2474

if maxException is not None:

2475

raise maxException

2476

else:

2477

raise ParseException(instring, loc, "no defined alternatives to match", self)

2478

2479

def __ior__(self, other ):

2480

if isinstance( other, basestring ):

2481

other = Literal( other )

2482

return self.append( other ) #MatchFirst( [ self, other ] )

2483

2484

def __str__( self ):

2485

if hasattr(self,"name"):

2486

return self.name

2487

2488

if self.strRepr is None:

2489

self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2490

2491

return self.strRepr

2492

2493

def checkRecursion( self, parseElementList ):

2494

subRecCheckList = parseElementList[:] + [ self ]

2495

for e in self.exprs:

2496

e.checkRecursion( subRecCheckList )

2497

2498

2499

class Each(ParseExpression):

2500

"""Requires all given ParseExpressions to be found, but in any order.

2501

Expressions may be separated by whitespace.

2502

May be constructed using the '&' operator.

2503

"""

2504

def __init__( self, exprs, savelist = True ):

2505

super(Each,self).__init__(exprs, savelist)

2506

self.mayReturnEmpty = True

2507

for e in self.exprs:

2508

if not e.mayReturnEmpty:

2509

self.mayReturnEmpty = False

2510

break

2511

self.skipWhitespace = True

2512

self.initExprGroups = True

2513

2514

def parseImpl( self, instring, loc, doActions=True ):

2515

if self.initExprGroups:

2516

self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]

2517

self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]

2518

self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]

2519

self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]

2520

self.required += self.multirequired

2521

self.initExprGroups = False

2522

tmpLoc = loc

2523

tmpReqd = self.required[:]

2524

tmpOpt = self.optionals[:]

2525

matchOrder = []

2526

2527

keepMatching = True

2528

while keepMatching:

2529

tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired

2530

failed = []

2531

for e in tmpExprs:

2532

try:

2533

tmpLoc = e.tryParse( instring, tmpLoc )

2534

except ParseException:

2535

failed.append(e)

2536

else:

2537

matchOrder.append(e)

2538

if e in tmpReqd:

2539

tmpReqd.remove(e)

2540

elif e in tmpOpt:

2541

tmpOpt.remove(e)

2542

if len(failed) == len(tmpExprs):

2543

keepMatching = False

2544

2545

if tmpReqd:

2546

missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )

2547

raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )

2548

2549

# add any unmatched Optionals, in case they have default values defined

2550

matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)

2551

2552

resultlist = []

2553

for e in matchOrder:

2554

loc,results = e._parse(instring,loc,doActions)

2555

resultlist.append(results)

2556

2557

finalResults = ParseResults([])

2558

for r in resultlist:

2559

dups = {}

2560

for k in r.keys():

2561

if k in finalResults.keys():

2562

tmp = ParseResults(finalResults[k])

2563

tmp += ParseResults(r[k])

2564

dups[k] = tmp

2565

finalResults += ParseResults(r)

2566

for k,v in dups.items():

2567

finalResults[k] = v

2568

return loc, finalResults

2569

2570

def __str__( self ):

2571

if hasattr(self,"name"):

2572

return self.name

2573

2574

if self.strRepr is None:

2575

self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"

2576

2577

return self.strRepr

2578

2579

def checkRecursion( self, parseElementList ):

2580

subRecCheckList = parseElementList[:] + [ self ]

2581

for e in self.exprs:

2582

e.checkRecursion( subRecCheckList )

2583

2584

2585

class ParseElementEnhance(ParserElement):

2586

"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""

2587

def __init__( self, expr, savelist=False ):

2588

super(ParseElementEnhance,self).__init__(savelist)

2589

if isinstance( expr, basestring ):

2590

expr = Literal(expr)

2591

self.expr = expr

2592

self.strRepr = None

2593

if expr is not None:

2594

self.mayIndexError = expr.mayIndexError

2595

self.mayReturnEmpty = expr.mayReturnEmpty

2596

self.setWhitespaceChars( expr.whiteChars )

2597

self.skipWhitespace = expr.skipWhitespace

2598

self.saveAsList = expr.saveAsList

2599

self.callPreparse = expr.callPreparse

2600

self.ignoreExprs.extend(expr.ignoreExprs)

2601

2602

def parseImpl( self, instring, loc, doActions=True ):

2603

if self.expr is not None:

2604

return self.expr._parse( instring, loc, doActions, callPreParse=False )

2605

else:

2606

raise ParseException("",loc,self.errmsg,self)

2607

2608

def leaveWhitespace( self ):

2609

self.skipWhitespace = False

2610

self.expr = self.expr.copy()

2611

if self.expr is not None:

2612

self.expr.leaveWhitespace()

2613

return self

2614

2615

def ignore( self, other ):

2616

if isinstance( other, Suppress ):

2617

if other not in self.ignoreExprs:

2618

super( ParseElementEnhance, self).ignore( other )

2619

if self.expr is not None:

2620

self.expr.ignore( self.ignoreExprs[-1] )

2621

else:

2622

super( ParseElementEnhance, self).ignore( other )

2623

if self.expr is not None:

2624

self.expr.ignore( self.ignoreExprs[-1] )

2625

return self

2626

2627

def streamline( self ):

2628

super(ParseElementEnhance,self).streamline()

2629

if self.expr is not None:

2630

self.expr.streamline()

2631

return self

2632

2633

def checkRecursion( self, parseElementList ):

2634

if self in parseElementList:

2635

raise RecursiveGrammarException( parseElementList+[self] )

2636

subRecCheckList = parseElementList[:] + [ self ]

2637

if self.expr is not None:

2638

self.expr.checkRecursion( subRecCheckList )

2639

2640

def validate( self, validateTrace=[] ):

2641

tmp = validateTrace[:]+[self]

2642

if self.expr is not None:

2643

self.expr.validate(tmp)

2644

self.checkRecursion( [] )

2645

2646

def __str__( self ):

2647

try:

2648

return super(ParseElementEnhance,self).__str__()

2649

except:

2650

pass

2651

2652

if self.strRepr is None and self.expr is not None:

2653

self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )

2654

return self.strRepr

2655

2656

2657

class FollowedBy(ParseElementEnhance):

2658

"""Lookahead matching of the given parse expression. FollowedBy

2659

does *not* advance the parsing position within the input string, it only

2660

verifies that the specified parse expression matches at the current

2661

position. FollowedBy always returns a null token list."""

2662

def __init__( self, expr ):

2663

super(FollowedBy,self).__init__(expr)

2664

self.mayReturnEmpty = True

2665

2666

def parseImpl( self, instring, loc, doActions=True ):

2667

self.expr.tryParse( instring, loc )

2668

return loc, []

2669

2670

2671

class NotAny(ParseElementEnhance):

2672

"""Lookahead to disallow matching with the given parse expression. NotAny

2673

does *not* advance the parsing position within the input string, it only

2674

verifies that the specified parse expression does *not* match at the current

2675

position. Also, NotAny does *not* skip over leading whitespace. NotAny

2676

always returns a null token list. May be constructed using the '~' operator."""

2677

def __init__( self, expr ):

2678

super(NotAny,self).__init__(expr)

2679

#~ self.leaveWhitespace()

2680

self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs

2681

self.mayReturnEmpty = True

2682

self.errmsg = "Found unwanted token, "+_ustr(self.expr)

2683

#self.myException = ParseException("",0,self.errmsg,self)

2684

2685

def parseImpl( self, instring, loc, doActions=True ):

2686

try:

2687

self.expr.tryParse( instring, loc )

2688

except (ParseException,IndexError):

2689

pass

2690

else:

2691

#~ raise ParseException(instring, loc, self.errmsg )

2692

exc = self.myException

2693

exc.loc = loc

2694

exc.pstr = instring

2695

raise exc

2696

return loc, []

2697

2698

def __str__( self ):

2699

if hasattr(self,"name"):

2700

return self.name

2701

2702

if self.strRepr is None:

2703

self.strRepr = "~{" + _ustr(self.expr) + "}"

2704

2705

return self.strRepr

2706

2707

2708

class ZeroOrMore(ParseElementEnhance):

2709

"""Optional repetition of zero or more of the given expression."""

2710

def __init__( self, expr ):

2711

super(ZeroOrMore,self).__init__(expr)

2712

self.mayReturnEmpty = True

2713

2714

def parseImpl( self, instring, loc, doActions=True ):

2715

tokens = []

2716

try:

2717

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2718

hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )

2719

while 1:

2720

if hasIgnoreExprs:

2721

preloc = self._skipIgnorables( instring, loc )

2722

else:

2723

preloc = loc

2724

loc, tmptokens = self.expr._parse( instring, preloc, doActions )

2725

if tmptokens or tmptokens.keys():

2726

tokens += tmptokens

2727

except (ParseException,IndexError):

2728

pass

2729

2730

return loc, tokens

2731

2732

def __str__( self ):

2733

if hasattr(self,"name"):

2734

return self.name

2735

2736

if self.strRepr is None:

2737

self.strRepr = "[" + _ustr(self.expr) + "]..."

2738

2739

return self.strRepr

2740

2741

def setResultsName( self, name, listAllMatches=False ):

2742

ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)

2743

ret.saveAsList = True

2744

return ret

2745

2746

2747

class OneOrMore(ParseElementEnhance):

2748

"""Repetition of one or more of the given expression."""

2749

def parseImpl( self, instring, loc, doActions=True ):

2750

# must be at least one

2751

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2752

try:

2753

hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )

2754

while 1:

2755

if hasIgnoreExprs:

2756

preloc = self._skipIgnorables( instring, loc )

2757

else:

2758

preloc = loc

2759

loc, tmptokens = self.expr._parse( instring, preloc, doActions )

2760

if tmptokens or tmptokens.keys():

2761

tokens += tmptokens

2762

except (ParseException,IndexError):

2763

pass

2764

2765

return loc, tokens

2766

2767

def __str__( self ):

2768

if hasattr(self,"name"):

2769

return self.name

2770

2771

if self.strRepr is None:

2772

self.strRepr = "{" + _ustr(self.expr) + "}..."

2773

2774

return self.strRepr

2775

2776

def setResultsName( self, name, listAllMatches=False ):

2777

ret = super(OneOrMore,self).setResultsName(name,listAllMatches)

2778

ret.saveAsList = True

2779

return ret

2780

2781

class _NullToken(object):

2782

def __bool__(self):

2783

return False

2784

__nonzero__ = __bool__

2785

def __str__(self):

2786

return ""

2787

2788

_optionalNotMatched = _NullToken()

2789

class Optional(ParseElementEnhance):

2790

"""Optional matching of the given expression.

2791

A default return string can also be specified, if the optional expression

2792

is not found.

2793

"""

2794

def __init__( self, exprs, default=_optionalNotMatched ):

2795

super(Optional,self).__init__( exprs, savelist=False )

2796

self.defaultValue = default

2797

self.mayReturnEmpty = True

2798

2799

def parseImpl( self, instring, loc, doActions=True ):

2800

try:

2801

loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

2802

except (ParseException,IndexError):

2803

if self.defaultValue is not _optionalNotMatched:

2804

if self.expr.resultsName:

2805

tokens = ParseResults([ self.defaultValue ])

2806

tokens[self.expr.resultsName] = self.defaultValue

2807

else:

2808

tokens = [ self.defaultValue ]

2809

else:

2810

tokens = []

2811

return loc, tokens

2812

2813

def __str__( self ):

2814

if hasattr(self,"name"):

2815

return self.name

2816

2817

if self.strRepr is None:

2818

self.strRepr = "[" + _ustr(self.expr) + "]"

2819

2820

return self.strRepr

2821

2822

2823

class SkipTo(ParseElementEnhance):

2824

"""Token for skipping over all undefined text until the matched expression is found.

2825

If include is set to true, the matched expression is also parsed (the skipped text

2826

and matched expression are returned as a 2-element list). The ignore

2827

argument is used to define grammars (typically quoted strings and comments) that

2828

might contain false matches.

2829

"""

2830

def __init__( self, other, include=False, ignore=None, failOn=None ):

2831

super( SkipTo, self ).__init__( other )

2832

self.ignoreExpr = ignore

2833

self.mayReturnEmpty = True

2834

self.mayIndexError = False

2835

self.includeMatch = include

2836

self.asList = False

2837

if failOn is not None and isinstance(failOn, basestring):

2838

self.failOn = Literal(failOn)

2839

else:

2840

self.failOn = failOn

2841

self.errmsg = "No match found for "+_ustr(self.expr)

2842

#self.myException = ParseException("",0,self.errmsg,self)

2843

2844

def parseImpl( self, instring, loc, doActions=True ):

2845

startLoc = loc

2846

instrlen = len(instring)

2847

expr = self.expr

2848

failParse = False

2849

while loc <= instrlen:

2850

try:

2851

if self.failOn:

2852

try:

2853

self.failOn.tryParse(instring, loc)

2854

except ParseBaseException:

2855

pass

2856

else:

2857

failParse = True

2858

raise ParseException(instring, loc, "Found expression " + str(self.failOn))

2859

failParse = False

2860

if self.ignoreExpr is not None:

2861

while 1:

2862

try:

2863

loc = self.ignoreExpr.tryParse(instring,loc)

2864

print "found ignoreExpr, advance to", loc

2865

except ParseBaseException:

2866

break

2867

expr._parse( instring, loc, doActions=False, callPreParse=False )

2868

skipText = instring[startLoc:loc]

2869

if self.includeMatch:

2870

loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)

2871

if mat:

2872

skipRes = ParseResults( skipText )

2873

skipRes += mat

2874

return loc, [ skipRes ]

2875

else:

2876

return loc, [ skipText ]

2877

else:

2878

return loc, [ skipText ]

2879

except (ParseException,IndexError):

2880

if failParse:

2881

raise

2882

else:

2883

loc += 1

2884

exc = self.myException

2885

exc.loc = loc

2886

exc.pstr = instring

2887

raise exc

2888

2889

class Forward(ParseElementEnhance):

2890

"""Forward declaration of an expression to be defined later -

2891

used for recursive grammars, such as algebraic infix notation.

2892

When the expression is known, it is assigned to the Forward variable using the '<<' operator.

2893

2894

Note: take care when assigning to Forward not to overlook precedence of operators.

2895

Specifically, '|' has a lower precedence than '<<', so that::

2896

fwdExpr << a | b | c

2897

will actually be evaluated as::

2898

(fwdExpr << a) | b | c

2899

thereby leaving b and c out as parseable alternatives. It is recommended that you

2900

explicitly group the values inserted into the Forward::

2901

fwdExpr << (a | b | c)

2902

"""

2903

def __init__( self, other=None ):

2904

super(Forward,self).__init__( other, savelist=False )

2905

2906

def __lshift__( self, other ):

2907

if isinstance( other, basestring ):

2908

other = Literal(other)

2909

self.expr = other

2910

self.mayReturnEmpty = other.mayReturnEmpty

2911

self.strRepr = None

2912

self.mayIndexError = self.expr.mayIndexError

2913

self.mayReturnEmpty = self.expr.mayReturnEmpty

2914

self.setWhitespaceChars( self.expr.whiteChars )

2915

self.skipWhitespace = self.expr.skipWhitespace

2916

self.saveAsList = self.expr.saveAsList

2917

self.ignoreExprs.extend(self.expr.ignoreExprs)

2918

return None

2919

2920

def leaveWhitespace( self ):

2921

self.skipWhitespace = False

2922

return self

2923

2924

def streamline( self ):

2925

if not self.streamlined:

2926

self.streamlined = True

2927

if self.expr is not None:

2928

self.expr.streamline()

2929

return self

2930

2931

def validate( self, validateTrace=[] ):

2932

if self not in validateTrace:

2933

tmp = validateTrace[:]+[self]

2934

if self.expr is not None:

2935

self.expr.validate(tmp)

2936

self.checkRecursion([])

2937

2938

def __str__( self ):

2939

if hasattr(self,"name"):

2940

return self.name

2941

2942

self._revertClass = self.__class__

2943

self.__class__ = _ForwardNoRecurse

2944

try:

2945

if self.expr is not None:

2946

retString = _ustr(self.expr)

2947

else:

2948

retString = "None"

2949

finally:

2950

self.__class__ = self._revertClass

2951

return self.__class__.__name__ + ": " + retString

2952

2953

def copy(self):

2954

if self.expr is not None:

2955

return super(Forward,self).copy()

2956

else:

2957

ret = Forward()

2958

ret << self

2959

return ret

2960

2961

class _ForwardNoRecurse(Forward):

2962

def __str__( self ):

2963

return "..."

2964

2965

class TokenConverter(ParseElementEnhance):

2966

"""Abstract subclass of ParseExpression, for converting parsed results."""

2967

def __init__( self, expr, savelist=False ):

2968

super(TokenConverter,self).__init__( expr )#, savelist )

2969

self.saveAsList = False

2970

2971

class Upcase(TokenConverter):

2972

"""Converter to upper case all matching tokens."""

2973

def __init__(self, *args):

2974

super(Upcase,self).__init__(*args)

2975

warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",

2976

DeprecationWarning,stacklevel=2)

2977

2978

def postParse( self, instring, loc, tokenlist ):

2979

return list(map( string.upper, tokenlist ))

2980

2981

2982

class Combine(TokenConverter):

2983

"""Converter to concatenate all matching tokens to a single string.

2984

By default, the matching patterns must also be contiguous in the input string;

2985

this can be disabled by specifying 'adjacent=False' in the constructor.

2986

"""

2987

def __init__( self, expr, joinString="", adjacent=True ):

2988

super(Combine,self).__init__( expr )

2989

# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

2990

if adjacent:

2991

self.leaveWhitespace()

2992

self.adjacent = adjacent

2993

self.skipWhitespace = True

2994

self.joinString = joinString

2995

2996

def ignore( self, other ):

2997

if self.adjacent:

2998

ParserElement.ignore(self, other)

2999

else:

3000

super( Combine, self).ignore( other )

3001

return self

3002

3003

def postParse( self, instring, loc, tokenlist ):

3004

retToks = tokenlist.copy()

3005

del retToks[:]

3006

retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)

3007

3008

if self.resultsName and len(retToks.keys())>0:

3009

return [ retToks ]

3010

else:

3011

return retToks

3012

3013

class Group(TokenConverter):

3014

"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""

3015

def __init__( self, expr ):

3016

super(Group,self).__init__( expr )

3017

self.saveAsList = True

3018

3019

def postParse( self, instring, loc, tokenlist ):

3020

return [ tokenlist ]

3021

3022

class Dict(TokenConverter):

3023

"""Converter to return a repetitive expression as a list, but also as a dictionary.

3024

Each element can also be referenced using the first token in the expression as its key.

3025

Useful for tabular report scraping when the first column can be used as a item key.

3026

"""

3027

def __init__( self, exprs ):

3028

super(Dict,self).__init__( exprs )

3029

self.saveAsList = True

3030

3031

def postParse( self, instring, loc, tokenlist ):

3032

for i,tok in enumerate(tokenlist):

3033

if len(tok) == 0:

3034

continue

3035

ikey = tok[0]

3036

if isinstance(ikey,int):

3037

ikey = _ustr(tok[0]).strip()

3038

if len(tok)==1:

3039

tokenlist[ikey] = _ParseResultsWithOffset("",i)

3040

elif len(tok)==2 and not isinstance(tok[1],ParseResults):

3041

tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)

3042

else:

3043

dictvalue = tok.copy() #ParseResults(i)

3044

del dictvalue[0]

3045

if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):

3046

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)

3047

else:

3048

tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)

3049

3050

if self.resultsName:

3051

return [ tokenlist ]

3052

else:

3053

return tokenlist

3054

3055

3056

class Suppress(TokenConverter):

3057

"""Converter for ignoring the results of a parsed expression."""

3058

def postParse( self, instring, loc, tokenlist ):

3059

return []

3060

3061

def suppress( self ):

3062

return self

3063

3064

3065

class OnlyOnce(object):

3066

"""Wrapper for parse actions, to ensure they are only called once."""

3067

def __init__(self, methodCall):

3068

self.callable = ParserElement._normalizeParseActionArgs(methodCall)

3069

self.called = False

3070

def __call__(self,s,l,t):

3071

if not self.called:

3072

results = self.callable(s,l,t)

3073

self.called = True

3074

return results

3075

raise ParseException(s,l,"")

3076

def reset(self):

3077

self.called = False

3078

3079

def traceParseAction(f):

3080

"""Decorator for debugging parse actions."""

3081

f = ParserElement._normalizeParseActionArgs(f)

3082

def z(*paArgs):

3083

thisFunc = f.func_name

3084

s,l,t = paArgs[-3:]

3085

if len(paArgs)>3:

3086

thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc

3087

sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )

3088

try:

3089

ret = f(*paArgs)

3090

except Exception, exc:

3091

sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )

3092

raise

3093

sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )

3094

return ret

3095

try:

3096

z.__name__ = f.__name__

3097

except AttributeError:

3098

pass

3099

return z

3100

3101

#

3102

# global helpers

3103

#

3104

def delimitedList( expr, delim=",", combine=False ):

3105

"""Helper to define a delimited list of expressions - the delimiter defaults to ','.

3106

By default, the list elements and delimiters can have intervening whitespace, and

3107

comments, but this can be overridden by passing 'combine=True' in the constructor.

3108

If combine is set to True, the matching tokens are returned as a single token

3109

string, with the delimiters included; otherwise, the matching tokens are returned

3110

as a list of tokens, with the delimiters suppressed.

3111

"""

3112

dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."

3113

if combine:

3114

return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)

3115

else:

3116

return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)

3117

3118

def countedArray( expr ):

3119

"""Helper to define a counted list of expressions.

3120

This helper defines a pattern of the form::

3121

integer expr expr expr...

3122

where the leading integer tells how many expr expressions follow.

3123

The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.

3124

"""

3125

arrayExpr = Forward()

3126

def countFieldParseAction(s,l,t):

3127

n = int(t[0])

3128

arrayExpr << (n and Group(And([expr]*n)) or Group(empty))

3129

return []

3130

return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )

3131

3132

def _flatten(L):

3133

if type(L) is not list: return [L]

3134

if L == []: return L

3135

return _flatten(L[0]) + _flatten(L[1:])

3136

3137

def matchPreviousLiteral(expr):

3138

"""Helper to define an expression that is indirectly defined from

3139

the tokens matched in a previous expression, that is, it looks

3140

for a 'repeat' of a previous expression. For example::

3141

first = Word(nums)

3142

second = matchPreviousLiteral(first)

3143

matchExpr = first + ":" + second

3144

will match "1:1", but not "1:2". Because this matches a

3145

previous literal, will also match the leading "1:1" in "1:10".

3146

If this is not desired, use matchPreviousExpr.

3147

Do *not* use with packrat parsing enabled.

3148

"""

3149

rep = Forward()

3150

def copyTokenToRepeater(s,l,t):

3151

if t:

3152

if len(t) == 1:

3153

rep << t[0]

3154

else:

3155

# flatten t tokens

3156

tflat = _flatten(t.asList())

3157

rep << And( [ Literal(tt) for tt in tflat ] )

3158

else:

3159

rep << Empty()

3160

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

3161

return rep

3162

3163

def matchPreviousExpr(expr):

3164

"""Helper to define an expression that is indirectly defined from

3165

the tokens matched in a previous expression, that is, it looks

3166

for a 'repeat' of a previous expression. For example::

3167

first = Word(nums)

3168

second = matchPreviousExpr(first)

3169

matchExpr = first + ":" + second

3170

will match "1:1", but not "1:2". Because this matches by

3171

expressions, will *not* match the leading "1:1" in "1:10";

3172

the expressions are evaluated first, and then compared, so

3173

"1" is compared with "10".

3174

Do *not* use with packrat parsing enabled.

3175

"""

3176

rep = Forward()

3177

e2 = expr.copy()

3178

rep << e2

3179

def copyTokenToRepeater(s,l,t):

3180

matchTokens = _flatten(t.asList())

3181

def mustMatchTheseTokens(s,l,t):

3182

theseTokens = _flatten(t.asList())

3183

if theseTokens != matchTokens:

3184

raise ParseException("",0,"")

3185

rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )

3186

expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

3187

return rep

3188

3189

def _escapeRegexRangeChars(s):

3190

#~ escape these chars: ^-]

3191

for c in r"\^-]":

3192

s = s.replace(c,_bslash+c)

3193

s = s.replace("\n",r"\n")

3194

s = s.replace("\t",r"\t")

3195

return _ustr(s)

3196

3197

def oneOf( strs, caseless=False, useRegex=True ):

3198

"""Helper to quickly define a set of alternative Literals, and makes sure to do

3199

longest-first testing when there is a conflict, regardless of the input order,

3200

but returns a MatchFirst for best performance.

3201

3202

Parameters:

3203

- strs - a string of space-delimited literals, or a list of string literals

3204

- caseless - (default=False) - treat all literals as caseless

3205

- useRegex - (default=True) - as an optimization, will generate a Regex

3206

object; otherwise, will generate a MatchFirst object (if caseless=True, or

3207

if creating a Regex raises an exception)

3208

"""

3209

if caseless:

3210

isequal = ( lambda a,b: a.upper() == b.upper() )

3211

masks = ( lambda a,b: b.upper().startswith(a.upper()) )

3212

parseElementClass = CaselessLiteral

3213

else:

3214

isequal = ( lambda a,b: a == b )

3215

masks = ( lambda a,b: b.startswith(a) )

3216

parseElementClass = Literal

3217

3218

if isinstance(strs,(list,tuple)):

3219

symbols = list(strs[:])

3220

elif isinstance(strs,basestring):

3221

symbols = strs.split()

3222

else:

3223

warnings.warn("Invalid argument to oneOf, expected string or list",

3224

SyntaxWarning, stacklevel=2)

3225

3226

i = 0

3227

while i < len(symbols)-1:

3228

cur = symbols[i]

3229

for j,other in enumerate(symbols[i+1:]):

3230

if ( isequal(other, cur) ):

3231

del symbols[i+j+1]

3232

break

3233

elif ( masks(cur, other) ):

3234

del symbols[i+j+1]

3235

symbols.insert(i,other)

3236

cur = other

3237

break

3238

else:

3239

i += 1

3240

3241

if not caseless and useRegex:

3242

#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))

3243

try:

3244

if len(symbols)==len("".join(symbols)):

3245

return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )

3246

else:

3247

return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )

3248

except:

3249

warnings.warn("Exception creating Regex for oneOf, building MatchFirst",

3250

SyntaxWarning, stacklevel=2)

3251

3252

3253

# last resort, just use MatchFirst

3254

return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )

3255

3256

def dictOf( key, value ):

3257

"""Helper to easily and clearly define a dictionary by specifying the respective patterns

3258

for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens

3259

in the proper order. The key pattern can include delimiting markers or punctuation,

3260

as long as they are suppressed, thereby leaving the significant key text. The value

3261

pattern can include named results, so that the Dict results can include named token

3262

fields.

3263

"""

3264

return Dict( ZeroOrMore( Group ( key + value ) ) )

3265

3266

def originalTextFor(expr, asString=True):

3267

"""Helper to return the original, untokenized text for a given expression. Useful to

3268

restore the parsed fields of an HTML start tag into the raw tag text itself, or to

3269

revert separate tokens with intervening whitespace back to the original matching

3270

input text. Simpler to use than the parse action keepOriginalText, and does not

3271

require the inspect module to chase up the call stack. By default, returns a

3272

string containing the original parsed text.

3273

3274

If the optional asString argument is passed as False, then the return value is a

3275

ParseResults containing any results names that were originally matched, and a

3276

single token containing the original matched text from the input string. So if

3277

the expression passed to originalTextFor contains expressions with defined

3278

results names, you must set asString to False if you want to preserve those

3279

results name values."""

3280

locMarker = Empty().setParseAction(lambda s,loc,t: loc)

3281

matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")

3282

if asString:

3283

extractText = lambda s,l,t: s[t._original_start:t._original_end]

3284

else:

3285

def extractText(s,l,t):

3286

del t[:]

3287

t.insert(0, s[t._original_start:t._original_end])

3288

del t["_original_start"]

3289

del t["_original_end"]

3290

matchExpr.setParseAction(extractText)

3291

return matchExpr

3292

3293

# convenience constants for positional expressions

3294

empty = Empty().setName("empty")

3295

lineStart = LineStart().setName("lineStart")

3296

lineEnd = LineEnd().setName("lineEnd")

3297

stringStart = StringStart().setName("stringStart")

3298

stringEnd = StringEnd().setName("stringEnd")

3299

3300

_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])

3301

_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])

3302

_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))

3303

_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))

3304

_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)

3305

_charRange = Group(_singleChar + Suppress("-") + _singleChar)

3306

_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"

3307

3308

_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)

3309

3310

def srange(s):

3311

r"""Helper to easily define string ranges for use in Word construction. Borrows

3312

syntax from regexp '[]' string range definitions::

3313

srange("[0-9]") -> "0123456789"

3314

srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

3315

srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

3316

The input string must be enclosed in []'s, and the returned string is the expanded

3317

character set joined into a single string.

3318

The values enclosed in the []'s may be::

3319

a single character

3320

an escaped character with a leading backslash (such as \- or \])

3321

an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)

3322

an escaped octal character with a leading '\0' (\041, which is a '!' character)

3323

a range of any of the above, separated by a dash ('a-z', etc.)

3324

any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)

3325

"""

3326

try:

3327

return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])

3328

except:

3329

return ""

3330

3331

def matchOnlyAtCol(n):

3332

"""Helper method for defining parse actions that require matching at a specific

3333

column in the input text.

3334

"""

3335

def verifyCol(strg,locn,toks):

3336

if col(locn,strg) != n:

3337

raise ParseException(strg,locn,"matched token not at column %d" % n)

3338

return verifyCol

3339

3340

def replaceWith(replStr):

3341

"""Helper method for common parse actions that simply return a literal value. Especially

3342

useful when used with transformString().

3343

"""

3344

def _replFunc(*args):

3345

return [replStr]

3346

return _replFunc

3347

3348

def removeQuotes(s,l,t):

3349

"""Helper parse action for removing quotation marks from parsed quoted strings.

3350

To use, add this parse action to quoted string using::

3351

quotedString.setParseAction( removeQuotes )

3352

"""

3353

return t[0][1:-1]

3354

3355

def upcaseTokens(s,l,t):

3356

"""Helper parse action to convert tokens to upper case."""

3357

return [ tt.upper() for tt in map(_ustr,t) ]

3358

3359

def downcaseTokens(s,l,t):

3360

"""Helper parse action to convert tokens to lower case."""

3361

return [ tt.lower() for tt in map(_ustr,t) ]

3362

3363

def keepOriginalText(s,startLoc,t):

3364

"""Helper parse action to preserve original parsed text,

3365

overriding any nested parse actions."""

3366

try:

3367

endloc = getTokensEndLoc()

3368

except ParseException:

3369

raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")

3370

del t[:]

3371

t += ParseResults(s[startLoc:endloc])

3372

return t

3373

3374

def getTokensEndLoc():

3375

"""Method to be called from within a parse action to determine the end

3376

location of the parsed tokens."""

3377

import inspect

3378

fstack = inspect.stack()

3379

try:

3380

# search up the stack (through intervening argument normalizers) for correct calling routine

3381

for f in fstack[2:]:

3382

if f[3] == "_parseNoCache":

3383

endloc = f[0].f_locals["loc"]

3384

return endloc

3385

else:

3386

raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")

3387

finally:

3388

del fstack

3389

3390

def _makeTags(tagStr, xml):

3391

"""Internal helper to construct opening and closing tag expressions, given a tag name"""

3392

if isinstance(tagStr,basestring):

3393

resname = tagStr

3394

tagStr = Keyword(tagStr, caseless=not xml)

3395

else:

3396

resname = tagStr.name

3397

3398

tagAttrName = Word(alphas,alphanums+"_-:")

3399

if (xml):

3400

tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )

3401

openTag = Suppress("<") + tagStr + \

3402

Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \

3403

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

3404

else:

3405

printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )

3406

tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)

3407

openTag = Suppress("<") + tagStr + \

3408

Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \

3409

Optional( Suppress("=") + tagAttrValue ) ))) + \

3410

Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

3411

closeTag = Combine(_L("</") + tagStr + ">")

3412

3413

openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)

3414

closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)

3415

3416

return openTag, closeTag

3417

3418

def makeHTMLTags(tagStr):

3419

"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""

3420

return _makeTags( tagStr, False )

3421

3422

def makeXMLTags(tagStr):

3423

"""Helper to construct opening and closing tag expressions for XML, given a tag name"""

3424

return _makeTags( tagStr, True )

3425

3426

def withAttribute(*args,**attrDict):

3427

"""Helper to create a validating parse action to be used with start tags created

3428

with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag

3429

with a required attribute value, to avoid false matches on common tags such as

3430

<TD> or <DIV>.

3431

3432

Call withAttribute with a series of attribute names and values. Specify the list

3433

of filter attributes names and values as:

3434

- keyword arguments, as in (class="Customer",align="right"), or

3435

- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )

3436

For attribute names with a namespace prefix, you must use the second form. Attribute

3437

names are matched insensitive to upper/lower case.

3438

3439

To verify that the attribute exists, but without specifying a value, pass

3440

withAttribute.ANY_VALUE as the value.

3441

"""

3442

if args:

3443

attrs = args[:]

3444

else:

3445

attrs = attrDict.items()

3446

attrs = [(k,v) for k,v in attrs]

3447

def pa(s,l,tokens):

3448

for attrName,attrValue in attrs:

3449

if attrName not in tokens:

3450

raise ParseException(s,l,"no matching attribute " + attrName)

3451

if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:

3452

raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %

3453

(attrName, tokens[attrName], attrValue))

3454

return pa

3455

withAttribute.ANY_VALUE = object()

3456

3457

opAssoc = _Constants()

3458

opAssoc.LEFT = object()

3459

opAssoc.RIGHT = object()

3460

3461

def operatorPrecedence( baseExpr, opList ):

3462

"""Helper method for constructing grammars of expressions made up of

3463

operators working in a precedence hierarchy. Operators may be unary or

3464

binary, left- or right-associative. Parse actions can also be attached

3465

to operator expressions.

3466

3467

Parameters:

3468

- baseExpr - expression representing the most basic element for the nested

3469

- opList - list of tuples, one for each operator precedence level in the

3470

expression grammar; each tuple is of the form

3471

(opExpr, numTerms, rightLeftAssoc, parseAction), where:

3472

- opExpr is the pyparsing expression for the operator;

3473

may also be a string, which will be converted to a Literal;

3474

if numTerms is 3, opExpr is a tuple of two expressions, for the

3475

two operators separating the 3 terms

3476

- numTerms is the number of terms for this operator (must

3477

be 1, 2, or 3)

3478

- rightLeftAssoc is the indicator whether the operator is

3479

right or left associative, using the pyparsing-defined

3480

constants opAssoc.RIGHT and opAssoc.LEFT.

3481

- parseAction is the parse action to be associated with

3482

expressions matching this operator expression (the

3483

parse action tuple member may be omitted)

3484

"""

3485

ret = Forward()

3486

lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )

3487

for i,operDef in enumerate(opList):

3488

opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]

3489

if arity == 3:

3490

if opExpr is None or len(opExpr) != 2:

3491

raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")

3492

opExpr1, opExpr2 = opExpr

3493

thisExpr = Forward()#.setName("expr%d" % i)

3494

if rightLeftAssoc == opAssoc.LEFT:

3495

if arity == 1:

3496

matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )

3497

elif arity == 2:

3498

if opExpr is not None:

3499

matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )

3500

else:

3501

matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )

3502

elif arity == 3:

3503

matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \

3504

Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )

3505

else:

3506

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

3507

elif rightLeftAssoc == opAssoc.RIGHT:

3508

if arity == 1:

3509

# try to avoid LR with this extra test

3510

if not isinstance(opExpr, Optional):

3511

opExpr = Optional(opExpr)

3512

matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )

3513

elif arity == 2:

3514

if opExpr is not None:

3515

matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )

3516

else:

3517

matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )

3518

elif arity == 3:

3519

matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \

3520

Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )

3521

else:

3522

raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

3523

else:

3524

raise ValueError("operator must indicate right or left associativity")

3525

if pa:

3526

matchExpr.setParseAction( pa )

3527

thisExpr << ( matchExpr | lastExpr )

3528

lastExpr = thisExpr

3529

ret << lastExpr

3530

return ret

3531

3532

dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")

3533

sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")

3534

quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")

3535

unicodeString = Combine(_L('u') + quotedString.copy())

3536

3537

def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):

3538

"""Helper method for defining nested lists enclosed in opening and closing

3539

delimiters ("(" and ")" are the default).

3540

3541

Parameters:

3542

- opener - opening character for a nested list (default="("); can also be a pyparsing expression

3543

- closer - closing character for a nested list (default=")"); can also be a pyparsing expression

3544

- content - expression for items within the nested lists (default=None)

3545

- ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)

3546

3547

If an expression is not provided for the content argument, the nested

3548

expression will capture all whitespace-delimited content between delimiters

3549

as a list of separate values.

3550

3551

Use the ignoreExpr argument to define expressions that may contain

3552

opening or closing characters that should not be treated as opening

3553

or closing characters for nesting, such as quotedString or a comment

3554

expression. Specify multiple expressions using an Or or MatchFirst.

3555

The default is quotedString, but if no expressions are to be ignored,

3556

then pass None for this argument.

3557

"""

3558

if opener == closer:

3559

raise ValueError("opening and closing strings cannot be the same")

3560

if content is None:

3561

if isinstance(opener,basestring) and isinstance(closer,basestring):

3562

if len(opener) == 1 and len(closer)==1:

3563

if ignoreExpr is not None:

3564

content = (Combine(OneOrMore(~ignoreExpr +

3565

CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3566

).setParseAction(lambda t:t[0].strip()))

3567

else:

3568

content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS

3569

).setParseAction(lambda t:t[0].strip()))

3570

else:

3571

if ignoreExpr is not None:

3572

content = (Combine(OneOrMore(~ignoreExpr +

3573

~Literal(opener) + ~Literal(closer) +

3574

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3575

).setParseAction(lambda t:t[0].strip()))

3576

else:

3577

content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +

3578

CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

3579

).setParseAction(lambda t:t[0].strip()))

3580

else:

3581

raise ValueError("opening and closing arguments must be strings if no content expression is given")

3582

ret = Forward()

3583

if ignoreExpr is not None:

3584

ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )

3585

else:

3586

ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )

3587

return ret

3588

3589

def indentedBlock(blockStatementExpr, indentStack, indent=True):

3590

"""Helper method for defining space-delimited indentation blocks, such as

3591

those used to define block statements in Python source code.

3592

3593

Parameters:

3594

- blockStatementExpr - expression defining syntax of statement that

3595

is repeated within the indented block

3596

- indentStack - list created by caller to manage indentation stack

3597

(multiple statementWithIndentedBlock expressions within a single grammar

3598

should share a common indentStack)

3599

- indent - boolean indicating whether block must be indented beyond the

3600

the current level; set to False for block of left-most statements

3601

(default=True)

3602

3603

A valid block must contain at least one blockStatement.

3604

"""

3605

def checkPeerIndent(s,l,t):

3606

if l >= len(s): return

3607

curCol = col(l,s)

3608

if curCol != indentStack[-1]:

3609

if curCol > indentStack[-1]:

3610

raise ParseFatalException(s,l,"illegal nesting")

3611

raise ParseException(s,l,"not a peer entry")

3612

3613

def checkSubIndent(s,l,t):

3614

curCol = col(l,s)

3615

if curCol > indentStack[-1]:

3616

indentStack.append( curCol )

3617

else:

3618

raise ParseException(s,l,"not a subentry")

3619

3620

def checkUnindent(s,l,t):

3621

if l >= len(s): return

3622

curCol = col(l,s)

3623

if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):

3624

raise ParseException(s,l,"not an unindent")

3625

indentStack.pop()

3626

3627

NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())

3628

INDENT = Empty() + Empty().setParseAction(checkSubIndent)

3629

PEER = Empty().setParseAction(checkPeerIndent)

3630

UNDENT = Empty().setParseAction(checkUnindent)

3631

if indent:

3632

smExpr = Group( Optional(NL) +

3633

FollowedBy(blockStatementExpr) +

3634

INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)

3635

else:

3636

smExpr = Group( Optional(NL) +

3637

(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )

3638

blockStatementExpr.ignore(_bslash + LineEnd())

3639

return smExpr

3640

3641

alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

3642

punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

3643

3644

anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))

3645

commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()

3646

_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))

3647

replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None

3648

3649

# it's easy to get these comment structures wrong - they're very common, so may as well make them available

3650

cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")

3651

3652

htmlComment = Regex(r"")

3653

restOfLine = Regex(r".*").leaveWhitespace()

3654

dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")

3655

cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")

3656

3657

javaStyleComment = cppStyleComment

3658

pythonStyleComment = Regex(r"#.*").setName("Python style comment")

3659

_noncomma = "".join( [ c for c in printables if c != "," ] )

3660

_commasepitem = Combine(OneOrMore(Word(_noncomma) +

3661

Optional( Word(" \t") +

3662

~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")

3663

commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")

3664

3665

3666

if __name__ == "__main__":

3667

3668

def test( teststring ):

3669

try:

3670

tokens = simpleSQL.parseString( teststring )

3671

tokenlist = tokens.asList()

3672

print (teststring + "->" + str(tokenlist))

3673

print ("tokens = " + str(tokens))

3674

print ("tokens.columns = " + str(tokens.columns))

3675

print ("tokens.tables = " + str(tokens.tables))

3676

print (tokens.asXML("SQL",True))

3677

except ParseBaseException,err:

3678

print (teststring + "->")

3679

print (err.line)

3680

print (" "*(err.column-1) + "^")

3681

print (err)

3682

print()

3683

3684

selectToken = CaselessLiteral( "select" )

3685

fromToken = CaselessLiteral( "from" )

3686

3687

ident = Word( alphas, alphanums + "_$" )

3688

columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )

3689

columnNameList = Group( delimitedList( columnName ) )#.setName("columns")

3690

tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )

3691

tableNameList = Group( delimitedList( tableName ) )#.setName("tables")

3692

simpleSQL = ( selectToken + \

3693

( '*' | columnNameList ).setResultsName( "columns" ) + \

3694

fromToken + \

3695

tableNameList.setResultsName( "tables" ) )

3696

3697

test( "SELECT * from XYZZY, ABC" )

3698

test( "select * from SYS.XYZZY" )

3699

test( "Select A from Sys.dual" )

3700

test( "Select AA,BB,CC from Sys.dual" )

3701

test( "Select A, B, C from Sys.dual" )

3702

test( "Select A, B, C from Sys.dual" )

3703

test( "Xelect A, B, C from Sys.dual" )

3704

test( "Select A, B, C frox Sys.dual" )

3705

test( "Select" )

3706

test( "Select ^^^ frox Sys.dual" )

3707

test( "Select A, B, C from Sys.dual, Table2 " )

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages