upstream/ipython Commit - r21613:7ab78318

1

# encoding: utf-8

1

# encoding: utf-8

2

"""

2

"""

3

Utilities for working with strings and text.

3

Utilities for working with strings and text.

4

5

Inheritance diagram:

5

Inheritance diagram:

6

7

.. inheritance-diagram:: IPython.utils.text

7

.. inheritance-diagram:: IPython.utils.text

8

:parts: 3

8

:parts: 3

9

"""

9

"""

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import os

12

import os

13

import re

13

import re

14

import sys

14

import sys

15

import textwrap

15

import textwrap

16

from string import Formatter

16

from string import Formatter

17

18

from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest

18

from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest

19

from IPython.utils import py3compat

19

from IPython.utils import py3compat

20

21

# datetime.strftime date format for ipython

21

# datetime.strftime date format for ipython

22

if sys.platform == 'win32':

22

if sys.platform == 'win32':

23

date_format = "%B %d, %Y"

23

date_format = "%B %d, %Y"

24

else:

24

else:

25

date_format = "%B %-d, %Y"

25

date_format = "%B %-d, %Y"

26

27

class LSString(str):

27

class LSString(str):

28

"""String derivative with a special access attributes.

28

"""String derivative with a special access attributes.

29

30

These are normal strings, but with the special attributes:

30

These are normal strings, but with the special attributes:

31

32

.l (or .list) : value as list (split on newlines).

32

.l (or .list) : value as list (split on newlines).

33

.n (or .nlstr): original value (the string itself).

33

.n (or .nlstr): original value (the string itself).

34

.s (or .spstr): value as whitespace-separated string.

34

.s (or .spstr): value as whitespace-separated string.

35

.p (or .paths): list of path objects (requires path.py package)

35

.p (or .paths): list of path objects (requires path.py package)

36

37

Any values which require transformations are computed only once and

37

Any values which require transformations are computed only once and

38

cached.

38

cached.

39

40

Such strings are very useful to efficiently interact with the shell, which

40

Such strings are very useful to efficiently interact with the shell, which

41

typically only understands whitespace-separated options for commands."""

41

typically only understands whitespace-separated options for commands."""

42

43

def get_list(self):

43

def get_list(self):

44

try:

44

try:

45

return self.__list

45

return self.__list

46

except AttributeError:

46

except AttributeError:

47

self.__list = self.split('\n')

47

self.__list = self.split('\n')

48

return self.__list

48

return self.__list

49

50

l = list = property(get_list)

50

l = list = property(get_list)

51

52

def get_spstr(self):

52

def get_spstr(self):

53

try:

53

try:

54

return self.__spstr

54

return self.__spstr

55

except AttributeError:

55

except AttributeError:

56

self.__spstr = self.replace('\n',' ')

56

self.__spstr = self.replace('\n',' ')

57

return self.__spstr

57

return self.__spstr

58

59

s = spstr = property(get_spstr)

59

s = spstr = property(get_spstr)

60

61

def get_nlstr(self):

61

def get_nlstr(self):

62

return self

62

return self

63

64

n = nlstr = property(get_nlstr)

64

n = nlstr = property(get_nlstr)

65

66

def get_paths(self):

66

def get_paths(self):

67

from path import path

67

from path import path

68

try:

68

try:

69

return self.__paths

69

return self.__paths

70

except AttributeError:

70

except AttributeError:

71

self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]

71

self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]

72

return self.__paths

72

return self.__paths

73

74

p = paths = property(get_paths)

74

p = paths = property(get_paths)

75

76

# FIXME: We need to reimplement type specific displayhook and then add this

76

# FIXME: We need to reimplement type specific displayhook and then add this

77

# back as a custom printer. This should also be moved outside utils into the

77

# back as a custom printer. This should also be moved outside utils into the

78

# core.

78

# core.

79

80

# def print_lsstring(arg):

80

# def print_lsstring(arg):

81

# """ Prettier (non-repr-like) and more informative printer for LSString """

81

# """ Prettier (non-repr-like) and more informative printer for LSString """

82

# print "LSString (.p, .n, .l, .s available). Value:"

82

# print "LSString (.p, .n, .l, .s available). Value:"

83

# print arg

83

# print arg

84

#

84

#

85

#

85

#

86

# print_lsstring = result_display.when_type(LSString)(print_lsstring)

86

# print_lsstring = result_display.when_type(LSString)(print_lsstring)

87

88

89

class SList(list):

89

class SList(list):

90

"""List derivative with a special access attributes.

90

"""List derivative with a special access attributes.

91

92

These are normal lists, but with the special attributes:

92

These are normal lists, but with the special attributes:

93

94

* .l (or .list) : value as list (the list itself).

94

* .l (or .list) : value as list (the list itself).

95

* .n (or .nlstr): value as a string, joined on newlines.

95

* .n (or .nlstr): value as a string, joined on newlines.

96

* .s (or .spstr): value as a string, joined on spaces.

96

* .s (or .spstr): value as a string, joined on spaces.

97

* .p (or .paths): list of path objects (requires path.py package)

97

* .p (or .paths): list of path objects (requires path.py package)

98

99

Any values which require transformations are computed only once and

99

Any values which require transformations are computed only once and

100

cached."""

100

cached."""

101

102

def get_list(self):

102

def get_list(self):

103

return self

103

return self

104

105

l = list = property(get_list)

105

l = list = property(get_list)

106

107

def get_spstr(self):

107

def get_spstr(self):

108

try:

108

try:

109

return self.__spstr

109

return self.__spstr

110

except AttributeError:

110

except AttributeError:

111

self.__spstr = ' '.join(self)

111

self.__spstr = ' '.join(self)

112

return self.__spstr

112

return self.__spstr

113

114

s = spstr = property(get_spstr)

114

s = spstr = property(get_spstr)

115

116

def get_nlstr(self):

116

def get_nlstr(self):

117

try:

117

try:

118

return self.__nlstr

118

return self.__nlstr

119

except AttributeError:

119

except AttributeError:

120

self.__nlstr = '\n'.join(self)

120

self.__nlstr = '\n'.join(self)

121

return self.__nlstr

121

return self.__nlstr

122

123

n = nlstr = property(get_nlstr)

123

n = nlstr = property(get_nlstr)

124

125

def get_paths(self):

125

def get_paths(self):

126

from path import path

126

from path import path

127

try:

127

try:

128

return self.__paths

128

return self.__paths

129

except AttributeError:

129

except AttributeError:

130

self.__paths = [path(p) for p in self if os.path.exists(p)]

130

self.__paths = [path(p) for p in self if os.path.exists(p)]

131

return self.__paths

131

return self.__paths

132

133

p = paths = property(get_paths)

133

p = paths = property(get_paths)

134

135

def grep(self, pattern, prune = False, field = None):

135

def grep(self, pattern, prune = False, field = None):

136

""" Return all strings matching 'pattern' (a regex or callable)

136

""" Return all strings matching 'pattern' (a regex or callable)

137

138

This is case-insensitive. If prune is true, return all items

138

This is case-insensitive. If prune is true, return all items

139

NOT matching the pattern.

139

NOT matching the pattern.

140

141

If field is specified, the match must occur in the specified

141

If field is specified, the match must occur in the specified

142

whitespace-separated field.

142

whitespace-separated field.

143

144

Examples::

144

Examples::

145

146

a.grep( lambda x: x.startswith('C') )

146

a.grep( lambda x: x.startswith('C') )

147

a.grep('Cha.*log', prune=1)

147

a.grep('Cha.*log', prune=1)

148

a.grep('chm', field=-1)

148

a.grep('chm', field=-1)

149

"""

149

"""

150

151

def match_target(s):

151

def match_target(s):

152

if field is None:

152

if field is None:

153

return s

153

return s

154

parts = s.split()

154

parts = s.split()

155

try:

155

try:

156

tgt = parts[field]

156

tgt = parts[field]

157

return tgt

157

return tgt

158

except IndexError:

158

except IndexError:

159

return ""

159

return ""

160

161

if isinstance(pattern, py3compat.string_types):

161

if isinstance(pattern, py3compat.string_types):

162

pred = lambda x : re.search(pattern, x, re.IGNORECASE)

162

pred = lambda x : re.search(pattern, x, re.IGNORECASE)

163

else:

163

else:

164

pred = pattern

164

pred = pattern

165

if not prune:

165

if not prune:

166

return SList([el for el in self if pred(match_target(el))])

166

return SList([el for el in self if pred(match_target(el))])

167

else:

167

else:

168

return SList([el for el in self if not pred(match_target(el))])

168

return SList([el for el in self if not pred(match_target(el))])

169

170

def fields(self, *fields):

170

def fields(self, *fields):

171

""" Collect whitespace-separated fields from string list

171

""" Collect whitespace-separated fields from string list

172

173

Allows quick awk-like usage of string lists.

173

Allows quick awk-like usage of string lists.

174

175

Example data (in var a, created by 'a = !ls -l')::

175

Example data (in var a, created by 'a = !ls -l')::

176

177

-rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

177

-rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

178

drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

178

drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

179

180

* ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

180

* ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

181

* ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

181

* ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

182

(note the joining by space).

182

(note the joining by space).

183

* ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

183

* ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

184

185

IndexErrors are ignored.

185

IndexErrors are ignored.

186

187

Without args, fields() just split()'s the strings.

187

Without args, fields() just split()'s the strings.

188

"""

188

"""

189

if len(fields) == 0:

189

if len(fields) == 0:

190

return [el.split() for el in self]

190

return [el.split() for el in self]

191

192

res = SList()

192

res = SList()

193

for el in [f.split() for f in self]:

193

for el in [f.split() for f in self]:

194

lineparts = []

194

lineparts = []

195

196

for fd in fields:

196

for fd in fields:

197

try:

197

try:

198

lineparts.append(el[fd])

198

lineparts.append(el[fd])

199

except IndexError:

199

except IndexError:

200

pass

200

pass

201

if lineparts:

201

if lineparts:

202

res.append(" ".join(lineparts))

202

res.append(" ".join(lineparts))

203

204

return res

204

return res

205

206

def sort(self,field= None, nums = False):

206

def sort(self,field= None, nums = False):

207

""" sort by specified fields (see fields())

207

""" sort by specified fields (see fields())

208

209

Example::

209

Example::

210

211

a.sort(1, nums = True)

211

a.sort(1, nums = True)

212

213

Sorts a by second field, in numerical order (so that 21 > 3)

213

Sorts a by second field, in numerical order (so that 21 > 3)

214

215

"""

215

"""

216

217

#decorate, sort, undecorate

217

#decorate, sort, undecorate

218

if field is not None:

218

if field is not None:

219

dsu = [[SList([line]).fields(field), line] for line in self]

219

dsu = [[SList([line]).fields(field), line] for line in self]

220

else:

220

else:

221

dsu = [[line, line] for line in self]

221

dsu = [[line, line] for line in self]

222

if nums:

222

if nums:

223

for i in range(len(dsu)):

223

for i in range(len(dsu)):

224

numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

224

numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

225

try:

225

try:

226

n = int(numstr)

226

n = int(numstr)

227

except ValueError:

227

except ValueError:

228

n = 0;

228

n = 0;

229

dsu[i][0] = n

229

dsu[i][0] = n

230

231

232

dsu.sort()

232

dsu.sort()

233

return SList([t[1] for t in dsu])

233

return SList([t[1] for t in dsu])

234

235

236

# FIXME: We need to reimplement type specific displayhook and then add this

236

# FIXME: We need to reimplement type specific displayhook and then add this

237

# back as a custom printer. This should also be moved outside utils into the

237

# back as a custom printer. This should also be moved outside utils into the

238

# core.

238

# core.

239

240

# def print_slist(arg):

240

# def print_slist(arg):

241

# """ Prettier (non-repr-like) and more informative printer for SList """

241

# """ Prettier (non-repr-like) and more informative printer for SList """

242

# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"

242

# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"

243

# if hasattr(arg, 'hideonce') and arg.hideonce:

243

# if hasattr(arg, 'hideonce') and arg.hideonce:

244

# arg.hideonce = False

244

# arg.hideonce = False

245

# return

245

# return

246

#

246

#

247

# nlprint(arg) # This was a nested list printer, now removed.

247

# nlprint(arg) # This was a nested list printer, now removed.

248

#

248

#

249

# print_slist = result_display.when_type(SList)(print_slist)

249

# print_slist = result_display.when_type(SList)(print_slist)

250

251

252

def indent(instr,nspaces=4, ntabs=0, flatten=False):

252

def indent(instr,nspaces=4, ntabs=0, flatten=False):

253

"""Indent a string a given number of spaces or tabstops.

253

"""Indent a string a given number of spaces or tabstops.

254

255

indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.

255

indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.

256

257

Parameters

257

Parameters

258

----------

258

----------

259

260

instr : basestring

260

instr : basestring

261

The string to be indented.

261

The string to be indented.

262

nspaces : int (default: 4)

262

nspaces : int (default: 4)

263

The number of spaces to be indented.

263

The number of spaces to be indented.

264

ntabs : int (default: 0)

264

ntabs : int (default: 0)

265

The number of tabs to be indented.

265

The number of tabs to be indented.

266

flatten : bool (default: False)

266

flatten : bool (default: False)

267

Whether to scrub existing indentation. If True, all lines will be

267

Whether to scrub existing indentation. If True, all lines will be

268

aligned to the same indentation. If False, existing indentation will

268

aligned to the same indentation. If False, existing indentation will

269

be strictly increased.

269

be strictly increased.

270

271

Returns

271

Returns

272

-------

272

-------

273

274

str|unicode : string indented by ntabs and nspaces.

274

str|unicode : string indented by ntabs and nspaces.

275

276

"""

276

"""

277

if instr is None:

277

if instr is None:

278

return

278

return

279

ind = '\t'*ntabs+' '*nspaces

279

ind = '\t'*ntabs+' '*nspaces

280

if flatten:

280

if flatten:

281

pat = re.compile(r'^\s*', re.MULTILINE)

281

pat = re.compile(r'^\s*', re.MULTILINE)

282

else:

282

else:

283

pat = re.compile(r'^', re.MULTILINE)

283

pat = re.compile(r'^', re.MULTILINE)

284

outstr = re.sub(pat, ind, instr)

284

outstr = re.sub(pat, ind, instr)

285

if outstr.endswith(os.linesep+ind):

285

if outstr.endswith(os.linesep+ind):

286

return outstr[:-len(ind)]

286

return outstr[:-len(ind)]

287

else:

287

else:

288

return outstr

288

return outstr

289

290

291

def list_strings(arg):

291

def list_strings(arg):

292

"""Always return a list of strings, given a string or list of strings

292

"""Always return a list of strings, given a string or list of strings

293

as input.

293

as input.

294

295

Examples

295

Examples

296

--------

296

--------

297

::

297

::

298

299

In [7]: list_strings('A single string')

299

In [7]: list_strings('A single string')

300

Out[7]: ['A single string']

300

Out[7]: ['A single string']

301

302

In [8]: list_strings(['A single string in a list'])

302

In [8]: list_strings(['A single string in a list'])

303

Out[8]: ['A single string in a list']

303

Out[8]: ['A single string in a list']

304

305

In [9]: list_strings(['A','list','of','strings'])

305

In [9]: list_strings(['A','list','of','strings'])

306

Out[9]: ['A', 'list', 'of', 'strings']

306

Out[9]: ['A', 'list', 'of', 'strings']

307

"""

307

"""

308

309

if isinstance(arg, py3compat.string_types): return [arg]

309

if isinstance(arg, py3compat.string_types): return [arg]

310

else: return arg

310

else: return arg

311

312

313

def marquee(txt='',width=78,mark='*'):

313

def marquee(txt='',width=78,mark='*'):

314

"""Return the input string centered in a 'marquee'.

314

"""Return the input string centered in a 'marquee'.

315

316

Examples

316

Examples

317

--------

317

--------

318

::

318

::

319

320

In [16]: marquee('A test',40)

320

In [16]: marquee('A test',40)

321

Out[16]: '**************** A test ****************'

321

Out[16]: '**************** A test ****************'

322

323

In [17]: marquee('A test',40,'-')

323

In [17]: marquee('A test',40,'-')

324

Out[17]: '---------------- A test ----------------'

324

Out[17]: '---------------- A test ----------------'

325

326

In [18]: marquee('A test',40,' ')

326

In [18]: marquee('A test',40,' ')

327

Out[18]: ' A test '

327

Out[18]: ' A test '

328

329

"""

329

"""

330

if not txt:

330

if not txt:

331

return (mark*width)[:width]

331

return (mark*width)[:width]

332

nmark = (width-len(txt)-2)//len(mark)//2

332

nmark = (width-len(txt)-2)//len(mark)//2

333

if nmark < 0: nmark =0

333

if nmark < 0: nmark =0

334

marks = mark*nmark

334

marks = mark*nmark

335

return '%s %s %s' % (marks,txt,marks)

335

return '%s %s %s' % (marks,txt,marks)

336

337

338

ini_spaces_re = re.compile(r'^(\s+)')

338

ini_spaces_re = re.compile(r'^(\s+)')

339

340

def num_ini_spaces(strng):

340

def num_ini_spaces(strng):

341

"""Return the number of initial spaces in a string"""

341

"""Return the number of initial spaces in a string"""

342

343

ini_spaces = ini_spaces_re.match(strng)

343

ini_spaces = ini_spaces_re.match(strng)

344

if ini_spaces:

344

if ini_spaces:

345

return ini_spaces.end()

345

return ini_spaces.end()

346

else:

346

else:

347

return 0

347

return 0

348

349

350

def format_screen(strng):

350

def format_screen(strng):

351

"""Format a string for screen printing.

351

"""Format a string for screen printing.

352

353

This removes some latex-type format codes."""

353

This removes some latex-type format codes."""

354

# Paragraph continue

354

# Paragraph continue

355

par_re = re.compile(r'\\$',re.MULTILINE)

355

par_re = re.compile(r'\\$',re.MULTILINE)

356

strng = par_re.sub('',strng)

356

strng = par_re.sub('',strng)

357

return strng

357

return strng

358

359

360

def dedent(text):

360

def dedent(text):

361

"""Equivalent of textwrap.dedent that ignores unindented first line.

361

"""Equivalent of textwrap.dedent that ignores unindented first line.

362

363

This means it will still dedent strings like:

363

This means it will still dedent strings like:

364

'''foo

364

'''foo

365

is a bar

365

is a bar

366

'''

366

'''

367

368

For use in wrap_paragraphs.

368

For use in wrap_paragraphs.

369

"""

369

"""

370

371

if text.startswith('\n'):

371

if text.startswith('\n'):

372

# text starts with blank line, don't ignore the first line

372

# text starts with blank line, don't ignore the first line

373

return textwrap.dedent(text)

373

return textwrap.dedent(text)

374

375

# split first line

375

# split first line

376

splits = text.split('\n',1)

376

splits = text.split('\n',1)

377

if len(splits) == 1:

377

if len(splits) == 1:

378

# only one line

378

# only one line

379

return textwrap.dedent(text)

379

return textwrap.dedent(text)

380

381

first, rest = splits

381

first, rest = splits

382

# dedent everything but the first line

382

# dedent everything but the first line

383

rest = textwrap.dedent(rest)

383

rest = textwrap.dedent(rest)

384

return '\n'.join([first, rest])

384

return '\n'.join([first, rest])

385

386

387

def wrap_paragraphs(text, ncols=80):

387

def wrap_paragraphs(text, ncols=80):

388

"""Wrap multiple paragraphs to fit a specified width.

388

"""Wrap multiple paragraphs to fit a specified width.

389

390

This is equivalent to textwrap.wrap, but with support for multiple

390

This is equivalent to textwrap.wrap, but with support for multiple

391

paragraphs, as separated by empty lines.

391

paragraphs, as separated by empty lines.

392

393

Returns

393

Returns

394

-------

394

-------

395

396

list of complete paragraphs, wrapped to fill `ncols` columns.

396

list of complete paragraphs, wrapped to fill `ncols` columns.

397

"""

397

"""

398

paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)

398

paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)

399

text = dedent(text).strip()

399

text = dedent(text).strip()

400

paragraphs = paragraph_re.split(text)[::2] # every other entry is space

400

paragraphs = paragraph_re.split(text)[::2] # every other entry is space

401

out_ps = []

401

out_ps = []

402

indent_re = re.compile(r'\n\s+', re.MULTILINE)

402

indent_re = re.compile(r'\n\s+', re.MULTILINE)

403

for p in paragraphs:

403

for p in paragraphs:

404

# presume indentation that survives dedent is meaningful formatting,

404

# presume indentation that survives dedent is meaningful formatting,

405

# so don't fill unless text is flush.

405

# so don't fill unless text is flush.

406

if indent_re.search(p) is None:

406

if indent_re.search(p) is None:

407

# wrap paragraph

407

# wrap paragraph

408

p = textwrap.fill(p, ncols)

408

p = textwrap.fill(p, ncols)

409

out_ps.append(p)

409

out_ps.append(p)

410

return out_ps

410

return out_ps

411

412

413

def long_substr(data):

413

def long_substr(data):

414

"""Return the longest common substring in a list of strings.

414

"""Return the longest common substring in a list of strings.

415

416

Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python

416

Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python

417

"""

417

"""

418

substr = ''

418

substr = ''

419

if len(data) > 1 and len(data[0]) > 0:

419

if len(data) > 1 and len(data[0]) > 0:

420

for i in range(len(data[0])):

420

for i in range(len(data[0])):

421

for j in range(len(data[0])-i+1):

421

for j in range(len(data[0])-i+1):

422

if j > len(substr) and all(data[0][i:i+j] in x for x in data):

422

if j > len(substr) and all(data[0][i:i+j] in x for x in data):

423

substr = data[0][i:i+j]

423

substr = data[0][i:i+j]

424

elif len(data) == 1:

424

elif len(data) == 1:

425

substr = data[0]

425

substr = data[0]

426

return substr

426

return substr

427

428

429

def strip_email_quotes(text):

429

def strip_email_quotes(text):

430

"""Strip leading email quotation characters ('>').

430

"""Strip leading email quotation characters ('>').

431

432

Removes any combination of leading '>' interspersed with whitespace that

432

Removes any combination of leading '>' interspersed with whitespace that

433

appears *identically* in all lines of the input text.

433

appears *identically* in all lines of the input text.

434

435

Parameters

435

Parameters

436

----------

436

----------

437

text : str

437

text : str

438

439

Examples

439

Examples

440

--------

440

--------

441

442

Simple uses::

442

Simple uses::

443

444

In [2]: strip_email_quotes('> > text')

444

In [2]: strip_email_quotes('> > text')

445

Out[2]: 'text'

445

Out[2]: 'text'

446

447

In [3]: strip_email_quotes('> > text\\n> > more')

447

In [3]: strip_email_quotes('> > text\\n> > more')

448

Out[3]: 'text\\nmore'

448

Out[3]: 'text\\nmore'

449

450

Note how only the common prefix that appears in all lines is stripped::

450

Note how only the common prefix that appears in all lines is stripped::

451

452

In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

452

In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

453

Out[4]: '> text\\n> more\\nmore...'

453

Out[4]: '> text\\n> more\\nmore...'

454

455

So if any line has no quote marks ('>') , then none are stripped from any

455

So if any line has no quote marks ('>') , then none are stripped from any

456

of them ::

456

of them ::

457

458

In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

458

In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

459

Out[5]: '> > text\\n> > more\\nlast different'

459

Out[5]: '> > text\\n> > more\\nlast different'

460

"""

460

"""

461

lines = text.splitlines()

461

lines = text.splitlines()

462

matches = set()

462

matches = set()

463

for line in lines:

463

for line in lines:

464

prefix = re.match(r'^(\s*>[ >]*)', line)

464

prefix = re.match(r'^(\s*>[ >]*)', line)

465

if prefix:

465

if prefix:

466

matches.add(prefix.group(1))

466

matches.add(prefix.group(1))

467

else:

467

else:

468

break

468

break

469

else:

469

else:

470

prefix = long_substr(list(matches))

470

prefix = long_substr(list(matches))

471

if prefix:

471

if prefix:

472

strip = len(prefix)

472

strip = len(prefix)

473

text = '\n'.join([ ln[strip:] for ln in lines])

473

text = '\n'.join([ ln[strip:] for ln in lines])

474

return text

474

return text

475

476

def strip_ansi(source):

476

def strip_ansi(source):

477

"""

477

"""

478

Remove ansi escape codes from text.

478

Remove ansi escape codes from text.

479

480

Parameters

480

Parameters

481

----------

481

----------

482

source : str

482

source : str

483

Source to remove the ansi from

483

Source to remove the ansi from

484

"""

484

"""

485

return re.sub(r'\033\[(\d|;)+?m', '', source)

485

return re.sub(r'\033\[(\d|;)+?m', '', source)

486

487

488

class EvalFormatter(Formatter):

488

class EvalFormatter(Formatter):

489

"""A String Formatter that allows evaluation of simple expressions.

489

"""A String Formatter that allows evaluation of simple expressions.

490

491

Note that this version interprets a : as specifying a format string (as per

491

Note that this version interprets a : as specifying a format string (as per

492

standard string formatting), so if slicing is required, you must explicitly

492

standard string formatting), so if slicing is required, you must explicitly

493

create a slice.

493

create a slice.

494

495

This is to be used in templating cases, such as the parallel batch

495

This is to be used in templating cases, such as the parallel batch

496

script templates, where simple arithmetic on arguments is useful.

496

script templates, where simple arithmetic on arguments is useful.

497

498

Examples

498

Examples

499

--------

499

--------

500

::

500

::

501

502

In [1]: f = EvalFormatter()

502

In [1]: f = EvalFormatter()

503

In [2]: f.format('{n//4}', n=8)

503

In [2]: f.format('{n//4}', n=8)

504

Out[2]: '2'

504

Out[2]: '2'

505

506

In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

506

In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

507

Out[3]: 'll'

507

Out[3]: 'll'

508

"""

508

"""

509

def get_field(self, name, args, kwargs):

509

def get_field(self, name, args, kwargs):

510

v = eval(name, kwargs)

510

v = eval(name, kwargs)

511

return v, name

511

return v, name

512

513

#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

513

#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

514

# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

514

# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

515

# above, it should be possible to remove FullEvalFormatter.

515

# above, it should be possible to remove FullEvalFormatter.

516

517

@skip_doctest_py3

517

@skip_doctest_py3

518

class FullEvalFormatter(Formatter):

518

class FullEvalFormatter(Formatter):

519

"""A String Formatter that allows evaluation of simple expressions.

519

"""A String Formatter that allows evaluation of simple expressions.

520

521

Any time a format key is not found in the kwargs,

521

Any time a format key is not found in the kwargs,

522

it will be tried as an expression in the kwargs namespace.

522

it will be tried as an expression in the kwargs namespace.

523

524

Note that this version allows slicing using [1:2], so you cannot specify

524

Note that this version allows slicing using [1:2], so you cannot specify

525

a format string. Use :class:`EvalFormatter` to permit format strings.

525

a format string. Use :class:`EvalFormatter` to permit format strings.

526

527

Examples

527

Examples

528

--------

528

--------

529

::

529

::

530

531

In [1]: f = FullEvalFormatter()

531

In [1]: f = FullEvalFormatter()

532

In [2]: f.format('{n//4}', n=8)

532

In [2]: f.format('{n//4}', n=8)

533

Out[2]: u'2'

533

Out[2]: u'2'

534

535

In [3]: f.format('{list(range(5))[2:4]}')

535

In [3]: f.format('{list(range(5))[2:4]}')

536

Out[3]: u'[2, 3]'

536

Out[3]: u'[2, 3]'

537

538

In [4]: f.format('{3*2}')

538

In [4]: f.format('{3*2}')

539

Out[4]: u'6'

539

Out[4]: u'6'

540

"""

540

"""

541

# copied from Formatter._vformat with minor changes to allow eval

541

# copied from Formatter._vformat with minor changes to allow eval

542

# and replace the format_spec code with slicing

542

# and replace the format_spec code with slicing

543

def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):

543

def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):

544

if recursion_depth < 0:

544

if recursion_depth < 0:

545

raise ValueError('Max string recursion exceeded')

545

raise ValueError('Max string recursion exceeded')

546

result = []

546

result = []

547

for literal_text, field_name, format_spec, conversion in \

547

for literal_text, field_name, format_spec, conversion in \

548

self.parse(format_string):

548

self.parse(format_string):

549

550

# output the literal text

550

# output the literal text

551

if literal_text:

551

if literal_text:

552

result.append(literal_text)

552

result.append(literal_text)

553

554

# if there's a field, output it

554

# if there's a field, output it

555

if field_name is not None:

555

if field_name is not None:

556

# this is some markup, find the object and do

556

# this is some markup, find the object and do

557

# the formatting

557

# the formatting

558

559

if format_spec:

559

if format_spec:

560

# override format spec, to allow slicing:

560

# override format spec, to allow slicing:

561

field_name = ':'.join([field_name, format_spec])

561

field_name = ':'.join([field_name, format_spec])

562

563

# eval the contents of the field for the object

563

# eval the contents of the field for the object

564

# to be formatted

564

# to be formatted

565

obj = eval(field_name, kwargs)

565

obj = eval(field_name, kwargs)

566

567

# do any conversion on the resulting object

567

# do any conversion on the resulting object

568

obj = self.convert_field(obj, conversion)

568

obj = self.convert_field(obj, conversion)

569

570

# format the object and append to the result

570

# format the object and append to the result

571

result.append(self.format_field(obj, ''))

571

result.append(self.format_field(obj, ''))

572

573

return u''.join(py3compat.cast_unicode(s) for s in result)

573

return u''.join(py3compat.cast_unicode(s) for s in result)

574

575

576

@skip_doctest_py3

576

@skip_doctest_py3

577

class DollarFormatter(FullEvalFormatter):

577

class DollarFormatter(FullEvalFormatter):

578

"""Formatter allowing Itpl style $foo replacement, for names and attribute

578

"""Formatter allowing Itpl style $foo replacement, for names and attribute

579

access only. Standard {foo} replacement also works, and allows full

579

access only. Standard {foo} replacement also works, and allows full

580

evaluation of its arguments.

580

evaluation of its arguments.

581

582

Examples

582

Examples

583

--------

583

--------

584

::

584

::

585

586

In [1]: f = DollarFormatter()

586

In [1]: f = DollarFormatter()

587

In [2]: f.format('{n//4}', n=8)

587

In [2]: f.format('{n//4}', n=8)

588

Out[2]: u'2'

588

Out[2]: u'2'

589

590

In [3]: f.format('23 * 76 is $result', result=23*76)

590

In [3]: f.format('23 * 76 is $result', result=23*76)

591

Out[3]: u'23 * 76 is 1748'

591

Out[3]: u'23 * 76 is 1748'

592

593

In [4]: f.format('$a or {b}', a=1, b=2)

593

In [4]: f.format('$a or {b}', a=1, b=2)

594

Out[4]: u'1 or 2'

594

Out[4]: u'1 or 2'

595

"""

595

"""

596

_dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")

596

_dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")

597

def parse(self, fmt_string):

597

def parse(self, fmt_string):

598

for literal_txt, field_name, format_spec, conversion \

598

for literal_txt, field_name, format_spec, conversion \

599

in Formatter.parse(self, fmt_string):

599

in Formatter.parse(self, fmt_string):

600

601

# Find $foo patterns in the literal text.

601

# Find $foo patterns in the literal text.

602

continue_from = 0

602

continue_from = 0

603

txt = ""

603

txt = ""

604

for m in self._dollar_pattern.finditer(literal_txt):

604

for m in self._dollar_pattern.finditer(literal_txt):

605

new_txt, new_field = m.group(1,2)

605

new_txt, new_field = m.group(1,2)

606

# $$foo --> $foo

606

# $$foo --> $foo

607

if new_field.startswith("$"):

607

if new_field.startswith("$"):

608

txt += new_txt + new_field

608

txt += new_txt + new_field

609

else:

609

else:

610

yield (txt + new_txt, new_field, "", None)

610

yield (txt + new_txt, new_field, "", None)

611

txt = ""

611

txt = ""

612

continue_from = m.end()

612

continue_from = m.end()

613

614

# Re-yield the {foo} style pattern

614

# Re-yield the {foo} style pattern

615

yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

615

yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

616

617

#-----------------------------------------------------------------------------

617

#-----------------------------------------------------------------------------

618

# Utils to columnize a list of string

618

# Utils to columnize a list of string

619

#-----------------------------------------------------------------------------

619

#-----------------------------------------------------------------------------

620

621

def _chunks(l, n):

621

def _col_chunks(l, nrows, row_first=False):

622

"""Yield successive n-sized chunks from l."""

622

"""Yield successive nrows-sized column chunks from l."""

623

for i in py3compat.xrange(0, len(l), n):

623

if row_first:

624

yield l[i:i+n]

624

ncols = (len(l) // nrows) + (len(l) % nrows > 0)

625

for i in py3compat.xrange(ncols):

626

yield [l[j] for j in py3compat.xrange(i, len(l), nrows)]

627

else:

628

for i in py3compat.xrange(0, len(l), nrows):

629

yield l[i:(i + nrows)]

625

630

626

631

627

def _find_optimal(rlist , separator_size=2 , displaywidth=80):

632

def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):

628

"""Calculate optimal info to columnize a list of string"""

633

"""Calculate optimal info to columnize a list of string"""

629

for nrow in range(1, len(rlist)+1) :

634

for nrow in range(1, len(rlist) + 1):

630

chk = list(map(max,_chunks(rlist, nrow)))

635

col_widths = list(map(max, _col_chunks(rlist, nrow, row_first)))

631

sumlength = sum(chk)

636

sumlength = sum(col_widths)

632

ncols = len(chk)

637

ncols = len(col_widths)

633

if sumlength+separator_size*(ncols-1) <= displaywidth :

638

if sumlength + separator_size * (ncols - 1) <= displaywidth:

634

break;

639

break

635

return {'columns~~_numbers~~' : ncols,

640

return {'num_columns': ncols,

636

'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,

641

'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,

637

'~~rows_number~~s' : nrow,

642

'num_rows': nrow,

638

'columns_width' : chk

643

'column_widths': col_widths

639

}

644

}

640

645

641

646

642

def _get_or_default(mylist, i, default=None):

647

def _get_or_default(mylist, i, default=None):

643

"""return list item number, or default if don't exist"""

648

"""return list item number, or default if don't exist"""

644

if i >= len(mylist):

649

if i >= len(mylist):

645

return default

650

return default

646

else :

651

else :

647

return mylist[i]

652

return mylist[i]

648

653

649

654

650

def compute_item_matrix(items, empty=None, *args, **kwargs) :

655

def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :

651

"""Returns a nested list, and info to columnize items

656

"""Returns a nested list, and info to columnize items

652

657

653

Parameters

658

Parameters

654

----------

659

----------

655

660

656

items

661

items

657

list of strings to columize

662

list of strings to columize

663

row_first : (default False)

664

Whether to to compute columns for a row-first matrix instead ofr

665

column-first (default).

658

empty : (default None)

666

empty : (default None)

659

default value to fill list if needed

667

default value to fill list if needed

660

separator_size : int (default=2)

668

separator_size : int (default=2)

661

How much caracters will be used as a separation between each columns.

669

How much caracters will be used as a separation between each columns.

662

displaywidth : int (default=80)

670

displaywidth : int (default=80)

663

The width of the area onto wich the columns should enter

671

The width of the area onto wich the columns should enter

664

672

665

Returns

673

Returns

666

-------

674

-------

667

675

668

strings_matrix

676

strings_matrix

669

677

670

nested list of string, the outer most list contains as many list as

678

nested list of string, the outer most list contains as many list as

671

rows, the innermost lists have each as many element as colums. If the

679

rows, the innermost lists have each as many element as colums. If the

672

total number of elements in `items` does not equal the product of

680

total number of elements in `items` does not equal the product of

673

rows*columns, the last element of some lists are filled with `None`.

681

rows*columns, the last element of some lists are filled with `None`.

674

682

675

dict_info

683

dict_info

676

some info to make columnize easier:

684

some info to make columnize easier:

677

685

678

columns~~_numbers~~

686

num_columns

679

number of columns

687

number of columns

680

~~rows_number~~s

688

num_rows

681

number of rows

689

number of rows

682

columns_width

690

column_widths

683

list of with of each columns

691

list of with of each columns

684

optimal_separator_width

692

optimal_separator_width

685

best separator width between columns

693

best separator width between columns

686

694

687

Examples

695

Examples

688

--------

696

--------

689

::

697

::

690

698

691

In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']

699

In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']

692

...: compute_item_matrix(l,displaywidth=12)

700

...: compute_item_matrix(l, displaywidth=12)

693

Out[1]:

701

Out[1]:

694

([['aaa', 'f', 'k'],

702

([['aaa', 'f', 'k'],

695

['b', 'g', 'l'],

703

['b', 'g', 'l'],

696

['cc', 'h', None],

704

['cc', 'h', None],

697

['d', 'i', None],

705

['d', 'i', None],

698

['eeeee', 'j', None]],

706

['eeeee', 'j', None]],

699

{'column~~s_number~~s': 3,

707

{'num_columns': 3,

700

'columns_width': [5, 1, 1],

708

'column_widths': [5, 1, 1],

701

'optimal_separator_width': 2,

709

'optimal_separator_width': 2,

702

'~~rows_number~~s': 5})

710

'num_rows': 5})

703

"""

711

"""

704

info = _find_optimal(list(map(len, items)), *args, **kwargs)

712

info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)

705

nrow, ncol = info['~~rows_number~~s'], info['columns~~_numbers~~']

713

nrow, ncol = info['num_rows'], info['num_columns']

706

return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)

714

if row_first:

715

return ([[_get_or_default(items, c * nrow + r, default=empty) for r in range(nrow)] for c in range(ncol)], info)

716

else:

717

return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)

707

718

708

719

709

def columnize(items, separator=' ', displaywidth=80):

720

def columnize(items, row_first=False, separator=' ', displaywidth=80):

710

""" Transform a list of strings into a single string with columns.

721

""" Transform a list of strings into a single string with columns.

711

722

712

Parameters

723

Parameters

713

----------

724

----------

714

items : sequence of strings

725

items : sequence of strings

715

The strings to process.

726

The strings to process.

716

727

728

row_first : (default False)

729

Whether to to compute columns for a row-first matrix instead ofr

730

column-first (default).

731

717

separator : str, optional [default is two spaces]

732

separator : str, optional [default is two spaces]

718

The string that separates columns.

733

The string that separates columns.

719

734

720

displaywidth : int, optional [default is 80]

735

displaywidth : int, optional [default is 80]

721

Width of the display in number of characters.

736

Width of the display in number of characters.

722

737

723

Returns

738

Returns

724

-------

739

-------

725

The formatted string.

740

The formatted string.

726

"""

741

"""

727

if not items :

742

if not items:

728

return '\n'

743

return '\n'

729

matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)

744

matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)

730

fmatrix = [filter(None, x) for x in matrix]

745

fmatrix = [filter(None, x) for x in matrix]

731

sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])

746

sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])

732

return '\n'.join(map(sjoin, fmatrix))+'\n'

747

return '\n'.join(map(sjoin, fmatrix))+'\n'

733

748

734

749

735

def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):

750

def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):

736

"""

751

"""

737

Return a string with a natural enumeration of items

752

Return a string with a natural enumeration of items

738

753

739

>>> get_text_list(['a', 'b', 'c', 'd'])

754

>>> get_text_list(['a', 'b', 'c', 'd'])

740

'a, b, c and d'

755

'a, b, c and d'

741

>>> get_text_list(['a', 'b', 'c'], ' or ')

756

>>> get_text_list(['a', 'b', 'c'], ' or ')

742

'a, b or c'

757

'a, b or c'

743

>>> get_text_list(['a', 'b', 'c'], ', ')

758

>>> get_text_list(['a', 'b', 'c'], ', ')

744

'a, b, c'

759

'a, b, c'

745

>>> get_text_list(['a', 'b'], ' or ')

760

>>> get_text_list(['a', 'b'], ' or ')

746

'a or b'

761

'a or b'

747

>>> get_text_list(['a'])

762

>>> get_text_list(['a'])

748

'a'

763

'a'

749

>>> get_text_list([])

764

>>> get_text_list([])

750

''

765

''

751

>>> get_text_list(['a', 'b'], wrap_item_with="`")

766

>>> get_text_list(['a', 'b'], wrap_item_with="`")

752

'`a` and `b`'

767

'`a` and `b`'

753

>>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

768

>>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

754

'a + b + c = d'

769

'a + b + c = d'

755

"""

770

"""

756

if len(list_) == 0:

771

if len(list_) == 0:

757

return ''

772

return ''

758

if wrap_item_with:

773

if wrap_item_with:

759

list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

774

list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

760

item in list_]

775

item in list_]

761

if len(list_) == 1:

776

if len(list_) == 1:

762

return list_[0]

777

return list_[0]

763

return '%s%s%s' % (

778

return '%s%s%s' % (

764

sep.join(i for i in list_[:-1]),

779

sep.join(i for i in list_[:-1]),

765

last_sep, list_[-1])

780

last_sep, list_[-1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # encoding: utf-8
             """
             Utilities for working with strings and text.
             Inheritance diagram:
             .. inheritance-diagram:: IPython.utils.text
                :parts: 3
             """
             from __future__ import absolute_import
             import os
             import re
             import sys
             import textwrap
             from string import Formatter
             from IPython.testing.skipdoctest import skip_doctest_py3, skip_doctest
             from IPython.utils import py3compat
             # datetime.strftime date format for ipython
             if sys.platform == 'win32':
                 date_format = "%B %d, %Y"
             else:
                 date_format = "%B %-d, %Y"
             class LSString(str):
                 """String derivative with a special access attributes.
                 These are normal strings, but with the special attributes:
                     .l (or .list) : value as list (split on newlines).
                     .n (or .nlstr): original value (the string itself).
                     .s (or .spstr): value as whitespace-separated string.
                     .p (or .paths): list of path objects (requires path.py package)
                 Any values which require transformations are computed only once and
                 cached.
                 Such strings are very useful to efficiently interact with the shell, which
                 typically only understands whitespace-separated options for commands."""
                 def get_list(self):
                     try:
                         return self.__list
                     except AttributeError:
                         self.__list = self.split('\n')
                         return self.__list
                 l = list = property(get_list)
                 def get_spstr(self):
                     try:
                         return self.__spstr
                     except AttributeError:
                         self.__spstr = self.replace('\n',' ')
                         return self.__spstr
                 s = spstr = property(get_spstr)
                 def get_nlstr(self):
                     return self
                 n = nlstr = property(get_nlstr)
                 def get_paths(self):
                     from path import path
                     try:
                         return self.__paths
                     except AttributeError:
                         self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
                         return self.__paths
                 p = paths = property(get_paths)
             # FIXME: We need to reimplement type specific displayhook and then add this
             # back as a custom printer. This should also be moved outside utils into the
             # core.
             # def print_lsstring(arg):
             #     """ Prettier (non-repr-like) and more informative printer for LSString """
             #     print "LSString (.p, .n, .l, .s available). Value:"
             #     print arg
             #
             #
             # print_lsstring = result_display.when_type(LSString)(print_lsstring)
             class SList(list):
                 """List derivative with a special access attributes.
                 These are normal lists, but with the special attributes:
                 * .l (or .list) : value as list (the list itself).
                 * .n (or .nlstr): value as a string, joined on newlines.
                 * .s (or .spstr): value as a string, joined on spaces.
                 * .p (or .paths): list of path objects (requires path.py package)
                 Any values which require transformations are computed only once and
                 cached."""
                 def get_list(self):
                     return self
                 l = list = property(get_list)
                 def get_spstr(self):
                     try:
                         return self.__spstr
                     except AttributeError:
                         self.__spstr = ' '.join(self)
                         return self.__spstr
                 s = spstr = property(get_spstr)
                 def get_nlstr(self):
                     try:
                         return self.__nlstr
                     except AttributeError:
                         self.__nlstr = '\n'.join(self)
                         return self.__nlstr
                 n = nlstr = property(get_nlstr)
                 def get_paths(self):
                     from path import path
                     try:
                         return self.__paths
                     except AttributeError:
                         self.__paths = [path(p) for p in self if os.path.exists(p)]
                         return self.__paths
                 p = paths = property(get_paths)
                 def grep(self, pattern, prune = False, field = None):
                     """ Return all strings matching 'pattern' (a regex or callable)
                     This is case-insensitive. If prune is true, return all items
                     NOT matching the pattern.
                     If field is specified, the match must occur in the specified
                     whitespace-separated field.
                     Examples::
                         a.grep( lambda x: x.startswith('C') )
                         a.grep('Cha.*log', prune=1)
                         a.grep('chm', field=-1)
                     """
                     def match_target(s):
                         if field is None:
                             return s
                         parts = s.split()
                         try:
                             tgt = parts[field]
                             return tgt
                         except IndexError:
                             return ""
                     if isinstance(pattern, py3compat.string_types):
                         pred = lambda x : re.search(pattern, x, re.IGNORECASE)
                     else:
                         pred = pattern
                     if not prune:
                         return SList([el for el in self if pred(match_target(el))])
                     else:
                         return SList([el for el in self if not pred(match_target(el))])
                 def fields(self, *fields):
                     """ Collect whitespace-separated fields from string list
                     Allows quick awk-like usage of string lists.
                     Example data (in var a, created by 'a = !ls -l')::
                         -rwxrwxrwx  1 ville None      18 Dec 14  2006 ChangeLog
                         drwxrwxrwx+ 6 ville None       0 Oct 24 18:05 IPython
                     * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
                     * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
                       (note the joining by space).
                     * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
                     IndexErrors are ignored.
                     Without args, fields() just split()'s the strings.
                     """
                     if len(fields) == 0:
                         return [el.split() for el in self]
                     res = SList()
                     for el in [f.split() for f in self]:
                         lineparts = []
                         for fd in fields:
                             try:
                                 lineparts.append(el[fd])
                             except IndexError:
                                 pass
                         if lineparts:
                             res.append(" ".join(lineparts))
                     return res
                 def sort(self,field= None,  nums = False):
                     """ sort by specified fields (see fields())
                     Example::
                         a.sort(1, nums = True)
                     Sorts a by second field, in numerical order (so that 21 > 3)
                     """
                     #decorate, sort, undecorate
                     if field is not None:
                         dsu = [[SList([line]).fields(field),  line] for line in self]
                     else:
                         dsu = [[line,  line] for line in self]
                     if nums:
                         for i in range(len(dsu)):
                             numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
                             try:
                                 n = int(numstr)
                             except ValueError:
                                 n = 0;
                             dsu[i][0] = n
                     dsu.sort()
                     return SList([t[1] for t in dsu])
             # FIXME: We need to reimplement type specific displayhook and then add this
             # back as a custom printer. This should also be moved outside utils into the
             # core.
             # def print_slist(arg):
             #     """ Prettier (non-repr-like) and more informative printer for SList """
             #     print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
             #     if hasattr(arg,  'hideonce') and arg.hideonce:
             #         arg.hideonce = False
             #         return
             #
             #     nlprint(arg)   # This was a nested list printer, now removed.
             #
             # print_slist = result_display.when_type(SList)(print_slist)
             def indent(instr,nspaces=4, ntabs=0, flatten=False):
                 """Indent a string a given number of spaces or tabstops.
                 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
                 Parameters
                 ----------
                 instr : basestring
                     The string to be indented.
                 nspaces : int (default: 4)
                     The number of spaces to be indented.
                 ntabs : int (default: 0)
                     The number of tabs to be indented.
                 flatten : bool (default: False)
                     Whether to scrub existing indentation.  If True, all lines will be
                     aligned to the same indentation.  If False, existing indentation will
                     be strictly increased.
                 Returns
                 -------
                 str|unicode : string indented by ntabs and nspaces.
                 """
                 if instr is None:
                     return
                 ind = '\t'*ntabs+' '*nspaces
                 if flatten:
                     pat = re.compile(r'^\s*', re.MULTILINE)
                 else:
                     pat = re.compile(r'^', re.MULTILINE)
                 outstr = re.sub(pat, ind, instr)
                 if outstr.endswith(os.linesep+ind):
                     return outstr[:-len(ind)]
                 else:
                     return outstr
             def list_strings(arg):
                 """Always return a list of strings, given a string or list of strings
                 as input.
                 Examples
                 --------
                 ::
                     In [7]: list_strings('A single string')
                     Out[7]: ['A single string']
                     In [8]: list_strings(['A single string in a list'])
                     Out[8]: ['A single string in a list']
                     In [9]: list_strings(['A','list','of','strings'])
                     Out[9]: ['A', 'list', 'of', 'strings']
                 """
                 if isinstance(arg, py3compat.string_types): return [arg]
                 else: return arg
             def marquee(txt='',width=78,mark='*'):
                 """Return the input string centered in a 'marquee'.
                 Examples
                 --------
                 ::
                     In [16]: marquee('A test',40)
                     Out[16]: '**************** A test ****************'
                     In [17]: marquee('A test',40,'-')
                     Out[17]: '---------------- A test ----------------'
                     In [18]: marquee('A test',40,' ')
                     Out[18]: '                 A test                 '
                 """
                 if not txt:
                     return (mark*width)[:width]
                 nmark = (width-len(txt)-2)//len(mark)//2
                 if nmark < 0: nmark =0
                 marks = mark*nmark
                 return '%s %s %s' % (marks,txt,marks)
             ini_spaces_re = re.compile(r'^(\s+)')
             def num_ini_spaces(strng):
                 """Return the number of initial spaces in a string"""
                 ini_spaces = ini_spaces_re.match(strng)
                 if ini_spaces:
                     return ini_spaces.end()
                 else:
                     return 0
             def format_screen(strng):
                 """Format a string for screen printing.
                 This removes some latex-type format codes."""
                 # Paragraph continue
                 par_re = re.compile(r'\\$',re.MULTILINE)
                 strng = par_re.sub('',strng)
                 return strng
             def dedent(text):
                 """Equivalent of textwrap.dedent that ignores unindented first line.
                 This means it will still dedent strings like:
                 '''foo
                 is a bar
                 '''
                 For use in wrap_paragraphs.
                 """
                 if text.startswith('\n'):
                     # text starts with blank line, don't ignore the first line
                     return textwrap.dedent(text)
                 # split first line
                 splits = text.split('\n',1)
                 if len(splits) == 1:
                     # only one line
                     return textwrap.dedent(text)
                 first, rest = splits
                 # dedent everything but the first line
                 rest = textwrap.dedent(rest)
                 return '\n'.join([first, rest])
             def wrap_paragraphs(text, ncols=80):
                 """Wrap multiple paragraphs to fit a specified width.
                 This is equivalent to textwrap.wrap, but with support for multiple
                 paragraphs, as separated by empty lines.
                 Returns
                 -------
                 list of complete paragraphs, wrapped to fill `ncols` columns.
                 """
                 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
                 text = dedent(text).strip()
                 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
                 out_ps = []
                 indent_re = re.compile(r'\n\s+', re.MULTILINE)
                 for p in paragraphs:
                     # presume indentation that survives dedent is meaningful formatting,
                     # so don't fill unless text is flush.
                     if indent_re.search(p) is None:
                         # wrap paragraph
                         p = textwrap.fill(p, ncols)
                     out_ps.append(p)
                 return out_ps
             def long_substr(data):
                 """Return the longest common substring in a list of strings.
                 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
                 """
                 substr = ''
                 if len(data) > 1 and len(data[0]) > 0:
                     for i in range(len(data[0])):
                         for j in range(len(data[0])-i+1):
                             if j > len(substr) and all(data[0][i:i+j] in x for x in data):
                                 substr = data[0][i:i+j]
                 elif len(data) == 1:
                     substr = data[0]
                 return substr
             def strip_email_quotes(text):
                 """Strip leading email quotation characters ('>').
                 Removes any combination of leading '>' interspersed with whitespace that
                 appears *identically* in all lines of the input text.
                 Parameters
                 ----------
                 text : str
                 Examples
                 --------
                 Simple uses::
                     In [2]: strip_email_quotes('> > text')
                     Out[2]: 'text'
                     In [3]: strip_email_quotes('> > text\\n> > more')
                     Out[3]: 'text\\nmore'
                 Note how only the common prefix that appears in all lines is stripped::
                     In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
                     Out[4]: '> text\\n> more\\nmore...'
                 So if any line has no quote marks ('>') , then none are stripped from any
                 of them ::
                     In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
                     Out[5]: '> > text\\n> > more\\nlast different'
                 """
                 lines = text.splitlines()
                 matches = set()
                 for line in lines:
                     prefix = re.match(r'^(\s*>[ >]*)', line)
                     if prefix:
                         matches.add(prefix.group(1))
                     else:
                         break
                 else:
                     prefix = long_substr(list(matches))
                     if prefix:
                         strip = len(prefix)
                         text = '\n'.join([ ln[strip:] for ln in lines])
                 return text
             def strip_ansi(source):
                 """
                 Remove ansi escape codes from text.
                 Parameters
                 ----------
                 source : str
                     Source to remove the ansi from
                 """
                 return re.sub(r'\033\[(\d|;)+?m', '', source)
             class EvalFormatter(Formatter):
                 """A String Formatter that allows evaluation of simple expressions.
                 Note that this version interprets a : as specifying a format string (as per
                 standard string formatting), so if slicing is required, you must explicitly
                 create a slice.
                 This is to be used in templating cases, such as the parallel batch
                 script templates, where simple arithmetic on arguments is useful.
                 Examples
                 --------
                 ::
                     In [1]: f = EvalFormatter()
                     In [2]: f.format('{n//4}', n=8)
                     Out[2]: '2'
                     In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
                     Out[3]: 'll'
                 """
                 def get_field(self, name, args, kwargs):
                     v = eval(name, kwargs)
                     return v, name
             #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
             # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
             # above, it should be possible to remove FullEvalFormatter.
             @skip_doctest_py3
             class FullEvalFormatter(Formatter):
                 """A String Formatter that allows evaluation of simple expressions.
                 Any time a format key is not found in the kwargs,
                 it will be tried as an expression in the kwargs namespace.
                 Note that this version allows slicing using [1:2], so you cannot specify
                 a format string. Use :class:`EvalFormatter` to permit format strings.
                 Examples
                 --------
                 ::
                     In [1]: f = FullEvalFormatter()
                     In [2]: f.format('{n//4}', n=8)
                     Out[2]: u'2'
                     In [3]: f.format('{list(range(5))[2:4]}')
                     Out[3]: u'[2, 3]'
                     In [4]: f.format('{3*2}')
                     Out[4]: u'6'
                 """
                 # copied from Formatter._vformat with minor changes to allow eval
                 # and replace the format_spec code with slicing
                 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
                     if recursion_depth < 0:
                         raise ValueError('Max string recursion exceeded')
                     result = []
                     for literal_text, field_name, format_spec, conversion in \
                             self.parse(format_string):
                         # output the literal text
                         if literal_text:
                             result.append(literal_text)
                         # if there's a field, output it
                         if field_name is not None:
                             # this is some markup, find the object and do
                             # the formatting
                             if format_spec:
                                 # override format spec, to allow slicing:
                                 field_name = ':'.join([field_name, format_spec])
                             # eval the contents of the field for the object
                             # to be formatted
                             obj = eval(field_name, kwargs)
                             # do any conversion on the resulting object
                             obj = self.convert_field(obj, conversion)
                             # format the object and append to the result
                             result.append(self.format_field(obj, ''))
                     return u''.join(py3compat.cast_unicode(s) for s in result)
             @skip_doctest_py3
             class DollarFormatter(FullEvalFormatter):
                 """Formatter allowing Itpl style $foo replacement, for names and attribute
                 access only. Standard {foo} replacement also works, and allows full
                 evaluation of its arguments.
                 Examples
                 --------
                 ::
                     In [1]: f = DollarFormatter()
                     In [2]: f.format('{n//4}', n=8)
                     Out[2]: u'2'
                     In [3]: f.format('23 * 76 is $result', result=23*76)
                     Out[3]: u'23 * 76 is 1748'
                     In [4]: f.format('$a or {b}', a=1, b=2)
                     Out[4]: u'1 or 2'
                 """
                 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
                 def parse(self, fmt_string):
                     for literal_txt, field_name, format_spec, conversion \
                                 in Formatter.parse(self, fmt_string):
                         # Find $foo patterns in the literal text.
                         continue_from = 0
                         txt = ""
                         for m in self._dollar_pattern.finditer(literal_txt):
                             new_txt, new_field = m.group(1,2)
                             # $$foo --> $foo
                             if new_field.startswith("$"):
                                 txt += new_txt + new_field
                             else:
                                 yield (txt + new_txt, new_field, "", None)
                                 txt = ""
                             continue_from = m.end()
                         # Re-yield the {foo} style pattern
                         yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
             #-----------------------------------------------------------------------------
             # Utils to columnize a list of string
             #-----------------------------------------------------------------------------
-            def _chunks(l, n):
+            def _col_chunks(l, nrows, row_first=False):
-                """Yield successive n-sized chunks from l."""
+                """Yield successive nrows-sized column chunks from l."""
-                for i in py3compat.xrange(0, len(l), n):
+                if row_first:
-                    yield l[i:i+n]
+                    ncols = (len(l) // nrows) + (len(l) % nrows > 0)
+                    for i in py3compat.xrange(ncols):
+                        yield [l[j] for j in py3compat.xrange(i, len(l), nrows)]
+                else:
+                    for i in py3compat.xrange(0, len(l), nrows):
+                        yield l[i:(i + nrows)]
-            def _find_optimal(rlist , separator_size=2 , displaywidth=80):
+            def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
                 """Calculate optimal info to columnize a list of string"""
-                for nrow in range(1, len(rlist)+1) :
+                for nrow in range(1, len(rlist) + 1):
-                    chk = list(map(max,_chunks(rlist, nrow)))
+                    col_widths = list(map(max, _col_chunks(rlist, nrow, row_first)))
-                    sumlength = sum(chk)
+                    sumlength = sum(col_widths)
-                    ncols = len(chk)
+                    ncols = len(col_widths)
-                    if sumlength+separator_size*(ncols-1) <= displaywidth :
+                    if sumlength + separator_size * (ncols - 1) <= displaywidth:
-                        break;
+                        break
-                return {'columns_numbers' : ncols,
+                return {'num_columns': ncols,
-                        'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
+                        'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
-                        'rows_numbers' : nrow,
+                        'num_rows': nrow,
-                        'columns_width' : chk
+                        'column_widths': col_widths
+                        }
             def _get_or_default(mylist, i, default=None):
                 """return list item number, or default if don't exist"""
                 if i >= len(mylist):
                     return default
                 else :
                     return mylist[i]
-            def compute_item_matrix(items, empty=None, *args, **kwargs) :
+            def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
                 """Returns a nested list, and info to columnize items
                 Parameters
                 ----------
                 items
                     list of strings to columize
+                row_first : (default False)
+                    Whether to to compute columns for a row-first matrix instead ofr
+                    column-first (default).
                 empty : (default None)
                     default value to fill list if needed
                 separator_size : int (default=2)
                     How much caracters will be used as a separation between each columns.
                 displaywidth : int (default=80)
                     The width of the area onto wich the columns should enter
                 Returns
                 -------
                 strings_matrix
                     nested list of string, the outer most list contains as many list as
                     rows, the innermost lists have each as many element as colums. If the
                     total number of elements in `items` does not equal the product of
                     rows*columns, the last element of some lists are filled with `None`.
                 dict_info
                     some info to make columnize easier:
-                    columns_numbers
+                    num_columns
                       number of columns
-                    rows_numbers
+                    num_rows
                       number of rows
-                    columns_width
+                    column_widths
                       list of with of each columns
                     optimal_separator_width
                       best separator width between columns
                 Examples
                 --------
                 ::
                     In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
-                       ...: compute_item_matrix(l,displaywidth=12)
+                       ...: compute_item_matrix(l, displaywidth=12)
                     Out[1]:
                         ([['aaa', 'f', 'k'],
                         ['b', 'g', 'l'],
                         ['cc', 'h', None],
                         ['d', 'i', None],
                         ['eeeee', 'j', None]],
-                        {'columns_numbers': 3,
+                        {'num_columns': 3,
-                        'columns_width': [5, 1, 1],
+                        'column_widths': [5, 1, 1],
                         'optimal_separator_width': 2,
-                        'rows_numbers': 5})
+                        'num_rows': 5})
                 """
-                info = _find_optimal(list(map(len, items)), *args, **kwargs)
+                info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
-                nrow, ncol = info['rows_numbers'], info['columns_numbers']
+                nrow, ncol = info['num_rows'], info['num_columns']
-                return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
+                if row_first:
+                    return ([[_get_or_default(items, c * nrow + r, default=empty) for r in range(nrow)] for c in range(ncol)], info)
+                else:
+                    return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
-            def columnize(items, separator='  ', displaywidth=80):
+            def columnize(items, row_first=False, separator='  ', displaywidth=80):
                 """ Transform a list of strings into a single string with columns.
                 Parameters
                 ----------
                 items : sequence of strings
                     The strings to process.
+                row_first : (default False)
+                    Whether to to compute columns for a row-first matrix instead ofr
+                    column-first (default).
                 separator : str, optional [default is two spaces]
                     The string that separates columns.
                 displaywidth : int, optional [default is 80]
                     Width of the display in number of characters.
                 Returns
                 -------
                 The formatted string.
                 """
-                if not items :
+                if not items:
                     return '\n'
-                matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
+                matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
                 fmatrix = [filter(None, x) for x in matrix]
-                sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
+                sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
                 return '\n'.join(map(sjoin, fmatrix))+'\n'
             def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
                 """
                 Return a string with a natural enumeration of items
                 >>> get_text_list(['a', 'b', 'c', 'd'])
                 'a, b, c and d'
                 >>> get_text_list(['a', 'b', 'c'], ' or ')
                 'a, b or c'
                 >>> get_text_list(['a', 'b', 'c'], ', ')
                 'a, b, c'
                 >>> get_text_list(['a', 'b'], ' or ')
                 'a or b'
                 >>> get_text_list(['a'])
                 'a'
                 >>> get_text_list([])
                 ''
                 >>> get_text_list(['a', 'b'], wrap_item_with="`")
                 '`a` and `b`'
                 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
                 'a + b + c = d'
                 """
                 if len(list_) == 0:
                     return ''
                 if wrap_item_with:
                     list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
                              item in list_]
                 if len(list_) == 1:
                     return list_[0]
                 return '%s%s%s' % (
                     sep.join(i for i in list_[:-1]),
                     last_sep, list_[-1])