upstream/ipython Commit - r22976:636eea87

1

# encoding: utf-8

1

# encoding: utf-8

2

"""

2

"""

3

Utilities for working with strings and text.

3

Utilities for working with strings and text.

4

5

Inheritance diagram:

5

Inheritance diagram:

6

7

.. inheritance-diagram:: IPython.utils.text

7

.. inheritance-diagram:: IPython.utils.text

8

:parts: 3

8

:parts: 3

9

"""

9

"""

10

11

import os

11

import os

12

import re

12

import re

13

import sys

13

import sys

14

import textwrap

14

import textwrap

15

from string import Formatter

15

from string import Formatter

16

try:

16

try:

17

from pathlib import Path

17

from pathlib import Path

18

except ImportError:

18

except ImportError:

19

# Python 2 backport

19

# Python 2 backport

20

from pathlib2 import Path

20

from pathlib2 import Path

21

22

from IPython.testing.skipdoctest import skip_doctest

23

from IPython.utils import py3compat

22

from IPython.utils import py3compat

24

23

25

# datetime.strftime date format for ipython

24

# datetime.strftime date format for ipython

26

if sys.platform == 'win32':

25

if sys.platform == 'win32':

27

date_format = "%B %d, %Y"

26

date_format = "%B %d, %Y"

28

else:

27

else:

29

date_format = "%B %-d, %Y"

28

date_format = "%B %-d, %Y"

30

29

31

class LSString(str):

30

class LSString(str):

32

"""String derivative with a special access attributes.

31

"""String derivative with a special access attributes.

33

32

34

These are normal strings, but with the special attributes:

33

These are normal strings, but with the special attributes:

35

34

36

.l (or .list) : value as list (split on newlines).

35

.l (or .list) : value as list (split on newlines).

37

.n (or .nlstr): original value (the string itself).

36

.n (or .nlstr): original value (the string itself).

38

.s (or .spstr): value as whitespace-separated string.

37

.s (or .spstr): value as whitespace-separated string.

39

.p (or .paths): list of path objects (requires path.py package)

38

.p (or .paths): list of path objects (requires path.py package)

40

39

41

Any values which require transformations are computed only once and

40

Any values which require transformations are computed only once and

42

cached.

41

cached.

43

42

44

Such strings are very useful to efficiently interact with the shell, which

43

Such strings are very useful to efficiently interact with the shell, which

45

typically only understands whitespace-separated options for commands."""

44

typically only understands whitespace-separated options for commands."""

46

45

47

def get_list(self):

46

def get_list(self):

48

try:

47

try:

49

return self.__list

48

return self.__list

50

except AttributeError:

49

except AttributeError:

51

self.__list = self.split('\n')

50

self.__list = self.split('\n')

52

return self.__list

51

return self.__list

53

52

54

l = list = property(get_list)

53

l = list = property(get_list)

55

54

56

def get_spstr(self):

55

def get_spstr(self):

57

try:

56

try:

58

return self.__spstr

57

return self.__spstr

59

except AttributeError:

58

except AttributeError:

60

self.__spstr = self.replace('\n',' ')

59

self.__spstr = self.replace('\n',' ')

61

return self.__spstr

60

return self.__spstr

62

61

63

s = spstr = property(get_spstr)

62

s = spstr = property(get_spstr)

64

63

65

def get_nlstr(self):

64

def get_nlstr(self):

66

return self

65

return self

67

66

68

n = nlstr = property(get_nlstr)

67

n = nlstr = property(get_nlstr)

69

68

70

def get_paths(self):

69

def get_paths(self):

71

try:

70

try:

72

return self.__paths

71

return self.__paths

73

except AttributeError:

72

except AttributeError:

74

self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

73

self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

75

return self.__paths

74

return self.__paths

76

75

77

p = paths = property(get_paths)

76

p = paths = property(get_paths)

78

77

79

# FIXME: We need to reimplement type specific displayhook and then add this

78

# FIXME: We need to reimplement type specific displayhook and then add this

80

# back as a custom printer. This should also be moved outside utils into the

79

# back as a custom printer. This should also be moved outside utils into the

81

# core.

80

# core.

82

81

83

# def print_lsstring(arg):

82

# def print_lsstring(arg):

84

# """ Prettier (non-repr-like) and more informative printer for LSString """

83

# """ Prettier (non-repr-like) and more informative printer for LSString """

85

# print "LSString (.p, .n, .l, .s available). Value:"

84

# print "LSString (.p, .n, .l, .s available). Value:"

86

# print arg

85

# print arg

87

#

86

#

88

#

87

#

89

# print_lsstring = result_display.when_type(LSString)(print_lsstring)

88

# print_lsstring = result_display.when_type(LSString)(print_lsstring)

90

89

91

90

92

class SList(list):

91

class SList(list):

93

"""List derivative with a special access attributes.

92

"""List derivative with a special access attributes.

94

93

95

These are normal lists, but with the special attributes:

94

These are normal lists, but with the special attributes:

96

95

97

* .l (or .list) : value as list (the list itself).

96

* .l (or .list) : value as list (the list itself).

98

* .n (or .nlstr): value as a string, joined on newlines.

97

* .n (or .nlstr): value as a string, joined on newlines.

99

* .s (or .spstr): value as a string, joined on spaces.

98

* .s (or .spstr): value as a string, joined on spaces.

100

* .p (or .paths): list of path objects (requires path.py package)

99

* .p (or .paths): list of path objects (requires path.py package)

101

100

102

Any values which require transformations are computed only once and

101

Any values which require transformations are computed only once and

103

cached."""

102

cached."""

104

103

105

def get_list(self):

104

def get_list(self):

106

return self

105

return self

107

106

108

l = list = property(get_list)

107

l = list = property(get_list)

109

108

110

def get_spstr(self):

109

def get_spstr(self):

111

try:

110

try:

112

return self.__spstr

111

return self.__spstr

113

except AttributeError:

112

except AttributeError:

114

self.__spstr = ' '.join(self)

113

self.__spstr = ' '.join(self)

115

return self.__spstr

114

return self.__spstr

116

115

117

s = spstr = property(get_spstr)

116

s = spstr = property(get_spstr)

118

117

119

def get_nlstr(self):

118

def get_nlstr(self):

120

try:

119

try:

121

return self.__nlstr

120

return self.__nlstr

122

except AttributeError:

121

except AttributeError:

123

self.__nlstr = '\n'.join(self)

122

self.__nlstr = '\n'.join(self)

124

return self.__nlstr

123

return self.__nlstr

125

124

126

n = nlstr = property(get_nlstr)

125

n = nlstr = property(get_nlstr)

127

126

128

def get_paths(self):

127

def get_paths(self):

129

try:

128

try:

130

return self.__paths

129

return self.__paths

131

except AttributeError:

130

except AttributeError:

132

self.__paths = [Path(p) for p in self if os.path.exists(p)]

131

self.__paths = [Path(p) for p in self if os.path.exists(p)]

133

return self.__paths

132

return self.__paths

134

133

135

p = paths = property(get_paths)

134

p = paths = property(get_paths)

136

135

137

def grep(self, pattern, prune = False, field = None):

136

def grep(self, pattern, prune = False, field = None):

138

""" Return all strings matching 'pattern' (a regex or callable)

137

""" Return all strings matching 'pattern' (a regex or callable)

139

138

140

This is case-insensitive. If prune is true, return all items

139

This is case-insensitive. If prune is true, return all items

141

NOT matching the pattern.

140

NOT matching the pattern.

142

141

143

If field is specified, the match must occur in the specified

142

If field is specified, the match must occur in the specified

144

whitespace-separated field.

143

whitespace-separated field.

145

144

146

Examples::

145

Examples::

147

146

148

a.grep( lambda x: x.startswith('C') )

147

a.grep( lambda x: x.startswith('C') )

149

a.grep('Cha.*log', prune=1)

148

a.grep('Cha.*log', prune=1)

150

a.grep('chm', field=-1)

149

a.grep('chm', field=-1)

151

"""

150

"""

152

151

153

def match_target(s):

152

def match_target(s):

154

if field is None:

153

if field is None:

155

return s

154

return s

156

parts = s.split()

155

parts = s.split()

157

try:

156

try:

158

tgt = parts[field]

157

tgt = parts[field]

159

return tgt

158

return tgt

160

except IndexError:

159

except IndexError:

161

return ""

160

return ""

162

161

163

if isinstance(pattern, py3compat.string_types):

162

if isinstance(pattern, py3compat.string_types):

164

pred = lambda x : re.search(pattern, x, re.IGNORECASE)

163

pred = lambda x : re.search(pattern, x, re.IGNORECASE)

165

else:

164

else:

166

pred = pattern

165

pred = pattern

167

if not prune:

166

if not prune:

168

return SList([el for el in self if pred(match_target(el))])

167

return SList([el for el in self if pred(match_target(el))])

169

else:

168

else:

170

return SList([el for el in self if not pred(match_target(el))])

169

return SList([el for el in self if not pred(match_target(el))])

171

170

172

def fields(self, *fields):

171

def fields(self, *fields):

173

""" Collect whitespace-separated fields from string list

172

""" Collect whitespace-separated fields from string list

174

173

175

Allows quick awk-like usage of string lists.

174

Allows quick awk-like usage of string lists.

176

175

177

Example data (in var a, created by 'a = !ls -l')::

176

Example data (in var a, created by 'a = !ls -l')::

178

177

179

-rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

178

-rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

180

drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

179

drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

181

180

182

* ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

181

* ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

183

* ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

182

* ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

184

(note the joining by space).

183

(note the joining by space).

185

* ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

184

* ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

186

185

187

IndexErrors are ignored.

186

IndexErrors are ignored.

188

187

189

Without args, fields() just split()'s the strings.

188

Without args, fields() just split()'s the strings.

190

"""

189

"""

191

if len(fields) == 0:

190

if len(fields) == 0:

192

return [el.split() for el in self]

191

return [el.split() for el in self]

193

192

194

res = SList()

193

res = SList()

195

for el in [f.split() for f in self]:

194

for el in [f.split() for f in self]:

196

lineparts = []

195

lineparts = []

197

196

198

for fd in fields:

197

for fd in fields:

199

try:

198

try:

200

lineparts.append(el[fd])

199

lineparts.append(el[fd])

201

except IndexError:

200

except IndexError:

202

pass

201

pass

203

if lineparts:

202

if lineparts:

204

res.append(" ".join(lineparts))

203

res.append(" ".join(lineparts))

205

204

206

return res

205

return res

207

206

208

def sort(self,field= None, nums = False):

207

def sort(self,field= None, nums = False):

209

""" sort by specified fields (see fields())

208

""" sort by specified fields (see fields())

210

209

211

Example::

210

Example::

212

211

213

a.sort(1, nums = True)

212

a.sort(1, nums = True)

214

213

215

Sorts a by second field, in numerical order (so that 21 > 3)

214

Sorts a by second field, in numerical order (so that 21 > 3)

216

215

217

"""

216

"""

218

217

219

#decorate, sort, undecorate

218

#decorate, sort, undecorate

220

if field is not None:

219

if field is not None:

221

dsu = [[SList([line]).fields(field), line] for line in self]

220

dsu = [[SList([line]).fields(field), line] for line in self]

222

else:

221

else:

223

dsu = [[line, line] for line in self]

222

dsu = [[line, line] for line in self]

224

if nums:

223

if nums:

225

for i in range(len(dsu)):

224

for i in range(len(dsu)):

226

numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

225

numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

227

try:

226

try:

228

n = int(numstr)

227

n = int(numstr)

229

except ValueError:

228

except ValueError:

230

n = 0

229

n = 0

231

dsu[i][0] = n

230

dsu[i][0] = n

232

231

233

232

234

dsu.sort()

233

dsu.sort()

235

return SList([t[1] for t in dsu])

234

return SList([t[1] for t in dsu])

236

235

237

236

238

# FIXME: We need to reimplement type specific displayhook and then add this

237

# FIXME: We need to reimplement type specific displayhook and then add this

239

# back as a custom printer. This should also be moved outside utils into the

238

# back as a custom printer. This should also be moved outside utils into the

240

# core.

239

# core.

241

240

242

# def print_slist(arg):

241

# def print_slist(arg):

243

# """ Prettier (non-repr-like) and more informative printer for SList """

242

# """ Prettier (non-repr-like) and more informative printer for SList """

244

# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"

243

# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"

245

# if hasattr(arg, 'hideonce') and arg.hideonce:

244

# if hasattr(arg, 'hideonce') and arg.hideonce:

246

# arg.hideonce = False

245

# arg.hideonce = False

247

# return

246

# return

248

#

247

#

249

# nlprint(arg) # This was a nested list printer, now removed.

248

# nlprint(arg) # This was a nested list printer, now removed.

250

#

249

#

251

# print_slist = result_display.when_type(SList)(print_slist)

250

# print_slist = result_display.when_type(SList)(print_slist)

252

251

253

252

254

def indent(instr,nspaces=4, ntabs=0, flatten=False):

253

def indent(instr,nspaces=4, ntabs=0, flatten=False):

255

"""Indent a string a given number of spaces or tabstops.

254

"""Indent a string a given number of spaces or tabstops.

256

255

257

indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.

256

indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.

258

257

259

Parameters

258

Parameters

260

----------

259

----------

261

260

262

instr : basestring

261

instr : basestring

263

The string to be indented.

262

The string to be indented.

264

nspaces : int (default: 4)

263

nspaces : int (default: 4)

265

The number of spaces to be indented.

264

The number of spaces to be indented.

266

ntabs : int (default: 0)

265

ntabs : int (default: 0)

267

The number of tabs to be indented.

266

The number of tabs to be indented.

268

flatten : bool (default: False)

267

flatten : bool (default: False)

269

Whether to scrub existing indentation. If True, all lines will be

268

Whether to scrub existing indentation. If True, all lines will be

270

aligned to the same indentation. If False, existing indentation will

269

aligned to the same indentation. If False, existing indentation will

271

be strictly increased.

270

be strictly increased.

272

271

273

Returns

272

Returns

274

-------

273

-------

275

274

276

str|unicode : string indented by ntabs and nspaces.

275

str|unicode : string indented by ntabs and nspaces.

277

276

278

"""

277

"""

279

if instr is None:

278

if instr is None:

280

return

279

return

281

ind = '\t'*ntabs+' '*nspaces

280

ind = '\t'*ntabs+' '*nspaces

282

if flatten:

281

if flatten:

283

pat = re.compile(r'^\s*', re.MULTILINE)

282

pat = re.compile(r'^\s*', re.MULTILINE)

284

else:

283

else:

285

pat = re.compile(r'^', re.MULTILINE)

284

pat = re.compile(r'^', re.MULTILINE)

286

outstr = re.sub(pat, ind, instr)

285

outstr = re.sub(pat, ind, instr)

287

if outstr.endswith(os.linesep+ind):

286

if outstr.endswith(os.linesep+ind):

288

return outstr[:-len(ind)]

287

return outstr[:-len(ind)]

289

else:

288

else:

290

return outstr

289

return outstr

291

290

292

291

293

def list_strings(arg):

292

def list_strings(arg):

294

"""Always return a list of strings, given a string or list of strings

293

"""Always return a list of strings, given a string or list of strings

295

as input.

294

as input.

296

295

297

Examples

296

Examples

298

--------

297

--------

299

::

298

::

300

299

301

In [7]: list_strings('A single string')

300

In [7]: list_strings('A single string')

302

Out[7]: ['A single string']

301

Out[7]: ['A single string']

303

302

304

In [8]: list_strings(['A single string in a list'])

303

In [8]: list_strings(['A single string in a list'])

305

Out[8]: ['A single string in a list']

304

Out[8]: ['A single string in a list']

306

305

307

In [9]: list_strings(['A','list','of','strings'])

306

In [9]: list_strings(['A','list','of','strings'])

308

Out[9]: ['A', 'list', 'of', 'strings']

307

Out[9]: ['A', 'list', 'of', 'strings']

309

"""

308

"""

310

309

311

if isinstance(arg, py3compat.string_types): return [arg]

310

if isinstance(arg, py3compat.string_types): return [arg]

312

else: return arg

311

else: return arg

313

312

314

313

315

def marquee(txt='',width=78,mark='*'):

314

def marquee(txt='',width=78,mark='*'):

316

"""Return the input string centered in a 'marquee'.

315

"""Return the input string centered in a 'marquee'.

317

316

318

Examples

317

Examples

319

--------

318

--------

320

::

319

::

321

320

322

In [16]: marquee('A test',40)

321

In [16]: marquee('A test',40)

323

Out[16]: '**************** A test ****************'

322

Out[16]: '**************** A test ****************'

324

323

325

In [17]: marquee('A test',40,'-')

324

In [17]: marquee('A test',40,'-')

326

Out[17]: '---------------- A test ----------------'

325

Out[17]: '---------------- A test ----------------'

327

326

328

In [18]: marquee('A test',40,' ')

327

In [18]: marquee('A test',40,' ')

329

Out[18]: ' A test '

328

Out[18]: ' A test '

330

329

331

"""

330

"""

332

if not txt:

331

if not txt:

333

return (mark*width)[:width]

332

return (mark*width)[:width]

334

nmark = (width-len(txt)-2)//len(mark)//2

333

nmark = (width-len(txt)-2)//len(mark)//2

335

if nmark < 0: nmark =0

334

if nmark < 0: nmark =0

336

marks = mark*nmark

335

marks = mark*nmark

337

return '%s %s %s' % (marks,txt,marks)

336

return '%s %s %s' % (marks,txt,marks)

338

337

339

338

340

ini_spaces_re = re.compile(r'^(\s+)')

339

ini_spaces_re = re.compile(r'^(\s+)')

341

340

342

def num_ini_spaces(strng):

341

def num_ini_spaces(strng):

343

"""Return the number of initial spaces in a string"""

342

"""Return the number of initial spaces in a string"""

344

343

345

ini_spaces = ini_spaces_re.match(strng)

344

ini_spaces = ini_spaces_re.match(strng)

346

if ini_spaces:

345

if ini_spaces:

347

return ini_spaces.end()

346

return ini_spaces.end()

348

else:

347

else:

349

return 0

348

return 0

350

349

351

350

352

def format_screen(strng):

351

def format_screen(strng):

353

"""Format a string for screen printing.

352

"""Format a string for screen printing.

354

353

355

This removes some latex-type format codes."""

354

This removes some latex-type format codes."""

356

# Paragraph continue

355

# Paragraph continue

357

par_re = re.compile(r'\\$',re.MULTILINE)

356

par_re = re.compile(r'\\$',re.MULTILINE)

358

strng = par_re.sub('',strng)

357

strng = par_re.sub('',strng)

359

return strng

358

return strng

360

359

361

360

362

def dedent(text):

361

def dedent(text):

363

"""Equivalent of textwrap.dedent that ignores unindented first line.

362

"""Equivalent of textwrap.dedent that ignores unindented first line.

364

363

365

This means it will still dedent strings like:

364

This means it will still dedent strings like:

366

'''foo

365

'''foo

367

is a bar

366

is a bar

368

'''

367

'''

369

368

370

For use in wrap_paragraphs.

369

For use in wrap_paragraphs.

371

"""

370

"""

372

371

373

if text.startswith('\n'):

372

if text.startswith('\n'):

374

# text starts with blank line, don't ignore the first line

373

# text starts with blank line, don't ignore the first line

375

return textwrap.dedent(text)

374

return textwrap.dedent(text)

376

375

377

# split first line

376

# split first line

378

splits = text.split('\n',1)

377

splits = text.split('\n',1)

379

if len(splits) == 1:

378

if len(splits) == 1:

380

# only one line

379

# only one line

381

return textwrap.dedent(text)

380

return textwrap.dedent(text)

382

381

383

first, rest = splits

382

first, rest = splits

384

# dedent everything but the first line

383

# dedent everything but the first line

385

rest = textwrap.dedent(rest)

384

rest = textwrap.dedent(rest)

386

return '\n'.join([first, rest])

385

return '\n'.join([first, rest])

387

386

388

387

389

def wrap_paragraphs(text, ncols=80):

388

def wrap_paragraphs(text, ncols=80):

390

"""Wrap multiple paragraphs to fit a specified width.

389

"""Wrap multiple paragraphs to fit a specified width.

391

390

392

This is equivalent to textwrap.wrap, but with support for multiple

391

This is equivalent to textwrap.wrap, but with support for multiple

393

paragraphs, as separated by empty lines.

392

paragraphs, as separated by empty lines.

394

393

395

Returns

394

Returns

396

-------

395

-------

397

396

398

list of complete paragraphs, wrapped to fill `ncols` columns.

397

list of complete paragraphs, wrapped to fill `ncols` columns.

399

"""

398

"""

400

paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)

399

paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)

401

text = dedent(text).strip()

400

text = dedent(text).strip()

402

paragraphs = paragraph_re.split(text)[::2] # every other entry is space

401

paragraphs = paragraph_re.split(text)[::2] # every other entry is space

403

out_ps = []

402

out_ps = []

404

indent_re = re.compile(r'\n\s+', re.MULTILINE)

403

indent_re = re.compile(r'\n\s+', re.MULTILINE)

405

for p in paragraphs:

404

for p in paragraphs:

406

# presume indentation that survives dedent is meaningful formatting,

405

# presume indentation that survives dedent is meaningful formatting,

407

# so don't fill unless text is flush.

406

# so don't fill unless text is flush.

408

if indent_re.search(p) is None:

407

if indent_re.search(p) is None:

409

# wrap paragraph

408

# wrap paragraph

410

p = textwrap.fill(p, ncols)

409

p = textwrap.fill(p, ncols)

411

out_ps.append(p)

410

out_ps.append(p)

412

return out_ps

411

return out_ps

413

412

414

413

415

def long_substr(data):

414

def long_substr(data):

416

"""Return the longest common substring in a list of strings.

415

"""Return the longest common substring in a list of strings.

417

416

418

Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python

417

Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python

419

"""

418

"""

420

substr = ''

419

substr = ''

421

if len(data) > 1 and len(data[0]) > 0:

420

if len(data) > 1 and len(data[0]) > 0:

422

for i in range(len(data[0])):

421

for i in range(len(data[0])):

423

for j in range(len(data[0])-i+1):

422

for j in range(len(data[0])-i+1):

424

if j > len(substr) and all(data[0][i:i+j] in x for x in data):

423

if j > len(substr) and all(data[0][i:i+j] in x for x in data):

425

substr = data[0][i:i+j]

424

substr = data[0][i:i+j]

426

elif len(data) == 1:

425

elif len(data) == 1:

427

substr = data[0]

426

substr = data[0]

428

return substr

427

return substr

429

428

430

429

431

def strip_email_quotes(text):

430

def strip_email_quotes(text):

432

"""Strip leading email quotation characters ('>').

431

"""Strip leading email quotation characters ('>').

433

432

434

Removes any combination of leading '>' interspersed with whitespace that

433

Removes any combination of leading '>' interspersed with whitespace that

435

appears *identically* in all lines of the input text.

434

appears *identically* in all lines of the input text.

436

435

437

Parameters

436

Parameters

438

----------

437

----------

439

text : str

438

text : str

440

439

441

Examples

440

Examples

442

--------

441

--------

443

442

444

Simple uses::

443

Simple uses::

445

444

446

In [2]: strip_email_quotes('> > text')

445

In [2]: strip_email_quotes('> > text')

447

Out[2]: 'text'

446

Out[2]: 'text'

448

447

449

In [3]: strip_email_quotes('> > text\\n> > more')

448

In [3]: strip_email_quotes('> > text\\n> > more')

450

Out[3]: 'text\\nmore'

449

Out[3]: 'text\\nmore'

451

450

452

Note how only the common prefix that appears in all lines is stripped::

451

Note how only the common prefix that appears in all lines is stripped::

453

452

454

In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

453

In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

455

Out[4]: '> text\\n> more\\nmore...'

454

Out[4]: '> text\\n> more\\nmore...'

456

455

457

So if any line has no quote marks ('>') , then none are stripped from any

456

So if any line has no quote marks ('>') , then none are stripped from any

458

of them ::

457

of them ::

459

458

460

In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

459

In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

461

Out[5]: '> > text\\n> > more\\nlast different'

460

Out[5]: '> > text\\n> > more\\nlast different'

462

"""

461

"""

463

lines = text.splitlines()

462

lines = text.splitlines()

464

matches = set()

463

matches = set()

465

for line in lines:

464

for line in lines:

466

prefix = re.match(r'^(\s*>[ >]*)', line)

465

prefix = re.match(r'^(\s*>[ >]*)', line)

467

if prefix:

466

if prefix:

468

matches.add(prefix.group(1))

467

matches.add(prefix.group(1))

469

else:

468

else:

470

break

469

break

471

else:

470

else:

472

prefix = long_substr(list(matches))

471

prefix = long_substr(list(matches))

473

if prefix:

472

if prefix:

474

strip = len(prefix)

473

strip = len(prefix)

475

text = '\n'.join([ ln[strip:] for ln in lines])

474

text = '\n'.join([ ln[strip:] for ln in lines])

476

return text

475

return text

477

476

478

def strip_ansi(source):

477

def strip_ansi(source):

479

"""

478

"""

480

Remove ansi escape codes from text.

479

Remove ansi escape codes from text.

481

480

482

Parameters

481

Parameters

483

----------

482

----------

484

source : str

483

source : str

485

Source to remove the ansi from

484

Source to remove the ansi from

486

"""

485

"""

487

return re.sub(r'\033\[(\d|;)+?m', '', source)

486

return re.sub(r'\033\[(\d|;)+?m', '', source)

488

487

489

488

490

class EvalFormatter(Formatter):

489

class EvalFormatter(Formatter):

491

"""A String Formatter that allows evaluation of simple expressions.

490

"""A String Formatter that allows evaluation of simple expressions.

492

491

493

Note that this version interprets a : as specifying a format string (as per

492

Note that this version interprets a : as specifying a format string (as per

494

standard string formatting), so if slicing is required, you must explicitly

493

standard string formatting), so if slicing is required, you must explicitly

495

create a slice.

494

create a slice.

496

495

497

This is to be used in templating cases, such as the parallel batch

496

This is to be used in templating cases, such as the parallel batch

498

script templates, where simple arithmetic on arguments is useful.

497

script templates, where simple arithmetic on arguments is useful.

499

498

500

Examples

499

Examples

501

--------

500

--------

502

::

501

::

503

502

504

In [1]: f = EvalFormatter()

503

In [1]: f = EvalFormatter()

505

In [2]: f.format('{n//4}', n=8)

504

In [2]: f.format('{n//4}', n=8)

506

Out[2]: '2'

505

Out[2]: '2'

507

506

508

In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

507

In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

509

Out[3]: 'll'

508

Out[3]: 'll'

510

"""

509

"""

511

def get_field(self, name, args, kwargs):

510

def get_field(self, name, args, kwargs):

512

v = eval(name, kwargs)

511

v = eval(name, kwargs)

513

return v, name

512

return v, name

514

513

515

#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

514

#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

516

# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

515

# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

517

# above, it should be possible to remove FullEvalFormatter.

516

# above, it should be possible to remove FullEvalFormatter.

518

517

519

@skip_doctest

520

class FullEvalFormatter(Formatter):

518

class FullEvalFormatter(Formatter):

521

"""A String Formatter that allows evaluation of simple expressions.

519

"""A String Formatter that allows evaluation of simple expressions.

522

520

523

Any time a format key is not found in the kwargs,

521

Any time a format key is not found in the kwargs,

524

it will be tried as an expression in the kwargs namespace.

522

it will be tried as an expression in the kwargs namespace.

525

523

526

Note that this version allows slicing using [1:2], so you cannot specify

524

Note that this version allows slicing using [1:2], so you cannot specify

527

a format string. Use :class:`EvalFormatter` to permit format strings.

525

a format string. Use :class:`EvalFormatter` to permit format strings.

528

526

529

Examples

527

Examples

530

--------

528

--------

531

::

529

::

532

530

533

In [1]: f = FullEvalFormatter()

531

In [1]: f = FullEvalFormatter()

534

In [2]: f.format('{n//4}', n=8)

532

In [2]: f.format('{n//4}', n=8)

535

Out[2]: u'2'

533

Out[2]: '2'

536

534

537

In [3]: f.format('{list(range(5))[2:4]}')

535

In [3]: f.format('{list(range(5))[2:4]}')

538

Out[3]: u'[2, 3]'

536

Out[3]: '[2, 3]'

539

537

540

In [4]: f.format('{3*2}')

538

In [4]: f.format('{3*2}')

541

Out[4]: u'6'

539

Out[4]: '6'

542

"""

540

"""

543

# copied from Formatter._vformat with minor changes to allow eval

541

# copied from Formatter._vformat with minor changes to allow eval

544

# and replace the format_spec code with slicing

542

# and replace the format_spec code with slicing

545

def vformat(self, format_string, args, kwargs):

543

def vformat(self, format_string, args, kwargs):

546

result = []

544

result = []

547

for literal_text, field_name, format_spec, conversion in \

545

for literal_text, field_name, format_spec, conversion in \

548

self.parse(format_string):

546

self.parse(format_string):

549

547

550

# output the literal text

548

# output the literal text

551

if literal_text:

549

if literal_text:

552

result.append(literal_text)

550

result.append(literal_text)

553

551

554

# if there's a field, output it

552

# if there's a field, output it

555

if field_name is not None:

553

if field_name is not None:

556

# this is some markup, find the object and do

554

# this is some markup, find the object and do

557

# the formatting

555

# the formatting

558

556

559

if format_spec:

557

if format_spec:

560

# override format spec, to allow slicing:

558

# override format spec, to allow slicing:

561

field_name = ':'.join([field_name, format_spec])

559

field_name = ':'.join([field_name, format_spec])

562

560

563

# eval the contents of the field for the object

561

# eval the contents of the field for the object

564

# to be formatted

562

# to be formatted

565

obj = eval(field_name, kwargs)

563

obj = eval(field_name, kwargs)

566

564

567

# do any conversion on the resulting object

565

# do any conversion on the resulting object

568

obj = self.convert_field(obj, conversion)

566

obj = self.convert_field(obj, conversion)

569

567

570

# format the object and append to the result

568

# format the object and append to the result

571

result.append(self.format_field(obj, ''))

569

result.append(self.format_field(obj, ''))

572

570

573

return u''.join(py3compat.cast_unicode(s) for s in result)

571

return u''.join(py3compat.cast_unicode(s) for s in result)

574

572

575

573

576

@skip_doctest

577

class DollarFormatter(FullEvalFormatter):

574

class DollarFormatter(FullEvalFormatter):

578

"""Formatter allowing Itpl style $foo replacement, for names and attribute

575

"""Formatter allowing Itpl style $foo replacement, for names and attribute

579

access only. Standard {foo} replacement also works, and allows full

576

access only. Standard {foo} replacement also works, and allows full

580

evaluation of its arguments.

577

evaluation of its arguments.

581

578

582

Examples

579

Examples

583

--------

580

--------

584

::

581

::

585

582

586

In [1]: f = DollarFormatter()

583

In [1]: f = DollarFormatter()

587

In [2]: f.format('{n//4}', n=8)

584

In [2]: f.format('{n//4}', n=8)

588

Out[2]: u'2'

585

Out[2]: '2'

589

586

590

In [3]: f.format('23 * 76 is $result', result=23*76)

587

In [3]: f.format('23 * 76 is $result', result=23*76)

591

Out[3]: u'23 * 76 is 1748'

588

Out[3]: '23 * 76 is 1748'

592

589

593

In [4]: f.format('$a or {b}', a=1, b=2)

590

In [4]: f.format('$a or {b}', a=1, b=2)

594

Out[4]: u'1 or 2'

591

Out[4]: '1 or 2'

595

"""

592

"""

596

_dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")

593

_dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")

597

def parse(self, fmt_string):

594

def parse(self, fmt_string):

598

for literal_txt, field_name, format_spec, conversion \

595

for literal_txt, field_name, format_spec, conversion \

599

in Formatter.parse(self, fmt_string):

596

in Formatter.parse(self, fmt_string):

600

597

601

# Find $foo patterns in the literal text.

598

# Find $foo patterns in the literal text.

602

continue_from = 0

599

continue_from = 0

603

txt = ""

600

txt = ""

604

for m in self._dollar_pattern.finditer(literal_txt):

601

for m in self._dollar_pattern.finditer(literal_txt):

605

new_txt, new_field = m.group(1,2)

602

new_txt, new_field = m.group(1,2)

606

# $$foo --> $foo

603

# $$foo --> $foo

607

if new_field.startswith("$"):

604

if new_field.startswith("$"):

608

txt += new_txt + new_field

605

txt += new_txt + new_field

609

else:

606

else:

610

yield (txt + new_txt, new_field, "", None)

607

yield (txt + new_txt, new_field, "", None)

611

txt = ""

608

txt = ""

612

continue_from = m.end()

609

continue_from = m.end()

613

610

614

# Re-yield the {foo} style pattern

611

# Re-yield the {foo} style pattern

615

yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

612

yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

616

613

617

#-----------------------------------------------------------------------------

614

#-----------------------------------------------------------------------------

618

# Utils to columnize a list of string

615

# Utils to columnize a list of string

619

#-----------------------------------------------------------------------------

616

#-----------------------------------------------------------------------------

620

617

621

def _col_chunks(l, max_rows, row_first=False):

618

def _col_chunks(l, max_rows, row_first=False):

622

"""Yield successive max_rows-sized column chunks from l."""

619

"""Yield successive max_rows-sized column chunks from l."""

623

if row_first:

620

if row_first:

624

ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

621

ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

625

for i in py3compat.xrange(ncols):

622

for i in py3compat.xrange(ncols):

626

yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]

623

yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]

627

else:

624

else:

628

for i in py3compat.xrange(0, len(l), max_rows):

625

for i in py3compat.xrange(0, len(l), max_rows):

629

yield l[i:(i + max_rows)]

626

yield l[i:(i + max_rows)]

630

627

631

628

632

def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):

629

def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):

633

"""Calculate optimal info to columnize a list of string"""

630

"""Calculate optimal info to columnize a list of string"""

634

for max_rows in range(1, len(rlist) + 1):

631

for max_rows in range(1, len(rlist) + 1):

635

col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

632

col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

636

sumlength = sum(col_widths)

633

sumlength = sum(col_widths)

637

ncols = len(col_widths)

634

ncols = len(col_widths)

638

if sumlength + separator_size * (ncols - 1) <= displaywidth:

635

if sumlength + separator_size * (ncols - 1) <= displaywidth:

639

break

636

break

640

return {'num_columns': ncols,

637

return {'num_columns': ncols,

641

'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,

638

'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,

642

'max_rows': max_rows,

639

'max_rows': max_rows,

643

'column_widths': col_widths

640

'column_widths': col_widths

644

}

641

}

645

642

646

643

647

def _get_or_default(mylist, i, default=None):

644

def _get_or_default(mylist, i, default=None):

648

"""return list item number, or default if don't exist"""

645

"""return list item number, or default if don't exist"""

649

if i >= len(mylist):

646

if i >= len(mylist):

650

return default

647

return default

651

else :

648

else :

652

return mylist[i]

649

return mylist[i]

653

650

654

651

655

def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :

652

def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :

656

"""Returns a nested list, and info to columnize items

653

"""Returns a nested list, and info to columnize items

657

654

658

Parameters

655

Parameters

659

----------

656

----------

660

657

661

items

658

items

662

list of strings to columize

659

list of strings to columize

663

row_first : (default False)

660

row_first : (default False)

664

Whether to compute columns for a row-first matrix instead of

661

Whether to compute columns for a row-first matrix instead of

665

column-first (default).

662

column-first (default).

666

empty : (default None)

663

empty : (default None)

667

default value to fill list if needed

664

default value to fill list if needed

668

separator_size : int (default=2)

665

separator_size : int (default=2)

669

How much caracters will be used as a separation between each columns.

666

How much caracters will be used as a separation between each columns.

670

displaywidth : int (default=80)

667

displaywidth : int (default=80)

671

The width of the area onto wich the columns should enter

668

The width of the area onto wich the columns should enter

672

669

673

Returns

670

Returns

674

-------

671

-------

675

672

676

strings_matrix

673

strings_matrix

677

674

678

nested list of string, the outer most list contains as many list as

675

nested list of string, the outer most list contains as many list as

679

rows, the innermost lists have each as many element as colums. If the

676

rows, the innermost lists have each as many element as colums. If the

680

total number of elements in `items` does not equal the product of

677

total number of elements in `items` does not equal the product of

681

rows*columns, the last element of some lists are filled with `None`.

678

rows*columns, the last element of some lists are filled with `None`.

682

679

683

dict_info

680

dict_info

684

some info to make columnize easier:

681

some info to make columnize easier:

685

682

686

num_columns

683

num_columns

687

number of columns

684

number of columns

688

max_rows

685

max_rows

689

maximum number of rows (final number may be less)

686

maximum number of rows (final number may be less)

690

column_widths

687

column_widths

691

list of with of each columns

688

list of with of each columns

692

optimal_separator_width

689

optimal_separator_width

693

best separator width between columns

690

best separator width between columns

694

691

695

Examples

692

Examples

696

--------

693

--------

697

::

694

::

698

695

699

In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']

696

In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']

700

...: compute_item_matrix(l, displaywidth=12)

697

...: compute_item_matrix(l, displaywidth=12)

701

Out[1]:

698

Out[1]:

702

([['aaa', 'f', 'k'],

699

([['aaa', 'f', 'k'],

703

['b', 'g', 'l'],

700

['b', 'g', 'l'],

704

['cc', 'h', None],

701

['cc', 'h', None],

705

['d', 'i', None],

702

['d', 'i', None],

706

['eeeee', 'j', None]],

703

['eeeee', 'j', None]],

707

{'num_columns': 3,

704

{'num_columns': 3,

708

'column_widths': [5, 1, 1],

705

'column_widths': [5, 1, 1],

709

'optimal_separator_width': 2,

706

'optimal_separator_width': 2,

710

'max_rows': 5})

707

'max_rows': 5})

711

"""

708

"""

712

info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)

709

info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)

713

nrow, ncol = info['max_rows'], info['num_columns']

710

nrow, ncol = info['max_rows'], info['num_columns']

714

if row_first:

711

if row_first:

715

return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)

712

return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)

716

else:

713

else:

717

return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)

714

return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)

718

715

719

716

720

def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):

717

def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):

721

""" Transform a list of strings into a single string with columns.

718

""" Transform a list of strings into a single string with columns.

722

719

723

Parameters

720

Parameters

724

----------

721

----------

725

items : sequence of strings

722

items : sequence of strings

726

The strings to process.

723

The strings to process.

727

724

728

row_first : (default False)

725

row_first : (default False)

729

Whether to compute columns for a row-first matrix instead of

726

Whether to compute columns for a row-first matrix instead of

730

column-first (default).

727

column-first (default).

731

728

732

separator : str, optional [default is two spaces]

729

separator : str, optional [default is two spaces]

733

The string that separates columns.

730

The string that separates columns.

734

731

735

displaywidth : int, optional [default is 80]

732

displaywidth : int, optional [default is 80]

736

Width of the display in number of characters.

733

Width of the display in number of characters.

737

734

738

Returns

735

Returns

739

-------

736

-------

740

The formatted string.

737

The formatted string.

741

"""

738

"""

742

if not items:

739

if not items:

743

return '\n'

740

return '\n'

744

matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)

741

matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)

745

if spread:

742

if spread:

746

separator = separator.ljust(int(info['optimal_separator_width']))

743

separator = separator.ljust(int(info['optimal_separator_width']))

747

fmatrix = [filter(None, x) for x in matrix]

744

fmatrix = [filter(None, x) for x in matrix]

748

sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])

745

sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])

749

return '\n'.join(map(sjoin, fmatrix))+'\n'

746

return '\n'.join(map(sjoin, fmatrix))+'\n'

750

747

751

748

752

def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):

749

def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):

753

"""

750

"""

754

Return a string with a natural enumeration of items

751

Return a string with a natural enumeration of items

755

752

756

>>> get_text_list(['a', 'b', 'c', 'd'])

753

>>> get_text_list(['a', 'b', 'c', 'd'])

757

'a, b, c and d'

754

'a, b, c and d'

758

>>> get_text_list(['a', 'b', 'c'], ' or ')

755

>>> get_text_list(['a', 'b', 'c'], ' or ')

759

'a, b or c'

756

'a, b or c'

760

>>> get_text_list(['a', 'b', 'c'], ', ')

757

>>> get_text_list(['a', 'b', 'c'], ', ')

761

'a, b, c'

758

'a, b, c'

762

>>> get_text_list(['a', 'b'], ' or ')

759

>>> get_text_list(['a', 'b'], ' or ')

763

'a or b'

760

'a or b'

764

>>> get_text_list(['a'])

761

>>> get_text_list(['a'])

765

'a'

762

'a'

766

>>> get_text_list([])

763

>>> get_text_list([])

767

''

764

''

768

>>> get_text_list(['a', 'b'], wrap_item_with="`")

765

>>> get_text_list(['a', 'b'], wrap_item_with="`")

769

'`a` and `b`'

766

'`a` and `b`'

770

>>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

767

>>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

771

'a + b + c = d'

768

'a + b + c = d'

772

"""

769

"""

773

if len(list_) == 0:

770

if len(list_) == 0:

774

return ''

771

return ''

775

if wrap_item_with:

772

if wrap_item_with:

776

list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

773

list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

777

item in list_]

774

item in list_]

778

if len(list_) == 1:

775

if len(list_) == 1:

779

return list_[0]

776

return list_[0]

780

return '%s%s%s' % (

777

return '%s%s%s' % (

781

sep.join(i for i in list_[:-1]),

778

sep.join(i for i in list_[:-1]),

782

last_sep, list_[-1])

779

last_sep, list_[-1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # encoding: utf-8
             """
             Utilities for working with strings and text.
             Inheritance diagram:
             .. inheritance-diagram:: IPython.utils.text
                :parts: 3
             """
             import os
             import re
             import sys
             import textwrap
             from string import Formatter
             try:
                 from pathlib import Path
             except ImportError:
                 # Python 2 backport
                 from pathlib2 import Path
-            from IPython.testing.skipdoctest import skip_doctest
             from IPython.utils import py3compat
             # datetime.strftime date format for ipython
             if sys.platform == 'win32':
                 date_format = "%B %d, %Y"
             else:
                 date_format = "%B %-d, %Y"
             class LSString(str):
                 """String derivative with a special access attributes.
                 These are normal strings, but with the special attributes:
                     .l (or .list) : value as list (split on newlines).
                     .n (or .nlstr): original value (the string itself).
                     .s (or .spstr): value as whitespace-separated string.
                     .p (or .paths): list of path objects (requires path.py package)
                 Any values which require transformations are computed only once and
                 cached.
                 Such strings are very useful to efficiently interact with the shell, which
                 typically only understands whitespace-separated options for commands."""
                 def get_list(self):
                     try:
                         return self.__list
                     except AttributeError:
                         self.__list = self.split('\n')
                         return self.__list
                 l = list = property(get_list)
                 def get_spstr(self):
                     try:
                         return self.__spstr
                     except AttributeError:
                         self.__spstr = self.replace('\n',' ')
                         return self.__spstr
                 s = spstr = property(get_spstr)
                 def get_nlstr(self):
                     return self
                 n = nlstr = property(get_nlstr)
                 def get_paths(self):
                     try:
                         return self.__paths
                     except AttributeError:
                         self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
                         return self.__paths
                 p = paths = property(get_paths)
             # FIXME: We need to reimplement type specific displayhook and then add this
             # back as a custom printer. This should also be moved outside utils into the
             # core.
             # def print_lsstring(arg):
             #     """ Prettier (non-repr-like) and more informative printer for LSString """
             #     print "LSString (.p, .n, .l, .s available). Value:"
             #     print arg
             #
             #
             # print_lsstring = result_display.when_type(LSString)(print_lsstring)
             class SList(list):
                 """List derivative with a special access attributes.
                 These are normal lists, but with the special attributes:
                 * .l (or .list) : value as list (the list itself).
                 * .n (or .nlstr): value as a string, joined on newlines.
                 * .s (or .spstr): value as a string, joined on spaces.
                 * .p (or .paths): list of path objects (requires path.py package)
                 Any values which require transformations are computed only once and
                 cached."""
                 def get_list(self):
                     return self
                 l = list = property(get_list)
                 def get_spstr(self):
                     try:
                         return self.__spstr
                     except AttributeError:
                         self.__spstr = ' '.join(self)
                         return self.__spstr
                 s = spstr = property(get_spstr)
                 def get_nlstr(self):
                     try:
                         return self.__nlstr
                     except AttributeError:
                         self.__nlstr = '\n'.join(self)
                         return self.__nlstr
                 n = nlstr = property(get_nlstr)
                 def get_paths(self):
                     try:
                         return self.__paths
                     except AttributeError:
                         self.__paths = [Path(p) for p in self if os.path.exists(p)]
                         return self.__paths
                 p = paths = property(get_paths)
                 def grep(self, pattern, prune = False, field = None):
                     """ Return all strings matching 'pattern' (a regex or callable)
                     This is case-insensitive. If prune is true, return all items
                     NOT matching the pattern.
                     If field is specified, the match must occur in the specified
                     whitespace-separated field.
                     Examples::
                         a.grep( lambda x: x.startswith('C') )
                         a.grep('Cha.*log', prune=1)
                         a.grep('chm', field=-1)
                     """
                     def match_target(s):
                         if field is None:
                             return s
                         parts = s.split()
                         try:
                             tgt = parts[field]
                             return tgt
                         except IndexError:
                             return ""
                     if isinstance(pattern, py3compat.string_types):
                         pred = lambda x : re.search(pattern, x, re.IGNORECASE)
                     else:
                         pred = pattern
                     if not prune:
                         return SList([el for el in self if pred(match_target(el))])
                     else:
                         return SList([el for el in self if not pred(match_target(el))])
                 def fields(self, *fields):
                     """ Collect whitespace-separated fields from string list
                     Allows quick awk-like usage of string lists.
                     Example data (in var a, created by 'a = !ls -l')::
                         -rwxrwxrwx  1 ville None      18 Dec 14  2006 ChangeLog
                         drwxrwxrwx+ 6 ville None       0 Oct 24 18:05 IPython
                     * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
                     * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
                       (note the joining by space).
                     * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
                     IndexErrors are ignored.
                     Without args, fields() just split()'s the strings.
                     """
                     if len(fields) == 0:
                         return [el.split() for el in self]
                     res = SList()
                     for el in [f.split() for f in self]:
                         lineparts = []
                         for fd in fields:
                             try:
                                 lineparts.append(el[fd])
                             except IndexError:
                                 pass
                         if lineparts:
                             res.append(" ".join(lineparts))
                     return res
                 def sort(self,field= None,  nums = False):
                     """ sort by specified fields (see fields())
                     Example::
                         a.sort(1, nums = True)
                     Sorts a by second field, in numerical order (so that 21 > 3)
                     """
                     #decorate, sort, undecorate
                     if field is not None:
                         dsu = [[SList([line]).fields(field),  line] for line in self]
                     else:
                         dsu = [[line,  line] for line in self]
                     if nums:
                         for i in range(len(dsu)):
                             numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
                             try:
                                 n = int(numstr)
                             except ValueError:
                                 n = 0
                             dsu[i][0] = n
                     dsu.sort()
                     return SList([t[1] for t in dsu])
             # FIXME: We need to reimplement type specific displayhook and then add this
             # back as a custom printer. This should also be moved outside utils into the
             # core.
             # def print_slist(arg):
             #     """ Prettier (non-repr-like) and more informative printer for SList """
             #     print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
             #     if hasattr(arg,  'hideonce') and arg.hideonce:
             #         arg.hideonce = False
             #         return
             #
             #     nlprint(arg)   # This was a nested list printer, now removed.
             #
             # print_slist = result_display.when_type(SList)(print_slist)
             def indent(instr,nspaces=4, ntabs=0, flatten=False):
                 """Indent a string a given number of spaces or tabstops.
                 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
                 Parameters
                 ----------
                 instr : basestring
                     The string to be indented.
                 nspaces : int (default: 4)
                     The number of spaces to be indented.
                 ntabs : int (default: 0)
                     The number of tabs to be indented.
                 flatten : bool (default: False)
                     Whether to scrub existing indentation.  If True, all lines will be
                     aligned to the same indentation.  If False, existing indentation will
                     be strictly increased.
                 Returns
                 -------
                 str|unicode : string indented by ntabs and nspaces.
                 """
                 if instr is None:
                     return
                 ind = '\t'*ntabs+' '*nspaces
                 if flatten:
                     pat = re.compile(r'^\s*', re.MULTILINE)
                 else:
                     pat = re.compile(r'^', re.MULTILINE)
                 outstr = re.sub(pat, ind, instr)
                 if outstr.endswith(os.linesep+ind):
                     return outstr[:-len(ind)]
                 else:
                     return outstr
             def list_strings(arg):
                 """Always return a list of strings, given a string or list of strings
                 as input.
                 Examples
                 --------
                 ::
                     In [7]: list_strings('A single string')
                     Out[7]: ['A single string']
                     In [8]: list_strings(['A single string in a list'])
                     Out[8]: ['A single string in a list']
                     In [9]: list_strings(['A','list','of','strings'])
                     Out[9]: ['A', 'list', 'of', 'strings']
                 """
                 if isinstance(arg, py3compat.string_types): return [arg]
                 else: return arg
             def marquee(txt='',width=78,mark='*'):
                 """Return the input string centered in a 'marquee'.
                 Examples
                 --------
                 ::
                     In [16]: marquee('A test',40)
                     Out[16]: '**************** A test ****************'
                     In [17]: marquee('A test',40,'-')
                     Out[17]: '---------------- A test ----------------'
                     In [18]: marquee('A test',40,' ')
                     Out[18]: '                 A test                 '
                 """
                 if not txt:
                     return (mark*width)[:width]
                 nmark = (width-len(txt)-2)//len(mark)//2
                 if nmark < 0: nmark =0
                 marks = mark*nmark
                 return '%s %s %s' % (marks,txt,marks)
             ini_spaces_re = re.compile(r'^(\s+)')
             def num_ini_spaces(strng):
                 """Return the number of initial spaces in a string"""
                 ini_spaces = ini_spaces_re.match(strng)
                 if ini_spaces:
                     return ini_spaces.end()
                 else:
                     return 0
             def format_screen(strng):
                 """Format a string for screen printing.
                 This removes some latex-type format codes."""
                 # Paragraph continue
                 par_re = re.compile(r'\\$',re.MULTILINE)
                 strng = par_re.sub('',strng)
                 return strng
             def dedent(text):
                 """Equivalent of textwrap.dedent that ignores unindented first line.
                 This means it will still dedent strings like:
                 '''foo
                 is a bar
                 '''
                 For use in wrap_paragraphs.
                 """
                 if text.startswith('\n'):
                     # text starts with blank line, don't ignore the first line
                     return textwrap.dedent(text)
                 # split first line
                 splits = text.split('\n',1)
                 if len(splits) == 1:
                     # only one line
                     return textwrap.dedent(text)
                 first, rest = splits
                 # dedent everything but the first line
                 rest = textwrap.dedent(rest)
                 return '\n'.join([first, rest])
             def wrap_paragraphs(text, ncols=80):
                 """Wrap multiple paragraphs to fit a specified width.
                 This is equivalent to textwrap.wrap, but with support for multiple
                 paragraphs, as separated by empty lines.
                 Returns
                 -------
                 list of complete paragraphs, wrapped to fill `ncols` columns.
                 """
                 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
                 text = dedent(text).strip()
                 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
                 out_ps = []
                 indent_re = re.compile(r'\n\s+', re.MULTILINE)
                 for p in paragraphs:
                     # presume indentation that survives dedent is meaningful formatting,
                     # so don't fill unless text is flush.
                     if indent_re.search(p) is None:
                         # wrap paragraph
                         p = textwrap.fill(p, ncols)
                     out_ps.append(p)
                 return out_ps
             def long_substr(data):
                 """Return the longest common substring in a list of strings.
                 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
                 """
                 substr = ''
                 if len(data) > 1 and len(data[0]) > 0:
                     for i in range(len(data[0])):
                         for j in range(len(data[0])-i+1):
                             if j > len(substr) and all(data[0][i:i+j] in x for x in data):
                                 substr = data[0][i:i+j]
                 elif len(data) == 1:
                     substr = data[0]
                 return substr
             def strip_email_quotes(text):
                 """Strip leading email quotation characters ('>').
                 Removes any combination of leading '>' interspersed with whitespace that
                 appears *identically* in all lines of the input text.
                 Parameters
                 ----------
                 text : str
                 Examples
                 --------
                 Simple uses::
                     In [2]: strip_email_quotes('> > text')
                     Out[2]: 'text'
                     In [3]: strip_email_quotes('> > text\\n> > more')
                     Out[3]: 'text\\nmore'
                 Note how only the common prefix that appears in all lines is stripped::
                     In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
                     Out[4]: '> text\\n> more\\nmore...'
                 So if any line has no quote marks ('>') , then none are stripped from any
                 of them ::
                     In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
                     Out[5]: '> > text\\n> > more\\nlast different'
                 """
                 lines = text.splitlines()
                 matches = set()
                 for line in lines:
                     prefix = re.match(r'^(\s*>[ >]*)', line)
                     if prefix:
                         matches.add(prefix.group(1))
                     else:
                         break
                 else:
                     prefix = long_substr(list(matches))
                     if prefix:
                         strip = len(prefix)
                         text = '\n'.join([ ln[strip:] for ln in lines])
                 return text
             def strip_ansi(source):
                 """
                 Remove ansi escape codes from text.
                 Parameters
                 ----------
                 source : str
                     Source to remove the ansi from
                 """
                 return re.sub(r'\033\[(\d|;)+?m', '', source)
             class EvalFormatter(Formatter):
                 """A String Formatter that allows evaluation of simple expressions.
                 Note that this version interprets a : as specifying a format string (as per
                 standard string formatting), so if slicing is required, you must explicitly
                 create a slice.
                 This is to be used in templating cases, such as the parallel batch
                 script templates, where simple arithmetic on arguments is useful.
                 Examples
                 --------
                 ::
                     In [1]: f = EvalFormatter()
                     In [2]: f.format('{n//4}', n=8)
                     Out[2]: '2'
                     In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
                     Out[3]: 'll'
                 """
                 def get_field(self, name, args, kwargs):
                     v = eval(name, kwargs)
                     return v, name
             #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
             # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
             # above, it should be possible to remove FullEvalFormatter.
-            @skip_doctest
             class FullEvalFormatter(Formatter):
                 """A String Formatter that allows evaluation of simple expressions.
                 Any time a format key is not found in the kwargs,
                 it will be tried as an expression in the kwargs namespace.
                 Note that this version allows slicing using [1:2], so you cannot specify
                 a format string. Use :class:`EvalFormatter` to permit format strings.
                 Examples
                 --------
                 ::
                     In [1]: f = FullEvalFormatter()
                     In [2]: f.format('{n//4}', n=8)
-                    Out[2]: u'2'
+                    Out[2]: '2'
                     In [3]: f.format('{list(range(5))[2:4]}')
-                    Out[3]: u'[2, 3]'
+                    Out[3]: '[2, 3]'
                     In [4]: f.format('{3*2}')
-                    Out[4]: u'6'
+                    Out[4]: '6'
                 """
                 # copied from Formatter._vformat with minor changes to allow eval
                 # and replace the format_spec code with slicing
                 def vformat(self, format_string, args, kwargs):
                     result = []
                     for literal_text, field_name, format_spec, conversion in \
                             self.parse(format_string):
                         # output the literal text
                         if literal_text:
                             result.append(literal_text)
                         # if there's a field, output it
                         if field_name is not None:
                             # this is some markup, find the object and do
                             # the formatting
                             if format_spec:
                                 # override format spec, to allow slicing:
                                 field_name = ':'.join([field_name, format_spec])
                             # eval the contents of the field for the object
                             # to be formatted
                             obj = eval(field_name, kwargs)
                             # do any conversion on the resulting object
                             obj = self.convert_field(obj, conversion)
                             # format the object and append to the result
                             result.append(self.format_field(obj, ''))
                     return u''.join(py3compat.cast_unicode(s) for s in result)
-            @skip_doctest
             class DollarFormatter(FullEvalFormatter):
                 """Formatter allowing Itpl style $foo replacement, for names and attribute
                 access only. Standard {foo} replacement also works, and allows full
                 evaluation of its arguments.
                 Examples
                 --------
                 ::
                     In [1]: f = DollarFormatter()
                     In [2]: f.format('{n//4}', n=8)
-                    Out[2]: u'2'
+                    Out[2]: '2'
                     In [3]: f.format('23 * 76 is $result', result=23*76)
-                    Out[3]: u'23 * 76 is 1748'
+                    Out[3]: '23 * 76 is 1748'
                     In [4]: f.format('$a or {b}', a=1, b=2)
-                    Out[4]: u'1 or 2'
+                    Out[4]: '1 or 2'
                 """
                 _dollar_pattern = re.compile("(.*?)\$(\$?[\w\.]+)")
                 def parse(self, fmt_string):
                     for literal_txt, field_name, format_spec, conversion \
                                 in Formatter.parse(self, fmt_string):
                         # Find $foo patterns in the literal text.
                         continue_from = 0
                         txt = ""
                         for m in self._dollar_pattern.finditer(literal_txt):
                             new_txt, new_field = m.group(1,2)
                             # $$foo --> $foo
                             if new_field.startswith("$"):
                                 txt += new_txt + new_field
                             else:
                                 yield (txt + new_txt, new_field, "", None)
                                 txt = ""
                             continue_from = m.end()
                         # Re-yield the {foo} style pattern
                         yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
             #-----------------------------------------------------------------------------
             # Utils to columnize a list of string
             #-----------------------------------------------------------------------------
             def _col_chunks(l, max_rows, row_first=False):
                 """Yield successive max_rows-sized column chunks from l."""
                 if row_first:
                     ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
                     for i in py3compat.xrange(ncols):
                         yield [l[j] for j in py3compat.xrange(i, len(l), ncols)]
                 else:
                     for i in py3compat.xrange(0, len(l), max_rows):
                         yield l[i:(i + max_rows)]
             def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
                 """Calculate optimal info to columnize a list of string"""
                 for max_rows in range(1, len(rlist) + 1):
                     col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
                     sumlength = sum(col_widths)
                     ncols = len(col_widths)
                     if sumlength + separator_size * (ncols - 1) <= displaywidth:
                         break
                 return {'num_columns': ncols,
                         'optimal_separator_width': (displaywidth - sumlength) / (ncols - 1) if (ncols - 1) else 0,
                         'max_rows': max_rows,
                         'column_widths': col_widths
                         }
             def _get_or_default(mylist, i, default=None):
                 """return list item number, or default if don't exist"""
                 if i >= len(mylist):
                     return default
                 else :
                     return mylist[i]
             def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
                 """Returns a nested list, and info to columnize items
                 Parameters
                 ----------
                 items
                     list of strings to columize
                 row_first : (default False)
                     Whether to compute columns for a row-first matrix instead of
                     column-first (default).
                 empty : (default None)
                     default value to fill list if needed
                 separator_size : int (default=2)
                     How much caracters will be used as a separation between each columns.
                 displaywidth : int (default=80)
                     The width of the area onto wich the columns should enter
                 Returns
                 -------
                 strings_matrix
                     nested list of string, the outer most list contains as many list as
                     rows, the innermost lists have each as many element as colums. If the
                     total number of elements in `items` does not equal the product of
                     rows*columns, the last element of some lists are filled with `None`.
                 dict_info
                     some info to make columnize easier:
                     num_columns
                       number of columns
                     max_rows
                       maximum number of rows (final number may be less)
                     column_widths
                       list of with of each columns
                     optimal_separator_width
                       best separator width between columns
                 Examples
                 --------
                 ::
                     In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
                        ...: compute_item_matrix(l, displaywidth=12)
                     Out[1]:
                         ([['aaa', 'f', 'k'],
                         ['b', 'g', 'l'],
                         ['cc', 'h', None],
                         ['d', 'i', None],
                         ['eeeee', 'j', None]],
                         {'num_columns': 3,
                         'column_widths': [5, 1, 1],
                         'optimal_separator_width': 2,
                         'max_rows': 5})
                 """
                 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
                 nrow, ncol = info['max_rows'], info['num_columns']
                 if row_first:
                     return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
                 else:
                     return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
             def columnize(items, row_first=False, separator='  ', displaywidth=80, spread=False):
                 """ Transform a list of strings into a single string with columns.
                 Parameters
                 ----------
                 items : sequence of strings
                     The strings to process.
                 row_first : (default False)
                     Whether to compute columns for a row-first matrix instead of
                     column-first (default).
                 separator : str, optional [default is two spaces]
                     The string that separates columns.
                 displaywidth : int, optional [default is 80]
                     Width of the display in number of characters.
                 Returns
                 -------
                 The formatted string.
                 """
                 if not items:
                     return '\n'
                 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
                 if spread:
                     separator = separator.ljust(int(info['optimal_separator_width']))
                 fmatrix = [filter(None, x) for x in matrix]
                 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
                 return '\n'.join(map(sjoin, fmatrix))+'\n'
             def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
                 """
                 Return a string with a natural enumeration of items
                 >>> get_text_list(['a', 'b', 'c', 'd'])
                 'a, b, c and d'
                 >>> get_text_list(['a', 'b', 'c'], ' or ')
                 'a, b or c'
                 >>> get_text_list(['a', 'b', 'c'], ', ')
                 'a, b, c'
                 >>> get_text_list(['a', 'b'], ' or ')
                 'a or b'
                 >>> get_text_list(['a'])
                 'a'
                 >>> get_text_list([])
                 ''
                 >>> get_text_list(['a', 'b'], wrap_item_with="`")
                 '`a` and `b`'
                 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
                 'a + b + c = d'
                 """
                 if len(list_) == 0:
                     return ''
                 if wrap_item_with:
                     list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
                              item in list_]
                 if len(list_) == 1:
                     return list_[0]
                 return '%s%s%s' % (
                     sep.join(i for i in list_[:-1]),
                     last_sep, list_[-1])