upstream/mercurial-mirror Commit - r48146:fde1df74

1

# verify.py - repository integrity checking for Mercurial

1

# verify.py - repository integrity checking for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import os

10

import os

11

12

from .i18n import _

12

from .i18n import _

13

from .node import short

13

from .node import short

14

from .utils import stringutil

14

from .utils import stringutil

15

16

from . import (

16

from . import (

17

error,

17

error,

18

pycompat,

18

pycompat,

19

revlog,

19

revlog,

20

util,

20

util,

21

)

21

)

22

23

VERIFY_DEFAULT = 0

23

VERIFY_DEFAULT = 0

24

VERIFY_FULL = 1

24

VERIFY_FULL = 1

25

26

27

def verify(repo, level=None):

27

def verify(repo, level=None):

28

with repo.lock():

28

with repo.lock():

29

v = verifier(repo, level)

29

v = verifier(repo, level)

30

return v.verify()

30

return v.verify()

31

32

33

def _normpath(f):

33

def _normpath(f):

34

# under hg < 2.4, convert didn't sanitize paths properly, so a

34

# under hg < 2.4, convert didn't sanitize paths properly, so a

35

# converted repo may contain repeated slashes

35

# converted repo may contain repeated slashes

36

while b'//' in f:

36

while b'//' in f:

37

f = f.replace(b'//', b'/')

37

f = f.replace(b'//', b'/')

38

return f

38

return f

39

40

41

class verifier(object):

41

class verifier(object):

42

def __init__(self, repo, level=None):

42

def __init__(self, repo, level=None):

43

self.repo = repo.unfiltered()

43

self.repo = repo.unfiltered()

44

self.ui = repo.ui

44

self.ui = repo.ui

45

self.match = repo.narrowmatch()

45

self.match = repo.narrowmatch()

46

if level is None:

46

if level is None:

47

level = VERIFY_DEFAULT

47

level = VERIFY_DEFAULT

48

self._level = level

48

self._level = level

49

self.badrevs = set()

49

self.badrevs = set()

50

self.errors = 0

50

self.errors = 0

51

self.warnings = 0

51

self.warnings = 0

52

self.havecl = len(repo.changelog) > 0

52

self.havecl = len(repo.changelog) > 0

53

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

53

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

54

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

54

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

55

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

55

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

56

self.refersmf = False

56

self.refersmf = False

57

self.fncachewarned = False

57

self.fncachewarned = False

58

# developer config: verify.skipflags

58

# developer config: verify.skipflags

59

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

59

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

60

self.warnorphanstorefiles = True

60

self.warnorphanstorefiles = True

61

62

def _warn(self, msg):

62

def _warn(self, msg):

63

"""record a "warning" level issue"""

63

"""record a "warning" level issue"""

64

self.ui.warn(msg + b"\n")

64

self.ui.warn(msg + b"\n")

65

self.warnings += 1

65

self.warnings += 1

66

67

def _err(self, linkrev, msg, filename=None):

67

def _err(self, linkrev, msg, filename=None):

68

"""record a "error" level issue"""

68

"""record a "error" level issue"""

69

if linkrev is not None:

69

if linkrev is not None:

70

self.badrevs.add(linkrev)

70

self.badrevs.add(linkrev)

71

linkrev = b"%d" % linkrev

71

linkrev = b"%d" % linkrev

72

else:

72

else:

73

linkrev = b'?'

73

linkrev = b'?'

74

msg = b"%s: %s" % (linkrev, msg)

74

msg = b"%s: %s" % (linkrev, msg)

75

if filename:

75

if filename:

76

msg = b"%s@%s" % (filename, msg)

76

msg = b"%s@%s" % (filename, msg)

77

self.ui.warn(b" " + msg + b"\n")

77

self.ui.warn(b" " + msg + b"\n")

78

self.errors += 1

78

self.errors += 1

79

80

def _exc(self, linkrev, msg, inst, filename=None):

80

def _exc(self, linkrev, msg, inst, filename=None):

81

"""record exception raised during the verify process"""

81

"""record exception raised during the verify process"""

82

fmsg = stringutil.forcebytestr(inst)

82

fmsg = stringutil.forcebytestr(inst)

83

if not fmsg:

83

if not fmsg:

84

fmsg = pycompat.byterepr(inst)

84

fmsg = pycompat.byterepr(inst)

85

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

85

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

86

87

def _checkrevlog(self, obj, name, linkrev):

87

def _checkrevlog(self, obj, name, linkrev):

88

"""verify high level property of a revlog

88

"""verify high level property of a revlog

89

90

- revlog is present,

90

- revlog is present,

91

- revlog is non-empty,

91

- revlog is non-empty,

92

- sizes (index and data) are correct,

92

- sizes (index and data) are correct,

93

- revlog's format version is correct.

93

- revlog's format version is correct.

94

"""

94

"""

95

if not len(obj) and (self.havecl or self.havemf):

95

if not len(obj) and (self.havecl or self.havemf):

96

self._err(linkrev, _(b"empty or missing %s") % name)

96

self._err(linkrev, _(b"empty or missing %s") % name)

97

return

97

return

98

99

d = obj.checksize()

99

d = obj.checksize()

100

if d[0]:

100

if d[0]:

101

self._err(None, _(b"data length off by %d bytes") % d[0], name)

101

self._err(None, _(b"data length off by %d bytes") % d[0], name)

102

if d[1]:

102

if d[1]:

103

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

103

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

104

105

if obj._format_version != revlog.REVLOGV0:

105

if obj._format_version != revlog.REVLOGV0:

106

if not self.revlogv1:

106

if not self.revlogv1:

107

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

107

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

108

elif self.revlogv1:

108

elif self.revlogv1:

109

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

109

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

110

111

def _checkentry(self, obj, i, node, seen, linkrevs, f):

111

def _checkentry(self, obj, i, node, seen, linkrevs, f):

112

"""verify a single revlog entry

112

"""verify a single revlog entry

113

114

arguments are:

114

arguments are:

115

- obj: the source revlog

115

- obj: the source revlog

116

- i: the revision number

116

- i: the revision number

117

- node: the revision node id

117

- node: the revision node id

118

- seen: nodes previously seen for this revlog

118

- seen: nodes previously seen for this revlog

119

- linkrevs: [changelog-revisions] introducing "node"

119

- linkrevs: [changelog-revisions] introducing "node"

120

- f: string label ("changelog", "manifest", or filename)

120

- f: string label ("changelog", "manifest", or filename)

121

122

Performs the following checks:

122

Performs the following checks:

123

- linkrev points to an existing changelog revision,

123

- linkrev points to an existing changelog revision,

124

- linkrev points to a changelog revision that introduces this revision,

124

- linkrev points to a changelog revision that introduces this revision,

125

- linkrev points to the lowest of these changesets,

125

- linkrev points to the lowest of these changesets,

126

- both parents exist in the revlog,

126

- both parents exist in the revlog,

127

- the revision is not duplicated.

127

- the revision is not duplicated.

128

129

Return the linkrev of the revision (or None for changelog's revisions).

129

Return the linkrev of the revision (or None for changelog's revisions).

130

"""

130

"""

131

lr = obj.linkrev(obj.rev(node))

131

lr = obj.linkrev(obj.rev(node))

132

if lr < 0 or (self.havecl and lr not in linkrevs):

132

if lr < 0 or (self.havecl and lr not in linkrevs):

133

if lr < 0 or lr >= len(self.repo.changelog):

133

if lr < 0 or lr >= len(self.repo.changelog):

134

msg = _(b"rev %d points to nonexistent changeset %d")

134

msg = _(b"rev %d points to nonexistent changeset %d")

135

else:

135

else:

136

msg = _(b"rev %d points to unexpected changeset %d")

136

msg = _(b"rev %d points to unexpected changeset %d")

137

self._err(None, msg % (i, lr), f)

137

self._err(None, msg % (i, lr), f)

138

if linkrevs:

138

if linkrevs:

139

if f and len(linkrevs) > 1:

139

if f and len(linkrevs) > 1:

140

try:

140

try:

141

# attempt to filter down to real linkrevs

141

# attempt to filter down to real linkrevs

142

linkrevs = []

142

linkrevs = []

143

for lr in linkrevs:

143

for lr in linkrevs:

144

if self.lrugetctx(lr)[f].filenode() == node:

144

if self.lrugetctx(lr)[f].filenode() == node:

145

linkrevs.append(lr)

145

linkrevs.append(lr)

146

except Exception:

146

except Exception:

147

pass

147

pass

148

msg = _(b" (expected %s)")

148

msg = _(b" (expected %s)")

149

msg %= b" ".join(map(pycompat.bytestr, linkrevs))

149

msg %= b" ".join(map(pycompat.bytestr, linkrevs))

150

self._warn(msg)

150

self._warn(msg)

151

lr = None # can't be trusted

151

lr = None # can't be trusted

152

153

try:

153

try:

154

p1, p2 = obj.parents(node)

154

p1, p2 = obj.parents(node)

155

if p1 not in seen and p1 != self.repo.nullid:

155

if p1 not in seen and p1 != self.repo.nullid:

156

msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))

156

msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))

157

self._err(lr, msg, f)

157

self._err(lr, msg, f)

158

if p2 not in seen and p2 != self.repo.nullid:

158

if p2 not in seen and p2 != self.repo.nullid:

159

self._err(

159

msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))

160

lr,

160

self._err(lr, msg, f)

161

_(b"unknown parent 2 %s of %s") % (short(p2), short(node)),

162

f,

163

)

164

except Exception as inst:

161

except Exception as inst:

165

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

162

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

166

163

167

if node in seen:

164

if node in seen:

168

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

165

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

169

seen[node] = i

166

seen[node] = i

170

return lr

167

return lr

171

168

172

def verify(self):

169

def verify(self):

173

"""verify the content of the Mercurial repository

170

"""verify the content of the Mercurial repository

174

171

175

This method run all verifications, displaying issues as they are found.

172

This method run all verifications, displaying issues as they are found.

176

173

177

return 1 if any error have been encountered, 0 otherwise."""

174

return 1 if any error have been encountered, 0 otherwise."""

178

# initial validation and generic report

175

# initial validation and generic report

179

repo = self.repo

176

repo = self.repo

180

ui = repo.ui

177

ui = repo.ui

181

if not repo.url().startswith(b'file:'):

178

if not repo.url().startswith(b'file:'):

182

raise error.Abort(_(b"cannot verify bundle or remote repos"))

179

raise error.Abort(_(b"cannot verify bundle or remote repos"))

183

180

184

if os.path.exists(repo.sjoin(b"journal")):

181

if os.path.exists(repo.sjoin(b"journal")):

185

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

182

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

186

183

187

if ui.verbose or not self.revlogv1:

184

if ui.verbose or not self.revlogv1:

188

ui.status(

185

ui.status(

189

_(b"repository uses revlog format %d\n")

186

_(b"repository uses revlog format %d\n")

190

% (self.revlogv1 and 1 or 0)

187

% (self.revlogv1 and 1 or 0)

191

)

188

)

192

189

193

# data verification

190

# data verification

194

mflinkrevs, filelinkrevs = self._verifychangelog()

191

mflinkrevs, filelinkrevs = self._verifychangelog()

195

filenodes = self._verifymanifest(mflinkrevs)

192

filenodes = self._verifymanifest(mflinkrevs)

196

del mflinkrevs

193

del mflinkrevs

197

self._crosscheckfiles(filelinkrevs, filenodes)

194

self._crosscheckfiles(filelinkrevs, filenodes)

198

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

195

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

199

196

200

# final report

197

# final report

201

ui.status(

198

ui.status(

202

_(b"checked %d changesets with %d changes to %d files\n")

199

_(b"checked %d changesets with %d changes to %d files\n")

203

% (len(repo.changelog), filerevisions, totalfiles)

200

% (len(repo.changelog), filerevisions, totalfiles)

204

)

201

)

205

if self.warnings:

202

if self.warnings:

206

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

203

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

207

if self.fncachewarned:

204

if self.fncachewarned:

208

ui.warn(

205

ui.warn(

209

_(

206

_(

210

b'hint: run "hg debugrebuildfncache" to recover from '

207

b'hint: run "hg debugrebuildfncache" to recover from '

211

b'corrupt fncache\n'

208

b'corrupt fncache\n'

212

)

209

)

213

)

210

)

214

if self.errors:

211

if self.errors:

215

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

212

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

216

if self.badrevs:

213

if self.badrevs:

217

ui.warn(

214

ui.warn(

218

_(b"(first damaged changeset appears to be %d)\n")

215

_(b"(first damaged changeset appears to be %d)\n")

219

% min(self.badrevs)

216

% min(self.badrevs)

220

)

217

)

221

return 1

218

return 1

222

return 0

219

return 0

223

220

224

def _verifychangelog(self):

221

def _verifychangelog(self):

225

"""verify the changelog of a repository

222

"""verify the changelog of a repository

226

223

227

The following checks are performed:

224

The following checks are performed:

228

- all of `_checkrevlog` checks,

225

- all of `_checkrevlog` checks,

229

- all of `_checkentry` checks (for each revisions),

226

- all of `_checkentry` checks (for each revisions),

230

- each revision can be read.

227

- each revision can be read.

231

228

232

The function returns some of the data observed in the changesets as a

229

The function returns some of the data observed in the changesets as a

233

(mflinkrevs, filelinkrevs) tuples:

230

(mflinkrevs, filelinkrevs) tuples:

234

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

231

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

235

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

232

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

236

233

237

If a matcher was specified, filelinkrevs will only contains matched

234

If a matcher was specified, filelinkrevs will only contains matched

238

files.

235

files.

239

"""

236

"""

240

ui = self.ui

237

ui = self.ui

241

repo = self.repo

238

repo = self.repo

242

match = self.match

239

match = self.match

243

cl = repo.changelog

240

cl = repo.changelog

244

241

245

ui.status(_(b"checking changesets\n"))

242

ui.status(_(b"checking changesets\n"))

246

mflinkrevs = {}

243

mflinkrevs = {}

247

filelinkrevs = {}

244

filelinkrevs = {}

248

seen = {}

245

seen = {}

249

self._checkrevlog(cl, b"changelog", 0)

246

self._checkrevlog(cl, b"changelog", 0)

250

progress = ui.makeprogress(

247

progress = ui.makeprogress(

251

_(b'checking'), unit=_(b'changesets'), total=len(repo)

248

_(b'checking'), unit=_(b'changesets'), total=len(repo)

252

)

249

)

253

for i in repo:

250

for i in repo:

254

progress.update(i)

251

progress.update(i)

255

n = cl.node(i)

252

n = cl.node(i)

256

self._checkentry(cl, i, n, seen, [i], b"changelog")

253

self._checkentry(cl, i, n, seen, [i], b"changelog")

257

254

258

try:

255

try:

259

changes = cl.read(n)

256

changes = cl.read(n)

260

if changes[0] != self.repo.nullid:

257

if changes[0] != self.repo.nullid:

261

mflinkrevs.setdefault(changes[0], []).append(i)

258

mflinkrevs.setdefault(changes[0], []).append(i)

262

self.refersmf = True

259

self.refersmf = True

263

for f in changes[3]:

260

for f in changes[3]:

264

if match(f):

261

if match(f):

265

filelinkrevs.setdefault(_normpath(f), []).append(i)

262

filelinkrevs.setdefault(_normpath(f), []).append(i)

266

except Exception as inst:

263

except Exception as inst:

267

self.refersmf = True

264

self.refersmf = True

268

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

265

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

269

progress.complete()

266

progress.complete()

270

return mflinkrevs, filelinkrevs

267

return mflinkrevs, filelinkrevs

271

268

272

def _verifymanifest(

269

def _verifymanifest(

273

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

270

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

274

):

271

):

275

"""verify the manifestlog content

272

"""verify the manifestlog content

276

273

277

Inputs:

274

Inputs:

278

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

275

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

279

- dir: a subdirectory to check (for tree manifest repo)

276

- dir: a subdirectory to check (for tree manifest repo)

280

- storefiles: set of currently "orphan" files.

277

- storefiles: set of currently "orphan" files.

281

- subdirprogress: a progress object

278

- subdirprogress: a progress object

282

279

283

This function checks:

280

This function checks:

284

* all of `_checkrevlog` checks (for all manifest related revlogs)

281

* all of `_checkrevlog` checks (for all manifest related revlogs)

285

* all of `_checkentry` checks (for all manifest related revisions)

282

* all of `_checkentry` checks (for all manifest related revisions)

286

* nodes for subdirectory exists in the sub-directory manifest

283

* nodes for subdirectory exists in the sub-directory manifest

287

* each manifest entries have a file path

284

* each manifest entries have a file path

288

* each manifest node refered in mflinkrevs exist in the manifest log

285

* each manifest node refered in mflinkrevs exist in the manifest log

289

286

290

If tree manifest is in use and a matchers is specified, only the

287

If tree manifest is in use and a matchers is specified, only the

291

sub-directories matching it will be verified.

288

sub-directories matching it will be verified.

292

289

293

return a two level mapping:

290

return a two level mapping:

294

{"path" -> { filenode -> changelog-revision}}

291

{"path" -> { filenode -> changelog-revision}}

295

292

296

This mapping primarily contains entries for every files in the

293

This mapping primarily contains entries for every files in the

297

repository. In addition, when tree-manifest is used, it also contains

294

repository. In addition, when tree-manifest is used, it also contains

298

sub-directory entries.

295

sub-directory entries.

299

296

300

If a matcher is provided, only matching paths will be included.

297

If a matcher is provided, only matching paths will be included.

301

"""

298

"""

302

repo = self.repo

299

repo = self.repo

303

ui = self.ui

300

ui = self.ui

304

match = self.match

301

match = self.match

305

mfl = self.repo.manifestlog

302

mfl = self.repo.manifestlog

306

mf = mfl.getstorage(dir)

303

mf = mfl.getstorage(dir)

307

304

308

if not dir:

305

if not dir:

309

self.ui.status(_(b"checking manifests\n"))

306

self.ui.status(_(b"checking manifests\n"))

310

307

311

filenodes = {}

308

filenodes = {}

312

subdirnodes = {}

309

subdirnodes = {}

313

seen = {}

310

seen = {}

314

label = b"manifest"

311

label = b"manifest"

315

if dir:

312

if dir:

316

label = dir

313

label = dir

317

revlogfiles = mf.files()

314

revlogfiles = mf.files()

318

storefiles.difference_update(revlogfiles)

315

storefiles.difference_update(revlogfiles)

319

if subdirprogress: # should be true since we're in a subdirectory

316

if subdirprogress: # should be true since we're in a subdirectory

320

subdirprogress.increment()

317

subdirprogress.increment()

321

if self.refersmf:

318

if self.refersmf:

322

# Do not check manifest if there are only changelog entries with

319

# Do not check manifest if there are only changelog entries with

323

# null manifests.

320

# null manifests.

324

self._checkrevlog(mf._revlog, label, 0)

321

self._checkrevlog(mf._revlog, label, 0)

325

progress = ui.makeprogress(

322

progress = ui.makeprogress(

326

_(b'checking'), unit=_(b'manifests'), total=len(mf)

323

_(b'checking'), unit=_(b'manifests'), total=len(mf)

327

)

324

)

328

for i in mf:

325

for i in mf:

329

if not dir:

326

if not dir:

330

progress.update(i)

327

progress.update(i)

331

n = mf.node(i)

328

n = mf.node(i)

332

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

329

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

333

if n in mflinkrevs:

330

if n in mflinkrevs:

334

del mflinkrevs[n]

331

del mflinkrevs[n]

335

elif dir:

332

elif dir:

336

self._err(

333

self._err(

337

lr,

334

lr,

338

_(b"%s not in parent-directory manifest") % short(n),

335

_(b"%s not in parent-directory manifest") % short(n),

339

label,

336

label,

340

)

337

)

341

else:

338

else:

342

self._err(lr, _(b"%s not in changesets") % short(n), label)

339

self._err(lr, _(b"%s not in changesets") % short(n), label)

343

340

344

try:

341

try:

345

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

342

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

346

for f, fn, fl in mfdelta.iterentries():

343

for f, fn, fl in mfdelta.iterentries():

347

if not f:

344

if not f:

348

self._err(lr, _(b"entry without name in manifest"))

345

self._err(lr, _(b"entry without name in manifest"))

349

elif f == b"/dev/null": # ignore this in very old repos

346

elif f == b"/dev/null": # ignore this in very old repos

350

continue

347

continue

351

fullpath = dir + _normpath(f)

348

fullpath = dir + _normpath(f)

352

if fl == b't':

349

if fl == b't':

353

if not match.visitdir(fullpath):

350

if not match.visitdir(fullpath):

354

continue

351

continue

355

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

352

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

356

fn, []

353

fn, []

357

).append(lr)

354

).append(lr)

358

else:

355

else:

359

if not match(fullpath):

356

if not match(fullpath):

360

continue

357

continue

361

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

358

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

362

except Exception as inst:

359

except Exception as inst:

363

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

360

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

364

if self._level >= VERIFY_FULL:

361

if self._level >= VERIFY_FULL:

365

try:

362

try:

366

# Various issues can affect manifest. So we read each full

363

# Various issues can affect manifest. So we read each full

367

# text from storage. This triggers the checks from the core

364

# text from storage. This triggers the checks from the core

368

# code (eg: hash verification, filename are ordered, etc.)

365

# code (eg: hash verification, filename are ordered, etc.)

369

mfdelta = mfl.get(dir, n).read()

366

mfdelta = mfl.get(dir, n).read()

370

except Exception as inst:

367

except Exception as inst:

371

self._exc(

368

self._exc(

372

lr,

369

lr,

373

_(b"reading full manifest %s") % short(n),

370

_(b"reading full manifest %s") % short(n),

374

inst,

371

inst,

375

label,

372

label,

376

)

373

)

377

374

378

if not dir:

375

if not dir:

379

progress.complete()

376

progress.complete()

380

377

381

if self.havemf:

378

if self.havemf:

382

# since we delete entry in `mflinkrevs` during iteration, any

379

# since we delete entry in `mflinkrevs` during iteration, any

383

# remaining entries are "missing". We need to issue errors for them.

380

# remaining entries are "missing". We need to issue errors for them.

384

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

381

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

385

for c, m in sorted(changesetpairs):

382

for c, m in sorted(changesetpairs):

386

if dir:

383

if dir:

387

self._err(

384

self._err(

388

c,

385

c,

389

_(

386

_(

390

b"parent-directory manifest refers to unknown"

387

b"parent-directory manifest refers to unknown"

391

b" revision %s"

388

b" revision %s"

392

)

389

)

393

% short(m),

390

% short(m),

394

label,

391

label,

395

)

392

)

396

else:

393

else:

397

self._err(

394

self._err(

398

c,

395

c,

399

_(b"changeset refers to unknown revision %s")

396

_(b"changeset refers to unknown revision %s")

400

% short(m),

397

% short(m),

401

label,

398

label,

402

)

399

)

403

400

404

if not dir and subdirnodes:

401

if not dir and subdirnodes:

405

self.ui.status(_(b"checking directory manifests\n"))

402

self.ui.status(_(b"checking directory manifests\n"))

406

storefiles = set()

403

storefiles = set()

407

subdirs = set()

404

subdirs = set()

408

revlogv1 = self.revlogv1

405

revlogv1 = self.revlogv1

409

for t, f, f2, size in repo.store.datafiles():

406

for t, f, f2, size in repo.store.datafiles():

410

if not f:

407

if not f:

411

self._err(None, _(b"cannot decode filename '%s'") % f2)

408

self._err(None, _(b"cannot decode filename '%s'") % f2)

412

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

409

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

413

storefiles.add(_normpath(f))

410

storefiles.add(_normpath(f))

414

subdirs.add(os.path.dirname(f))

411

subdirs.add(os.path.dirname(f))

415

subdirprogress = ui.makeprogress(

412

subdirprogress = ui.makeprogress(

416

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

413

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

417

)

414

)

418

415

419

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

416

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

420

subdirfilenodes = self._verifymanifest(

417

subdirfilenodes = self._verifymanifest(

421

linkrevs, subdir, storefiles, subdirprogress

418

linkrevs, subdir, storefiles, subdirprogress

422

)

419

)

423

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

420

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

424

filenodes.setdefault(f, {}).update(onefilenodes)

421

filenodes.setdefault(f, {}).update(onefilenodes)

425

422

426

if not dir and subdirnodes:

423

if not dir and subdirnodes:

427

assert subdirprogress is not None # help pytype

424

assert subdirprogress is not None # help pytype

428

subdirprogress.complete()

425

subdirprogress.complete()

429

if self.warnorphanstorefiles:

426

if self.warnorphanstorefiles:

430

for f in sorted(storefiles):

427

for f in sorted(storefiles):

431

self._warn(_(b"warning: orphan data file '%s'") % f)

428

self._warn(_(b"warning: orphan data file '%s'") % f)

432

429

433

return filenodes

430

return filenodes

434

431

435

def _crosscheckfiles(self, filelinkrevs, filenodes):

432

def _crosscheckfiles(self, filelinkrevs, filenodes):

436

repo = self.repo

433

repo = self.repo

437

ui = self.ui

434

ui = self.ui

438

ui.status(_(b"crosschecking files in changesets and manifests\n"))

435

ui.status(_(b"crosschecking files in changesets and manifests\n"))

439

436

440

total = len(filelinkrevs) + len(filenodes)

437

total = len(filelinkrevs) + len(filenodes)

441

progress = ui.makeprogress(

438

progress = ui.makeprogress(

442

_(b'crosschecking'), unit=_(b'files'), total=total

439

_(b'crosschecking'), unit=_(b'files'), total=total

443

)

440

)

444

if self.havemf:

441

if self.havemf:

445

for f in sorted(filelinkrevs):

442

for f in sorted(filelinkrevs):

446

progress.increment()

443

progress.increment()

447

if f not in filenodes:

444

if f not in filenodes:

448

lr = filelinkrevs[f][0]

445

lr = filelinkrevs[f][0]

449

self._err(lr, _(b"in changeset but not in manifest"), f)

446

self._err(lr, _(b"in changeset but not in manifest"), f)

450

447

451

if self.havecl:

448

if self.havecl:

452

for f in sorted(filenodes):

449

for f in sorted(filenodes):

453

progress.increment()

450

progress.increment()

454

if f not in filelinkrevs:

451

if f not in filelinkrevs:

455

try:

452

try:

456

fl = repo.file(f)

453

fl = repo.file(f)

457

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

454

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

458

except Exception:

455

except Exception:

459

lr = None

456

lr = None

460

self._err(lr, _(b"in manifest but not in changeset"), f)

457

self._err(lr, _(b"in manifest but not in changeset"), f)

461

458

462

progress.complete()

459

progress.complete()

463

460

464

def _verifyfiles(self, filenodes, filelinkrevs):

461

def _verifyfiles(self, filenodes, filelinkrevs):

465

repo = self.repo

462

repo = self.repo

466

ui = self.ui

463

ui = self.ui

467

lrugetctx = self.lrugetctx

464

lrugetctx = self.lrugetctx

468

revlogv1 = self.revlogv1

465

revlogv1 = self.revlogv1

469

havemf = self.havemf

466

havemf = self.havemf

470

ui.status(_(b"checking files\n"))

467

ui.status(_(b"checking files\n"))

471

468

472

storefiles = set()

469

storefiles = set()

473

for rl_type, f, f2, size in repo.store.datafiles():

470

for rl_type, f, f2, size in repo.store.datafiles():

474

if not f:

471

if not f:

475

self._err(None, _(b"cannot decode filename '%s'") % f2)

472

self._err(None, _(b"cannot decode filename '%s'") % f2)

476

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

473

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

477

storefiles.add(_normpath(f))

474

storefiles.add(_normpath(f))

478

475

479

state = {

476

state = {

480

# TODO this assumes revlog storage for changelog.

477

# TODO this assumes revlog storage for changelog.

481

b'expectedversion': self.repo.changelog._format_version,

478

b'expectedversion': self.repo.changelog._format_version,

482

b'skipflags': self.skipflags,

479

b'skipflags': self.skipflags,

483

# experimental config: censor.policy

480

# experimental config: censor.policy

484

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

481

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

485

}

482

}

486

483

487

files = sorted(set(filenodes) | set(filelinkrevs))

484

files = sorted(set(filenodes) | set(filelinkrevs))

488

revisions = 0

485

revisions = 0

489

progress = ui.makeprogress(

486

progress = ui.makeprogress(

490

_(b'checking'), unit=_(b'files'), total=len(files)

487

_(b'checking'), unit=_(b'files'), total=len(files)

491

)

488

)

492

for i, f in enumerate(files):

489

for i, f in enumerate(files):

493

progress.update(i, item=f)

490

progress.update(i, item=f)

494

try:

491

try:

495

linkrevs = filelinkrevs[f]

492

linkrevs = filelinkrevs[f]

496

except KeyError:

493

except KeyError:

497

# in manifest but not in changelog

494

# in manifest but not in changelog

498

linkrevs = []

495

linkrevs = []

499

496

500

if linkrevs:

497

if linkrevs:

501

lr = linkrevs[0]

498

lr = linkrevs[0]

502

else:

499

else:

503

lr = None

500

lr = None

504

501

505

try:

502

try:

506

fl = repo.file(f)

503

fl = repo.file(f)

507

except error.StorageError as e:

504

except error.StorageError as e:

508

self._err(lr, _(b"broken revlog! (%s)") % e, f)

505

self._err(lr, _(b"broken revlog! (%s)") % e, f)

509

continue

506

continue

510

507

511

for ff in fl.files():

508

for ff in fl.files():

512

try:

509

try:

513

storefiles.remove(ff)

510

storefiles.remove(ff)

514

except KeyError:

511

except KeyError:

515

if self.warnorphanstorefiles:

512

if self.warnorphanstorefiles:

516

self._warn(

513

self._warn(

517

_(b" warning: revlog '%s' not in fncache!") % ff

514

_(b" warning: revlog '%s' not in fncache!") % ff

518

)

515

)

519

self.fncachewarned = True

516

self.fncachewarned = True

520

517

521

if not len(fl) and (self.havecl or self.havemf):

518

if not len(fl) and (self.havecl or self.havemf):

522

self._err(lr, _(b"empty or missing %s") % f)

519

self._err(lr, _(b"empty or missing %s") % f)

523

else:

520

else:

524

# Guard against implementations not setting this.

521

# Guard against implementations not setting this.

525

state[b'skipread'] = set()

522

state[b'skipread'] = set()

526

state[b'safe_renamed'] = set()

523

state[b'safe_renamed'] = set()

527

524

528

for problem in fl.verifyintegrity(state):

525

for problem in fl.verifyintegrity(state):

529

if problem.node is not None:

526

if problem.node is not None:

530

linkrev = fl.linkrev(fl.rev(problem.node))

527

linkrev = fl.linkrev(fl.rev(problem.node))

531

else:

528

else:

532

linkrev = None

529

linkrev = None

533

530

534

if problem.warning:

531

if problem.warning:

535

self._warn(problem.warning)

532

self._warn(problem.warning)

536

elif problem.error:

533

elif problem.error:

537

self._err(

534

self._err(

538

linkrev if linkrev is not None else lr,

535

linkrev if linkrev is not None else lr,

539

problem.error,

536

problem.error,

540

f,

537

f,

541

)

538

)

542

else:

539

else:

543

raise error.ProgrammingError(

540

raise error.ProgrammingError(

544

b'problem instance does not set warning or error '

541

b'problem instance does not set warning or error '

545

b'attribute: %s' % problem.msg

542

b'attribute: %s' % problem.msg

546

)

543

)

547

544

548

seen = {}

545

seen = {}

549

for i in fl:

546

for i in fl:

550

revisions += 1

547

revisions += 1

551

n = fl.node(i)

548

n = fl.node(i)

552

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

549

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

553

if f in filenodes:

550

if f in filenodes:

554

if havemf and n not in filenodes[f]:

551

if havemf and n not in filenodes[f]:

555

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

552

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

556

else:

553

else:

557

del filenodes[f][n]

554

del filenodes[f][n]

558

555

559

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

556

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

560

continue

557

continue

561

558

562

# check renames

559

# check renames

563

try:

560

try:

564

# This requires resolving fulltext (at least on revlogs,

561

# This requires resolving fulltext (at least on revlogs,

565

# though not with LFS revisions). We may want

562

# though not with LFS revisions). We may want

566

# ``verifyintegrity()`` to pass a set of nodes with

563

# ``verifyintegrity()`` to pass a set of nodes with

567

# rename metadata as an optimization.

564

# rename metadata as an optimization.

568

rp = fl.renamed(n)

565

rp = fl.renamed(n)

569

if rp:

566

if rp:

570

if lr is not None and ui.verbose:

567

if lr is not None and ui.verbose:

571

ctx = lrugetctx(lr)

568

ctx = lrugetctx(lr)

572

if not any(rp[0] in pctx for pctx in ctx.parents()):

569

if not any(rp[0] in pctx for pctx in ctx.parents()):

573

self._warn(

570

self._warn(

574

_(

571

_(

575

b"warning: copy source of '%s' not"

572

b"warning: copy source of '%s' not"

576

b" in parents of %s"

573

b" in parents of %s"

577

)

574

)

578

% (f, ctx)

575

% (f, ctx)

579

)

576

)

580

fl2 = repo.file(rp[0])

577

fl2 = repo.file(rp[0])

581

if not len(fl2):

578

if not len(fl2):

582

self._err(

579

self._err(

583

lr,

580

lr,

584

_(

581

_(

585

b"empty or missing copy source revlog "

582

b"empty or missing copy source revlog "

586

b"%s:%s"

583

b"%s:%s"

587

)

584

)

588

% (rp[0], short(rp[1])),

585

% (rp[0], short(rp[1])),

589

f,

586

f,

590

)

587

)

591

elif rp[1] == self.repo.nullid:

588

elif rp[1] == self.repo.nullid:

592

ui.note(

589

ui.note(

593

_(

590

_(

594

b"warning: %s@%s: copy source"

591

b"warning: %s@%s: copy source"

595

b" revision is nullid %s:%s\n"

592

b" revision is nullid %s:%s\n"

596

)

593

)

597

% (f, lr, rp[0], short(rp[1]))

594

% (f, lr, rp[0], short(rp[1]))

598

)

595

)

599

else:

596

else:

600

fl2.rev(rp[1])

597

fl2.rev(rp[1])

601

except Exception as inst:

598

except Exception as inst:

602

self._exc(

599

self._exc(

603

lr, _(b"checking rename of %s") % short(n), inst, f

600

lr, _(b"checking rename of %s") % short(n), inst, f

604

)

601

)

605

602

606

# cross-check

603

# cross-check

607

if f in filenodes:

604

if f in filenodes:

608

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

605

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

609

for lr, node in sorted(fns):

606

for lr, node in sorted(fns):

610

self._err(

607

self._err(

611

lr,

608

lr,

612

_(b"manifest refers to unknown revision %s")

609

_(b"manifest refers to unknown revision %s")

613

% short(node),

610

% short(node),

614

f,

611

f,

615

)

612

)

616

progress.complete()

613

progress.complete()

617

614

618

if self.warnorphanstorefiles:

615

if self.warnorphanstorefiles:

619

for f in sorted(storefiles):

616

for f in sorted(storefiles):

620

self._warn(_(b"warning: orphan data file '%s'") % f)

617

self._warn(_(b"warning: orphan data file '%s'") % f)

621

618

622

return len(files), revisions

619

return len(files), revisions

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # verify.py - repository integrity checking for Mercurial
             #
             # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             from .i18n import _
             from .node import short
             from .utils import stringutil
             from . import (
                 error,
                 pycompat,
                 revlog,
                 util,
             )
             VERIFY_DEFAULT = 0
             VERIFY_FULL = 1
             def verify(repo, level=None):
                 with repo.lock():
                     v = verifier(repo, level)
                     return v.verify()
             def _normpath(f):
                 # under hg < 2.4, convert didn't sanitize paths properly, so a
                 # converted repo may contain repeated slashes
                 while b'//' in f:
                     f = f.replace(b'//', b'/')
                 return f
             class verifier(object):
                 def __init__(self, repo, level=None):
                     self.repo = repo.unfiltered()
                     self.ui = repo.ui
                     self.match = repo.narrowmatch()
                     if level is None:
                         level = VERIFY_DEFAULT
                     self._level = level
                     self.badrevs = set()
                     self.errors = 0
                     self.warnings = 0
                     self.havecl = len(repo.changelog) > 0
                     self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
                     self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
                     self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
                     self.refersmf = False
                     self.fncachewarned = False
                     # developer config: verify.skipflags
                     self.skipflags = repo.ui.configint(b'verify', b'skipflags')
                     self.warnorphanstorefiles = True
                 def _warn(self, msg):
                     """record a "warning" level issue"""
                     self.ui.warn(msg + b"\n")
                     self.warnings += 1
                 def _err(self, linkrev, msg, filename=None):
                     """record a "error" level issue"""
                     if linkrev is not None:
                         self.badrevs.add(linkrev)
                         linkrev = b"%d" % linkrev
                     else:
                         linkrev = b'?'
                     msg = b"%s: %s" % (linkrev, msg)
                     if filename:
                         msg = b"%s@%s" % (filename, msg)
                     self.ui.warn(b" " + msg + b"\n")
                     self.errors += 1
                 def _exc(self, linkrev, msg, inst, filename=None):
                     """record exception raised during the verify process"""
                     fmsg = stringutil.forcebytestr(inst)
                     if not fmsg:
                         fmsg = pycompat.byterepr(inst)
                     self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
                 def _checkrevlog(self, obj, name, linkrev):
                     """verify high level property of a revlog
                     - revlog is present,
                     - revlog is non-empty,
                     - sizes (index and data) are correct,
                     - revlog's format version is correct.
                     """
                     if not len(obj) and (self.havecl or self.havemf):
                         self._err(linkrev, _(b"empty or missing %s") % name)
                         return
                     d = obj.checksize()
                     if d[0]:
                         self._err(None, _(b"data length off by %d bytes") % d[0], name)
                     if d[1]:
                         self._err(None, _(b"index contains %d extra bytes") % d[1], name)
                     if obj._format_version != revlog.REVLOGV0:
                         if not self.revlogv1:
                             self._warn(_(b"warning: `%s' uses revlog format 1") % name)
                     elif self.revlogv1:
                         self._warn(_(b"warning: `%s' uses revlog format 0") % name)
                 def _checkentry(self, obj, i, node, seen, linkrevs, f):
                     """verify a single revlog entry
                     arguments are:
                     - obj:      the source revlog
                     - i:        the revision number
                     - node:     the revision node id
                     - seen:     nodes previously seen for this revlog
                     - linkrevs: [changelog-revisions] introducing "node"
                     - f:        string label ("changelog", "manifest", or filename)
                     Performs the following checks:
                     - linkrev points to an existing changelog revision,
                     - linkrev points to a changelog revision that introduces this revision,
                     - linkrev points to the lowest of these changesets,
                     - both parents exist in the revlog,
                     - the revision is not duplicated.
                     Return the linkrev of the revision (or None for changelog's revisions).
                     """
                     lr = obj.linkrev(obj.rev(node))
                     if lr < 0 or (self.havecl and lr not in linkrevs):
                         if lr < 0 or lr >= len(self.repo.changelog):
                             msg = _(b"rev %d points to nonexistent changeset %d")
                         else:
                             msg = _(b"rev %d points to unexpected changeset %d")
                         self._err(None, msg % (i, lr), f)
                         if linkrevs:
                             if f and len(linkrevs) > 1:
                                 try:
                                     # attempt to filter down to real linkrevs
                                     linkrevs = []
                                     for lr in linkrevs:
                                         if self.lrugetctx(lr)[f].filenode() == node:
                                             linkrevs.append(lr)
                                 except Exception:
                                     pass
                             msg = _(b" (expected %s)")
                             msg %= b" ".join(map(pycompat.bytestr, linkrevs))
                             self._warn(msg)
                         lr = None  # can't be trusted
                     try:
                         p1, p2 = obj.parents(node)
                         if p1 not in seen and p1 != self.repo.nullid:
                             msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
                             self._err(lr, msg, f)
                         if p2 not in seen and p2 != self.repo.nullid:
-                            self._err(
+                            msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
-                                lr,
+                            self._err(lr, msg, f)
-                                _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
-                                f,
                     except Exception as inst:
                         self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
                     if node in seen:
                         self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
                     seen[node] = i
                     return lr
                 def verify(self):
                     """verify the content of the Mercurial repository
                     This method run all verifications, displaying issues as they are found.
                     return 1 if any error have been encountered, 0 otherwise."""
                     # initial validation and generic report
                     repo = self.repo
                     ui = repo.ui
                     if not repo.url().startswith(b'file:'):
                         raise error.Abort(_(b"cannot verify bundle or remote repos"))
                     if os.path.exists(repo.sjoin(b"journal")):
                         ui.warn(_(b"abandoned transaction found - run hg recover\n"))
                     if ui.verbose or not self.revlogv1:
                         ui.status(
                             _(b"repository uses revlog format %d\n")
                             % (self.revlogv1 and 1 or 0)
                         )
                     # data verification
                     mflinkrevs, filelinkrevs = self._verifychangelog()
                     filenodes = self._verifymanifest(mflinkrevs)
                     del mflinkrevs
                     self._crosscheckfiles(filelinkrevs, filenodes)
                     totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
                     # final report
                     ui.status(
                         _(b"checked %d changesets with %d changes to %d files\n")
                         % (len(repo.changelog), filerevisions, totalfiles)
                     )
                     if self.warnings:
                         ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
                     if self.fncachewarned:
                         ui.warn(
                             _(
                                 b'hint: run "hg debugrebuildfncache" to recover from '
                                 b'corrupt fncache\n'
                             )
                         )
                     if self.errors:
                         ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
                         if self.badrevs:
                             ui.warn(
                                 _(b"(first damaged changeset appears to be %d)\n")
                                 % min(self.badrevs)
                             )
                         return 1
                     return 0
                 def _verifychangelog(self):
                     """verify the changelog of a repository
                     The following checks are performed:
                     - all of `_checkrevlog` checks,
                     - all of `_checkentry` checks (for each revisions),
                     - each revision can be read.
                     The function returns some of the data observed in the changesets as a
                     (mflinkrevs, filelinkrevs) tuples:
                     - mflinkrevs:   is a { manifest-node -> [changelog-rev] } mapping
                     - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
                     If a matcher was specified, filelinkrevs will only contains matched
                     files.
                     """
                     ui = self.ui
                     repo = self.repo
                     match = self.match
                     cl = repo.changelog
                     ui.status(_(b"checking changesets\n"))
                     mflinkrevs = {}
                     filelinkrevs = {}
                     seen = {}
                     self._checkrevlog(cl, b"changelog", 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'changesets'), total=len(repo)
                     )
                     for i in repo:
                         progress.update(i)
                         n = cl.node(i)
                         self._checkentry(cl, i, n, seen, [i], b"changelog")
                         try:
                             changes = cl.read(n)
                             if changes[0] != self.repo.nullid:
                                 mflinkrevs.setdefault(changes[0], []).append(i)
                                 self.refersmf = True
                             for f in changes[3]:
                                 if match(f):
                                     filelinkrevs.setdefault(_normpath(f), []).append(i)
                         except Exception as inst:
                             self.refersmf = True
                             self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
                     progress.complete()
                     return mflinkrevs, filelinkrevs
                 def _verifymanifest(
                     self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
                 ):
                     """verify the manifestlog content
                     Inputs:
                     - mflinkrevs:     a {manifest-node -> [changelog-revisions]} mapping
                     - dir:            a subdirectory to check (for tree manifest repo)
                     - storefiles:     set of currently "orphan" files.
                     - subdirprogress: a progress object
                     This function checks:
                     * all of `_checkrevlog` checks (for all manifest related revlogs)
                     * all of `_checkentry` checks (for all manifest related revisions)
                     * nodes for subdirectory exists in the sub-directory manifest
                     * each manifest entries have a file path
                     * each manifest node refered in mflinkrevs exist in the manifest log
                     If tree manifest is in use and a matchers is specified, only the
                     sub-directories matching it will be verified.
                     return a two level mapping:
                         {"path" -> { filenode -> changelog-revision}}
                     This mapping primarily contains entries for every files in the
                     repository. In addition, when tree-manifest is used, it also contains
                     sub-directory entries.
                     If a matcher is provided, only matching paths will be included.
                     """
                     repo = self.repo
                     ui = self.ui
                     match = self.match
                     mfl = self.repo.manifestlog
                     mf = mfl.getstorage(dir)
                     if not dir:
                         self.ui.status(_(b"checking manifests\n"))
                     filenodes = {}
                     subdirnodes = {}
                     seen = {}
                     label = b"manifest"
                     if dir:
                         label = dir
                         revlogfiles = mf.files()
                         storefiles.difference_update(revlogfiles)
                         if subdirprogress:  # should be true since we're in a subdirectory
                             subdirprogress.increment()
                     if self.refersmf:
                         # Do not check manifest if there are only changelog entries with
                         # null manifests.
                         self._checkrevlog(mf._revlog, label, 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'manifests'), total=len(mf)
                     )
                     for i in mf:
                         if not dir:
                             progress.update(i)
                         n = mf.node(i)
                         lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
                         if n in mflinkrevs:
                             del mflinkrevs[n]
                         elif dir:
                             self._err(
                                 lr,
                                 _(b"%s not in parent-directory manifest") % short(n),
                                 label,
                             )
                         else:
                             self._err(lr, _(b"%s not in changesets") % short(n), label)
                         try:
                             mfdelta = mfl.get(dir, n).readdelta(shallow=True)
                             for f, fn, fl in mfdelta.iterentries():
                                 if not f:
                                     self._err(lr, _(b"entry without name in manifest"))
                                 elif f == b"/dev/null":  # ignore this in very old repos
                                     continue
                                 fullpath = dir + _normpath(f)
                                 if fl == b't':
                                     if not match.visitdir(fullpath):
                                         continue
                                     subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
                                         fn, []
                                     ).append(lr)
                                 else:
                                     if not match(fullpath):
                                         continue
                                     filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
                         except Exception as inst:
                             self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
                         if self._level >= VERIFY_FULL:
                             try:
                                 # Various issues can affect manifest. So we read each full
                                 # text from storage. This triggers the checks from the core
                                 # code (eg: hash verification, filename are ordered, etc.)
                                 mfdelta = mfl.get(dir, n).read()
                             except Exception as inst:
                                 self._exc(
                                     lr,
                                     _(b"reading full manifest %s") % short(n),
                                     inst,
                                     label,
                                 )
                     if not dir:
                         progress.complete()
                     if self.havemf:
                         # since we delete entry in `mflinkrevs` during iteration, any
                         # remaining entries are "missing". We need to issue errors for them.
                         changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
                         for c, m in sorted(changesetpairs):
                             if dir:
                                 self._err(
                                     c,
                                     _(
                                         b"parent-directory manifest refers to unknown"
                                         b" revision %s"
                                     )
                                     % short(m),
                                     label,
                                 )
                             else:
                                 self._err(
                                     c,
                                     _(b"changeset refers to unknown revision %s")
                                     % short(m),
                                     label,
                                 )
                     if not dir and subdirnodes:
                         self.ui.status(_(b"checking directory manifests\n"))
                         storefiles = set()
                         subdirs = set()
                         revlogv1 = self.revlogv1
                         for t, f, f2, size in repo.store.datafiles():
                             if not f:
                                 self._err(None, _(b"cannot decode filename '%s'") % f2)
                             elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                                 storefiles.add(_normpath(f))
                                 subdirs.add(os.path.dirname(f))
                         subdirprogress = ui.makeprogress(
                             _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
                         )
                     for subdir, linkrevs in pycompat.iteritems(subdirnodes):
                         subdirfilenodes = self._verifymanifest(
                             linkrevs, subdir, storefiles, subdirprogress
                         )
                         for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
                             filenodes.setdefault(f, {}).update(onefilenodes)
                     if not dir and subdirnodes:
                         assert subdirprogress is not None  # help pytype
                         subdirprogress.complete()
                         if self.warnorphanstorefiles:
                             for f in sorted(storefiles):
                                 self._warn(_(b"warning: orphan data file '%s'") % f)
                     return filenodes
                 def _crosscheckfiles(self, filelinkrevs, filenodes):
                     repo = self.repo
                     ui = self.ui
                     ui.status(_(b"crosschecking files in changesets and manifests\n"))
                     total = len(filelinkrevs) + len(filenodes)
                     progress = ui.makeprogress(
                         _(b'crosschecking'), unit=_(b'files'), total=total
                     )
                     if self.havemf:
                         for f in sorted(filelinkrevs):
                             progress.increment()
                             if f not in filenodes:
                                 lr = filelinkrevs[f][0]
                                 self._err(lr, _(b"in changeset but not in manifest"), f)
                     if self.havecl:
                         for f in sorted(filenodes):
                             progress.increment()
                             if f not in filelinkrevs:
                                 try:
                                     fl = repo.file(f)
                                     lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                                 except Exception:
                                     lr = None
                                 self._err(lr, _(b"in manifest but not in changeset"), f)
                     progress.complete()
                 def _verifyfiles(self, filenodes, filelinkrevs):
                     repo = self.repo
                     ui = self.ui
                     lrugetctx = self.lrugetctx
                     revlogv1 = self.revlogv1
                     havemf = self.havemf
                     ui.status(_(b"checking files\n"))
                     storefiles = set()
                     for rl_type, f, f2, size in repo.store.datafiles():
                         if not f:
                             self._err(None, _(b"cannot decode filename '%s'") % f2)
                         elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
                             storefiles.add(_normpath(f))
                     state = {
                         # TODO this assumes revlog storage for changelog.
                         b'expectedversion': self.repo.changelog._format_version,
                         b'skipflags': self.skipflags,
                         # experimental config: censor.policy
                         b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
                     }
                     files = sorted(set(filenodes) | set(filelinkrevs))
                     revisions = 0
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'files'), total=len(files)
                     )
                     for i, f in enumerate(files):
                         progress.update(i, item=f)
                         try:
                             linkrevs = filelinkrevs[f]
                         except KeyError:
                             # in manifest but not in changelog
                             linkrevs = []
                         if linkrevs:
                             lr = linkrevs[0]
                         else:
                             lr = None
                         try:
                             fl = repo.file(f)
                         except error.StorageError as e:
                             self._err(lr, _(b"broken revlog! (%s)") % e, f)
                             continue
                         for ff in fl.files():
                             try:
                                 storefiles.remove(ff)
                             except KeyError:
                                 if self.warnorphanstorefiles:
                                     self._warn(
                                         _(b" warning: revlog '%s' not in fncache!") % ff
                                     )
                                     self.fncachewarned = True
                         if not len(fl) and (self.havecl or self.havemf):
                             self._err(lr, _(b"empty or missing %s") % f)
                         else:
                             # Guard against implementations not setting this.
                             state[b'skipread'] = set()
                             state[b'safe_renamed'] = set()
                             for problem in fl.verifyintegrity(state):
                                 if problem.node is not None:
                                     linkrev = fl.linkrev(fl.rev(problem.node))
                                 else:
                                     linkrev = None
                                 if problem.warning:
                                     self._warn(problem.warning)
                                 elif problem.error:
                                     self._err(
                                         linkrev if linkrev is not None else lr,
                                         problem.error,
                                         f,
                                     )
                                 else:
                                     raise error.ProgrammingError(
                                         b'problem instance does not set warning or error '
                                         b'attribute: %s' % problem.msg
                                     )
                         seen = {}
                         for i in fl:
                             revisions += 1
                             n = fl.node(i)
                             lr = self._checkentry(fl, i, n, seen, linkrevs, f)
                             if f in filenodes:
                                 if havemf and n not in filenodes[f]:
                                     self._err(lr, _(b"%s not in manifests") % (short(n)), f)
                                 else:
                                     del filenodes[f][n]
                             if n in state[b'skipread'] and n not in state[b'safe_renamed']:
                                 continue
                             # check renames
                             try:
                                 # This requires resolving fulltext (at least on revlogs,
                                 # though not with LFS revisions). We may want
                                 # ``verifyintegrity()`` to pass a set of nodes with
                                 # rename metadata as an optimization.
                                 rp = fl.renamed(n)
                                 if rp:
                                     if lr is not None and ui.verbose:
                                         ctx = lrugetctx(lr)
                                         if not any(rp[0] in pctx for pctx in ctx.parents()):
                                             self._warn(
                                                 _(
                                                     b"warning: copy source of '%s' not"
                                                     b" in parents of %s"
                                                 )
                                                 % (f, ctx)
                                             )
                                     fl2 = repo.file(rp[0])
                                     if not len(fl2):
                                         self._err(
                                             lr,
                                             _(
                                                 b"empty or missing copy source revlog "
                                                 b"%s:%s"
                                             )
                                             % (rp[0], short(rp[1])),
                                             f,
                                         )
                                     elif rp[1] == self.repo.nullid:
                                         ui.note(
                                             _(
                                                 b"warning: %s@%s: copy source"
                                                 b" revision is nullid %s:%s\n"
                                             )
                                             % (f, lr, rp[0], short(rp[1]))
                                         )
                                     else:
                                         fl2.rev(rp[1])
                             except Exception as inst:
                                 self._exc(
                                     lr, _(b"checking rename of %s") % short(n), inst, f
                                 )
                         # cross-check
                         if f in filenodes:
                             fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
                             for lr, node in sorted(fns):
                                 self._err(
                                     lr,
                                     _(b"manifest refers to unknown revision %s")
                                     % short(node),
                                     f,
                                 )
                     progress.complete()
                     if self.warnorphanstorefiles:
                         for f in sorted(storefiles):
                             self._warn(_(b"warning: orphan data file '%s'") % f)
                     return len(files), revisions