upstream/mercurial-mirror Commit - r47548:51378966

1

# verify.py - repository integrity checking for Mercurial

1

# verify.py - repository integrity checking for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import os

10

import os

11

12

from .i18n import _

12

from .i18n import _

13

from .node import (

13

from .node import (

14

nullid,

14

nullid,

15

short,

15

short,

16

)

16

)

17

from .utils import (

17

from .utils import (

18

stringutil,

18

stringutil,

19

)

19

)

20

21

from . import (

21

from . import (

22

error,

22

error,

23

pycompat,

23

pycompat,

24

revlog,

24

revlog,

25

util,

25

util,

26

)

26

)

27

28

VERIFY_DEFAULT = 0

28

VERIFY_DEFAULT = 0

29

VERIFY_FULL = 1

29

VERIFY_FULL = 1

30

31

32

def verify(repo, level=None):

32

def verify(repo, level=None):

33

with repo.lock():

33

with repo.lock():

34

v = verifier(repo, level)

34

v = verifier(repo, level)

35

return v.verify()

35

return v.verify()

36

37

38

def _normpath(f):

38

def _normpath(f):

39

# under hg < 2.4, convert didn't sanitize paths properly, so a

39

# under hg < 2.4, convert didn't sanitize paths properly, so a

40

# converted repo may contain repeated slashes

40

# converted repo may contain repeated slashes

41

while b'//' in f:

41

while b'//' in f:

42

f = f.replace(b'//', b'/')

42

f = f.replace(b'//', b'/')

43

return f

43

return f

44

45

46

class verifier(object):

46

class verifier(object):

47

def __init__(self, repo, level=None):

47

def __init__(self, repo, level=None):

48

self.repo = repo.unfiltered()

48

self.repo = repo.unfiltered()

49

self.ui = repo.ui

49

self.ui = repo.ui

50

self.match = repo.narrowmatch()

50

self.match = repo.narrowmatch()

51

if level is None:

51

if level is None:

52

level = VERIFY_DEFAULT

52

level = VERIFY_DEFAULT

53

self._level = level

53

self._level = level

54

self.badrevs = set()

54

self.badrevs = set()

55

self.errors = 0

55

self.errors = 0

56

self.warnings = 0

56

self.warnings = 0

57

self.havecl = len(repo.changelog) > 0

57

self.havecl = len(repo.changelog) > 0

58

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

58

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

59

self.revlogv1 = repo.changelog.version != revlog.REVLOGV0

59

self.revlogv1 = repo.changelog.version != revlog.REVLOGV0

60

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

60

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

61

self.refersmf = False

61

self.refersmf = False

62

self.fncachewarned = False

62

self.fncachewarned = False

63

# developer config: verify.skipflags

63

# developer config: verify.skipflags

64

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

64

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

65

self.warnorphanstorefiles = True

65

self.warnorphanstorefiles = True

66

67

def _warn(self, msg):

67

def _warn(self, msg):

68

"""record a "warning" level issue"""

68

"""record a "warning" level issue"""

69

self.ui.warn(msg + b"\n")

69

self.ui.warn(msg + b"\n")

70

self.warnings += 1

70

self.warnings += 1

71

72

def _err(self, linkrev, msg, filename=None):

72

def _err(self, linkrev, msg, filename=None):

73

"""record a "error" level issue"""

73

"""record a "error" level issue"""

74

if linkrev is not None:

74

if linkrev is not None:

75

self.badrevs.add(linkrev)

75

self.badrevs.add(linkrev)

76

linkrev = b"%d" % linkrev

76

linkrev = b"%d" % linkrev

77

else:

77

else:

78

linkrev = b'?'

78

linkrev = b'?'

79

msg = b"%s: %s" % (linkrev, msg)

79

msg = b"%s: %s" % (linkrev, msg)

80

if filename:

80

if filename:

81

msg = b"%s@%s" % (filename, msg)

81

msg = b"%s@%s" % (filename, msg)

82

self.ui.warn(b" " + msg + b"\n")

82

self.ui.warn(b" " + msg + b"\n")

83

self.errors += 1

83

self.errors += 1

84

85

def _exc(self, linkrev, msg, inst, filename=None):

85

def _exc(self, linkrev, msg, inst, filename=None):

86

"""record exception raised during the verify process"""

86

"""record exception raised during the verify process"""

87

fmsg = stringutil.forcebytestr(inst)

87

fmsg = stringutil.forcebytestr(inst)

88

if not fmsg:

88

if not fmsg:

89

fmsg = pycompat.byterepr(inst)

89

fmsg = pycompat.byterepr(inst)

90

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

90

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

91

92

def _checkrevlog(self, obj, name, linkrev):

92

def _checkrevlog(self, obj, name, linkrev):

93

"""verify high level property of a revlog

93

"""verify high level property of a revlog

94

95

- revlog is present,

95

- revlog is present,

96

- revlog is non-empty,

96

- revlog is non-empty,

97

- sizes (index and data) are correct,

97

- sizes (index and data) are correct,

98

- revlog's format version is correct.

98

- revlog's format version is correct.

99

"""

99

"""

100

if not len(obj) and (self.havecl or self.havemf):

100

if not len(obj) and (self.havecl or self.havemf):

101

self._err(linkrev, _(b"empty or missing %s") % name)

101

self._err(linkrev, _(b"empty or missing %s") % name)

102

return

102

return

103

104

d = obj.checksize()

104

d = obj.checksize()

105

if d[0]:

105

if d[0]:

106

self._err(None, _(b"data length off by %d bytes") % d[0], name)

106

self._err(None, _(b"data length off by %d bytes") % d[0], name)

107

if d[1]:

107

if d[1]:

108

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

108

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

109

110

if obj.version != revlog.REVLOGV0:

110

if obj.version != revlog.REVLOGV0:

111

if not self.revlogv1:

111

if not self.revlogv1:

112

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

112

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

113

elif self.revlogv1:

113

elif self.revlogv1:

114

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

114

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

115

116

def _checkentry(self, obj, i, node, seen, linkrevs, f):

116

def _checkentry(self, obj, i, node, seen, linkrevs, f):

117

"""verify a single revlog entry

117

"""verify a single revlog entry

118

119

arguments are:

119

arguments are:

120

- obj: the source revlog

120

- obj: the source revlog

121

- i: the revision number

121

- i: the revision number

122

- node: the revision node id

122

- node: the revision node id

123

- seen: nodes previously seen for this revlog

123

- seen: nodes previously seen for this revlog

124

- linkrevs: [changelog-revisions] introducing "node"

124

- linkrevs: [changelog-revisions] introducing "node"

125

- f: string label ("changelog", "manifest", or filename)

125

- f: string label ("changelog", "manifest", or filename)

126

127

Performs the following checks:

127

Performs the following checks:

128

- linkrev points to an existing changelog revision,

128

- linkrev points to an existing changelog revision,

129

- linkrev points to a changelog revision that introduces this revision,

129

- linkrev points to a changelog revision that introduces this revision,

130

- linkrev points to the lowest of these changesets,

130

- linkrev points to the lowest of these changesets,

131

- both parents exist in the revlog,

131

- both parents exist in the revlog,

132

- the revision is not duplicated.

132

- the revision is not duplicated.

133

134

Return the linkrev of the revision (or None for changelog's revisions).

134

Return the linkrev of the revision (or None for changelog's revisions).

135

"""

135

"""

136

lr = obj.linkrev(obj.rev(node))

136

lr = obj.linkrev(obj.rev(node))

137

if lr < 0 or (self.havecl and lr not in linkrevs):

137

if lr < 0 or (self.havecl and lr not in linkrevs):

138

if lr < 0 or lr >= len(self.repo.changelog):

138

if lr < 0 or lr >= len(self.repo.changelog):

139

msg = _(b"rev %d points to nonexistent changeset %d")

139

msg = _(b"rev %d points to nonexistent changeset %d")

140

else:

140

else:

141

msg = _(b"rev %d points to unexpected changeset %d")

141

msg = _(b"rev %d points to unexpected changeset %d")

142

self._err(None, msg % (i, lr), f)

142

self._err(None, msg % (i, lr), f)

143

if linkrevs:

143

if linkrevs:

144

if f and len(linkrevs) > 1:

144

if f and len(linkrevs) > 1:

145

try:

145

try:

146

# attempt to filter down to real linkrevs

146

# attempt to filter down to real linkrevs

147

linkrevs = [

147

linkrevs = [

148

l

148

l

149

for l in linkrevs

149

for l in linkrevs

150

if self.lrugetctx(l)[f].filenode() == node

150

if self.lrugetctx(l)[f].filenode() == node

151

]

151

]

152

except Exception:

152

except Exception:

153

pass

153

pass

154

self._warn(

154

self._warn(

155

_(b" (expected %s)")

155

_(b" (expected %s)")

156

% b" ".join(map(pycompat.bytestr, linkrevs))

156

% b" ".join(map(pycompat.bytestr, linkrevs))

157

)

157

)

158

lr = None # can't be trusted

158

lr = None # can't be trusted

159

160

try:

160

try:

161

p1, p2 = obj.parents(node)

161

p1, p2 = obj.parents(node)

162

if p1 not in seen and p1 != nullid:

162

if p1 not in seen and p1 != nullid:

163

self._err(

163

self._err(

164

lr,

164

lr,

165

_(b"unknown parent 1 %s of %s") % (short(p1), short(node)),

165

_(b"unknown parent 1 %s of %s") % (short(p1), short(node)),

166

f,

166

f,

167

)

167

)

168

if p2 not in seen and p2 != nullid:

168

if p2 not in seen and p2 != nullid:

169

self._err(

169

self._err(

170

lr,

170

lr,

171

_(b"unknown parent 2 %s of %s") % (short(p2), short(node)),

171

_(b"unknown parent 2 %s of %s") % (short(p2), short(node)),

172

f,

172

f,

173

)

173

)

174

except Exception as inst:

174

except Exception as inst:

175

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

175

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

176

177

if node in seen:

177

if node in seen:

178

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

178

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

179

seen[node] = i

179

seen[node] = i

180

return lr

180

return lr

181

182

def verify(self):

182

def verify(self):

183

"""verify the content of the Mercurial repository

183

"""verify the content of the Mercurial repository

184

185

This method run all verifications, displaying issues as they are found.

185

This method run all verifications, displaying issues as they are found.

186

187

return 1 if any error have been encountered, 0 otherwise."""

187

return 1 if any error have been encountered, 0 otherwise."""

188

# initial validation and generic report

188

# initial validation and generic report

189

repo = self.repo

189

repo = self.repo

190

ui = repo.ui

190

ui = repo.ui

191

if not repo.url().startswith(b'file:'):

191

if not repo.url().startswith(b'file:'):

192

raise error.Abort(_(b"cannot verify bundle or remote repos"))

192

raise error.Abort(_(b"cannot verify bundle or remote repos"))

193

194

if os.path.exists(repo.sjoin(b"journal")):

194

if os.path.exists(repo.sjoin(b"journal")):

195

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

195

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

196

197

if ui.verbose or not self.revlogv1:

197

if ui.verbose or not self.revlogv1:

198

ui.status(

198

ui.status(

199

_(b"repository uses revlog format %d\n")

199

_(b"repository uses revlog format %d\n")

200

% (self.revlogv1 and 1 or 0)

200

% (self.revlogv1 and 1 or 0)

201

)

201

)

202

203

# data verification

203

# data verification

204

mflinkrevs, filelinkrevs = self._verifychangelog()

204

mflinkrevs, filelinkrevs = self._verifychangelog()

205

filenodes = self._verifymanifest(mflinkrevs)

205

filenodes = self._verifymanifest(mflinkrevs)

206

del mflinkrevs

206

del mflinkrevs

207

self._crosscheckfiles(filelinkrevs, filenodes)

207

self._crosscheckfiles(filelinkrevs, filenodes)

208

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

208

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

209

210

# final report

210

# final report

211

ui.status(

211

ui.status(

212

_(b"checked %d changesets with %d changes to %d files\n")

212

_(b"checked %d changesets with %d changes to %d files\n")

213

% (len(repo.changelog), filerevisions, totalfiles)

213

% (len(repo.changelog), filerevisions, totalfiles)

214

)

214

)

215

if self.warnings:

215

if self.warnings:

216

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

216

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

217

if self.fncachewarned:

217

if self.fncachewarned:

218

ui.warn(

218

ui.warn(

219

_(

219

_(

220

b'hint: run "hg debugrebuildfncache" to recover from '

220

b'hint: run "hg debugrebuildfncache" to recover from '

221

b'corrupt fncache\n'

221

b'corrupt fncache\n'

222

)

222

)

223

)

223

)

224

if self.errors:

224

if self.errors:

225

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

225

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

226

if self.badrevs:

226

if self.badrevs:

227

ui.warn(

227

ui.warn(

228

_(b"(first damaged changeset appears to be %d)\n")

228

_(b"(first damaged changeset appears to be %d)\n")

229

% min(self.badrevs)

229

% min(self.badrevs)

230

)

230

)

231

return 1

231

return 1

232

return 0

232

return 0

233

234

def _verifychangelog(self):

234

def _verifychangelog(self):

235

"""verify the changelog of a repository

235

"""verify the changelog of a repository

236

237

The following checks are performed:

237

The following checks are performed:

238

- all of `_checkrevlog` checks,

238

- all of `_checkrevlog` checks,

239

- all of `_checkentry` checks (for each revisions),

239

- all of `_checkentry` checks (for each revisions),

240

- each revision can be read.

240

- each revision can be read.

241

242

The function returns some of the data observed in the changesets as a

242

The function returns some of the data observed in the changesets as a

243

(mflinkrevs, filelinkrevs) tuples:

243

(mflinkrevs, filelinkrevs) tuples:

244

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

244

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

245

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

245

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

246

247

If a matcher was specified, filelinkrevs will only contains matched

247

If a matcher was specified, filelinkrevs will only contains matched

248

files.

248

files.

249

"""

249

"""

250

ui = self.ui

250

ui = self.ui

251

repo = self.repo

251

repo = self.repo

252

match = self.match

252

match = self.match

253

cl = repo.changelog

253

cl = repo.changelog

254

255

ui.status(_(b"checking changesets\n"))

255

ui.status(_(b"checking changesets\n"))

256

mflinkrevs = {}

256

mflinkrevs = {}

257

filelinkrevs = {}

257

filelinkrevs = {}

258

seen = {}

258

seen = {}

259

self._checkrevlog(cl, b"changelog", 0)

259

self._checkrevlog(cl, b"changelog", 0)

260

progress = ui.makeprogress(

260

progress = ui.makeprogress(

261

_(b'checking'), unit=_(b'changesets'), total=len(repo)

261

_(b'checking'), unit=_(b'changesets'), total=len(repo)

262

)

262

)

263

for i in repo:

263

for i in repo:

264

progress.update(i)

264

progress.update(i)

265

n = cl.node(i)

265

n = cl.node(i)

266

self._checkentry(cl, i, n, seen, [i], b"changelog")

266

self._checkentry(cl, i, n, seen, [i], b"changelog")

267

268

try:

268

try:

269

changes = cl.read(n)

269

changes = cl.read(n)

270

if changes[0] != nullid:

270

if changes[0] != nullid:

271

mflinkrevs.setdefault(changes[0], []).append(i)

271

mflinkrevs.setdefault(changes[0], []).append(i)

272

self.refersmf = True

272

self.refersmf = True

273

for f in changes[3]:

273

for f in changes[3]:

274

if match(f):

274

if match(f):

275

filelinkrevs.setdefault(_normpath(f), []).append(i)

275

filelinkrevs.setdefault(_normpath(f), []).append(i)

276

except Exception as inst:

276

except Exception as inst:

277

self.refersmf = True

277

self.refersmf = True

278

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

278

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

279

progress.complete()

279

progress.complete()

280

return mflinkrevs, filelinkrevs

280

return mflinkrevs, filelinkrevs

281

282

def _verifymanifest(

282

def _verifymanifest(

283

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

283

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

284

):

284

):

285

"""verify the manifestlog content

285

"""verify the manifestlog content

286

287

Inputs:

287

Inputs:

288

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

288

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

289

- dir: a subdirectory to check (for tree manifest repo)

289

- dir: a subdirectory to check (for tree manifest repo)

290

- storefiles: set of currently "orphan" files.

290

- storefiles: set of currently "orphan" files.

291

- subdirprogress: a progress object

291

- subdirprogress: a progress object

292

293

This function checks:

293

This function checks:

294

* all of `_checkrevlog` checks (for all manifest related revlogs)

294

* all of `_checkrevlog` checks (for all manifest related revlogs)

295

* all of `_checkentry` checks (for all manifest related revisions)

295

* all of `_checkentry` checks (for all manifest related revisions)

296

* nodes for subdirectory exists in the sub-directory manifest

296

* nodes for subdirectory exists in the sub-directory manifest

297

* each manifest entries have a file path

297

* each manifest entries have a file path

298

* each manifest node refered in mflinkrevs exist in the manifest log

298

* each manifest node refered in mflinkrevs exist in the manifest log

299

300

If tree manifest is in use and a matchers is specified, only the

300

If tree manifest is in use and a matchers is specified, only the

301

sub-directories matching it will be verified.

301

sub-directories matching it will be verified.

302

303

return a two level mapping:

303

return a two level mapping:

304

{"path" -> { filenode -> changelog-revision}}

304

{"path" -> { filenode -> changelog-revision}}

305

306

This mapping primarily contains entries for every files in the

306

This mapping primarily contains entries for every files in the

307

repository. In addition, when tree-manifest is used, it also contains

307

repository. In addition, when tree-manifest is used, it also contains

308

sub-directory entries.

308

sub-directory entries.

309

310

If a matcher is provided, only matching paths will be included.

310

If a matcher is provided, only matching paths will be included.

311

"""

311

"""

312

repo = self.repo

312

repo = self.repo

313

ui = self.ui

313

ui = self.ui

314

match = self.match

314

match = self.match

315

mfl = self.repo.manifestlog

315

mfl = self.repo.manifestlog

316

mf = mfl.getstorage(dir)

316

mf = mfl.getstorage(dir)

317

318

if not dir:

318

if not dir:

319

self.ui.status(_(b"checking manifests\n"))

319

self.ui.status(_(b"checking manifests\n"))

320

321

filenodes = {}

321

filenodes = {}

322

subdirnodes = {}

322

subdirnodes = {}

323

seen = {}

323

seen = {}

324

label = b"manifest"

324

label = b"manifest"

325

if dir:

325

if dir:

326

label = dir

326

label = dir

327

revlogfiles = mf.files()

327

revlogfiles = mf.files()

328

storefiles.difference_update(revlogfiles)

328

storefiles.difference_update(revlogfiles)

329

if subdirprogress: # should be true since we're in a subdirectory

329

if subdirprogress: # should be true since we're in a subdirectory

330

subdirprogress.increment()

330

subdirprogress.increment()

331

if self.refersmf:

331

if self.refersmf:

332

# Do not check manifest if there are only changelog entries with

332

# Do not check manifest if there are only changelog entries with

333

# null manifests.

333

# null manifests.

334

self._checkrevlog(mf, label, 0)

334

self._checkrevlog(mf, label, 0)

335

progress = ui.makeprogress(

335

progress = ui.makeprogress(

336

_(b'checking'), unit=_(b'manifests'), total=len(mf)

336

_(b'checking'), unit=_(b'manifests'), total=len(mf)

337

)

337

)

338

for i in mf:

338

for i in mf:

339

if not dir:

339

if not dir:

340

progress.update(i)

340

progress.update(i)

341

n = mf.node(i)

341

n = mf.node(i)

342

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

342

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

343

if n in mflinkrevs:

343

if n in mflinkrevs:

344

del mflinkrevs[n]

344

del mflinkrevs[n]

345

elif dir:

345

elif dir:

346

self._err(

346

self._err(

347

lr,

347

lr,

348

_(b"%s not in parent-directory manifest") % short(n),

348

_(b"%s not in parent-directory manifest") % short(n),

349

label,

349

label,

350

)

350

)

351

else:

351

else:

352

self._err(lr, _(b"%s not in changesets") % short(n), label)

352

self._err(lr, _(b"%s not in changesets") % short(n), label)

353

354

try:

354

try:

355

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

355

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

356

for f, fn, fl in mfdelta.iterentries():

356

for f, fn, fl in mfdelta.iterentries():

357

if not f:

357

if not f:

358

self._err(lr, _(b"entry without name in manifest"))

358

self._err(lr, _(b"entry without name in manifest"))

359

elif f == b"/dev/null": # ignore this in very old repos

359

elif f == b"/dev/null": # ignore this in very old repos

360

continue

360

continue

361

fullpath = dir + _normpath(f)

361

fullpath = dir + _normpath(f)

362

if fl == b't':

362

if fl == b't':

363

if not match.visitdir(fullpath):

363

if not match.visitdir(fullpath):

364

continue

364

continue

365

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

365

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

366

fn, []

366

fn, []

367

).append(lr)

367

).append(lr)

368

else:

368

else:

369

if not match(fullpath):

369

if not match(fullpath):

370

continue

370

continue

371

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

371

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

372

except Exception as inst:

372

except Exception as inst:

373

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

373

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

374

if self._level >= VERIFY_FULL:

374

if self._level >= VERIFY_FULL:

375

try:

375

try:

376

# Various issues can affect manifest. So we read each full

376

# Various issues can affect manifest. So we read each full

377

# text from storage. This triggers the checks from the core

377

# text from storage. This triggers the checks from the core

378

# code (eg: hash verification, filename are ordered, etc.)

378

# code (eg: hash verification, filename are ordered, etc.)

379

mfdelta = mfl.get(dir, n).read()

379

mfdelta = mfl.get(dir, n).read()

380

except Exception as inst:

380

except Exception as inst:

381

self._exc(

381

self._exc(

382

lr,

382

lr,

383

_(b"reading full manifest %s") % short(n),

383

_(b"reading full manifest %s") % short(n),

384

inst,

384

inst,

385

label,

385

label,

386

)

386

)

387

388

if not dir:

388

if not dir:

389

progress.complete()

389

progress.complete()

390

391

if self.havemf:

391

if self.havemf:

392

# since we delete entry in `mflinkrevs` during iteration, any

392

# since we delete entry in `mflinkrevs` during iteration, any

393

# remaining entries are "missing". We need to issue errors for them.

393

# remaining entries are "missing". We need to issue errors for them.

394

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

394

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

395

for c, m in sorted(changesetpairs):

395

for c, m in sorted(changesetpairs):

396

if dir:

396

if dir:

397

self._err(

397

self._err(

398

c,

398

c,

399

_(

399

_(

400

b"parent-directory manifest refers to unknown"

400

b"parent-directory manifest refers to unknown"

401

b" revision %s"

401

b" revision %s"

402

)

402

)

403

% short(m),

403

% short(m),

404

label,

404

label,

405

)

405

)

406

else:

406

else:

407

self._err(

407

self._err(

408

c,

408

c,

409

_(b"changeset refers to unknown revision %s")

409

_(b"changeset refers to unknown revision %s")

410

% short(m),

410

% short(m),

411

label,

411

label,

412

)

412

)

413

414

if not dir and subdirnodes:

414

if not dir and subdirnodes:

415

self.ui.status(_(b"checking directory manifests\n"))

415

self.ui.status(_(b"checking directory manifests\n"))

416

storefiles = set()

416

storefiles = set()

417

subdirs = set()

417

subdirs = set()

418

revlogv1 = self.revlogv1

418

revlogv1 = self.revlogv1

419

for f, f2, size in repo.store.datafiles():

419

for f, f2, size in repo.store.datafiles():

420

if not f:

420

if not f:

421

self._err(None, _(b"cannot decode filename '%s'") % f2)

421

self._err(None, _(b"cannot decode filename '%s'") % f2)

422

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

422

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

423

storefiles.add(_normpath(f))

423

storefiles.add(_normpath(f))

424

subdirs.add(os.path.dirname(f))

424

subdirs.add(os.path.dirname(f))

425

subdirprogress = ui.makeprogress(

425

subdirprogress = ui.makeprogress(

426

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

426

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

427

)

427

)

428

429

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

429

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

430

subdirfilenodes = self._verifymanifest(

430

subdirfilenodes = self._verifymanifest(

431

linkrevs, subdir, storefiles, subdirprogress

431

linkrevs, subdir, storefiles, subdirprogress

432

)

432

)

433

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

433

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

434

filenodes.setdefault(f, {}).update(onefilenodes)

434

filenodes.setdefault(f, {}).update(onefilenodes)

435

436

if not dir and subdirnodes:

436

if not dir and subdirnodes:

437

assert subdirprogress is not None # help pytype

437

subdirprogress.complete()

438

subdirprogress.complete()

438

if self.warnorphanstorefiles:

439

if self.warnorphanstorefiles:

439

for f in sorted(storefiles):

440

for f in sorted(storefiles):

440

self._warn(_(b"warning: orphan data file '%s'") % f)

441

self._warn(_(b"warning: orphan data file '%s'") % f)

441

442

return filenodes

443

return filenodes

443

444

def _crosscheckfiles(self, filelinkrevs, filenodes):

445

def _crosscheckfiles(self, filelinkrevs, filenodes):

445

repo = self.repo

446

repo = self.repo

446

ui = self.ui

447

ui = self.ui

447

ui.status(_(b"crosschecking files in changesets and manifests\n"))

448

ui.status(_(b"crosschecking files in changesets and manifests\n"))

448

449

total = len(filelinkrevs) + len(filenodes)

450

total = len(filelinkrevs) + len(filenodes)

450

progress = ui.makeprogress(

451

progress = ui.makeprogress(

451

_(b'crosschecking'), unit=_(b'files'), total=total

452

_(b'crosschecking'), unit=_(b'files'), total=total

452

)

453

)

453

if self.havemf:

454

if self.havemf:

454

for f in sorted(filelinkrevs):

455

for f in sorted(filelinkrevs):

455

progress.increment()

456

progress.increment()

456

if f not in filenodes:

457

if f not in filenodes:

457

lr = filelinkrevs[f][0]

458

lr = filelinkrevs[f][0]

458

self._err(lr, _(b"in changeset but not in manifest"), f)

459

self._err(lr, _(b"in changeset but not in manifest"), f)

459

460

if self.havecl:

461

if self.havecl:

461

for f in sorted(filenodes):

462

for f in sorted(filenodes):

462

progress.increment()

463

progress.increment()

463

if f not in filelinkrevs:

464

if f not in filelinkrevs:

464

try:

465

try:

465

fl = repo.file(f)

466

fl = repo.file(f)

466

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

467

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

467

except Exception:

468

except Exception:

468

lr = None

469

lr = None

469

self._err(lr, _(b"in manifest but not in changeset"), f)

470

self._err(lr, _(b"in manifest but not in changeset"), f)

470

471

progress.complete()

472

progress.complete()

472

473

def _verifyfiles(self, filenodes, filelinkrevs):

474

def _verifyfiles(self, filenodes, filelinkrevs):

474

repo = self.repo

475

repo = self.repo

475

ui = self.ui

476

ui = self.ui

476

lrugetctx = self.lrugetctx

477

lrugetctx = self.lrugetctx

477

revlogv1 = self.revlogv1

478

revlogv1 = self.revlogv1

478

havemf = self.havemf

479

havemf = self.havemf

479

ui.status(_(b"checking files\n"))

480

ui.status(_(b"checking files\n"))

480

481

storefiles = set()

482

storefiles = set()

482

for f, f2, size in repo.store.datafiles():

483

for f, f2, size in repo.store.datafiles():

483

if not f:

484

if not f:

484

self._err(None, _(b"cannot decode filename '%s'") % f2)

485

self._err(None, _(b"cannot decode filename '%s'") % f2)

485

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

486

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

486

storefiles.add(_normpath(f))

487

storefiles.add(_normpath(f))

487

488

state = {

489

state = {

489

# TODO this assumes revlog storage for changelog.

490

# TODO this assumes revlog storage for changelog.

490

b'expectedversion': self.repo.changelog.version & 0xFFFF,

491

b'expectedversion': self.repo.changelog.version & 0xFFFF,

491

b'skipflags': self.skipflags,

492

b'skipflags': self.skipflags,

492

# experimental config: censor.policy

493

# experimental config: censor.policy

493

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

494

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

494

}

495

}

495

496

files = sorted(set(filenodes) | set(filelinkrevs))

497

files = sorted(set(filenodes) | set(filelinkrevs))

497

revisions = 0

498

revisions = 0

498

progress = ui.makeprogress(

499

progress = ui.makeprogress(

499

_(b'checking'), unit=_(b'files'), total=len(files)

500

_(b'checking'), unit=_(b'files'), total=len(files)

500

)

501

)

501

for i, f in enumerate(files):

502

for i, f in enumerate(files):

502

progress.update(i, item=f)

503

progress.update(i, item=f)

503

try:

504

try:

504

linkrevs = filelinkrevs[f]

505

linkrevs = filelinkrevs[f]

505

except KeyError:

506

except KeyError:

506

# in manifest but not in changelog

507

# in manifest but not in changelog

507

linkrevs = []

508

linkrevs = []

508

509

if linkrevs:

510

if linkrevs:

510

lr = linkrevs[0]

511

lr = linkrevs[0]

511

else:

512

else:

512

lr = None

513

lr = None

513

514

try:

515

try:

515

fl = repo.file(f)

516

fl = repo.file(f)

516

except error.StorageError as e:

517

except error.StorageError as e:

517

self._err(lr, _(b"broken revlog! (%s)") % e, f)

518

self._err(lr, _(b"broken revlog! (%s)") % e, f)

518

continue

519

continue

519

520

for ff in fl.files():

521

for ff in fl.files():

521

try:

522

try:

522

storefiles.remove(ff)

523

storefiles.remove(ff)

523

except KeyError:

524

except KeyError:

524

if self.warnorphanstorefiles:

525

if self.warnorphanstorefiles:

525

self._warn(

526

self._warn(

526

_(b" warning: revlog '%s' not in fncache!") % ff

527

_(b" warning: revlog '%s' not in fncache!") % ff

527

)

528

)

528

self.fncachewarned = True

529

self.fncachewarned = True

529

530

if not len(fl) and (self.havecl or self.havemf):

531

if not len(fl) and (self.havecl or self.havemf):

531

self._err(lr, _(b"empty or missing %s") % f)

532

self._err(lr, _(b"empty or missing %s") % f)

532

else:

533

else:

533

# Guard against implementations not setting this.

534

# Guard against implementations not setting this.

534

state[b'skipread'] = set()

535

state[b'skipread'] = set()

535

state[b'safe_renamed'] = set()

536

state[b'safe_renamed'] = set()

536

537

for problem in fl.verifyintegrity(state):

538

for problem in fl.verifyintegrity(state):

538

if problem.node is not None:

539

if problem.node is not None:

539

linkrev = fl.linkrev(fl.rev(problem.node))

540

linkrev = fl.linkrev(fl.rev(problem.node))

540

else:

541

else:

541

linkrev = None

542

linkrev = None

542

543

if problem.warning:

544

if problem.warning:

544

self._warn(problem.warning)

545

self._warn(problem.warning)

545

elif problem.error:

546

elif problem.error:

546

self._err(

547

self._err(

547

linkrev if linkrev is not None else lr,

548

linkrev if linkrev is not None else lr,

548

problem.error,

549

problem.error,

549

f,

550

f,

550

)

551

)

551

else:

552

else:

552

raise error.ProgrammingError(

553

raise error.ProgrammingError(

553

b'problem instance does not set warning or error '

554

b'problem instance does not set warning or error '

554

b'attribute: %s' % problem.msg

555

b'attribute: %s' % problem.msg

555

)

556

)

556

557

seen = {}

558

seen = {}

558

for i in fl:

559

for i in fl:

559

revisions += 1

560

revisions += 1

560

n = fl.node(i)

561

n = fl.node(i)

561

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

562

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

562

if f in filenodes:

563

if f in filenodes:

563

if havemf and n not in filenodes[f]:

564

if havemf and n not in filenodes[f]:

564

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

565

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

565

else:

566

else:

566

del filenodes[f][n]

567

del filenodes[f][n]

567

568

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

569

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

569

continue

570

continue

570

571

# check renames

572

# check renames

572

try:

573

try:

573

# This requires resolving fulltext (at least on revlogs,

574

# This requires resolving fulltext (at least on revlogs,

574

# though not with LFS revisions). We may want

575

# though not with LFS revisions). We may want

575

# ``verifyintegrity()`` to pass a set of nodes with

576

# ``verifyintegrity()`` to pass a set of nodes with

576

# rename metadata as an optimization.

577

# rename metadata as an optimization.

577

rp = fl.renamed(n)

578

rp = fl.renamed(n)

578

if rp:

579

if rp:

579

if lr is not None and ui.verbose:

580

if lr is not None and ui.verbose:

580

ctx = lrugetctx(lr)

581

ctx = lrugetctx(lr)

581

if not any(rp[0] in pctx for pctx in ctx.parents()):

582

if not any(rp[0] in pctx for pctx in ctx.parents()):

582

self._warn(

583

self._warn(

583

_(

584

_(

584

b"warning: copy source of '%s' not"

585

b"warning: copy source of '%s' not"

585

b" in parents of %s"

586

b" in parents of %s"

586

)

587

)

587

% (f, ctx)

588

% (f, ctx)

588

)

589

)

589

fl2 = repo.file(rp[0])

590

fl2 = repo.file(rp[0])

590

if not len(fl2):

591

if not len(fl2):

591

self._err(

592

self._err(

592

lr,

593

lr,

593

_(

594

_(

594

b"empty or missing copy source revlog "

595

b"empty or missing copy source revlog "

595

b"%s:%s"

596

b"%s:%s"

596

)

597

)

597

% (rp[0], short(rp[1])),

598

% (rp[0], short(rp[1])),

598

f,

599

f,

599

)

600

)

600

elif rp[1] == nullid:

601

elif rp[1] == nullid:

601

ui.note(

602

ui.note(

602

_(

603

_(

603

b"warning: %s@%s: copy source"

604

b"warning: %s@%s: copy source"

604

b" revision is nullid %s:%s\n"

605

b" revision is nullid %s:%s\n"

605

)

606

)

606

% (f, lr, rp[0], short(rp[1]))

607

% (f, lr, rp[0], short(rp[1]))

607

)

608

)

608

else:

609

else:

609

fl2.rev(rp[1])

610

fl2.rev(rp[1])

610

except Exception as inst:

611

except Exception as inst:

611

self._exc(

612

self._exc(

612

lr, _(b"checking rename of %s") % short(n), inst, f

613

lr, _(b"checking rename of %s") % short(n), inst, f

613

)

614

)

614

615

# cross-check

616

# cross-check

616

if f in filenodes:

617

if f in filenodes:

617

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

618

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

618

for lr, node in sorted(fns):

619

for lr, node in sorted(fns):

619

self._err(

620

self._err(

620

lr,

621

lr,

621

_(b"manifest refers to unknown revision %s")

622

_(b"manifest refers to unknown revision %s")

622

% short(node),

623

% short(node),

623

f,

624

f,

624

)

625

)

625

progress.complete()

626

progress.complete()

626

627

if self.warnorphanstorefiles:

628

if self.warnorphanstorefiles:

628

for f in sorted(storefiles):

629

for f in sorted(storefiles):

629

self._warn(_(b"warning: orphan data file '%s'") % f)

630

self._warn(_(b"warning: orphan data file '%s'") % f)

630

631

return len(files), revisions

632

return len(files), revisions

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # verify.py - repository integrity checking for Mercurial
             #
             # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 short,
             )
             from .utils import (
                 stringutil,
             )
             from . import (
                 error,
                 pycompat,
                 revlog,
                 util,
             )
             VERIFY_DEFAULT = 0
             VERIFY_FULL = 1
             def verify(repo, level=None):
                 with repo.lock():
                     v = verifier(repo, level)
                     return v.verify()
             def _normpath(f):
                 # under hg < 2.4, convert didn't sanitize paths properly, so a
                 # converted repo may contain repeated slashes
                 while b'//' in f:
                     f = f.replace(b'//', b'/')
                 return f
             class verifier(object):
                 def __init__(self, repo, level=None):
                     self.repo = repo.unfiltered()
                     self.ui = repo.ui
                     self.match = repo.narrowmatch()
                     if level is None:
                         level = VERIFY_DEFAULT
                     self._level = level
                     self.badrevs = set()
                     self.errors = 0
                     self.warnings = 0
                     self.havecl = len(repo.changelog) > 0
                     self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
                     self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
                     self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
                     self.refersmf = False
                     self.fncachewarned = False
                     # developer config: verify.skipflags
                     self.skipflags = repo.ui.configint(b'verify', b'skipflags')
                     self.warnorphanstorefiles = True
                 def _warn(self, msg):
                     """record a "warning" level issue"""
                     self.ui.warn(msg + b"\n")
                     self.warnings += 1
                 def _err(self, linkrev, msg, filename=None):
                     """record a "error" level issue"""
                     if linkrev is not None:
                         self.badrevs.add(linkrev)
                         linkrev = b"%d" % linkrev
                     else:
                         linkrev = b'?'
                     msg = b"%s: %s" % (linkrev, msg)
                     if filename:
                         msg = b"%s@%s" % (filename, msg)
                     self.ui.warn(b" " + msg + b"\n")
                     self.errors += 1
                 def _exc(self, linkrev, msg, inst, filename=None):
                     """record exception raised during the verify process"""
                     fmsg = stringutil.forcebytestr(inst)
                     if not fmsg:
                         fmsg = pycompat.byterepr(inst)
                     self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
                 def _checkrevlog(self, obj, name, linkrev):
                     """verify high level property of a revlog
                     - revlog is present,
                     - revlog is non-empty,
                     - sizes (index and data) are correct,
                     - revlog's format version is correct.
                     """
                     if not len(obj) and (self.havecl or self.havemf):
                         self._err(linkrev, _(b"empty or missing %s") % name)
                         return
                     d = obj.checksize()
                     if d[0]:
                         self._err(None, _(b"data length off by %d bytes") % d[0], name)
                     if d[1]:
                         self._err(None, _(b"index contains %d extra bytes") % d[1], name)
                     if obj.version != revlog.REVLOGV0:
                         if not self.revlogv1:
                             self._warn(_(b"warning: `%s' uses revlog format 1") % name)
                     elif self.revlogv1:
                         self._warn(_(b"warning: `%s' uses revlog format 0") % name)
                 def _checkentry(self, obj, i, node, seen, linkrevs, f):
                     """verify a single revlog entry
                     arguments are:
                     - obj:      the source revlog
                     - i:        the revision number
                     - node:        the revision node id
                     - seen:     nodes previously seen for this revlog
                     - linkrevs: [changelog-revisions] introducing "node"
                     - f:        string label ("changelog", "manifest", or filename)
                     Performs the following checks:
                     - linkrev points to an existing changelog revision,
                     - linkrev points to a changelog revision that introduces this revision,
                     - linkrev points to the lowest of these changesets,
                     - both parents exist in the revlog,
                     - the revision is not duplicated.
                     Return the linkrev of the revision (or None for changelog's revisions).
                     """
                     lr = obj.linkrev(obj.rev(node))
                     if lr < 0 or (self.havecl and lr not in linkrevs):
                         if lr < 0 or lr >= len(self.repo.changelog):
                             msg = _(b"rev %d points to nonexistent changeset %d")
                         else:
                             msg = _(b"rev %d points to unexpected changeset %d")
                         self._err(None, msg % (i, lr), f)
                         if linkrevs:
                             if f and len(linkrevs) > 1:
                                 try:
                                     # attempt to filter down to real linkrevs
                                     linkrevs = [
                                         l
                                         for l in linkrevs
                                         if self.lrugetctx(l)[f].filenode() == node
                                     ]
                                 except Exception:
                                     pass
                             self._warn(
                                 _(b" (expected %s)")
                                 % b" ".join(map(pycompat.bytestr, linkrevs))
                             )
                         lr = None  # can't be trusted
                     try:
                         p1, p2 = obj.parents(node)
                         if p1 not in seen and p1 != nullid:
                             self._err(
                                 lr,
                                 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
                                 f,
                             )
                         if p2 not in seen and p2 != nullid:
                             self._err(
                                 lr,
                                 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
                                 f,
                             )
                     except Exception as inst:
                         self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
                     if node in seen:
                         self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
                     seen[node] = i
                     return lr
                 def verify(self):
                     """verify the content of the Mercurial repository
                     This method run all verifications, displaying issues as they are found.
                     return 1 if any error have been encountered, 0 otherwise."""
                     # initial validation and generic report
                     repo = self.repo
                     ui = repo.ui
                     if not repo.url().startswith(b'file:'):
                         raise error.Abort(_(b"cannot verify bundle or remote repos"))
                     if os.path.exists(repo.sjoin(b"journal")):
                         ui.warn(_(b"abandoned transaction found - run hg recover\n"))
                     if ui.verbose or not self.revlogv1:
                         ui.status(
                             _(b"repository uses revlog format %d\n")
                             % (self.revlogv1 and 1 or 0)
                         )
                     # data verification
                     mflinkrevs, filelinkrevs = self._verifychangelog()
                     filenodes = self._verifymanifest(mflinkrevs)
                     del mflinkrevs
                     self._crosscheckfiles(filelinkrevs, filenodes)
                     totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
                     # final report
                     ui.status(
                         _(b"checked %d changesets with %d changes to %d files\n")
                         % (len(repo.changelog), filerevisions, totalfiles)
                     )
                     if self.warnings:
                         ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
                     if self.fncachewarned:
                         ui.warn(
                             _(
                                 b'hint: run "hg debugrebuildfncache" to recover from '
                                 b'corrupt fncache\n'
                             )
                         )
                     if self.errors:
                         ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
                         if self.badrevs:
                             ui.warn(
                                 _(b"(first damaged changeset appears to be %d)\n")
                                 % min(self.badrevs)
                             )
                         return 1
                     return 0
                 def _verifychangelog(self):
                     """verify the changelog of a repository
                     The following checks are performed:
                     - all of `_checkrevlog` checks,
                     - all of `_checkentry` checks (for each revisions),
                     - each revision can be read.
                     The function returns some of the data observed in the changesets as a
                     (mflinkrevs, filelinkrevs) tuples:
                     - mflinkrevs:   is a { manifest-node -> [changelog-rev] } mapping
                     - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
                     If a matcher was specified, filelinkrevs will only contains matched
                     files.
                     """
                     ui = self.ui
                     repo = self.repo
                     match = self.match
                     cl = repo.changelog
                     ui.status(_(b"checking changesets\n"))
                     mflinkrevs = {}
                     filelinkrevs = {}
                     seen = {}
                     self._checkrevlog(cl, b"changelog", 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'changesets'), total=len(repo)
                     )
                     for i in repo:
                         progress.update(i)
                         n = cl.node(i)
                         self._checkentry(cl, i, n, seen, [i], b"changelog")
                         try:
                             changes = cl.read(n)
                             if changes[0] != nullid:
                                 mflinkrevs.setdefault(changes[0], []).append(i)
                                 self.refersmf = True
                             for f in changes[3]:
                                 if match(f):
                                     filelinkrevs.setdefault(_normpath(f), []).append(i)
                         except Exception as inst:
                             self.refersmf = True
                             self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
                     progress.complete()
                     return mflinkrevs, filelinkrevs
                 def _verifymanifest(
                     self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
                 ):
                     """verify the manifestlog content
                     Inputs:
                     - mflinkrevs:     a {manifest-node -> [changelog-revisions]} mapping
                     - dir:            a subdirectory to check (for tree manifest repo)
                     - storefiles:     set of currently "orphan" files.
                     - subdirprogress: a progress object
                     This function checks:
                     * all of `_checkrevlog` checks (for all manifest related revlogs)
                     * all of `_checkentry` checks (for all manifest related revisions)
                     * nodes for subdirectory exists in the sub-directory manifest
                     * each manifest entries have a file path
                     * each manifest node refered in mflinkrevs exist in the manifest log
                     If tree manifest is in use and a matchers is specified, only the
                     sub-directories matching it will be verified.
                     return a two level mapping:
                         {"path" -> { filenode -> changelog-revision}}
                     This mapping primarily contains entries for every files in the
                     repository. In addition, when tree-manifest is used, it also contains
                     sub-directory entries.
                     If a matcher is provided, only matching paths will be included.
                     """
                     repo = self.repo
                     ui = self.ui
                     match = self.match
                     mfl = self.repo.manifestlog
                     mf = mfl.getstorage(dir)
                     if not dir:
                         self.ui.status(_(b"checking manifests\n"))
                     filenodes = {}
                     subdirnodes = {}
                     seen = {}
                     label = b"manifest"
                     if dir:
                         label = dir
                         revlogfiles = mf.files()
                         storefiles.difference_update(revlogfiles)
                         if subdirprogress:  # should be true since we're in a subdirectory
                             subdirprogress.increment()
                     if self.refersmf:
                         # Do not check manifest if there are only changelog entries with
                         # null manifests.
                         self._checkrevlog(mf, label, 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'manifests'), total=len(mf)
                     )
                     for i in mf:
                         if not dir:
                             progress.update(i)
                         n = mf.node(i)
                         lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
                         if n in mflinkrevs:
                             del mflinkrevs[n]
                         elif dir:
                             self._err(
                                 lr,
                                 _(b"%s not in parent-directory manifest") % short(n),
                                 label,
                             )
                         else:
                             self._err(lr, _(b"%s not in changesets") % short(n), label)
                         try:
                             mfdelta = mfl.get(dir, n).readdelta(shallow=True)
                             for f, fn, fl in mfdelta.iterentries():
                                 if not f:
                                     self._err(lr, _(b"entry without name in manifest"))
                                 elif f == b"/dev/null":  # ignore this in very old repos
                                     continue
                                 fullpath = dir + _normpath(f)
                                 if fl == b't':
                                     if not match.visitdir(fullpath):
                                         continue
                                     subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
                                         fn, []
                                     ).append(lr)
                                 else:
                                     if not match(fullpath):
                                         continue
                                     filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
                         except Exception as inst:
                             self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
                         if self._level >= VERIFY_FULL:
                             try:
                                 # Various issues can affect manifest. So we read each full
                                 # text from storage. This triggers the checks from the core
                                 # code (eg: hash verification, filename are ordered, etc.)
                                 mfdelta = mfl.get(dir, n).read()
                             except Exception as inst:
                                 self._exc(
                                     lr,
                                     _(b"reading full manifest %s") % short(n),
                                     inst,
                                     label,
                                 )
                     if not dir:
                         progress.complete()
                     if self.havemf:
                         # since we delete entry in `mflinkrevs` during iteration, any
                         # remaining entries are "missing". We need to issue errors for them.
                         changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
                         for c, m in sorted(changesetpairs):
                             if dir:
                                 self._err(
                                     c,
                                     _(
                                         b"parent-directory manifest refers to unknown"
                                         b" revision %s"
                                     )
                                     % short(m),
                                     label,
                                 )
                             else:
                                 self._err(
                                     c,
                                     _(b"changeset refers to unknown revision %s")
                                     % short(m),
                                     label,
                                 )
                     if not dir and subdirnodes:
                         self.ui.status(_(b"checking directory manifests\n"))
                         storefiles = set()
                         subdirs = set()
                         revlogv1 = self.revlogv1
                         for f, f2, size in repo.store.datafiles():
                             if not f:
                                 self._err(None, _(b"cannot decode filename '%s'") % f2)
                             elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                                 storefiles.add(_normpath(f))
                                 subdirs.add(os.path.dirname(f))
                         subdirprogress = ui.makeprogress(
                             _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
                         )
                     for subdir, linkrevs in pycompat.iteritems(subdirnodes):
                         subdirfilenodes = self._verifymanifest(
                             linkrevs, subdir, storefiles, subdirprogress
                         )
                         for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
                             filenodes.setdefault(f, {}).update(onefilenodes)
                     if not dir and subdirnodes:
+                        assert subdirprogress is not None  # help pytype
                         subdirprogress.complete()
                         if self.warnorphanstorefiles:
                             for f in sorted(storefiles):
                                 self._warn(_(b"warning: orphan data file '%s'") % f)
                     return filenodes
                 def _crosscheckfiles(self, filelinkrevs, filenodes):
                     repo = self.repo
                     ui = self.ui
                     ui.status(_(b"crosschecking files in changesets and manifests\n"))
                     total = len(filelinkrevs) + len(filenodes)
                     progress = ui.makeprogress(
                         _(b'crosschecking'), unit=_(b'files'), total=total
                     )
                     if self.havemf:
                         for f in sorted(filelinkrevs):
                             progress.increment()
                             if f not in filenodes:
                                 lr = filelinkrevs[f][0]
                                 self._err(lr, _(b"in changeset but not in manifest"), f)
                     if self.havecl:
                         for f in sorted(filenodes):
                             progress.increment()
                             if f not in filelinkrevs:
                                 try:
                                     fl = repo.file(f)
                                     lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                                 except Exception:
                                     lr = None
                                 self._err(lr, _(b"in manifest but not in changeset"), f)
                     progress.complete()
                 def _verifyfiles(self, filenodes, filelinkrevs):
                     repo = self.repo
                     ui = self.ui
                     lrugetctx = self.lrugetctx
                     revlogv1 = self.revlogv1
                     havemf = self.havemf
                     ui.status(_(b"checking files\n"))
                     storefiles = set()
                     for f, f2, size in repo.store.datafiles():
                         if not f:
                             self._err(None, _(b"cannot decode filename '%s'") % f2)
                         elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
                             storefiles.add(_normpath(f))
                     state = {
                         # TODO this assumes revlog storage for changelog.
                         b'expectedversion': self.repo.changelog.version & 0xFFFF,
                         b'skipflags': self.skipflags,
                         # experimental config: censor.policy
                         b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
                     }
                     files = sorted(set(filenodes) | set(filelinkrevs))
                     revisions = 0
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'files'), total=len(files)
                     )
                     for i, f in enumerate(files):
                         progress.update(i, item=f)
                         try:
                             linkrevs = filelinkrevs[f]
                         except KeyError:
                             # in manifest but not in changelog
                             linkrevs = []
                         if linkrevs:
                             lr = linkrevs[0]
                         else:
                             lr = None
                         try:
                             fl = repo.file(f)
                         except error.StorageError as e:
                             self._err(lr, _(b"broken revlog! (%s)") % e, f)
                             continue
                         for ff in fl.files():
                             try:
                                 storefiles.remove(ff)
                             except KeyError:
                                 if self.warnorphanstorefiles:
                                     self._warn(
                                         _(b" warning: revlog '%s' not in fncache!") % ff
                                     )
                                     self.fncachewarned = True
                         if not len(fl) and (self.havecl or self.havemf):
                             self._err(lr, _(b"empty or missing %s") % f)
                         else:
                             # Guard against implementations not setting this.
                             state[b'skipread'] = set()
                             state[b'safe_renamed'] = set()
                             for problem in fl.verifyintegrity(state):
                                 if problem.node is not None:
                                     linkrev = fl.linkrev(fl.rev(problem.node))
                                 else:
                                     linkrev = None
                                 if problem.warning:
                                     self._warn(problem.warning)
                                 elif problem.error:
                                     self._err(
                                         linkrev if linkrev is not None else lr,
                                         problem.error,
                                         f,
                                     )
                                 else:
                                     raise error.ProgrammingError(
                                         b'problem instance does not set warning or error '
                                         b'attribute: %s' % problem.msg
                                     )
                         seen = {}
                         for i in fl:
                             revisions += 1
                             n = fl.node(i)
                             lr = self._checkentry(fl, i, n, seen, linkrevs, f)
                             if f in filenodes:
                                 if havemf and n not in filenodes[f]:
                                     self._err(lr, _(b"%s not in manifests") % (short(n)), f)
                                 else:
                                     del filenodes[f][n]
                             if n in state[b'skipread'] and n not in state[b'safe_renamed']:
                                 continue
                             # check renames
                             try:
                                 # This requires resolving fulltext (at least on revlogs,
                                 # though not with LFS revisions). We may want
                                 # ``verifyintegrity()`` to pass a set of nodes with
                                 # rename metadata as an optimization.
                                 rp = fl.renamed(n)
                                 if rp:
                                     if lr is not None and ui.verbose:
                                         ctx = lrugetctx(lr)
                                         if not any(rp[0] in pctx for pctx in ctx.parents()):
                                             self._warn(
                                                 _(
                                                     b"warning: copy source of '%s' not"
                                                     b" in parents of %s"
                                                 )
                                                 % (f, ctx)
                                             )
                                     fl2 = repo.file(rp[0])
                                     if not len(fl2):
                                         self._err(
                                             lr,
                                             _(
                                                 b"empty or missing copy source revlog "
                                                 b"%s:%s"
                                             )
                                             % (rp[0], short(rp[1])),
                                             f,
                                         )
                                     elif rp[1] == nullid:
                                         ui.note(
                                             _(
                                                 b"warning: %s@%s: copy source"
                                                 b" revision is nullid %s:%s\n"
                                             )
                                             % (f, lr, rp[0], short(rp[1]))
                                         )
                                     else:
                                         fl2.rev(rp[1])
                             except Exception as inst:
                                 self._exc(
                                     lr, _(b"checking rename of %s") % short(n), inst, f
                                 )
                         # cross-check
                         if f in filenodes:
                             fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
                             for lr, node in sorted(fns):
                                 self._err(
                                     lr,
                                     _(b"manifest refers to unknown revision %s")
                                     % short(node),
                                     f,
                                 )
                     progress.complete()
                     if self.warnorphanstorefiles:
                         for f in sorted(storefiles):
                             self._warn(_(b"warning: orphan data file '%s'") % f)
                     return len(files), revisions