upstream/mercurial-mirror Commit - r48142:5ed2aaab

1

# verify.py - repository integrity checking for Mercurial

1

# verify.py - repository integrity checking for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import os

10

import os

11

12

from .i18n import _

12

from .i18n import _

13

from .node import short

13

from .node import short

14

from .utils import stringutil

14

from .utils import stringutil

15

16

from . import (

16

from . import (

17

error,

17

error,

18

pycompat,

18

pycompat,

19

revlog,

19

revlog,

20

util,

20

util,

21

)

21

)

22

23

VERIFY_DEFAULT = 0

23

VERIFY_DEFAULT = 0

24

VERIFY_FULL = 1

24

VERIFY_FULL = 1

25

26

27

def verify(repo, level=None):

27

def verify(repo, level=None):

28

with repo.lock():

28

with repo.lock():

29

v = verifier(repo, level)

29

v = verifier(repo, level)

30

return v.verify()

30

return v.verify()

31

32

33

def _normpath(f):

33

def _normpath(f):

34

# under hg < 2.4, convert didn't sanitize paths properly, so a

34

# under hg < 2.4, convert didn't sanitize paths properly, so a

35

# converted repo may contain repeated slashes

35

# converted repo may contain repeated slashes

36

while b'//' in f:

36

while b'//' in f:

37

f = f.replace(b'//', b'/')

37

f = f.replace(b'//', b'/')

38

return f

38

return f

39

40

41

class verifier(object):

41

class verifier(object):

42

def __init__(self, repo, level=None):

42

def __init__(self, repo, level=None):

43

self.repo = repo.unfiltered()

43

self.repo = repo.unfiltered()

44

self.ui = repo.ui

44

self.ui = repo.ui

45

self.match = repo.narrowmatch()

45

self.match = repo.narrowmatch()

46

if level is None:

46

if level is None:

47

level = VERIFY_DEFAULT

47

level = VERIFY_DEFAULT

48

self._level = level

48

self._level = level

49

self.badrevs = set()

49

self.badrevs = set()

50

self.errors = 0

50

self.errors = 0

51

self.warnings = 0

51

self.warnings = 0

52

self.havecl = len(repo.changelog) > 0

52

self.havecl = len(repo.changelog) > 0

53

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

53

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

54

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

54

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

55

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

55

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

56

self.refersmf = False

56

self.refersmf = False

57

self.fncachewarned = False

57

self.fncachewarned = False

58

# developer config: verify.skipflags

58

# developer config: verify.skipflags

59

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

59

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

60

self.warnorphanstorefiles = True

60

self.warnorphanstorefiles = True

61

62

def _warn(self, msg):

62

def _warn(self, msg):

63

"""record a "warning" level issue"""

63

"""record a "warning" level issue"""

64

self.ui.warn(msg + b"\n")

64

self.ui.warn(msg + b"\n")

65

self.warnings += 1

65

self.warnings += 1

66

67

def _err(self, linkrev, msg, filename=None):

67

def _err(self, linkrev, msg, filename=None):

68

"""record a "error" level issue"""

68

"""record a "error" level issue"""

69

if linkrev is not None:

69

if linkrev is not None:

70

self.badrevs.add(linkrev)

70

self.badrevs.add(linkrev)

71

linkrev = b"%d" % linkrev

71

linkrev = b"%d" % linkrev

72

else:

72

else:

73

linkrev = b'?'

73

linkrev = b'?'

74

msg = b"%s: %s" % (linkrev, msg)

74

msg = b"%s: %s" % (linkrev, msg)

75

if filename:

75

if filename:

76

msg = b"%s@%s" % (filename, msg)

76

msg = b"%s@%s" % (filename, msg)

77

self.ui.warn(b" " + msg + b"\n")

77

self.ui.warn(b" " + msg + b"\n")

78

self.errors += 1

78

self.errors += 1

79

80

def _exc(self, linkrev, msg, inst, filename=None):

80

def _exc(self, linkrev, msg, inst, filename=None):

81

"""record exception raised during the verify process"""

81

"""record exception raised during the verify process"""

82

fmsg = stringutil.forcebytestr(inst)

82

fmsg = stringutil.forcebytestr(inst)

83

if not fmsg:

83

if not fmsg:

84

fmsg = pycompat.byterepr(inst)

84

fmsg = pycompat.byterepr(inst)

85

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

85

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

86

87

def _checkrevlog(self, obj, name, linkrev):

87

def _checkrevlog(self, obj, name, linkrev):

88

"""verify high level property of a revlog

88

"""verify high level property of a revlog

89

90

- revlog is present,

90

- revlog is present,

91

- revlog is non-empty,

91

- revlog is non-empty,

92

- sizes (index and data) are correct,

92

- sizes (index and data) are correct,

93

- revlog's format version is correct.

93

- revlog's format version is correct.

94

"""

94

"""

95

if not len(obj) and (self.havecl or self.havemf):

95

if not len(obj) and (self.havecl or self.havemf):

96

self._err(linkrev, _(b"empty or missing %s") % name)

96

self._err(linkrev, _(b"empty or missing %s") % name)

97

return

97

return

98

99

d = obj.checksize()

99

d = obj.checksize()

100

if d[0]:

100

if d[0]:

101

self._err(None, _(b"data length off by %d bytes") % d[0], name)

101

self._err(None, _(b"data length off by %d bytes") % d[0], name)

102

if d[1]:

102

if d[1]:

103

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

103

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

104

105

if obj._format_version != revlog.REVLOGV0:

105

if obj._format_version != revlog.REVLOGV0:

106

if not self.revlogv1:

106

if not self.revlogv1:

107

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

107

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

108

elif self.revlogv1:

108

elif self.revlogv1:

109

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

109

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

110

111

def _checkentry(self, obj, i, node, seen, linkrevs, f):

111

def _checkentry(self, obj, i, node, seen, linkrevs, f):

112

"""verify a single revlog entry

112

"""verify a single revlog entry

113

114

arguments are:

114

arguments are:

115

- obj: the source revlog

115

- obj: the source revlog

116

- i: the revision number

116

- i: the revision number

117

- node: the revision node id

117

- node: the revision node id

118

- seen: nodes previously seen for this revlog

118

- seen: nodes previously seen for this revlog

119

- linkrevs: [changelog-revisions] introducing "node"

119

- linkrevs: [changelog-revisions] introducing "node"

120

- f: string label ("changelog", "manifest", or filename)

120

- f: string label ("changelog", "manifest", or filename)

121

122

Performs the following checks:

122

Performs the following checks:

123

- linkrev points to an existing changelog revision,

123

- linkrev points to an existing changelog revision,

124

- linkrev points to a changelog revision that introduces this revision,

124

- linkrev points to a changelog revision that introduces this revision,

125

- linkrev points to the lowest of these changesets,

125

- linkrev points to the lowest of these changesets,

126

- both parents exist in the revlog,

126

- both parents exist in the revlog,

127

- the revision is not duplicated.

127

- the revision is not duplicated.

128

129

Return the linkrev of the revision (or None for changelog's revisions).

129

Return the linkrev of the revision (or None for changelog's revisions).

130

"""

130

"""

131

lr = obj.linkrev(obj.rev(node))

131

lr = obj.linkrev(obj.rev(node))

132

if lr < 0 or (self.havecl and lr not in linkrevs):

132

if lr < 0 or (self.havecl and lr not in linkrevs):

133

if lr < 0 or lr >= len(self.repo.changelog):

133

if lr < 0 or lr >= len(self.repo.changelog):

134

msg = _(b"rev %d points to nonexistent changeset %d")

134

msg = _(b"rev %d points to nonexistent changeset %d")

135

else:

135

else:

136

msg = _(b"rev %d points to unexpected changeset %d")

136

msg = _(b"rev %d points to unexpected changeset %d")

137

self._err(None, msg % (i, lr), f)

137

self._err(None, msg % (i, lr), f)

138

if linkrevs:

138

if linkrevs:

139

if f and len(linkrevs) > 1:

139

if f and len(linkrevs) > 1:

140

try:

140

try:

141

# attempt to filter down to real linkrevs

141

# attempt to filter down to real linkrevs

142

linkrevs = [

142

linkrevs = [

143

l

143

l

144

for l in linkrevs

144

for l in linkrevs

145

if self.lrugetctx(l)[f].filenode() == node

145

if self.lrugetctx(l)[f].filenode() == node

146

]

146

]

147

except Exception:

147

except Exception:

148

pass

148

pass

149

self._warn(

149

self._warn(

150

_(b" (expected %s)")

150

_(b" (expected %s)")

151

% b" ".join(map(pycompat.bytestr, linkrevs))

151

% b" ".join(map(pycompat.bytestr, linkrevs))

152

)

152

)

153

lr = None # can't be trusted

153

lr = None # can't be trusted

154

155

try:

155

try:

156

p1, p2 = obj.parents(node)

156

p1, p2 = obj.parents(node)

157

if p1 not in seen and p1 != self.repo.nullid:

157

if p1 not in seen and p1 != self.repo.nullid:

158

self._err(

158

self._err(

159

lr,

159

lr,

160

_(b"unknown parent 1 %s of %s") % (short(p1), short(node)),

160

_(b"unknown parent 1 %s of %s") % (short(p1), short(node)),

161

f,

161

f,

162

)

162

)

163

if p2 not in seen and p2 != self.repo.nullid:

163

if p2 not in seen and p2 != self.repo.nullid:

164

self._err(

164

self._err(

165

lr,

165

lr,

166

_(b"unknown parent 2 %s of %s") % (short(p2), short(node)),

166

_(b"unknown parent 2 %s of %s") % (short(p2), short(node)),

167

f,

167

f,

168

)

168

)

169

except Exception as inst:

169

except Exception as inst:

170

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

170

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

171

172

if node in seen:

172

if node in seen:

173

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

173

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

174

seen[node] = i

174

seen[node] = i

175

return lr

175

return lr

176

177

def verify(self):

177

def verify(self):

178

"""verify the content of the Mercurial repository

178

"""verify the content of the Mercurial repository

179

180

This method run all verifications, displaying issues as they are found.

180

This method run all verifications, displaying issues as they are found.

181

182

return 1 if any error have been encountered, 0 otherwise."""

182

return 1 if any error have been encountered, 0 otherwise."""

183

# initial validation and generic report

183

# initial validation and generic report

184

repo = self.repo

184

repo = self.repo

185

ui = repo.ui

185

ui = repo.ui

186

if not repo.url().startswith(b'file:'):

186

if not repo.url().startswith(b'file:'):

187

raise error.Abort(_(b"cannot verify bundle or remote repos"))

187

raise error.Abort(_(b"cannot verify bundle or remote repos"))

188

189

if os.path.exists(repo.sjoin(b"journal")):

189

if os.path.exists(repo.sjoin(b"journal")):

190

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

190

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

191

192

if ui.verbose or not self.revlogv1:

192

if ui.verbose or not self.revlogv1:

193

ui.status(

193

ui.status(

194

_(b"repository uses revlog format %d\n")

194

_(b"repository uses revlog format %d\n")

195

% (self.revlogv1 and 1 or 0)

195

% (self.revlogv1 and 1 or 0)

196

)

196

)

197

198

# data verification

198

# data verification

199

mflinkrevs, filelinkrevs = self._verifychangelog()

199

mflinkrevs, filelinkrevs = self._verifychangelog()

200

filenodes = self._verifymanifest(mflinkrevs)

200

filenodes = self._verifymanifest(mflinkrevs)

201

del mflinkrevs

201

del mflinkrevs

202

self._crosscheckfiles(filelinkrevs, filenodes)

202

self._crosscheckfiles(filelinkrevs, filenodes)

203

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

203

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

204

205

# final report

205

# final report

206

ui.status(

206

ui.status(

207

_(b"checked %d changesets with %d changes to %d files\n")

207

_(b"checked %d changesets with %d changes to %d files\n")

208

% (len(repo.changelog), filerevisions, totalfiles)

208

% (len(repo.changelog), filerevisions, totalfiles)

209

)

209

)

210

if self.warnings:

210

if self.warnings:

211

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

211

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

212

if self.fncachewarned:

212

if self.fncachewarned:

213

ui.warn(

213

ui.warn(

214

_(

214

_(

215

b'hint: run "hg debugrebuildfncache" to recover from '

215

b'hint: run "hg debugrebuildfncache" to recover from '

216

b'corrupt fncache\n'

216

b'corrupt fncache\n'

217

)

217

)

218

)

218

)

219

if self.errors:

219

if self.errors:

220

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

220

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

221

if self.badrevs:

221

if self.badrevs:

222

ui.warn(

222

ui.warn(

223

_(b"(first damaged changeset appears to be %d)\n")

223

_(b"(first damaged changeset appears to be %d)\n")

224

% min(self.badrevs)

224

% min(self.badrevs)

225

)

225

)

226

return 1

226

return 1

227

return 0

227

return 0

228

229

def _verifychangelog(self):

229

def _verifychangelog(self):

230

"""verify the changelog of a repository

230

"""verify the changelog of a repository

231

232

The following checks are performed:

232

The following checks are performed:

233

- all of `_checkrevlog` checks,

233

- all of `_checkrevlog` checks,

234

- all of `_checkentry` checks (for each revisions),

234

- all of `_checkentry` checks (for each revisions),

235

- each revision can be read.

235

- each revision can be read.

236

237

The function returns some of the data observed in the changesets as a

237

The function returns some of the data observed in the changesets as a

238

(mflinkrevs, filelinkrevs) tuples:

238

(mflinkrevs, filelinkrevs) tuples:

239

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

239

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

240

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

240

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

241

242

If a matcher was specified, filelinkrevs will only contains matched

242

If a matcher was specified, filelinkrevs will only contains matched

243

files.

243

files.

244

"""

244

"""

245

ui = self.ui

245

ui = self.ui

246

repo = self.repo

246

repo = self.repo

247

match = self.match

247

match = self.match

248

cl = repo.changelog

248

cl = repo.changelog

249

250

ui.status(_(b"checking changesets\n"))

250

ui.status(_(b"checking changesets\n"))

251

mflinkrevs = {}

251

mflinkrevs = {}

252

filelinkrevs = {}

252

filelinkrevs = {}

253

seen = {}

253

seen = {}

254

self._checkrevlog(cl, b"changelog", 0)

254

self._checkrevlog(cl, b"changelog", 0)

255

progress = ui.makeprogress(

255

progress = ui.makeprogress(

256

_(b'checking'), unit=_(b'changesets'), total=len(repo)

256

_(b'checking'), unit=_(b'changesets'), total=len(repo)

257

)

257

)

258

for i in repo:

258

for i in repo:

259

progress.update(i)

259

progress.update(i)

260

n = cl.node(i)

260

n = cl.node(i)

261

self._checkentry(cl, i, n, seen, [i], b"changelog")

261

self._checkentry(cl, i, n, seen, [i], b"changelog")

262

263

try:

263

try:

264

changes = cl.read(n)

264

changes = cl.read(n)

265

if changes[0] != self.repo.nullid:

265

if changes[0] != self.repo.nullid:

266

mflinkrevs.setdefault(changes[0], []).append(i)

266

mflinkrevs.setdefault(changes[0], []).append(i)

267

self.refersmf = True

267

self.refersmf = True

268

for f in changes[3]:

268

for f in changes[3]:

269

if match(f):

269

if match(f):

270

filelinkrevs.setdefault(_normpath(f), []).append(i)

270

filelinkrevs.setdefault(_normpath(f), []).append(i)

271

except Exception as inst:

271

except Exception as inst:

272

self.refersmf = True

272

self.refersmf = True

273

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

273

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

274

progress.complete()

274

progress.complete()

275

return mflinkrevs, filelinkrevs

275

return mflinkrevs, filelinkrevs

276

277

def _verifymanifest(

277

def _verifymanifest(

278

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

278

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

279

):

279

):

280

"""verify the manifestlog content

280

"""verify the manifestlog content

281

282

Inputs:

282

Inputs:

283

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

283

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

284

- dir: a subdirectory to check (for tree manifest repo)

284

- dir: a subdirectory to check (for tree manifest repo)

285

- storefiles: set of currently "orphan" files.

285

- storefiles: set of currently "orphan" files.

286

- subdirprogress: a progress object

286

- subdirprogress: a progress object

287

288

This function checks:

288

This function checks:

289

* all of `_checkrevlog` checks (for all manifest related revlogs)

289

* all of `_checkrevlog` checks (for all manifest related revlogs)

290

* all of `_checkentry` checks (for all manifest related revisions)

290

* all of `_checkentry` checks (for all manifest related revisions)

291

* nodes for subdirectory exists in the sub-directory manifest

291

* nodes for subdirectory exists in the sub-directory manifest

292

* each manifest entries have a file path

292

* each manifest entries have a file path

293

* each manifest node refered in mflinkrevs exist in the manifest log

293

* each manifest node refered in mflinkrevs exist in the manifest log

294

295

If tree manifest is in use and a matchers is specified, only the

295

If tree manifest is in use and a matchers is specified, only the

296

sub-directories matching it will be verified.

296

sub-directories matching it will be verified.

297

298

return a two level mapping:

298

return a two level mapping:

299

{"path" -> { filenode -> changelog-revision}}

299

{"path" -> { filenode -> changelog-revision}}

300

301

This mapping primarily contains entries for every files in the

301

This mapping primarily contains entries for every files in the

302

repository. In addition, when tree-manifest is used, it also contains

302

repository. In addition, when tree-manifest is used, it also contains

303

sub-directory entries.

303

sub-directory entries.

304

305

If a matcher is provided, only matching paths will be included.

305

If a matcher is provided, only matching paths will be included.

306

"""

306

"""

307

repo = self.repo

307

repo = self.repo

308

ui = self.ui

308

ui = self.ui

309

match = self.match

309

match = self.match

310

mfl = self.repo.manifestlog

310

mfl = self.repo.manifestlog

311

mf = mfl.getstorage(dir)

311

mf = mfl.getstorage(dir)

312

313

if not dir:

313

if not dir:

314

self.ui.status(_(b"checking manifests\n"))

314

self.ui.status(_(b"checking manifests\n"))

315

316

filenodes = {}

316

filenodes = {}

317

subdirnodes = {}

317

subdirnodes = {}

318

seen = {}

318

seen = {}

319

label = b"manifest"

319

label = b"manifest"

320

if dir:

320

if dir:

321

label = dir

321

label = dir

322

revlogfiles = mf.files()

322

revlogfiles = mf.files()

323

storefiles.difference_update(revlogfiles)

323

storefiles.difference_update(revlogfiles)

324

if subdirprogress: # should be true since we're in a subdirectory

324

if subdirprogress: # should be true since we're in a subdirectory

325

subdirprogress.increment()

325

subdirprogress.increment()

326

if self.refersmf:

326

if self.refersmf:

327

# Do not check manifest if there are only changelog entries with

327

# Do not check manifest if there are only changelog entries with

328

# null manifests.

328

# null manifests.

329

self._checkrevlog(mf._revlog, label, 0)

329

self._checkrevlog(mf._revlog, label, 0)

330

progress = ui.makeprogress(

330

progress = ui.makeprogress(

331

_(b'checking'), unit=_(b'manifests'), total=len(mf)

331

_(b'checking'), unit=_(b'manifests'), total=len(mf)

332

)

332

)

333

for i in mf:

333

for i in mf:

334

if not dir:

334

if not dir:

335

progress.update(i)

335

progress.update(i)

336

n = mf.node(i)

336

n = mf.node(i)

337

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

337

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

338

if n in mflinkrevs:

338

if n in mflinkrevs:

339

del mflinkrevs[n]

339

del mflinkrevs[n]

340

elif dir:

340

elif dir:

341

self._err(

341

self._err(

342

lr,

342

lr,

343

_(b"%s not in parent-directory manifest") % short(n),

343

_(b"%s not in parent-directory manifest") % short(n),

344

label,

344

label,

345

)

345

)

346

else:

346

else:

347

self._err(lr, _(b"%s not in changesets") % short(n), label)

347

self._err(lr, _(b"%s not in changesets") % short(n), label)

348

349

try:

349

try:

350

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

350

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

351

for f, fn, fl in mfdelta.iterentries():

351

for f, fn, fl in mfdelta.iterentries():

352

if not f:

352

if not f:

353

self._err(lr, _(b"entry without name in manifest"))

353

self._err(lr, _(b"entry without name in manifest"))

354

elif f == b"/dev/null": # ignore this in very old repos

354

elif f == b"/dev/null": # ignore this in very old repos

355

continue

355

continue

356

fullpath = dir + _normpath(f)

356

fullpath = dir + _normpath(f)

357

if fl == b't':

357

if fl == b't':

358

if not match.visitdir(fullpath):

358

if not match.visitdir(fullpath):

359

continue

359

continue

360

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

360

subdirnodes.setdefault(fullpath + b'/', {}).setdefault(

361

fn, []

361

fn, []

362

).append(lr)

362

).append(lr)

363

else:

363

else:

364

if not match(fullpath):

364

if not match(fullpath):

365

continue

365

continue

366

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

366

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

367

except Exception as inst:

367

except Exception as inst:

368

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

368

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

369

if self._level >= VERIFY_FULL:

369

if self._level >= VERIFY_FULL:

370

try:

370

try:

371

# Various issues can affect manifest. So we read each full

371

# Various issues can affect manifest. So we read each full

372

# text from storage. This triggers the checks from the core

372

# text from storage. This triggers the checks from the core

373

# code (eg: hash verification, filename are ordered, etc.)

373

# code (eg: hash verification, filename are ordered, etc.)

374

mfdelta = mfl.get(dir, n).read()

374

mfdelta = mfl.get(dir, n).read()

375

except Exception as inst:

375

except Exception as inst:

376

self._exc(

376

self._exc(

377

lr,

377

lr,

378

_(b"reading full manifest %s") % short(n),

378

_(b"reading full manifest %s") % short(n),

379

inst,

379

inst,

380

label,

380

label,

381

)

381

)

382

383

if not dir:

383

if not dir:

384

progress.complete()

384

progress.complete()

385

386

if self.havemf:

386

if self.havemf:

387

# since we delete entry in `mflinkrevs` during iteration, any

387

# since we delete entry in `mflinkrevs` during iteration, any

388

# remaining entries are "missing". We need to issue errors for them.

388

# remaining entries are "missing". We need to issue errors for them.

389

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

389

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

390

for c, m in sorted(changesetpairs):

390

for c, m in sorted(changesetpairs):

391

if dir:

391

if dir:

392

self._err(

392

self._err(

393

c,

393

c,

394

_(

394

_(

395

b"parent-directory manifest refers to unknown"

395

b"parent-directory manifest refers to unknown"

396

b" revision %s"

396

b" revision %s"

397

)

397

)

398

% short(m),

398

% short(m),

399

label,

399

label,

400

)

400

)

401

else:

401

else:

402

self._err(

402

self._err(

403

c,

403

c,

404

_(b"changeset refers to unknown revision %s")

404

_(b"changeset refers to unknown revision %s")

405

% short(m),

405

% short(m),

406

label,

406

label,

407

)

407

)

408

409

if not dir and subdirnodes:

409

if not dir and subdirnodes:

410

self.ui.status(_(b"checking directory manifests\n"))

410

self.ui.status(_(b"checking directory manifests\n"))

411

storefiles = set()

411

storefiles = set()

412

subdirs = set()

412

subdirs = set()

413

revlogv1 = self.revlogv1

413

revlogv1 = self.revlogv1

414

for t, f, f2, size in repo.store.datafiles():

414

for t, f, f2, size in repo.store.datafiles():

415

if not f:

415

if not f:

416

self._err(None, _(b"cannot decode filename '%s'") % f2)

416

self._err(None, _(b"cannot decode filename '%s'") % f2)

417

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

417

elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):

418

storefiles.add(_normpath(f))

418

storefiles.add(_normpath(f))

419

subdirs.add(os.path.dirname(f))

419

subdirs.add(os.path.dirname(f))

420

subdirprogress = ui.makeprogress(

420

subdirprogress = ui.makeprogress(

421

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

421

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

422

)

422

)

423

424

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

424

for subdir, linkrevs in pycompat.iteritems(subdirnodes):

425

subdirfilenodes = self._verifymanifest(

425

subdirfilenodes = self._verifymanifest(

426

linkrevs, subdir, storefiles, subdirprogress

426

linkrevs, subdir, storefiles, subdirprogress

427

)

427

)

428

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

428

for f, onefilenodes in pycompat.iteritems(subdirfilenodes):

429

filenodes.setdefault(f, {}).update(onefilenodes)

429

filenodes.setdefault(f, {}).update(onefilenodes)

430

431

if not dir and subdirnodes:

431

if not dir and subdirnodes:

432

assert subdirprogress is not None # help pytype

432

assert subdirprogress is not None # help pytype

433

subdirprogress.complete()

433

subdirprogress.complete()

434

if self.warnorphanstorefiles:

434

if self.warnorphanstorefiles:

435

for f in sorted(storefiles):

435

for f in sorted(storefiles):

436

self._warn(_(b"warning: orphan data file '%s'") % f)

436

self._warn(_(b"warning: orphan data file '%s'") % f)

437

438

return filenodes

438

return filenodes

439

440

def _crosscheckfiles(self, filelinkrevs, filenodes):

440

def _crosscheckfiles(self, filelinkrevs, filenodes):

441

repo = self.repo

441

repo = self.repo

442

ui = self.ui

442

ui = self.ui

443

ui.status(_(b"crosschecking files in changesets and manifests\n"))

443

ui.status(_(b"crosschecking files in changesets and manifests\n"))

444

445

total = len(filelinkrevs) + len(filenodes)

445

total = len(filelinkrevs) + len(filenodes)

446

progress = ui.makeprogress(

446

progress = ui.makeprogress(

447

_(b'crosschecking'), unit=_(b'files'), total=total

447

_(b'crosschecking'), unit=_(b'files'), total=total

448

)

448

)

449

if self.havemf:

449

if self.havemf:

450

for f in sorted(filelinkrevs):

450

for f in sorted(filelinkrevs):

451

progress.increment()

451

progress.increment()

452

if f not in filenodes:

452

if f not in filenodes:

453

lr = filelinkrevs[f][0]

453

lr = filelinkrevs[f][0]

454

self._err(lr, _(b"in changeset but not in manifest"), f)

454

self._err(lr, _(b"in changeset but not in manifest"), f)

455

456

if self.havecl:

456

if self.havecl:

457

for f in sorted(filenodes):

457

for f in sorted(filenodes):

458

progress.increment()

458

progress.increment()

459

if f not in filelinkrevs:

459

if f not in filelinkrevs:

460

try:

460

try:

461

fl = repo.file(f)

461

fl = repo.file(f)

462

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

462

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

463

except Exception:

463

except Exception:

464

lr = None

464

lr = None

465

self._err(lr, _(b"in manifest but not in changeset"), f)

465

self._err(lr, _(b"in manifest but not in changeset"), f)

466

467

progress.complete()

467

progress.complete()

468

469

def _verifyfiles(self, filenodes, filelinkrevs):

469

def _verifyfiles(self, filenodes, filelinkrevs):

470

repo = self.repo

470

repo = self.repo

471

ui = self.ui

471

ui = self.ui

472

lrugetctx = self.lrugetctx

472

lrugetctx = self.lrugetctx

473

revlogv1 = self.revlogv1

473

revlogv1 = self.revlogv1

474

havemf = self.havemf

474

havemf = self.havemf

475

ui.status(_(b"checking files\n"))

475

ui.status(_(b"checking files\n"))

476

477

storefiles = set()

477

storefiles = set()

478

for rl_type, f, f2, size in repo.store.datafiles():

478

for rl_type, f, f2, size in repo.store.datafiles():

479

if not f:

479

if not f:

480

self._err(None, _(b"cannot decode filename '%s'") % f2)

480

self._err(None, _(b"cannot decode filename '%s'") % f2)

481

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

481

elif (size > 0 or not revlogv1) and f.startswith(b'data/'):

482

storefiles.add(_normpath(f))

482

storefiles.add(_normpath(f))

483

484

state = {

484

state = {

485

# TODO this assumes revlog storage for changelog.

485

# TODO this assumes revlog storage for changelog.

486

b'expectedversion': self.repo.changelog._format_version,

486

b'expectedversion': self.repo.changelog._format_version,

487

b'skipflags': self.skipflags,

487

b'skipflags': self.skipflags,

488

# experimental config: censor.policy

488

# experimental config: censor.policy

489

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

489

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

490

}

490

}

491

492

files = sorted(set(filenodes) | set(filelinkrevs))

492

files = sorted(set(filenodes) | set(filelinkrevs))

493

revisions = 0

493

revisions = 0

494

progress = ui.makeprogress(

494

progress = ui.makeprogress(

495

_(b'checking'), unit=_(b'files'), total=len(files)

495

_(b'checking'), unit=_(b'files'), total=len(files)

496

)

496

)

497

for i, f in enumerate(files):

497

for i, f in enumerate(files):

498

progress.update(i, item=f)

498

progress.update(i, item=f)

499

try:

499

try:

500

linkrevs = filelinkrevs[f]

500

linkrevs = filelinkrevs[f]

501

except KeyError:

501

except KeyError:

502

# in manifest but not in changelog

502

# in manifest but not in changelog

503

linkrevs = []

503

linkrevs = []

504

505

if linkrevs:

505

if linkrevs:

506

lr = linkrevs[0]

506

lr = linkrevs[0]

507

else:

507

else:

508

lr = None

508

lr = None

509

510

try:

510

try:

511

fl = repo.file(f)

511

fl = repo.file(f)

512

except error.StorageError as e:

512

except error.StorageError as e:

513

self._err(lr, _(b"broken revlog! (%s)") % e, f)

513

self._err(lr, _(b"broken revlog! (%s)") % e, f)

514

continue

514

continue

515

516

for ff in fl.files():

516

for ff in fl.files():

517

try:

517

try:

518

storefiles.remove(ff)

518

storefiles.remove(ff)

519

except KeyError:

519

except KeyError:

520

if self.warnorphanstorefiles:

520

if self.warnorphanstorefiles:

521

self._warn(

521

self._warn(

522

_(b" warning: revlog '%s' not in fncache!") % ff

522

_(b" warning: revlog '%s' not in fncache!") % ff

523

)

523

)

524

self.fncachewarned = True

524

self.fncachewarned = True

525

526

if not len(fl) and (self.havecl or self.havemf):

526

if not len(fl) and (self.havecl or self.havemf):

527

self._err(lr, _(b"empty or missing %s") % f)

527

self._err(lr, _(b"empty or missing %s") % f)

528

else:

528

else:

529

# Guard against implementations not setting this.

529

# Guard against implementations not setting this.

530

state[b'skipread'] = set()

530

state[b'skipread'] = set()

531

state[b'safe_renamed'] = set()

531

state[b'safe_renamed'] = set()

532

533

for problem in fl.verifyintegrity(state):

533

for problem in fl.verifyintegrity(state):

534

if problem.node is not None:

534

if problem.node is not None:

535

linkrev = fl.linkrev(fl.rev(problem.node))

535

linkrev = fl.linkrev(fl.rev(problem.node))

536

else:

536

else:

537

linkrev = None

537

linkrev = None

538

539

if problem.warning:

539

if problem.warning:

540

self._warn(problem.warning)

540

self._warn(problem.warning)

541

elif problem.error:

541

elif problem.error:

542

self._err(

542

self._err(

543

linkrev if linkrev is not None else lr,

543

linkrev if linkrev is not None else lr,

544

problem.error,

544

problem.error,

545

f,

545

f,

546

)

546

)

547

else:

547

else:

548

raise error.ProgrammingError(

548

raise error.ProgrammingError(

549

b'problem instance does not set warning or error '

549

b'problem instance does not set warning or error '

550

b'attribute: %s' % problem.msg

550

b'attribute: %s' % problem.msg

551

)

551

)

552

553

seen = {}

553

seen = {}

554

for i in fl:

554

for i in fl:

555

revisions += 1

555

revisions += 1

556

n = fl.node(i)

556

n = fl.node(i)

557

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

557

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

558

if f in filenodes:

558

if f in filenodes:

559

if havemf and n not in filenodes[f]:

559

if havemf and n not in filenodes[f]:

560

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

560

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

561

else:

561

else:

562

del filenodes[f][n]

562

del filenodes[f][n]

563

564

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

564

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

565

continue

565

continue

566

567

# check renames

567

# check renames

568

try:

568

try:

569

# This requires resolving fulltext (at least on revlogs,

569

# This requires resolving fulltext (at least on revlogs,

570

# though not with LFS revisions). We may want

570

# though not with LFS revisions). We may want

571

# ``verifyintegrity()`` to pass a set of nodes with

571

# ``verifyintegrity()`` to pass a set of nodes with

572

# rename metadata as an optimization.

572

# rename metadata as an optimization.

573

rp = fl.renamed(n)

573

rp = fl.renamed(n)

574

if rp:

574

if rp:

575

if lr is not None and ui.verbose:

575

if lr is not None and ui.verbose:

576

ctx = lrugetctx(lr)

576

ctx = lrugetctx(lr)

577

if not any(rp[0] in pctx for pctx in ctx.parents()):

577

if not any(rp[0] in pctx for pctx in ctx.parents()):

578

self._warn(

578

self._warn(

579

_(

579

_(

580

b"warning: copy source of '%s' not"

580

b"warning: copy source of '%s' not"

581

b" in parents of %s"

581

b" in parents of %s"

582

)

582

)

583

% (f, ctx)

583

% (f, ctx)

584

)

584

)

585

fl2 = repo.file(rp[0])

585

fl2 = repo.file(rp[0])

586

if not len(fl2):

586

if not len(fl2):

587

self._err(

587

self._err(

588

lr,

588

lr,

589

_(

589

_(

590

b"empty or missing copy source revlog "

590

b"empty or missing copy source revlog "

591

b"%s:%s"

591

b"%s:%s"

592

)

592

)

593

% (rp[0], short(rp[1])),

593

% (rp[0], short(rp[1])),

594

f,

594

f,

595

)

595

)

596

elif rp[1] == self.repo.nullid:

596

elif rp[1] == self.repo.nullid:

597

ui.note(

597

ui.note(

598

_(

598

_(

599

b"warning: %s@%s: copy source"

599

b"warning: %s@%s: copy source"

600

b" revision is nullid %s:%s\n"

600

b" revision is nullid %s:%s\n"

601

)

601

)

602

% (f, lr, rp[0], short(rp[1]))

602

% (f, lr, rp[0], short(rp[1]))

603

)

603

)

604

else:

604

else:

605

fl2.rev(rp[1])

605

fl2.rev(rp[1])

606

except Exception as inst:

606

except Exception as inst:

607

self._exc(

607

self._exc(

608

lr, _(b"checking rename of %s") % short(n), inst, f

608

lr, _(b"checking rename of %s") % short(n), inst, f

609

)

609

)

610

611

# cross-check

611

# cross-check

612

if f in filenodes:

612

if f in filenodes:

613

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

613

fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]

614

for lr, node in sorted(fns):

614

for lr, node in sorted(fns):

615

self._err(

615

self._err(

616

lr,

616

lr,

617

_(b"manifest refers to unknown revision %s")

617

_(b"manifest refers to unknown revision %s")

618

% short(node),

618

% short(node),

619

f,

619

f,

620

)

620

)

621

progress.complete()

621

progress.complete()

622

623

if self.warnorphanstorefiles:

623

if self.warnorphanstorefiles:

624

for f in sorted(storefiles):

624

for f in sorted(storefiles):

625

self._warn(_(b"warning: orphan data file '%s'") % f)

625

self._warn(_(b"warning: orphan data file '%s'") % f)

626

627

return len(files), revisions

627

return len(files), revisions

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # verify.py - repository integrity checking for Mercurial
             #
             # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             from .i18n import _
             from .node import short
             from .utils import stringutil
             from . import (
                 error,
                 pycompat,
                 revlog,
                 util,
             )
             VERIFY_DEFAULT = 0
             VERIFY_FULL = 1
             def verify(repo, level=None):
                 with repo.lock():
                     v = verifier(repo, level)
                     return v.verify()
             def _normpath(f):
                 # under hg < 2.4, convert didn't sanitize paths properly, so a
                 # converted repo may contain repeated slashes
                 while b'//' in f:
                     f = f.replace(b'//', b'/')
                 return f
             class verifier(object):
                 def __init__(self, repo, level=None):
                     self.repo = repo.unfiltered()
                     self.ui = repo.ui
                     self.match = repo.narrowmatch()
                     if level is None:
                         level = VERIFY_DEFAULT
                     self._level = level
                     self.badrevs = set()
                     self.errors = 0
                     self.warnings = 0
                     self.havecl = len(repo.changelog) > 0
                     self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
                     self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
                     self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
                     self.refersmf = False
                     self.fncachewarned = False
                     # developer config: verify.skipflags
                     self.skipflags = repo.ui.configint(b'verify', b'skipflags')
                     self.warnorphanstorefiles = True
                 def _warn(self, msg):
                     """record a "warning" level issue"""
                     self.ui.warn(msg + b"\n")
                     self.warnings += 1
                 def _err(self, linkrev, msg, filename=None):
                     """record a "error" level issue"""
                     if linkrev is not None:
                         self.badrevs.add(linkrev)
                         linkrev = b"%d" % linkrev
                     else:
                         linkrev = b'?'
                     msg = b"%s: %s" % (linkrev, msg)
                     if filename:
                         msg = b"%s@%s" % (filename, msg)
                     self.ui.warn(b" " + msg + b"\n")
                     self.errors += 1
                 def _exc(self, linkrev, msg, inst, filename=None):
                     """record exception raised during the verify process"""
                     fmsg = stringutil.forcebytestr(inst)
                     if not fmsg:
                         fmsg = pycompat.byterepr(inst)
                     self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
                 def _checkrevlog(self, obj, name, linkrev):
                     """verify high level property of a revlog
                     - revlog is present,
                     - revlog is non-empty,
                     - sizes (index and data) are correct,
                     - revlog's format version is correct.
                     """
                     if not len(obj) and (self.havecl or self.havemf):
                         self._err(linkrev, _(b"empty or missing %s") % name)
                         return
                     d = obj.checksize()
                     if d[0]:
                         self._err(None, _(b"data length off by %d bytes") % d[0], name)
                     if d[1]:
                         self._err(None, _(b"index contains %d extra bytes") % d[1], name)
                     if obj._format_version != revlog.REVLOGV0:
                         if not self.revlogv1:
                             self._warn(_(b"warning: `%s' uses revlog format 1") % name)
                     elif self.revlogv1:
                         self._warn(_(b"warning: `%s' uses revlog format 0") % name)
                 def _checkentry(self, obj, i, node, seen, linkrevs, f):
                     """verify a single revlog entry
                     arguments are:
                     - obj:      the source revlog
                     - i:        the revision number
-                    - node:        the revision node id
+                    - node:     the revision node id
                     - seen:     nodes previously seen for this revlog
                     - linkrevs: [changelog-revisions] introducing "node"
                     - f:        string label ("changelog", "manifest", or filename)
                     Performs the following checks:
                     - linkrev points to an existing changelog revision,
                     - linkrev points to a changelog revision that introduces this revision,
                     - linkrev points to the lowest of these changesets,
                     - both parents exist in the revlog,
                     - the revision is not duplicated.
                     Return the linkrev of the revision (or None for changelog's revisions).
                     """
                     lr = obj.linkrev(obj.rev(node))
                     if lr < 0 or (self.havecl and lr not in linkrevs):
                         if lr < 0 or lr >= len(self.repo.changelog):
                             msg = _(b"rev %d points to nonexistent changeset %d")
                         else:
                             msg = _(b"rev %d points to unexpected changeset %d")
                         self._err(None, msg % (i, lr), f)
                         if linkrevs:
                             if f and len(linkrevs) > 1:
                                 try:
                                     # attempt to filter down to real linkrevs
                                     linkrevs = [
                                         l
                                         for l in linkrevs
                                         if self.lrugetctx(l)[f].filenode() == node
                                     ]
                                 except Exception:
                                     pass
                             self._warn(
                                 _(b" (expected %s)")
                                 % b" ".join(map(pycompat.bytestr, linkrevs))
                             )
                         lr = None  # can't be trusted
                     try:
                         p1, p2 = obj.parents(node)
                         if p1 not in seen and p1 != self.repo.nullid:
                             self._err(
                                 lr,
                                 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
                                 f,
                             )
                         if p2 not in seen and p2 != self.repo.nullid:
                             self._err(
                                 lr,
                                 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
                                 f,
                             )
                     except Exception as inst:
                         self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
                     if node in seen:
                         self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
                     seen[node] = i
                     return lr
                 def verify(self):
                     """verify the content of the Mercurial repository
                     This method run all verifications, displaying issues as they are found.
                     return 1 if any error have been encountered, 0 otherwise."""
                     # initial validation and generic report
                     repo = self.repo
                     ui = repo.ui
                     if not repo.url().startswith(b'file:'):
                         raise error.Abort(_(b"cannot verify bundle or remote repos"))
                     if os.path.exists(repo.sjoin(b"journal")):
                         ui.warn(_(b"abandoned transaction found - run hg recover\n"))
                     if ui.verbose or not self.revlogv1:
                         ui.status(
                             _(b"repository uses revlog format %d\n")
                             % (self.revlogv1 and 1 or 0)
                         )
                     # data verification
                     mflinkrevs, filelinkrevs = self._verifychangelog()
                     filenodes = self._verifymanifest(mflinkrevs)
                     del mflinkrevs
                     self._crosscheckfiles(filelinkrevs, filenodes)
                     totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
                     # final report
                     ui.status(
                         _(b"checked %d changesets with %d changes to %d files\n")
                         % (len(repo.changelog), filerevisions, totalfiles)
                     )
                     if self.warnings:
                         ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
                     if self.fncachewarned:
                         ui.warn(
                             _(
                                 b'hint: run "hg debugrebuildfncache" to recover from '
                                 b'corrupt fncache\n'
                             )
                         )
                     if self.errors:
                         ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
                         if self.badrevs:
                             ui.warn(
                                 _(b"(first damaged changeset appears to be %d)\n")
                                 % min(self.badrevs)
                             )
                         return 1
                     return 0
                 def _verifychangelog(self):
                     """verify the changelog of a repository
                     The following checks are performed:
                     - all of `_checkrevlog` checks,
                     - all of `_checkentry` checks (for each revisions),
                     - each revision can be read.
                     The function returns some of the data observed in the changesets as a
                     (mflinkrevs, filelinkrevs) tuples:
                     - mflinkrevs:   is a { manifest-node -> [changelog-rev] } mapping
                     - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
                     If a matcher was specified, filelinkrevs will only contains matched
                     files.
                     """
                     ui = self.ui
                     repo = self.repo
                     match = self.match
                     cl = repo.changelog
                     ui.status(_(b"checking changesets\n"))
                     mflinkrevs = {}
                     filelinkrevs = {}
                     seen = {}
                     self._checkrevlog(cl, b"changelog", 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'changesets'), total=len(repo)
                     )
                     for i in repo:
                         progress.update(i)
                         n = cl.node(i)
                         self._checkentry(cl, i, n, seen, [i], b"changelog")
                         try:
                             changes = cl.read(n)
                             if changes[0] != self.repo.nullid:
                                 mflinkrevs.setdefault(changes[0], []).append(i)
                                 self.refersmf = True
                             for f in changes[3]:
                                 if match(f):
                                     filelinkrevs.setdefault(_normpath(f), []).append(i)
                         except Exception as inst:
                             self.refersmf = True
                             self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
                     progress.complete()
                     return mflinkrevs, filelinkrevs
                 def _verifymanifest(
                     self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
                 ):
                     """verify the manifestlog content
                     Inputs:
                     - mflinkrevs:     a {manifest-node -> [changelog-revisions]} mapping
                     - dir:            a subdirectory to check (for tree manifest repo)
                     - storefiles:     set of currently "orphan" files.
                     - subdirprogress: a progress object
                     This function checks:
                     * all of `_checkrevlog` checks (for all manifest related revlogs)
                     * all of `_checkentry` checks (for all manifest related revisions)
                     * nodes for subdirectory exists in the sub-directory manifest
                     * each manifest entries have a file path
                     * each manifest node refered in mflinkrevs exist in the manifest log
                     If tree manifest is in use and a matchers is specified, only the
                     sub-directories matching it will be verified.
                     return a two level mapping:
                         {"path" -> { filenode -> changelog-revision}}
                     This mapping primarily contains entries for every files in the
                     repository. In addition, when tree-manifest is used, it also contains
                     sub-directory entries.
                     If a matcher is provided, only matching paths will be included.
                     """
                     repo = self.repo
                     ui = self.ui
                     match = self.match
                     mfl = self.repo.manifestlog
                     mf = mfl.getstorage(dir)
                     if not dir:
                         self.ui.status(_(b"checking manifests\n"))
                     filenodes = {}
                     subdirnodes = {}
                     seen = {}
                     label = b"manifest"
                     if dir:
                         label = dir
                         revlogfiles = mf.files()
                         storefiles.difference_update(revlogfiles)
                         if subdirprogress:  # should be true since we're in a subdirectory
                             subdirprogress.increment()
                     if self.refersmf:
                         # Do not check manifest if there are only changelog entries with
                         # null manifests.
                         self._checkrevlog(mf._revlog, label, 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'manifests'), total=len(mf)
                     )
                     for i in mf:
                         if not dir:
                             progress.update(i)
                         n = mf.node(i)
                         lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
                         if n in mflinkrevs:
                             del mflinkrevs[n]
                         elif dir:
                             self._err(
                                 lr,
                                 _(b"%s not in parent-directory manifest") % short(n),
                                 label,
                             )
                         else:
                             self._err(lr, _(b"%s not in changesets") % short(n), label)
                         try:
                             mfdelta = mfl.get(dir, n).readdelta(shallow=True)
                             for f, fn, fl in mfdelta.iterentries():
                                 if not f:
                                     self._err(lr, _(b"entry without name in manifest"))
                                 elif f == b"/dev/null":  # ignore this in very old repos
                                     continue
                                 fullpath = dir + _normpath(f)
                                 if fl == b't':
                                     if not match.visitdir(fullpath):
                                         continue
                                     subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
                                         fn, []
                                     ).append(lr)
                                 else:
                                     if not match(fullpath):
                                         continue
                                     filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
                         except Exception as inst:
                             self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
                         if self._level >= VERIFY_FULL:
                             try:
                                 # Various issues can affect manifest. So we read each full
                                 # text from storage. This triggers the checks from the core
                                 # code (eg: hash verification, filename are ordered, etc.)
                                 mfdelta = mfl.get(dir, n).read()
                             except Exception as inst:
                                 self._exc(
                                     lr,
                                     _(b"reading full manifest %s") % short(n),
                                     inst,
                                     label,
                                 )
                     if not dir:
                         progress.complete()
                     if self.havemf:
                         # since we delete entry in `mflinkrevs` during iteration, any
                         # remaining entries are "missing". We need to issue errors for them.
                         changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
                         for c, m in sorted(changesetpairs):
                             if dir:
                                 self._err(
                                     c,
                                     _(
                                         b"parent-directory manifest refers to unknown"
                                         b" revision %s"
                                     )
                                     % short(m),
                                     label,
                                 )
                             else:
                                 self._err(
                                     c,
                                     _(b"changeset refers to unknown revision %s")
                                     % short(m),
                                     label,
                                 )
                     if not dir and subdirnodes:
                         self.ui.status(_(b"checking directory manifests\n"))
                         storefiles = set()
                         subdirs = set()
                         revlogv1 = self.revlogv1
                         for t, f, f2, size in repo.store.datafiles():
                             if not f:
                                 self._err(None, _(b"cannot decode filename '%s'") % f2)
                             elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                                 storefiles.add(_normpath(f))
                                 subdirs.add(os.path.dirname(f))
                         subdirprogress = ui.makeprogress(
                             _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
                         )
                     for subdir, linkrevs in pycompat.iteritems(subdirnodes):
                         subdirfilenodes = self._verifymanifest(
                             linkrevs, subdir, storefiles, subdirprogress
                         )
                         for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
                             filenodes.setdefault(f, {}).update(onefilenodes)
                     if not dir and subdirnodes:
                         assert subdirprogress is not None  # help pytype
                         subdirprogress.complete()
                         if self.warnorphanstorefiles:
                             for f in sorted(storefiles):
                                 self._warn(_(b"warning: orphan data file '%s'") % f)
                     return filenodes
                 def _crosscheckfiles(self, filelinkrevs, filenodes):
                     repo = self.repo
                     ui = self.ui
                     ui.status(_(b"crosschecking files in changesets and manifests\n"))
                     total = len(filelinkrevs) + len(filenodes)
                     progress = ui.makeprogress(
                         _(b'crosschecking'), unit=_(b'files'), total=total
                     )
                     if self.havemf:
                         for f in sorted(filelinkrevs):
                             progress.increment()
                             if f not in filenodes:
                                 lr = filelinkrevs[f][0]
                                 self._err(lr, _(b"in changeset but not in manifest"), f)
                     if self.havecl:
                         for f in sorted(filenodes):
                             progress.increment()
                             if f not in filelinkrevs:
                                 try:
                                     fl = repo.file(f)
                                     lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                                 except Exception:
                                     lr = None
                                 self._err(lr, _(b"in manifest but not in changeset"), f)
                     progress.complete()
                 def _verifyfiles(self, filenodes, filelinkrevs):
                     repo = self.repo
                     ui = self.ui
                     lrugetctx = self.lrugetctx
                     revlogv1 = self.revlogv1
                     havemf = self.havemf
                     ui.status(_(b"checking files\n"))
                     storefiles = set()
                     for rl_type, f, f2, size in repo.store.datafiles():
                         if not f:
                             self._err(None, _(b"cannot decode filename '%s'") % f2)
                         elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
                             storefiles.add(_normpath(f))
                     state = {
                         # TODO this assumes revlog storage for changelog.
                         b'expectedversion': self.repo.changelog._format_version,
                         b'skipflags': self.skipflags,
                         # experimental config: censor.policy
                         b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
                     }
                     files = sorted(set(filenodes) | set(filelinkrevs))
                     revisions = 0
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'files'), total=len(files)
                     )
                     for i, f in enumerate(files):
                         progress.update(i, item=f)
                         try:
                             linkrevs = filelinkrevs[f]
                         except KeyError:
                             # in manifest but not in changelog
                             linkrevs = []
                         if linkrevs:
                             lr = linkrevs[0]
                         else:
                             lr = None
                         try:
                             fl = repo.file(f)
                         except error.StorageError as e:
                             self._err(lr, _(b"broken revlog! (%s)") % e, f)
                             continue
                         for ff in fl.files():
                             try:
                                 storefiles.remove(ff)
                             except KeyError:
                                 if self.warnorphanstorefiles:
                                     self._warn(
                                         _(b" warning: revlog '%s' not in fncache!") % ff
                                     )
                                     self.fncachewarned = True
                         if not len(fl) and (self.havecl or self.havemf):
                             self._err(lr, _(b"empty or missing %s") % f)
                         else:
                             # Guard against implementations not setting this.
                             state[b'skipread'] = set()
                             state[b'safe_renamed'] = set()
                             for problem in fl.verifyintegrity(state):
                                 if problem.node is not None:
                                     linkrev = fl.linkrev(fl.rev(problem.node))
                                 else:
                                     linkrev = None
                                 if problem.warning:
                                     self._warn(problem.warning)
                                 elif problem.error:
                                     self._err(
                                         linkrev if linkrev is not None else lr,
                                         problem.error,
                                         f,
                                     )
                                 else:
                                     raise error.ProgrammingError(
                                         b'problem instance does not set warning or error '
                                         b'attribute: %s' % problem.msg
                                     )
                         seen = {}
                         for i in fl:
                             revisions += 1
                             n = fl.node(i)
                             lr = self._checkentry(fl, i, n, seen, linkrevs, f)
                             if f in filenodes:
                                 if havemf and n not in filenodes[f]:
                                     self._err(lr, _(b"%s not in manifests") % (short(n)), f)
                                 else:
                                     del filenodes[f][n]
                             if n in state[b'skipread'] and n not in state[b'safe_renamed']:
                                 continue
                             # check renames
                             try:
                                 # This requires resolving fulltext (at least on revlogs,
                                 # though not with LFS revisions). We may want
                                 # ``verifyintegrity()`` to pass a set of nodes with
                                 # rename metadata as an optimization.
                                 rp = fl.renamed(n)
                                 if rp:
                                     if lr is not None and ui.verbose:
                                         ctx = lrugetctx(lr)
                                         if not any(rp[0] in pctx for pctx in ctx.parents()):
                                             self._warn(
                                                 _(
                                                     b"warning: copy source of '%s' not"
                                                     b" in parents of %s"
                                                 )
                                                 % (f, ctx)
                                             )
                                     fl2 = repo.file(rp[0])
                                     if not len(fl2):
                                         self._err(
                                             lr,
                                             _(
                                                 b"empty or missing copy source revlog "
                                                 b"%s:%s"
                                             )
                                             % (rp[0], short(rp[1])),
                                             f,
                                         )
                                     elif rp[1] == self.repo.nullid:
                                         ui.note(
                                             _(
                                                 b"warning: %s@%s: copy source"
                                                 b" revision is nullid %s:%s\n"
                                             )
                                             % (f, lr, rp[0], short(rp[1]))
                                         )
                                     else:
                                         fl2.rev(rp[1])
                             except Exception as inst:
                                 self._exc(
                                     lr, _(b"checking rename of %s") % short(n), inst, f
                                 )
                         # cross-check
                         if f in filenodes:
                             fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
                             for lr, node in sorted(fns):
                                 self._err(
                                     lr,
                                     _(b"manifest refers to unknown revision %s")
                                     % short(node),
                                     f,
                                 )
                     progress.complete()
                     if self.warnorphanstorefiles:
                         for f in sorted(storefiles):
                             self._warn(_(b"warning: orphan data file '%s'") % f)
                     return len(files), revisions