upstream/mercurial-mirror Commit - r51908:812cd3df

1

# verify.py - repository integrity checking for Mercurial

1

# verify.py - repository integrity checking for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import os

9

import os

10

11

from .i18n import _

11

from .i18n import _

12

from .node import short

12

from .node import short

13

from .utils import stringutil

13

from .utils import stringutil

14

15

from . import (

15

from . import (

16

error,

16

error,

17

pycompat,

17

pycompat,

18

requirements,

18

requirements,

19

revlog,

19

revlog,

20

transaction,

20

transaction,

21

util,

21

util,

22

)

22

)

23

24

VERIFY_DEFAULT = 0

24

VERIFY_DEFAULT = 0

25

VERIFY_FULL = 1

25

VERIFY_FULL = 1

26

27

28

def verify(repo, level=None):

28

def verify(repo, level=None):

29

with repo.lock():

29

with repo.lock():

30

v = verifier(repo, level)

30

v = verifier(repo, level)

31

return v.verify()

31

return v.verify()

32

33

34

def _normpath(f):

34

def _normpath(f):

35

# under hg < 2.4, convert didn't sanitize paths properly, so a

35

# under hg < 2.4, convert didn't sanitize paths properly, so a

36

# converted repo may contain repeated slashes

36

# converted repo may contain repeated slashes

37

while b'//' in f:

37

while b'//' in f:

38

f = f.replace(b'//', b'/')

38

f = f.replace(b'//', b'/')

39

return f

39

return f

40

41

42

HINT_FNCACHE = _(

42

HINT_FNCACHE = _(

43

b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'

43

b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'

44

)

44

)

45

46

WARN_PARENT_DIR_UNKNOWN_REV = _(

46

WARN_PARENT_DIR_UNKNOWN_REV = _(

47

b"parent-directory manifest refers to unknown revision %s"

47

b"parent-directory manifest refers to unknown revision %s"

48

)

48

)

49

50

WARN_UNKNOWN_COPY_SOURCE = _(

50

WARN_UNKNOWN_COPY_SOURCE = _(

51

b"warning: copy source of '%s' not in parents of %s"

51

b"warning: copy source of '%s' not in parents of %s"

52

)

52

)

53

54

WARN_NULLID_COPY_SOURCE = _(

54

WARN_NULLID_COPY_SOURCE = _(

55

b"warning: %s@%s: copy source revision is nullid %s:%s\n"

55

b"warning: %s@%s: copy source revision is nullid %s:%s\n"

56

)

56

)

57

58

59

class verifier:

59

class verifier:

60

def __init__(self, repo, level=None):

60

def __init__(self, repo, level=None):

61

self.repo = repo.unfiltered()

61

self.repo = repo.unfiltered()

62

self.ui = repo.ui

62

self.ui = repo.ui

63

self.match = repo.narrowmatch()

63

self.match = repo.narrowmatch()

64

if level is None:

64

if level is None:

65

level = VERIFY_DEFAULT

65

level = VERIFY_DEFAULT

66

self._level = level

66

self._level = level

67

self.badrevs = set()

67

self.badrevs = set()

68

self.errors = 0

68

self.errors = 0

69

self.warnings = 0

69

self.warnings = 0

70

self.havecl = len(repo.changelog) > 0

70

self.havecl = len(repo.changelog) > 0

71

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

71

self.havemf = len(repo.manifestlog.getstorage(b'')) > 0

72

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

72

self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0

73

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

73

self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)

74

self.refersmf = False

74

self.refersmf = False

75

self.fncachewarned = False

75

self.fncachewarned = False

76

# developer config: verify.skipflags

76

# developer config: verify.skipflags

77

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

77

self.skipflags = repo.ui.configint(b'verify', b'skipflags')

78

self.warnorphanstorefiles = True

78

self.warnorphanstorefiles = True

79

80

def _warn(self, msg):

80

def _warn(self, msg):

81

"""record a "warning" level issue"""

81

"""record a "warning" level issue"""

82

self.ui.warn(msg + b"\n")

82

self.ui.warn(msg + b"\n")

83

self.warnings += 1

83

self.warnings += 1

84

85

def _err(self, linkrev, msg, filename=None):

85

def _err(self, linkrev, msg, filename=None):

86

"""record a "error" level issue"""

86

"""record a "error" level issue"""

87

if linkrev is not None:

87

if linkrev is not None:

88

self.badrevs.add(linkrev)

88

self.badrevs.add(linkrev)

89

linkrev = b"%d" % linkrev

89

linkrev = b"%d" % linkrev

90

else:

90

else:

91

linkrev = b'?'

91

linkrev = b'?'

92

msg = b"%s: %s" % (linkrev, msg)

92

msg = b"%s: %s" % (linkrev, msg)

93

if filename:

93

if filename:

94

msg = b"%s@%s" % (filename, msg)

94

msg = b"%s@%s" % (filename, msg)

95

self.ui.warn(b" " + msg + b"\n")

95

self.ui.warn(b" " + msg + b"\n")

96

self.errors += 1

96

self.errors += 1

97

98

def _exc(self, linkrev, msg, inst, filename=None):

98

def _exc(self, linkrev, msg, inst, filename=None):

99

"""record exception raised during the verify process"""

99

"""record exception raised during the verify process"""

100

fmsg = stringutil.forcebytestr(inst)

100

fmsg = stringutil.forcebytestr(inst)

101

if not fmsg:

101

if not fmsg:

102

fmsg = pycompat.byterepr(inst)

102

fmsg = pycompat.byterepr(inst)

103

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

103

self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

104

105

def _checkrevlog(self, obj, name, linkrev):

105

def _checkrevlog(self, obj, name, linkrev):

106

"""verify high level property of a revlog

106

"""verify high level property of a revlog

107

108

- revlog is present,

108

- revlog is present,

109

- revlog is non-empty,

109

- revlog is non-empty,

110

- sizes (index and data) are correct,

110

- sizes (index and data) are correct,

111

- revlog's format version is correct.

111

- revlog's format version is correct.

112

"""

112

"""

113

if not len(obj) and (self.havecl or self.havemf):

113

if not len(obj) and (self.havecl or self.havemf):

114

self._err(linkrev, _(b"empty or missing %s") % name)

114

self._err(linkrev, _(b"empty or missing %s") % name)

115

return

115

return

116

117

d = obj.checksize()

117

d = obj.checksize()

118

if d[0]:

118

if d[0]:

119

self._err(None, _(b"data length off by %d bytes") % d[0], name)

119

self._err(None, _(b"data length off by %d bytes") % d[0], name)

120

if d[1]:

120

if d[1]:

121

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

121

self._err(None, _(b"index contains %d extra bytes") % d[1], name)

122

123

if obj._format_version != revlog.REVLOGV0:

123

if obj._format_version != revlog.REVLOGV0:

124

if not self.revlogv1:

124

if not self.revlogv1:

125

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

125

self._warn(_(b"warning: `%s' uses revlog format 1") % name)

126

elif self.revlogv1:

126

elif self.revlogv1:

127

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

127

self._warn(_(b"warning: `%s' uses revlog format 0") % name)

128

129

def _checkentry(self, obj, i, node, seen, linkrevs, f):

129

def _checkentry(self, obj, i, node, seen, linkrevs, f):

130

"""verify a single revlog entry

130

"""verify a single revlog entry

131

132

arguments are:

132

arguments are:

133

- obj: the source revlog

133

- obj: the source revlog

134

- i: the revision number

134

- i: the revision number

135

- node: the revision node id

135

- node: the revision node id

136

- seen: nodes previously seen for this revlog

136

- seen: nodes previously seen for this revlog

137

- linkrevs: [changelog-revisions] introducing "node"

137

- linkrevs: [changelog-revisions] introducing "node"

138

- f: string label ("changelog", "manifest", or filename)

138

- f: string label ("changelog", "manifest", or filename)

139

140

Performs the following checks:

140

Performs the following checks:

141

- linkrev points to an existing changelog revision,

141

- linkrev points to an existing changelog revision,

142

- linkrev points to a changelog revision that introduces this revision,

142

- linkrev points to a changelog revision that introduces this revision,

143

- linkrev points to the lowest of these changesets,

143

- linkrev points to the lowest of these changesets,

144

- both parents exist in the revlog,

144

- both parents exist in the revlog,

145

- the revision is not duplicated.

145

- the revision is not duplicated.

146

147

Return the linkrev of the revision (or None for changelog's revisions).

147

Return the linkrev of the revision (or None for changelog's revisions).

148

"""

148

"""

149

lr = obj.linkrev(obj.rev(node))

149

lr = obj.linkrev(obj.rev(node))

150

if lr < 0 or (self.havecl and lr not in linkrevs):

150

if lr < 0 or (self.havecl and lr not in linkrevs):

151

if lr < 0 or lr >= len(self.repo.changelog):

151

if lr < 0 or lr >= len(self.repo.changelog):

152

msg = _(b"rev %d points to nonexistent changeset %d")

152

msg = _(b"rev %d points to nonexistent changeset %d")

153

else:

153

else:

154

msg = _(b"rev %d points to unexpected changeset %d")

154

msg = _(b"rev %d points to unexpected changeset %d")

155

self._err(None, msg % (i, lr), f)

155

self._err(None, msg % (i, lr), f)

156

if linkrevs:

156

if linkrevs:

157

if f and len(linkrevs) > 1:

157

if f and len(linkrevs) > 1:

158

try:

158

try:

159

# attempt to filter down to real linkrevs

159

# attempt to filter down to real linkrevs

160

linkrevs = []

160

linkrevs = []

161

for lr in linkrevs:

161

for lr in linkrevs:

162

if self.lrugetctx(lr)[f].filenode() == node:

162

if self.lrugetctx(lr)[f].filenode() == node:

163

linkrevs.append(lr)

163

linkrevs.append(lr)

164

except Exception:

164

except Exception:

165

pass

165

pass

166

msg = _(b" (expected %s)")

166

msg = _(b" (expected %s)")

167

msg %= b" ".join(map(pycompat.bytestr, linkrevs))

167

msg %= b" ".join(map(pycompat.bytestr, linkrevs))

168

self._warn(msg)

168

self._warn(msg)

169

lr = None # can't be trusted

169

lr = None # can't be trusted

170

171

try:

171

try:

172

p1, p2 = obj.parents(node)

172

p1, p2 = obj.parents(node)

173

if p1 not in seen and p1 != self.repo.nullid:

173

if p1 not in seen and p1 != self.repo.nullid:

174

msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))

174

msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))

175

self._err(lr, msg, f)

175

self._err(lr, msg, f)

176

if p2 not in seen and p2 != self.repo.nullid:

176

if p2 not in seen and p2 != self.repo.nullid:

177

msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))

177

msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))

178

self._err(lr, msg, f)

178

self._err(lr, msg, f)

179

except Exception as inst:

179

except Exception as inst:

180

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

180

self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

181

182

if node in seen:

182

if node in seen:

183

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

183

self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)

184

seen[node] = i

184

seen[node] = i

185

return lr

185

return lr

186

187

def verify(self):

187

def verify(self):

188

"""verify the content of the Mercurial repository

188

"""verify the content of the Mercurial repository

189

190

This method run all verifications, displaying issues as they are found.

190

This method run all verifications, displaying issues as they are found.

191

192

return 1 if any error have been encountered, 0 otherwise."""

192

return 1 if any error have been encountered, 0 otherwise."""

193

# initial validation and generic report

193

# initial validation and generic report

194

repo = self.repo

194

repo = self.repo

195

ui = repo.ui

195

ui = repo.ui

196

if not repo.url().startswith(b'file:'):

196

if not repo.url().startswith(b'file:'):

197

raise error.Abort(_(b"cannot verify bundle or remote repos"))

197

raise error.Abort(_(b"cannot verify bundle or remote repos"))

198

199

if transaction.has_abandoned_transaction(repo):

199

if transaction.has_abandoned_transaction(repo):

200

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

200

ui.warn(_(b"abandoned transaction found - run hg recover\n"))

201

202

if ui.verbose or not self.revlogv1:

202

if ui.verbose or not self.revlogv1:

203

ui.status(

203

ui.status(

204

_(b"repository uses revlog format %d\n")

204

_(b"repository uses revlog format %d\n")

205

% (self.revlogv1 and 1 or 0)

205

% (self.revlogv1 and 1 or 0)

206

)

206

)

207

208

# data verification

208

# data verification

209

mflinkrevs, filelinkrevs = self._verifychangelog()

209

mflinkrevs, filelinkrevs = self._verifychangelog()

210

filenodes = self._verifymanifest(mflinkrevs)

210

filenodes = self._verifymanifest(mflinkrevs)

211

del mflinkrevs

211

del mflinkrevs

212

self._crosscheckfiles(filelinkrevs, filenodes)

212

self._crosscheckfiles(filelinkrevs, filenodes)

213

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

213

totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

214

215

if self.errors:

215

if self.errors:

216

ui.warn(_(b"not checking dirstate because of previous errors\n"))

216

ui.warn(_(b"not checking dirstate because of previous errors\n"))

217

dirstate_errors = 0

217

dirstate_errors = 0

218

else:

218

else:

219

dirstate_errors = self._verify_dirstate()

219

dirstate_errors = self._verify_dirstate()

220

221

# final report

221

# final report

222

ui.status(

222

ui.status(

223

_(b"checked %d changesets with %d changes to %d files\n")

223

_(b"checked %d changesets with %d changes to %d files\n")

224

% (len(repo.changelog), filerevisions, totalfiles)

224

% (len(repo.changelog), filerevisions, totalfiles)

225

)

225

)

226

if self.warnings:

226

if self.warnings:

227

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

227

ui.warn(_(b"%d warnings encountered!\n") % self.warnings)

228

if self.fncachewarned:

228

if self.fncachewarned:

229

ui.warn(HINT_FNCACHE)

229

ui.warn(HINT_FNCACHE)

230

if self.errors:

230

if self.errors:

231

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

231

ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)

232

if self.badrevs:

232

if self.badrevs:

233

msg = _(b"(first damaged changeset appears to be %d)\n")

233

msg = _(b"(first damaged changeset appears to be %d)\n")

234

msg %= min(self.badrevs)

234

msg %= min(self.badrevs)

235

ui.warn(msg)

235

ui.warn(msg)

236

if dirstate_errors:

236

if dirstate_errors:

237

ui.warn(

237

ui.warn(

238

_(b"dirstate inconsistent with current parent's manifest\n")

238

_(b"dirstate inconsistent with current parent's manifest\n")

239

)

239

)

240

ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)

240

ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)

241

return 1

241

return 1

242

return 0

242

return 0

243

244

def _verifychangelog(self):

244

def _verifychangelog(self):

245

"""verify the changelog of a repository

245

"""verify the changelog of a repository

246

247

The following checks are performed:

247

The following checks are performed:

248

- all of `_checkrevlog` checks,

248

- all of `_checkrevlog` checks,

249

- all of `_checkentry` checks (for each revisions),

249

- all of `_checkentry` checks (for each revisions),

250

- each revision can be read.

250

- each revision can be read.

251

252

The function returns some of the data observed in the changesets as a

252

The function returns some of the data observed in the changesets as a

253

(mflinkrevs, filelinkrevs) tuples:

253

(mflinkrevs, filelinkrevs) tuples:

254

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

254

- mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping

255

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

255

- filelinkrevs: is a { file-path -> [changelog-rev] } mapping

256

257

If a matcher was specified, filelinkrevs will only contains matched

257

If a matcher was specified, filelinkrevs will only contains matched

258

files.

258

files.

259

"""

259

"""

260

ui = self.ui

260

ui = self.ui

261

repo = self.repo

261

repo = self.repo

262

match = self.match

262

match = self.match

263

cl = repo.changelog

263

cl = repo.changelog

264

265

ui.status(_(b"checking changesets\n"))

265

ui.status(_(b"checking changesets\n"))

266

mflinkrevs = {}

266

mflinkrevs = {}

267

filelinkrevs = {}

267

filelinkrevs = {}

268

seen = {}

268

seen = {}

269

self._checkrevlog(cl, b"changelog", 0)

269

self._checkrevlog(cl, b"changelog", 0)

270

progress = ui.makeprogress(

270

progress = ui.makeprogress(

271

_(b'checking'), unit=_(b'changesets'), total=len(repo)

271

_(b'checking'), unit=_(b'changesets'), total=len(repo)

272

)

272

)

273

for i in repo:

273

with cl.reading():

274

progress.update(i)

274

for i in repo:

275

n = cl.~~nod~~e(i)

275

progress.update(i)

276

self._checkentry(cl, i, n, seen, [i], b"changelog")

276

n = cl.node(i)

277

self._checkentry(cl, i, n, seen, [i], b"changelog")

277

278

try:

279

try:

279

changes = cl.read(n)

280

changes = cl.read(n)

280

if changes[0] != self.repo.nullid:

281

if changes[0] != self.repo.nullid:

281

mflinkrevs.setdefault(changes[0], []).append(i)

282

mflinkrevs.setdefault(changes[0], []).append(i)

283

self.refersmf = True

284

for f in changes[3]:

285

if match(f):

286

filelinkrevs.setdefault(_normpath(f), []).append(i)

287

except Exception as inst:

282

self.refersmf = True

288

self.refersmf = True

283

for f in changes[3]:

289

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

284

if match(f):

285

filelinkrevs.setdefault(_normpath(f), []).append(i)

286

except Exception as inst:

287

self.refersmf = True

288

self._exc(i, _(b"unpacking changeset %s") % short(n), inst)

289

progress.complete()

290

progress.complete()

290

return mflinkrevs, filelinkrevs

291

return mflinkrevs, filelinkrevs

291

292

def _verifymanifest(

293

def _verifymanifest(

293

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

294

self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None

294

):

295

):

295

"""verify the manifestlog content

296

"""verify the manifestlog content

296

297

Inputs:

298

Inputs:

298

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

299

- mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping

299

- dir: a subdirectory to check (for tree manifest repo)

300

- dir: a subdirectory to check (for tree manifest repo)

300

- storefiles: set of currently "orphan" files.

301

- storefiles: set of currently "orphan" files.

301

- subdirprogress: a progress object

302

- subdirprogress: a progress object

302

303

This function checks:

304

This function checks:

304

* all of `_checkrevlog` checks (for all manifest related revlogs)

305

* all of `_checkrevlog` checks (for all manifest related revlogs)

305

* all of `_checkentry` checks (for all manifest related revisions)

306

* all of `_checkentry` checks (for all manifest related revisions)

306

* nodes for subdirectory exists in the sub-directory manifest

307

* nodes for subdirectory exists in the sub-directory manifest

307

* each manifest entries have a file path

308

* each manifest entries have a file path

308

* each manifest node refered in mflinkrevs exist in the manifest log

309

* each manifest node refered in mflinkrevs exist in the manifest log

309

310

If tree manifest is in use and a matchers is specified, only the

311

If tree manifest is in use and a matchers is specified, only the

311

sub-directories matching it will be verified.

312

sub-directories matching it will be verified.

312

313

return a two level mapping:

314

return a two level mapping:

314

{"path" -> { filenode -> changelog-revision}}

315

{"path" -> { filenode -> changelog-revision}}

315

316

This mapping primarily contains entries for every files in the

317

This mapping primarily contains entries for every files in the

317

repository. In addition, when tree-manifest is used, it also contains

318

repository. In addition, when tree-manifest is used, it also contains

318

sub-directory entries.

319

sub-directory entries.

319

320

If a matcher is provided, only matching paths will be included.

321

If a matcher is provided, only matching paths will be included.

321

"""

322

"""

322

repo = self.repo

323

repo = self.repo

323

ui = self.ui

324

ui = self.ui

324

match = self.match

325

match = self.match

325

mfl = self.repo.manifestlog

326

mfl = self.repo.manifestlog

326

mf = mfl.getstorage(dir)

327

mf = mfl.getstorage(dir)

327

328

if not dir:

329

if not dir:

329

self.ui.status(_(b"checking manifests\n"))

330

self.ui.status(_(b"checking manifests\n"))

330

331

filenodes = {}

332

filenodes = {}

332

subdirnodes = {}

333

subdirnodes = {}

333

seen = {}

334

seen = {}

334

label = b"manifest"

335

label = b"manifest"

335

if dir:

336

if dir:

336

label = dir

337

label = dir

337

revlogfiles = mf.files()

338

revlogfiles = mf.files()

338

storefiles.difference_update(revlogfiles)

339

storefiles.difference_update(revlogfiles)

339

if subdirprogress: # should be true since we're in a subdirectory

340

if subdirprogress: # should be true since we're in a subdirectory

340

subdirprogress.increment()

341

subdirprogress.increment()

341

if self.refersmf:

342

if self.refersmf:

342

# Do not check manifest if there are only changelog entries with

343

# Do not check manifest if there are only changelog entries with

343

# null manifests.

344

# null manifests.

344

self._checkrevlog(mf._revlog, label, 0)

345

self._checkrevlog(mf._revlog, label, 0)

345

progress = ui.makeprogress(

346

progress = ui.makeprogress(

346

_(b'checking'), unit=_(b'manifests'), total=len(mf)

347

_(b'checking'), unit=_(b'manifests'), total=len(mf)

347

)

348

)

348

for i in mf:

349

for i in mf:

349

if not dir:

350

if not dir:

350

progress.update(i)

351

progress.update(i)

351

n = mf.node(i)

352

n = mf.node(i)

352

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

353

lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)

353

if n in mflinkrevs:

354

if n in mflinkrevs:

354

del mflinkrevs[n]

355

del mflinkrevs[n]

355

elif dir:

356

elif dir:

356

msg = _(b"%s not in parent-directory manifest") % short(n)

357

msg = _(b"%s not in parent-directory manifest") % short(n)

357

self._err(lr, msg, label)

358

self._err(lr, msg, label)

358

else:

359

else:

359

self._err(lr, _(b"%s not in changesets") % short(n), label)

360

self._err(lr, _(b"%s not in changesets") % short(n), label)

360

361

try:

362

try:

362

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

363

mfdelta = mfl.get(dir, n).readdelta(shallow=True)

363

for f, fn, fl in mfdelta.iterentries():

364

for f, fn, fl in mfdelta.iterentries():

364

if not f:

365

if not f:

365

self._err(lr, _(b"entry without name in manifest"))

366

self._err(lr, _(b"entry without name in manifest"))

366

elif f == b"/dev/null": # ignore this in very old repos

367

elif f == b"/dev/null": # ignore this in very old repos

367

continue

368

continue

368

fullpath = dir + _normpath(f)

369

fullpath = dir + _normpath(f)

369

if fl == b't':

370

if fl == b't':

370

if not match.visitdir(fullpath):

371

if not match.visitdir(fullpath):

371

continue

372

continue

372

sdn = subdirnodes.setdefault(fullpath + b'/', {})

373

sdn = subdirnodes.setdefault(fullpath + b'/', {})

373

sdn.setdefault(fn, []).append(lr)

374

sdn.setdefault(fn, []).append(lr)

374

else:

375

else:

375

if not match(fullpath):

376

if not match(fullpath):

376

continue

377

continue

377

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

378

filenodes.setdefault(fullpath, {}).setdefault(fn, lr)

378

except Exception as inst:

379

except Exception as inst:

379

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

380

self._exc(lr, _(b"reading delta %s") % short(n), inst, label)

380

if self._level >= VERIFY_FULL:

381

if self._level >= VERIFY_FULL:

381

try:

382

try:

382

# Various issues can affect manifest. So we read each full

383

# Various issues can affect manifest. So we read each full

383

# text from storage. This triggers the checks from the core

384

# text from storage. This triggers the checks from the core

384

# code (eg: hash verification, filename are ordered, etc.)

385

# code (eg: hash verification, filename are ordered, etc.)

385

mfdelta = mfl.get(dir, n).read()

386

mfdelta = mfl.get(dir, n).read()

386

except Exception as inst:

387

except Exception as inst:

387

msg = _(b"reading full manifest %s") % short(n)

388

msg = _(b"reading full manifest %s") % short(n)

388

self._exc(lr, msg, inst, label)

389

self._exc(lr, msg, inst, label)

389

390

if not dir:

391

if not dir:

391

progress.complete()

392

progress.complete()

392

393

if self.havemf:

394

if self.havemf:

394

# since we delete entry in `mflinkrevs` during iteration, any

395

# since we delete entry in `mflinkrevs` during iteration, any

395

# remaining entries are "missing". We need to issue errors for them.

396

# remaining entries are "missing". We need to issue errors for them.

396

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

397

changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]

397

for c, m in sorted(changesetpairs):

398

for c, m in sorted(changesetpairs):

398

if dir:

399

if dir:

399

self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)

400

self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)

400

else:

401

else:

401

msg = _(b"changeset refers to unknown revision %s")

402

msg = _(b"changeset refers to unknown revision %s")

402

msg %= short(m)

403

msg %= short(m)

403

self._err(c, msg, label)

404

self._err(c, msg, label)

404

405

if not dir and subdirnodes:

406

if not dir and subdirnodes:

406

self.ui.status(_(b"checking directory manifests\n"))

407

self.ui.status(_(b"checking directory manifests\n"))

407

storefiles = set()

408

storefiles = set()

408

subdirs = set()

409

subdirs = set()

409

revlogv1 = self.revlogv1

410

revlogv1 = self.revlogv1

410

undecodable = []

411

undecodable = []

411

for entry in repo.store.data_entries(undecodable=undecodable):

412

for entry in repo.store.data_entries(undecodable=undecodable):

412

for file_ in entry.files():

413

for file_ in entry.files():

413

f = file_.unencoded_path

414

f = file_.unencoded_path

414

size = file_.file_size(repo.store.vfs)

415

size = file_.file_size(repo.store.vfs)

415

if (size > 0 or not revlogv1) and f.startswith(b'meta/'):

416

if (size > 0 or not revlogv1) and f.startswith(b'meta/'):

416

storefiles.add(_normpath(f))

417

storefiles.add(_normpath(f))

417

subdirs.add(os.path.dirname(f))

418

subdirs.add(os.path.dirname(f))

418

for f in undecodable:

419

for f in undecodable:

419

self._err(None, _(b"cannot decode filename '%s'") % f)

420

self._err(None, _(b"cannot decode filename '%s'") % f)

420

subdirprogress = ui.makeprogress(

421

subdirprogress = ui.makeprogress(

421

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

422

_(b'checking'), unit=_(b'manifests'), total=len(subdirs)

422

)

423

)

423

424

for subdir, linkrevs in subdirnodes.items():

425

for subdir, linkrevs in subdirnodes.items():

425

subdirfilenodes = self._verifymanifest(

426

subdirfilenodes = self._verifymanifest(

426

linkrevs, subdir, storefiles, subdirprogress

427

linkrevs, subdir, storefiles, subdirprogress

427

)

428

)

428

for f, onefilenodes in subdirfilenodes.items():

429

for f, onefilenodes in subdirfilenodes.items():

429

filenodes.setdefault(f, {}).update(onefilenodes)

430

filenodes.setdefault(f, {}).update(onefilenodes)

430

431

if not dir and subdirnodes:

432

if not dir and subdirnodes:

432

assert subdirprogress is not None # help pytype

433

assert subdirprogress is not None # help pytype

433

subdirprogress.complete()

434

subdirprogress.complete()

434

if self.warnorphanstorefiles:

435

if self.warnorphanstorefiles:

435

for f in sorted(storefiles):

436

for f in sorted(storefiles):

436

self._warn(_(b"warning: orphan data file '%s'") % f)

437

self._warn(_(b"warning: orphan data file '%s'") % f)

437

438

return filenodes

439

return filenodes

439

440

def _crosscheckfiles(self, filelinkrevs, filenodes):

441

def _crosscheckfiles(self, filelinkrevs, filenodes):

441

repo = self.repo

442

repo = self.repo

442

ui = self.ui

443

ui = self.ui

443

ui.status(_(b"crosschecking files in changesets and manifests\n"))

444

ui.status(_(b"crosschecking files in changesets and manifests\n"))

444

445

total = len(filelinkrevs) + len(filenodes)

446

total = len(filelinkrevs) + len(filenodes)

446

progress = ui.makeprogress(

447

progress = ui.makeprogress(

447

_(b'crosschecking'), unit=_(b'files'), total=total

448

_(b'crosschecking'), unit=_(b'files'), total=total

448

)

449

)

449

if self.havemf:

450

if self.havemf:

450

for f in sorted(filelinkrevs):

451

for f in sorted(filelinkrevs):

451

progress.increment()

452

progress.increment()

452

if f not in filenodes:

453

if f not in filenodes:

453

lr = filelinkrevs[f][0]

454

lr = filelinkrevs[f][0]

454

self._err(lr, _(b"in changeset but not in manifest"), f)

455

self._err(lr, _(b"in changeset but not in manifest"), f)

455

456

if self.havecl:

457

if self.havecl:

457

for f in sorted(filenodes):

458

for f in sorted(filenodes):

458

progress.increment()

459

progress.increment()

459

if f not in filelinkrevs:

460

if f not in filelinkrevs:

460

try:

461

try:

461

fl = repo.file(f)

462

fl = repo.file(f)

462

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

463

lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])

463

except Exception:

464

except Exception:

464

lr = None

465

lr = None

465

self._err(lr, _(b"in manifest but not in changeset"), f)

466

self._err(lr, _(b"in manifest but not in changeset"), f)

466

467

progress.complete()

468

progress.complete()

468

469

def _verifyfiles(self, filenodes, filelinkrevs):

470

def _verifyfiles(self, filenodes, filelinkrevs):

470

repo = self.repo

471

repo = self.repo

471

ui = self.ui

472

ui = self.ui

472

lrugetctx = self.lrugetctx

473

lrugetctx = self.lrugetctx

473

revlogv1 = self.revlogv1

474

revlogv1 = self.revlogv1

474

havemf = self.havemf

475

havemf = self.havemf

475

ui.status(_(b"checking files\n"))

476

ui.status(_(b"checking files\n"))

476

477

storefiles = set()

478

storefiles = set()

478

undecodable = []

479

undecodable = []

479

for entry in repo.store.data_entries(undecodable=undecodable):

480

for entry in repo.store.data_entries(undecodable=undecodable):

480

for file_ in entry.files():

481

for file_ in entry.files():

481

size = file_.file_size(repo.store.vfs)

482

size = file_.file_size(repo.store.vfs)

482

f = file_.unencoded_path

483

f = file_.unencoded_path

483

if (size > 0 or not revlogv1) and f.startswith(b'data/'):

484

if (size > 0 or not revlogv1) and f.startswith(b'data/'):

484

storefiles.add(_normpath(f))

485

storefiles.add(_normpath(f))

485

for f in undecodable:

486

for f in undecodable:

486

self._err(None, _(b"cannot decode filename '%s'") % f)

487

self._err(None, _(b"cannot decode filename '%s'") % f)

487

488

state = {

489

state = {

489

# TODO this assumes revlog storage for changelog.

490

# TODO this assumes revlog storage for changelog.

490

b'expectedversion': self.repo.changelog._format_version,

491

b'expectedversion': self.repo.changelog._format_version,

491

b'skipflags': self.skipflags,

492

b'skipflags': self.skipflags,

492

# experimental config: censor.policy

493

# experimental config: censor.policy

493

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

494

b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',

494

}

495

}

495

496

files = sorted(set(filenodes) | set(filelinkrevs))

497

files = sorted(set(filenodes) | set(filelinkrevs))

497

revisions = 0

498

revisions = 0

498

progress = ui.makeprogress(

499

progress = ui.makeprogress(

499

_(b'checking'), unit=_(b'files'), total=len(files)

500

_(b'checking'), unit=_(b'files'), total=len(files)

500

)

501

)

501

for i, f in enumerate(files):

502

for i, f in enumerate(files):

502

progress.update(i, item=f)

503

progress.update(i, item=f)

503

try:

504

try:

504

linkrevs = filelinkrevs[f]

505

linkrevs = filelinkrevs[f]

505

except KeyError:

506

except KeyError:

506

# in manifest but not in changelog

507

# in manifest but not in changelog

507

linkrevs = []

508

linkrevs = []

508

509

if linkrevs:

510

if linkrevs:

510

lr = linkrevs[0]

511

lr = linkrevs[0]

511

else:

512

else:

512

lr = None

513

lr = None

513

514

try:

515

try:

515

fl = repo.file(f)

516

fl = repo.file(f)

516

except error.StorageError as e:

517

except error.StorageError as e:

517

self._err(lr, _(b"broken revlog! (%s)") % e, f)

518

self._err(lr, _(b"broken revlog! (%s)") % e, f)

518

continue

519

continue

519

520

for ff in fl.files():

521

for ff in fl.files():

521

try:

522

try:

522

storefiles.remove(ff)

523

storefiles.remove(ff)

523

except KeyError:

524

except KeyError:

524

if self.warnorphanstorefiles:

525

if self.warnorphanstorefiles:

525

msg = _(b" warning: revlog '%s' not in fncache!")

526

msg = _(b" warning: revlog '%s' not in fncache!")

526

self._warn(msg % ff)

527

self._warn(msg % ff)

527

self.fncachewarned = True

528

self.fncachewarned = True

528

529

if not len(fl) and (self.havecl or self.havemf):

530

if not len(fl) and (self.havecl or self.havemf):

530

self._err(lr, _(b"empty or missing %s") % f)

531

self._err(lr, _(b"empty or missing %s") % f)

531

else:

532

else:

532

# Guard against implementations not setting this.

533

# Guard against implementations not setting this.

533

state[b'skipread'] = set()

534

state[b'skipread'] = set()

534

state[b'safe_renamed'] = set()

535

state[b'safe_renamed'] = set()

535

536

for problem in fl.verifyintegrity(state):

537

for problem in fl.verifyintegrity(state):

537

if problem.node is not None:

538

if problem.node is not None:

538

linkrev = fl.linkrev(fl.rev(problem.node))

539

linkrev = fl.linkrev(fl.rev(problem.node))

539

else:

540

else:

540

linkrev = None

541

linkrev = None

541

542

if problem.warning:

543

if problem.warning:

543

self._warn(problem.warning)

544

self._warn(problem.warning)

544

elif problem.error:

545

elif problem.error:

545

linkrev_msg = linkrev if linkrev is not None else lr

546

linkrev_msg = linkrev if linkrev is not None else lr

546

self._err(linkrev_msg, problem.error, f)

547

self._err(linkrev_msg, problem.error, f)

547

else:

548

else:

548

raise error.ProgrammingError(

549

raise error.ProgrammingError(

549

b'problem instance does not set warning or error '

550

b'problem instance does not set warning or error '

550

b'attribute: %s' % problem.msg

551

b'attribute: %s' % problem.msg

551

)

552

)

552

553

seen = {}

554

seen = {}

554

for i in fl:

555

for i in fl:

555

revisions += 1

556

revisions += 1

556

n = fl.node(i)

557

n = fl.node(i)

557

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

558

lr = self._checkentry(fl, i, n, seen, linkrevs, f)

558

if f in filenodes:

559

if f in filenodes:

559

if havemf and n not in filenodes[f]:

560

if havemf and n not in filenodes[f]:

560

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

561

self._err(lr, _(b"%s not in manifests") % (short(n)), f)

561

else:

562

else:

562

del filenodes[f][n]

563

del filenodes[f][n]

563

564

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

565

if n in state[b'skipread'] and n not in state[b'safe_renamed']:

565

continue

566

continue

566

567

# check renames

568

# check renames

568

try:

569

try:

569

# This requires resolving fulltext (at least on revlogs,

570

# This requires resolving fulltext (at least on revlogs,

570

# though not with LFS revisions). We may want

571

# though not with LFS revisions). We may want

571

# ``verifyintegrity()`` to pass a set of nodes with

572

# ``verifyintegrity()`` to pass a set of nodes with

572

# rename metadata as an optimization.

573

# rename metadata as an optimization.

573

rp = fl.renamed(n)

574

rp = fl.renamed(n)

574

if rp:

575

if rp:

575

if lr is not None and ui.verbose:

576

if lr is not None and ui.verbose:

576

ctx = lrugetctx(lr)

577

ctx = lrugetctx(lr)

577

if not any(rp[0] in pctx for pctx in ctx.parents()):

578

if not any(rp[0] in pctx for pctx in ctx.parents()):

578

self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))

579

self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))

579

fl2 = repo.file(rp[0])

580

fl2 = repo.file(rp[0])

580

if not len(fl2):

581

if not len(fl2):

581

m = _(b"empty or missing copy source revlog %s:%s")

582

m = _(b"empty or missing copy source revlog %s:%s")

582

self._err(lr, m % (rp[0], short(rp[1])), f)

583

self._err(lr, m % (rp[0], short(rp[1])), f)

583

elif rp[1] == self.repo.nullid:

584

elif rp[1] == self.repo.nullid:

584

msg = WARN_NULLID_COPY_SOURCE

585

msg = WARN_NULLID_COPY_SOURCE

585

msg %= (f, lr, rp[0], short(rp[1]))

586

msg %= (f, lr, rp[0], short(rp[1]))

586

ui.note(msg)

587

ui.note(msg)

587

else:

588

else:

588

fl2.rev(rp[1])

589

fl2.rev(rp[1])

589

except Exception as inst:

590

except Exception as inst:

590

self._exc(

591

self._exc(

591

lr, _(b"checking rename of %s") % short(n), inst, f

592

lr, _(b"checking rename of %s") % short(n), inst, f

592

)

593

)

593

594

# cross-check

595

# cross-check

595

if f in filenodes:

596

if f in filenodes:

596

fns = [(v, k) for k, v in filenodes[f].items()]

597

fns = [(v, k) for k, v in filenodes[f].items()]

597

for lr, node in sorted(fns):

598

for lr, node in sorted(fns):

598

msg = _(b"manifest refers to unknown revision %s")

599

msg = _(b"manifest refers to unknown revision %s")

599

self._err(lr, msg % short(node), f)

600

self._err(lr, msg % short(node), f)

600

progress.complete()

601

progress.complete()

601

602

if self.warnorphanstorefiles:

603

if self.warnorphanstorefiles:

603

for f in sorted(storefiles):

604

for f in sorted(storefiles):

604

self._warn(_(b"warning: orphan data file '%s'") % f)

605

self._warn(_(b"warning: orphan data file '%s'") % f)

605

606

return len(files), revisions

607

return len(files), revisions

607

608

def _verify_dirstate(self):

609

def _verify_dirstate(self):

609

"""Check that the dirstate is consistent with the parent's manifest"""

610

"""Check that the dirstate is consistent with the parent's manifest"""

610

repo = self.repo

611

repo = self.repo

611

ui = self.ui

612

ui = self.ui

612

ui.status(_(b"checking dirstate\n"))

613

ui.status(_(b"checking dirstate\n"))

613

614

parent1, parent2 = repo.dirstate.parents()

615

parent1, parent2 = repo.dirstate.parents()

615

m1 = repo[parent1].manifest()

616

m1 = repo[parent1].manifest()

616

m2 = repo[parent2].manifest()

617

m2 = repo[parent2].manifest()

617

dirstate_errors = 0

618

dirstate_errors = 0

618

619

is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements

620

is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements

620

narrow_matcher = repo.narrowmatch() if is_narrow else None

621

narrow_matcher = repo.narrowmatch() if is_narrow else None

621

622

for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):

623

for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):

623

ui.error(err)

624

ui.error(err)

624

dirstate_errors += 1

625

dirstate_errors += 1

625

626

if dirstate_errors:

627

if dirstate_errors:

627

self.errors += dirstate_errors

628

self.errors += dirstate_errors

628

return dirstate_errors

629

return dirstate_errors

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # verify.py - repository integrity checking for Mercurial
             #
             # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import os
             from .i18n import _
             from .node import short
             from .utils import stringutil
             from . import (
                 error,
                 pycompat,
                 requirements,
                 revlog,
                 transaction,
                 util,
             )
             VERIFY_DEFAULT = 0
             VERIFY_FULL = 1
             def verify(repo, level=None):
                 with repo.lock():
                     v = verifier(repo, level)
                     return v.verify()
             def _normpath(f):
                 # under hg < 2.4, convert didn't sanitize paths properly, so a
                 # converted repo may contain repeated slashes
                 while b'//' in f:
                     f = f.replace(b'//', b'/')
                 return f
             HINT_FNCACHE = _(
                 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
             )
             WARN_PARENT_DIR_UNKNOWN_REV = _(
                 b"parent-directory manifest refers to unknown revision %s"
             )
             WARN_UNKNOWN_COPY_SOURCE = _(
                 b"warning: copy source of '%s' not in parents of %s"
             )
             WARN_NULLID_COPY_SOURCE = _(
                 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
             )
             class verifier:
                 def __init__(self, repo, level=None):
                     self.repo = repo.unfiltered()
                     self.ui = repo.ui
                     self.match = repo.narrowmatch()
                     if level is None:
                         level = VERIFY_DEFAULT
                     self._level = level
                     self.badrevs = set()
                     self.errors = 0
                     self.warnings = 0
                     self.havecl = len(repo.changelog) > 0
                     self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
                     self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
                     self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
                     self.refersmf = False
                     self.fncachewarned = False
                     # developer config: verify.skipflags
                     self.skipflags = repo.ui.configint(b'verify', b'skipflags')
                     self.warnorphanstorefiles = True
                 def _warn(self, msg):
                     """record a "warning" level issue"""
                     self.ui.warn(msg + b"\n")
                     self.warnings += 1
                 def _err(self, linkrev, msg, filename=None):
                     """record a "error" level issue"""
                     if linkrev is not None:
                         self.badrevs.add(linkrev)
                         linkrev = b"%d" % linkrev
                     else:
                         linkrev = b'?'
                     msg = b"%s: %s" % (linkrev, msg)
                     if filename:
                         msg = b"%s@%s" % (filename, msg)
                     self.ui.warn(b" " + msg + b"\n")
                     self.errors += 1
                 def _exc(self, linkrev, msg, inst, filename=None):
                     """record exception raised during the verify process"""
                     fmsg = stringutil.forcebytestr(inst)
                     if not fmsg:
                         fmsg = pycompat.byterepr(inst)
                     self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
                 def _checkrevlog(self, obj, name, linkrev):
                     """verify high level property of a revlog
                     - revlog is present,
                     - revlog is non-empty,
                     - sizes (index and data) are correct,
                     - revlog's format version is correct.
                     """
                     if not len(obj) and (self.havecl or self.havemf):
                         self._err(linkrev, _(b"empty or missing %s") % name)
                         return
                     d = obj.checksize()
                     if d[0]:
                         self._err(None, _(b"data length off by %d bytes") % d[0], name)
                     if d[1]:
                         self._err(None, _(b"index contains %d extra bytes") % d[1], name)
                     if obj._format_version != revlog.REVLOGV0:
                         if not self.revlogv1:
                             self._warn(_(b"warning: `%s' uses revlog format 1") % name)
                     elif self.revlogv1:
                         self._warn(_(b"warning: `%s' uses revlog format 0") % name)
                 def _checkentry(self, obj, i, node, seen, linkrevs, f):
                     """verify a single revlog entry
                     arguments are:
                     - obj:      the source revlog
                     - i:        the revision number
                     - node:     the revision node id
                     - seen:     nodes previously seen for this revlog
                     - linkrevs: [changelog-revisions] introducing "node"
                     - f:        string label ("changelog", "manifest", or filename)
                     Performs the following checks:
                     - linkrev points to an existing changelog revision,
                     - linkrev points to a changelog revision that introduces this revision,
                     - linkrev points to the lowest of these changesets,
                     - both parents exist in the revlog,
                     - the revision is not duplicated.
                     Return the linkrev of the revision (or None for changelog's revisions).
                     """
                     lr = obj.linkrev(obj.rev(node))
                     if lr < 0 or (self.havecl and lr not in linkrevs):
                         if lr < 0 or lr >= len(self.repo.changelog):
                             msg = _(b"rev %d points to nonexistent changeset %d")
                         else:
                             msg = _(b"rev %d points to unexpected changeset %d")
                         self._err(None, msg % (i, lr), f)
                         if linkrevs:
                             if f and len(linkrevs) > 1:
                                 try:
                                     # attempt to filter down to real linkrevs
                                     linkrevs = []
                                     for lr in linkrevs:
                                         if self.lrugetctx(lr)[f].filenode() == node:
                                             linkrevs.append(lr)
                                 except Exception:
                                     pass
                             msg = _(b" (expected %s)")
                             msg %= b" ".join(map(pycompat.bytestr, linkrevs))
                             self._warn(msg)
                         lr = None  # can't be trusted
                     try:
                         p1, p2 = obj.parents(node)
                         if p1 not in seen and p1 != self.repo.nullid:
                             msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
                             self._err(lr, msg, f)
                         if p2 not in seen and p2 != self.repo.nullid:
                             msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
                             self._err(lr, msg, f)
                     except Exception as inst:
                         self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
                     if node in seen:
                         self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
                     seen[node] = i
                     return lr
                 def verify(self):
                     """verify the content of the Mercurial repository
                     This method run all verifications, displaying issues as they are found.
                     return 1 if any error have been encountered, 0 otherwise."""
                     # initial validation and generic report
                     repo = self.repo
                     ui = repo.ui
                     if not repo.url().startswith(b'file:'):
                         raise error.Abort(_(b"cannot verify bundle or remote repos"))
                     if transaction.has_abandoned_transaction(repo):
                         ui.warn(_(b"abandoned transaction found - run hg recover\n"))
                     if ui.verbose or not self.revlogv1:
                         ui.status(
                             _(b"repository uses revlog format %d\n")
                             % (self.revlogv1 and 1 or 0)
                         )
                     # data verification
                     mflinkrevs, filelinkrevs = self._verifychangelog()
                     filenodes = self._verifymanifest(mflinkrevs)
                     del mflinkrevs
                     self._crosscheckfiles(filelinkrevs, filenodes)
                     totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
                     if self.errors:
                         ui.warn(_(b"not checking dirstate because of previous errors\n"))
                         dirstate_errors = 0
                     else:
                         dirstate_errors = self._verify_dirstate()
                     # final report
                     ui.status(
                         _(b"checked %d changesets with %d changes to %d files\n")
                         % (len(repo.changelog), filerevisions, totalfiles)
                     )
                     if self.warnings:
                         ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
                     if self.fncachewarned:
                         ui.warn(HINT_FNCACHE)
                     if self.errors:
                         ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
                         if self.badrevs:
                             msg = _(b"(first damaged changeset appears to be %d)\n")
                             msg %= min(self.badrevs)
                             ui.warn(msg)
                         if dirstate_errors:
                             ui.warn(
                                 _(b"dirstate inconsistent with current parent's manifest\n")
                             )
                             ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
                         return 1
                     return 0
                 def _verifychangelog(self):
                     """verify the changelog of a repository
                     The following checks are performed:
                     - all of `_checkrevlog` checks,
                     - all of `_checkentry` checks (for each revisions),
                     - each revision can be read.
                     The function returns some of the data observed in the changesets as a
                     (mflinkrevs, filelinkrevs) tuples:
                     - mflinkrevs:   is a { manifest-node -> [changelog-rev] } mapping
                     - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
                     If a matcher was specified, filelinkrevs will only contains matched
                     files.
                     """
                     ui = self.ui
                     repo = self.repo
                     match = self.match
                     cl = repo.changelog
                     ui.status(_(b"checking changesets\n"))
                     mflinkrevs = {}
                     filelinkrevs = {}
                     seen = {}
                     self._checkrevlog(cl, b"changelog", 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'changesets'), total=len(repo)
                     )
-                    for i in repo:
+                    with cl.reading():
-                        progress.update(i)
+                        for i in repo:
-                        n = cl.node(i)
+                            progress.update(i)
-                        self._checkentry(cl, i, n, seen, [i], b"changelog")
+                            n = cl.node(i)
+                            self._checkentry(cl, i, n, seen, [i], b"changelog")
-                        try:
+                            try:
-                            changes = cl.read(n)
+                                changes = cl.read(n)
-                            if changes[0] != self.repo.nullid:
+                                if changes[0] != self.repo.nullid:
-                                mflinkrevs.setdefault(changes[0], []).append(i)
+                                    mflinkrevs.setdefault(changes[0], []).append(i)
+                                    self.refersmf = True
+                                for f in changes[3]:
+                                    if match(f):
+                                        filelinkrevs.setdefault(_normpath(f), []).append(i)
+                            except Exception as inst:
                                 self.refersmf = True
-                            for f in changes[3]:
+                                self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
-                                if match(f):
-                                    filelinkrevs.setdefault(_normpath(f), []).append(i)
-                        except Exception as inst:
-                            self.refersmf = True
-                            self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
                     progress.complete()
                     return mflinkrevs, filelinkrevs
                 def _verifymanifest(
                     self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
                 ):
                     """verify the manifestlog content
                     Inputs:
                     - mflinkrevs:     a {manifest-node -> [changelog-revisions]} mapping
                     - dir:            a subdirectory to check (for tree manifest repo)
                     - storefiles:     set of currently "orphan" files.
                     - subdirprogress: a progress object
                     This function checks:
                     * all of `_checkrevlog` checks (for all manifest related revlogs)
                     * all of `_checkentry` checks (for all manifest related revisions)
                     * nodes for subdirectory exists in the sub-directory manifest
                     * each manifest entries have a file path
                     * each manifest node refered in mflinkrevs exist in the manifest log
                     If tree manifest is in use and a matchers is specified, only the
                     sub-directories matching it will be verified.
                     return a two level mapping:
                         {"path" -> { filenode -> changelog-revision}}
                     This mapping primarily contains entries for every files in the
                     repository. In addition, when tree-manifest is used, it also contains
                     sub-directory entries.
                     If a matcher is provided, only matching paths will be included.
                     """
                     repo = self.repo
                     ui = self.ui
                     match = self.match
                     mfl = self.repo.manifestlog
                     mf = mfl.getstorage(dir)
                     if not dir:
                         self.ui.status(_(b"checking manifests\n"))
                     filenodes = {}
                     subdirnodes = {}
                     seen = {}
                     label = b"manifest"
                     if dir:
                         label = dir
                         revlogfiles = mf.files()
                         storefiles.difference_update(revlogfiles)
                         if subdirprogress:  # should be true since we're in a subdirectory
                             subdirprogress.increment()
                     if self.refersmf:
                         # Do not check manifest if there are only changelog entries with
                         # null manifests.
                         self._checkrevlog(mf._revlog, label, 0)
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'manifests'), total=len(mf)
                     )
                     for i in mf:
                         if not dir:
                             progress.update(i)
                         n = mf.node(i)
                         lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
                         if n in mflinkrevs:
                             del mflinkrevs[n]
                         elif dir:
                             msg = _(b"%s not in parent-directory manifest") % short(n)
                             self._err(lr, msg, label)
                         else:
                             self._err(lr, _(b"%s not in changesets") % short(n), label)
                         try:
                             mfdelta = mfl.get(dir, n).readdelta(shallow=True)
                             for f, fn, fl in mfdelta.iterentries():
                                 if not f:
                                     self._err(lr, _(b"entry without name in manifest"))
                                 elif f == b"/dev/null":  # ignore this in very old repos
                                     continue
                                 fullpath = dir + _normpath(f)
                                 if fl == b't':
                                     if not match.visitdir(fullpath):
                                         continue
                                     sdn = subdirnodes.setdefault(fullpath + b'/', {})
                                     sdn.setdefault(fn, []).append(lr)
                                 else:
                                     if not match(fullpath):
                                         continue
                                     filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
                         except Exception as inst:
                             self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
                         if self._level >= VERIFY_FULL:
                             try:
                                 # Various issues can affect manifest. So we read each full
                                 # text from storage. This triggers the checks from the core
                                 # code (eg: hash verification, filename are ordered, etc.)
                                 mfdelta = mfl.get(dir, n).read()
                             except Exception as inst:
                                 msg = _(b"reading full manifest %s") % short(n)
                                 self._exc(lr, msg, inst, label)
                     if not dir:
                         progress.complete()
                     if self.havemf:
                         # since we delete entry in `mflinkrevs` during iteration, any
                         # remaining entries are "missing". We need to issue errors for them.
                         changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
                         for c, m in sorted(changesetpairs):
                             if dir:
                                 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
                             else:
                                 msg = _(b"changeset refers to unknown revision %s")
                                 msg %= short(m)
                                 self._err(c, msg, label)
                     if not dir and subdirnodes:
                         self.ui.status(_(b"checking directory manifests\n"))
                         storefiles = set()
                         subdirs = set()
                         revlogv1 = self.revlogv1
                         undecodable = []
                         for entry in repo.store.data_entries(undecodable=undecodable):
                             for file_ in entry.files():
                                 f = file_.unencoded_path
                                 size = file_.file_size(repo.store.vfs)
                                 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                                     storefiles.add(_normpath(f))
                                     subdirs.add(os.path.dirname(f))
                         for f in undecodable:
                             self._err(None, _(b"cannot decode filename '%s'") % f)
                         subdirprogress = ui.makeprogress(
                             _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
                         )
                     for subdir, linkrevs in subdirnodes.items():
                         subdirfilenodes = self._verifymanifest(
                             linkrevs, subdir, storefiles, subdirprogress
                         )
                         for f, onefilenodes in subdirfilenodes.items():
                             filenodes.setdefault(f, {}).update(onefilenodes)
                     if not dir and subdirnodes:
                         assert subdirprogress is not None  # help pytype
                         subdirprogress.complete()
                         if self.warnorphanstorefiles:
                             for f in sorted(storefiles):
                                 self._warn(_(b"warning: orphan data file '%s'") % f)
                     return filenodes
                 def _crosscheckfiles(self, filelinkrevs, filenodes):
                     repo = self.repo
                     ui = self.ui
                     ui.status(_(b"crosschecking files in changesets and manifests\n"))
                     total = len(filelinkrevs) + len(filenodes)
                     progress = ui.makeprogress(
                         _(b'crosschecking'), unit=_(b'files'), total=total
                     )
                     if self.havemf:
                         for f in sorted(filelinkrevs):
                             progress.increment()
                             if f not in filenodes:
                                 lr = filelinkrevs[f][0]
                                 self._err(lr, _(b"in changeset but not in manifest"), f)
                     if self.havecl:
                         for f in sorted(filenodes):
                             progress.increment()
                             if f not in filelinkrevs:
                                 try:
                                     fl = repo.file(f)
                                     lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                                 except Exception:
                                     lr = None
                                 self._err(lr, _(b"in manifest but not in changeset"), f)
                     progress.complete()
                 def _verifyfiles(self, filenodes, filelinkrevs):
                     repo = self.repo
                     ui = self.ui
                     lrugetctx = self.lrugetctx
                     revlogv1 = self.revlogv1
                     havemf = self.havemf
                     ui.status(_(b"checking files\n"))
                     storefiles = set()
                     undecodable = []
                     for entry in repo.store.data_entries(undecodable=undecodable):
                         for file_ in entry.files():
                             size = file_.file_size(repo.store.vfs)
                             f = file_.unencoded_path
                             if (size > 0 or not revlogv1) and f.startswith(b'data/'):
                                 storefiles.add(_normpath(f))
                     for f in undecodable:
                         self._err(None, _(b"cannot decode filename '%s'") % f)
                     state = {
                         # TODO this assumes revlog storage for changelog.
                         b'expectedversion': self.repo.changelog._format_version,
                         b'skipflags': self.skipflags,
                         # experimental config: censor.policy
                         b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
                     }
                     files = sorted(set(filenodes) | set(filelinkrevs))
                     revisions = 0
                     progress = ui.makeprogress(
                         _(b'checking'), unit=_(b'files'), total=len(files)
                     )
                     for i, f in enumerate(files):
                         progress.update(i, item=f)
                         try:
                             linkrevs = filelinkrevs[f]
                         except KeyError:
                             # in manifest but not in changelog
                             linkrevs = []
                         if linkrevs:
                             lr = linkrevs[0]
                         else:
                             lr = None
                         try:
                             fl = repo.file(f)
                         except error.StorageError as e:
                             self._err(lr, _(b"broken revlog! (%s)") % e, f)
                             continue
                         for ff in fl.files():
                             try:
                                 storefiles.remove(ff)
                             except KeyError:
                                 if self.warnorphanstorefiles:
                                     msg = _(b" warning: revlog '%s' not in fncache!")
                                     self._warn(msg % ff)
                                     self.fncachewarned = True
                         if not len(fl) and (self.havecl or self.havemf):
                             self._err(lr, _(b"empty or missing %s") % f)
                         else:
                             # Guard against implementations not setting this.
                             state[b'skipread'] = set()
                             state[b'safe_renamed'] = set()
                             for problem in fl.verifyintegrity(state):
                                 if problem.node is not None:
                                     linkrev = fl.linkrev(fl.rev(problem.node))
                                 else:
                                     linkrev = None
                                 if problem.warning:
                                     self._warn(problem.warning)
                                 elif problem.error:
                                     linkrev_msg = linkrev if linkrev is not None else lr
                                     self._err(linkrev_msg, problem.error, f)
                                 else:
                                     raise error.ProgrammingError(
                                         b'problem instance does not set warning or error '
                                         b'attribute: %s' % problem.msg
                                     )
                         seen = {}
                         for i in fl:
                             revisions += 1
                             n = fl.node(i)
                             lr = self._checkentry(fl, i, n, seen, linkrevs, f)
                             if f in filenodes:
                                 if havemf and n not in filenodes[f]:
                                     self._err(lr, _(b"%s not in manifests") % (short(n)), f)
                                 else:
                                     del filenodes[f][n]
                             if n in state[b'skipread'] and n not in state[b'safe_renamed']:
                                 continue
                             # check renames
                             try:
                                 # This requires resolving fulltext (at least on revlogs,
                                 # though not with LFS revisions). We may want
                                 # ``verifyintegrity()`` to pass a set of nodes with
                                 # rename metadata as an optimization.
                                 rp = fl.renamed(n)
                                 if rp:
                                     if lr is not None and ui.verbose:
                                         ctx = lrugetctx(lr)
                                         if not any(rp[0] in pctx for pctx in ctx.parents()):
                                             self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
                                     fl2 = repo.file(rp[0])
                                     if not len(fl2):
                                         m = _(b"empty or missing copy source revlog %s:%s")
                                         self._err(lr, m % (rp[0], short(rp[1])), f)
                                     elif rp[1] == self.repo.nullid:
                                         msg = WARN_NULLID_COPY_SOURCE
                                         msg %= (f, lr, rp[0], short(rp[1]))
                                         ui.note(msg)
                                     else:
                                         fl2.rev(rp[1])
                             except Exception as inst:
                                 self._exc(
                                     lr, _(b"checking rename of %s") % short(n), inst, f
                                 )
                         # cross-check
                         if f in filenodes:
                             fns = [(v, k) for k, v in filenodes[f].items()]
                             for lr, node in sorted(fns):
                                 msg = _(b"manifest refers to unknown revision %s")
                                 self._err(lr, msg % short(node), f)
                     progress.complete()
                     if self.warnorphanstorefiles:
                         for f in sorted(storefiles):
                             self._warn(_(b"warning: orphan data file '%s'") % f)
                     return len(files), revisions
                 def _verify_dirstate(self):
                     """Check that the dirstate is consistent with the parent's manifest"""
                     repo = self.repo
                     ui = self.ui
                     ui.status(_(b"checking dirstate\n"))
                     parent1, parent2 = repo.dirstate.parents()
                     m1 = repo[parent1].manifest()
                     m2 = repo[parent2].manifest()
                     dirstate_errors = 0
                     is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
                     narrow_matcher = repo.narrowmatch() if is_narrow else None
                     for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
                         ui.error(err)
                         dirstate_errors += 1
                     if dirstate_errors:
                         self.errors += dirstate_errors
                     return dirstate_errors