upstream/mercurial-mirror Commit - r48265:5045ba2a

1

# censor code related to censoring revision

1

# censor code related to censoring revision

2

# coding: utf8

2

# coding: utf8

3

#

3

#

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

10

import contextlib

10

import contextlib

11

import os

11

import os

12

13

from ..node import (

13

from ..node import (

14

nullrev,

14

nullrev,

15

)

15

)

16

from .constants import (

16

from .constants import (

17

COMP_MODE_PLAIN,

17

COMP_MODE_PLAIN,

18

ENTRY_DATA_COMPRESSED_LENGTH,

18

ENTRY_DATA_COMPRESSED_LENGTH,

19

ENTRY_DATA_COMPRESSION_MODE,

19

ENTRY_DATA_COMPRESSION_MODE,

20

ENTRY_DATA_OFFSET,

20

ENTRY_DATA_OFFSET,

21

ENTRY_DATA_UNCOMPRESSED_LENGTH,

21

ENTRY_DATA_UNCOMPRESSED_LENGTH,

22

ENTRY_DELTA_BASE,

22

ENTRY_DELTA_BASE,

23

ENTRY_LINK_REV,

23

ENTRY_LINK_REV,

24

ENTRY_NODE_ID,

24

ENTRY_NODE_ID,

25

ENTRY_PARENT_1,

25

ENTRY_PARENT_1,

26

ENTRY_PARENT_2,

26

ENTRY_PARENT_2,

27

ENTRY_SIDEDATA_COMPRESSED_LENGTH,

27

ENTRY_SIDEDATA_COMPRESSED_LENGTH,

28

ENTRY_SIDEDATA_COMPRESSION_MODE,

28

ENTRY_SIDEDATA_COMPRESSION_MODE,

29

ENTRY_SIDEDATA_OFFSET,

29

ENTRY_SIDEDATA_OFFSET,

30

REVLOGV0,

30

REVLOGV0,

31

REVLOGV1,

31

REVLOGV1,

32

)

32

)

33

from ..i18n import _

33

from ..i18n import _

34

35

from .. import (

35

from .. import (

36

error,

36

error,

37

pycompat,

37

pycompat,

38

revlogutils,

38

revlogutils,

39

util,

39

util,

40

)

40

)

41

from ..utils import (

41

from ..utils import (

42

storageutil,

42

storageutil,

43

)

43

)

44

from . import (

44

from . import (

45

constants,

45

constants,

46

deltas,

46

deltas,

47

)

47

)

48

49

50

def v1_censor(rl, tr, censornode, tombstone=b''):

50

def v1_censor(rl, tr, censornode, tombstone=b''):

51

"""censors a revision in a "version 1" revlog"""

51

"""censors a revision in a "version 1" revlog"""

52

assert rl._format_version == constants.REVLOGV1, rl._format_version

52

assert rl._format_version == constants.REVLOGV1, rl._format_version

53

54

# avoid cycle

54

# avoid cycle

55

from .. import revlog

55

from .. import revlog

56

57

censorrev = rl.rev(censornode)

57

censorrev = rl.rev(censornode)

58

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

58

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

59

60

# Rewriting the revlog in place is hard. Our strategy for censoring is

60

# Rewriting the revlog in place is hard. Our strategy for censoring is

61

# to create a new revlog, copy all revisions to it, then replace the

61

# to create a new revlog, copy all revisions to it, then replace the

62

# revlogs on transaction close.

62

# revlogs on transaction close.

63

#

63

#

64

# This is a bit dangerous. We could easily have a mismatch of state.

64

# This is a bit dangerous. We could easily have a mismatch of state.

65

newrl = revlog.revlog(

65

newrl = revlog.revlog(

66

rl.opener,

66

rl.opener,

67

target=rl.target,

67

target=rl.target,

68

radix=rl.radix,

68

radix=rl.radix,

69

postfix=b'tmpcensored',

69

postfix=b'tmpcensored',

70

censorable=True,

70

censorable=True,

71

)

71

)

72

newrl._format_version = rl._format_version

72

newrl._format_version = rl._format_version

73

newrl._format_flags = rl._format_flags

73

newrl._format_flags = rl._format_flags

74

newrl._generaldelta = rl._generaldelta

74

newrl._generaldelta = rl._generaldelta

75

newrl._parse_index = rl._parse_index

75

newrl._parse_index = rl._parse_index

76

77

for rev in rl.revs():

77

for rev in rl.revs():

78

node = rl.node(rev)

78

node = rl.node(rev)

79

p1, p2 = rl.parents(node)

79

p1, p2 = rl.parents(node)

80

81

if rev == censorrev:

81

if rev == censorrev:

82

newrl.addrawrevision(

82

newrl.addrawrevision(

83

tombstone,

83

tombstone,

84

tr,

84

tr,

85

rl.linkrev(censorrev),

85

rl.linkrev(censorrev),

86

p1,

86

p1,

87

p2,

87

p2,

88

censornode,

88

censornode,

89

constants.REVIDX_ISCENSORED,

89

constants.REVIDX_ISCENSORED,

90

)

90

)

91

92

if newrl.deltaparent(rev) != nullrev:

92

if newrl.deltaparent(rev) != nullrev:

93

m = _(b'censored revision stored as delta; cannot censor')

93

m = _(b'censored revision stored as delta; cannot censor')

94

h = _(

94

h = _(

95

b'censoring of revlogs is not fully implemented;'

95

b'censoring of revlogs is not fully implemented;'

96

b' please report this bug'

96

b' please report this bug'

97

)

97

)

98

raise error.Abort(m, hint=h)

98

raise error.Abort(m, hint=h)

99

continue

99

continue

100

101

if rl.iscensored(rev):

101

if rl.iscensored(rev):

102

if rl.deltaparent(rev) != nullrev:

102

if rl.deltaparent(rev) != nullrev:

103

m = _(

103

m = _(

104

b'cannot censor due to censored '

104

b'cannot censor due to censored '

105

b'revision having delta stored'

105

b'revision having delta stored'

106

)

106

)

107

raise error.Abort(m)

107

raise error.Abort(m)

108

rawtext = rl._chunk(rev)

108

rawtext = rl._chunk(rev)

109

else:

109

else:

110

rawtext = rl.rawdata(rev)

110

rawtext = rl.rawdata(rev)

111

112

newrl.addrawrevision(

112

newrl.addrawrevision(

113

rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)

113

rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)

114

)

114

)

115

116

tr.addbackup(rl._indexfile, location=b'store')

116

tr.addbackup(rl._indexfile, location=b'store')

117

if not rl._inline:

117

if not rl._inline:

118

tr.addbackup(rl._datafile, location=b'store')

118

tr.addbackup(rl._datafile, location=b'store')

119

120

rl.opener.rename(newrl._indexfile, rl._indexfile)

120

rl.opener.rename(newrl._indexfile, rl._indexfile)

121

if not rl._inline:

121

if not rl._inline:

122

rl.opener.rename(newrl._datafile, rl._datafile)

122

rl.opener.rename(newrl._datafile, rl._datafile)

123

124

rl.clearcaches()

124

rl.clearcaches()

125

rl._loadindex()

125

rl._loadindex()

126

127

128

def v2_censor(revlog, tr, censornode, tombstone=b''):

128

def v2_censor(revlog, tr, censornode, tombstone=b''):

129

"""censors a revision in a "version 2" revlog"""

129

"""censors a revision in a "version 2" revlog"""

130

# General principle

130

assert revlog._format_version != REVLOGV0, revlog._format_version

131

#

131

assert revlog._format_version != REVLOGV1, revlog._format_version

132

# We create new revlog files (index/data/sidedata) to copy the content of

132

133

# the existing data without the censored data.

133

censor_revs = {revlog.rev(censornode)}

134

#

134

_rewrite_v2(revlog, tr, censor_revs, tombstone)

135

# We need to recompute new delta for any revision that used the censored

135

136

# revision as delta base. As the cumulative size of the new delta may be

136

137

# large, we store them in a temporary file until they are stored in their

137

def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):

138

# final destination.

138

"""rewrite a revlog to censor some of its content

139

#

139

140

# All data before the censored data can be blindly copied. The rest needs

140

General principle

141

# to be copied as we go and the associated index entry needs adjustement.

142

141

142

We create new revlog files (index/data/sidedata) to copy the content of

143

the existing data without the censored data.

144

145

We need to recompute new delta for any revision that used the censored

146

revision as delta base. As the cumulative size of the new delta may be

147

large, we store them in a temporary file until they are stored in their

148

final destination.

149

150

All data before the censored data can be blindly copied. The rest needs

151

to be copied as we go and the associated index entry needs adjustement.

152

"""

143

assert revlog._format_version != REVLOGV0, revlog._format_version

153

assert revlog._format_version != REVLOGV0, revlog._format_version

144

assert revlog._format_version != REVLOGV1, revlog._format_version

154

assert revlog._format_version != REVLOGV1, revlog._format_version

145

155

146

old_index = revlog.index

156

old_index = revlog.index

147

docket = revlog._docket

157

docket = revlog._docket

148

158

149

censor_revs = {revlog.rev(censornode)}

150

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

159

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

151

160

152

first_excl_rev = min(censor_revs)

161

first_excl_rev = min(censor_revs)

153

162

154

first_excl_entry = revlog.index[first_excl_rev]

163

first_excl_entry = revlog.index[first_excl_rev]

155

index_cutoff = revlog.index.entry_size * first_excl_rev

164

index_cutoff = revlog.index.entry_size * first_excl_rev

156

data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16

165

data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16

157

sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)

166

sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)

158

167

159

with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:

168

with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:

160

# rev → (new_base, data_start, data_end, compression_mode)

169

# rev → (new_base, data_start, data_end, compression_mode)

161

rewritten_entries = _precompute_rewritten_delta(

170

rewritten_entries = _precompute_rewritten_delta(

162

revlog,

171

revlog,

163

old_index,

172

old_index,

164

censor_revs,

173

censor_revs,

165

tmp_storage,

174

tmp_storage,

166

)

175

)

167

176

168

all_files = _setup_new_files(

177

all_files = _setup_new_files(

169

revlog,

178

revlog,

170

index_cutoff,

179

index_cutoff,

171

data_cutoff,

180

data_cutoff,

172

sidedata_cutoff,

181

sidedata_cutoff,

173

)

182

)

174

183

175

# we dont need to open the old index file since its content already

184

# we dont need to open the old index file since its content already

176

# exist in a usable form in `old_index`.

185

# exist in a usable form in `old_index`.

177

with all_files() as open_files:

186

with all_files() as open_files:

178

(

187

(

179

old_data_file,

188

old_data_file,

180

old_sidedata_file,

189

old_sidedata_file,

181

new_index_file,

190

new_index_file,

182

new_data_file,

191

new_data_file,

183

new_sidedata_file,

192

new_sidedata_file,

184

) = open_files

193

) = open_files

185

194

186

# writing the censored revision

195

# writing the censored revision

187

196

188

# Writing all subsequent revisions

197

# Writing all subsequent revisions

189

for rev in range(first_excl_rev, len(old_index)):

198

for rev in range(first_excl_rev, len(old_index)):

190

if rev in censor_revs:

199

if rev in censor_revs:

191

_rewrite_censor(

200

_rewrite_censor(

192

revlog,

201

revlog,

193

old_index,

202

old_index,

194

open_files,

203

open_files,

195

rev,

204

rev,

196

tombstone,

205

tombstone,

197

)

206

)

198

else:

207

else:

199

_rewrite_simple(

208

_rewrite_simple(

200

revlog,

209

revlog,

201

old_index,

210

old_index,

202

open_files,

211

open_files,

203

rev,

212

rev,

204

rewritten_entries,

213

rewritten_entries,

205

tmp_storage,

214

tmp_storage,

206

)

215

)

207

docket.write(transaction=None, stripping=True)

216

docket.write(transaction=None, stripping=True)

208

217

209

218

210

def _precompute_rewritten_delta(

219

def _precompute_rewritten_delta(

211

revlog,

220

revlog,

212

old_index,

221

old_index,

213

excluded_revs,

222

excluded_revs,

214

tmp_storage,

223

tmp_storage,

215

):

224

):

216

"""Compute new delta for revisions whose delta is based on revision that

225

"""Compute new delta for revisions whose delta is based on revision that

217

will not survive as is.

226

will not survive as is.

218

227

219

Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}

228

Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}

220

"""

229

"""

221

dc = deltas.deltacomputer(revlog)

230

dc = deltas.deltacomputer(revlog)

222

rewritten_entries = {}

231

rewritten_entries = {}

223

first_excl_rev = min(excluded_revs)

232

first_excl_rev = min(excluded_revs)

224

with revlog._segmentfile._open_read() as dfh:

233

with revlog._segmentfile._open_read() as dfh:

225

for rev in range(first_excl_rev, len(old_index)):

234

for rev in range(first_excl_rev, len(old_index)):

226

if rev in excluded_revs:

235

if rev in excluded_revs:

227

# this revision will be preserved as is, so we don't need to

236

# this revision will be preserved as is, so we don't need to

228

# consider recomputing a delta.

237

# consider recomputing a delta.

229

continue

238

continue

230

entry = old_index[rev]

239

entry = old_index[rev]

231

if entry[ENTRY_DELTA_BASE] not in excluded_revs:

240

if entry[ENTRY_DELTA_BASE] not in excluded_revs:

232

continue

241

continue

233

# This is a revision that use the censored revision as the base

242

# This is a revision that use the censored revision as the base

234

# for its delta. We need a need new deltas

243

# for its delta. We need a need new deltas

235

if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:

244

if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:

236

# this revision is empty, we can delta against nullrev

245

# this revision is empty, we can delta against nullrev

237

rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)

246

rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)

238

else:

247

else:

239

248

240

text = revlog.rawdata(rev, _df=dfh)

249

text = revlog.rawdata(rev, _df=dfh)

241

info = revlogutils.revisioninfo(

250

info = revlogutils.revisioninfo(

242

node=entry[ENTRY_NODE_ID],

251

node=entry[ENTRY_NODE_ID],

243

p1=revlog.node(entry[ENTRY_PARENT_1]),

252

p1=revlog.node(entry[ENTRY_PARENT_1]),

244

p2=revlog.node(entry[ENTRY_PARENT_2]),

253

p2=revlog.node(entry[ENTRY_PARENT_2]),

245

btext=[text],

254

btext=[text],

246

textlen=len(text),

255

textlen=len(text),

247

cachedelta=None,

256

cachedelta=None,

248

flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,

257

flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,

249

)

258

)

250

d = dc.finddeltainfo(

259

d = dc.finddeltainfo(

251

info, dfh, excluded_bases=excluded_revs, target_rev=rev

260

info, dfh, excluded_bases=excluded_revs, target_rev=rev

252

)

261

)

253

default_comp = revlog._docket.default_compression_header

262

default_comp = revlog._docket.default_compression_header

254

comp_mode, d = deltas.delta_compression(default_comp, d)

263

comp_mode, d = deltas.delta_compression(default_comp, d)

255

# using `tell` is a bit lazy, but we are not here for speed

264

# using `tell` is a bit lazy, but we are not here for speed

256

start = tmp_storage.tell()

265

start = tmp_storage.tell()

257

tmp_storage.write(d.data[1])

266

tmp_storage.write(d.data[1])

258

end = tmp_storage.tell()

267

end = tmp_storage.tell()

259

rewritten_entries[rev] = (d.base, start, end, comp_mode)

268

rewritten_entries[rev] = (d.base, start, end, comp_mode)

260

return rewritten_entries

269

return rewritten_entries

261

270

262

271

263

def _setup_new_files(

272

def _setup_new_files(

264

revlog,

273

revlog,

265

index_cutoff,

274

index_cutoff,

266

data_cutoff,

275

data_cutoff,

267

sidedata_cutoff,

276

sidedata_cutoff,

268

):

277

):

269

"""

278

"""

270

279

271

return a context manager to open all the relevant files:

280

return a context manager to open all the relevant files:

272

- old_data_file,

281

- old_data_file,

273

- old_sidedata_file,

282

- old_sidedata_file,

274

- new_index_file,

283

- new_index_file,

275

- new_data_file,

284

- new_data_file,

276

- new_sidedata_file,

285

- new_sidedata_file,

277

286

278

The old_index_file is not here because it is accessed through the

287

The old_index_file is not here because it is accessed through the

279

`old_index` object if the caller function.

288

`old_index` object if the caller function.

280

"""

289

"""

281

docket = revlog._docket

290

docket = revlog._docket

282

old_index_filepath = revlog.opener.join(docket.index_filepath())

291

old_index_filepath = revlog.opener.join(docket.index_filepath())

283

old_data_filepath = revlog.opener.join(docket.data_filepath())

292

old_data_filepath = revlog.opener.join(docket.data_filepath())

284

old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())

293

old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())

285

294

286

new_index_filepath = revlog.opener.join(docket.new_index_file())

295

new_index_filepath = revlog.opener.join(docket.new_index_file())

287

new_data_filepath = revlog.opener.join(docket.new_data_file())

296

new_data_filepath = revlog.opener.join(docket.new_data_file())

288

new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())

297

new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())

289

298

290

util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)

299

util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)

291

util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)

300

util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)

292

util.copyfile(

301

util.copyfile(

293

old_sidedata_filepath,

302

old_sidedata_filepath,

294

new_sidedata_filepath,

303

new_sidedata_filepath,

295

nb_bytes=sidedata_cutoff,

304

nb_bytes=sidedata_cutoff,

296

)

305

)

297

revlog.opener.register_file(docket.index_filepath())

306

revlog.opener.register_file(docket.index_filepath())

298

revlog.opener.register_file(docket.data_filepath())

307

revlog.opener.register_file(docket.data_filepath())

299

revlog.opener.register_file(docket.sidedata_filepath())

308

revlog.opener.register_file(docket.sidedata_filepath())

300

309

301

docket.index_end = index_cutoff

310

docket.index_end = index_cutoff

302

docket.data_end = data_cutoff

311

docket.data_end = data_cutoff

303

docket.sidedata_end = sidedata_cutoff

312

docket.sidedata_end = sidedata_cutoff

304

313

305

# reload the revlog internal information

314

# reload the revlog internal information

306

revlog.clearcaches()

315

revlog.clearcaches()

307

revlog._loadindex(docket=docket)

316

revlog._loadindex(docket=docket)

308

317

309

@contextlib.contextmanager

318

@contextlib.contextmanager

310

def all_files_opener():

319

def all_files_opener():

311

# hide opening in an helper function to please check-code, black

320

# hide opening in an helper function to please check-code, black

312

# and various python version at the same time

321

# and various python version at the same time

313

with open(old_data_filepath, 'rb') as old_data_file:

322

with open(old_data_filepath, 'rb') as old_data_file:

314

with open(old_sidedata_filepath, 'rb') as old_sidedata_file:

323

with open(old_sidedata_filepath, 'rb') as old_sidedata_file:

315

with open(new_index_filepath, 'r+b') as new_index_file:

324

with open(new_index_filepath, 'r+b') as new_index_file:

316

with open(new_data_filepath, 'r+b') as new_data_file:

325

with open(new_data_filepath, 'r+b') as new_data_file:

317

with open(

326

with open(

318

new_sidedata_filepath, 'r+b'

327

new_sidedata_filepath, 'r+b'

319

) as new_sidedata_file:

328

) as new_sidedata_file:

320

new_index_file.seek(0, os.SEEK_END)

329

new_index_file.seek(0, os.SEEK_END)

321

assert new_index_file.tell() == index_cutoff

330

assert new_index_file.tell() == index_cutoff

322

new_data_file.seek(0, os.SEEK_END)

331

new_data_file.seek(0, os.SEEK_END)

323

assert new_data_file.tell() == data_cutoff

332

assert new_data_file.tell() == data_cutoff

324

new_sidedata_file.seek(0, os.SEEK_END)

333

new_sidedata_file.seek(0, os.SEEK_END)

325

assert new_sidedata_file.tell() == sidedata_cutoff

334

assert new_sidedata_file.tell() == sidedata_cutoff

326

yield (

335

yield (

327

old_data_file,

336

old_data_file,

328

old_sidedata_file,

337

old_sidedata_file,

329

new_index_file,

338

new_index_file,

330

new_data_file,

339

new_data_file,

331

new_sidedata_file,

340

new_sidedata_file,

332

)

341

)

333

342

334

return all_files_opener

343

return all_files_opener

335

344

336

345

337

def _rewrite_simple(

346

def _rewrite_simple(

338

revlog,

347

revlog,

339

old_index,

348

old_index,

340

all_files,

349

all_files,

341

rev,

350

rev,

342

rewritten_entries,

351

rewritten_entries,

343

tmp_storage,

352

tmp_storage,

344

):

353

):

345

"""append a normal revision to the index after the rewritten one(s)"""

354

"""append a normal revision to the index after the rewritten one(s)"""

346

(

355

(

347

old_data_file,

356

old_data_file,

348

old_sidedata_file,

357

old_sidedata_file,

349

new_index_file,

358

new_index_file,

350

new_data_file,

359

new_data_file,

351

new_sidedata_file,

360

new_sidedata_file,

352

) = all_files

361

) = all_files

353

entry = old_index[rev]

362

entry = old_index[rev]

354

flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF

363

flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF

355

old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16

364

old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16

356

365

357

if rev not in rewritten_entries:

366

if rev not in rewritten_entries:

358

old_data_file.seek(old_data_offset)

367

old_data_file.seek(old_data_offset)

359

new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]

368

new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]

360

new_data = old_data_file.read(new_data_size)

369

new_data = old_data_file.read(new_data_size)

361

data_delta_base = entry[ENTRY_DELTA_BASE]

370

data_delta_base = entry[ENTRY_DELTA_BASE]

362

d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]

371

d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]

363

else:

372

else:

364

(

373

(

365

data_delta_base,

374

data_delta_base,

366

start,

375

start,

367

end,

376

end,

368

d_comp_mode,

377

d_comp_mode,

369

) = rewritten_entries[rev]

378

) = rewritten_entries[rev]

370

new_data_size = end - start

379

new_data_size = end - start

371

tmp_storage.seek(start)

380

tmp_storage.seek(start)

372

new_data = tmp_storage.read(new_data_size)

381

new_data = tmp_storage.read(new_data_size)

373

382

374

# It might be faster to group continuous read/write operation,

383

# It might be faster to group continuous read/write operation,

375

# however, this is censor, an operation that is not focussed

384

# however, this is censor, an operation that is not focussed

376

# around stellar performance. So I have not written this

385

# around stellar performance. So I have not written this

377

# optimisation yet.

386

# optimisation yet.

378

new_data_offset = new_data_file.tell()

387

new_data_offset = new_data_file.tell()

379

new_data_file.write(new_data)

388

new_data_file.write(new_data)

380

389

381

sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]

390

sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]

382

new_sidedata_offset = new_sidedata_file.tell()

391

new_sidedata_offset = new_sidedata_file.tell()

383

if 0 < sidedata_size:

392

if 0 < sidedata_size:

384

old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]

393

old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]

385

old_sidedata_file.seek(old_sidedata_offset)

394

old_sidedata_file.seek(old_sidedata_offset)

386

new_sidedata = old_sidedata_file.read(sidedata_size)

395

new_sidedata = old_sidedata_file.read(sidedata_size)

387

new_sidedata_file.write(new_sidedata)

396

new_sidedata_file.write(new_sidedata)

388

397

389

data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]

398

data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]

390

sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]

399

sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]

391

assert data_delta_base <= rev, (data_delta_base, rev)

400

assert data_delta_base <= rev, (data_delta_base, rev)

392

401

393

new_entry = revlogutils.entry(

402

new_entry = revlogutils.entry(

394

flags=flags,

403

flags=flags,

395

data_offset=new_data_offset,

404

data_offset=new_data_offset,

396

data_compressed_length=new_data_size,

405

data_compressed_length=new_data_size,

397

data_uncompressed_length=data_uncompressed_length,

406

data_uncompressed_length=data_uncompressed_length,

398

data_delta_base=data_delta_base,

407

data_delta_base=data_delta_base,

399

link_rev=entry[ENTRY_LINK_REV],

408

link_rev=entry[ENTRY_LINK_REV],

400

parent_rev_1=entry[ENTRY_PARENT_1],

409

parent_rev_1=entry[ENTRY_PARENT_1],

401

parent_rev_2=entry[ENTRY_PARENT_2],

410

parent_rev_2=entry[ENTRY_PARENT_2],

402

node_id=entry[ENTRY_NODE_ID],

411

node_id=entry[ENTRY_NODE_ID],

403

sidedata_offset=new_sidedata_offset,

412

sidedata_offset=new_sidedata_offset,

404

sidedata_compressed_length=sidedata_size,

413

sidedata_compressed_length=sidedata_size,

405

data_compression_mode=d_comp_mode,

414

data_compression_mode=d_comp_mode,

406

sidedata_compression_mode=sd_com_mode,

415

sidedata_compression_mode=sd_com_mode,

407

)

416

)

408

revlog.index.append(new_entry)

417

revlog.index.append(new_entry)

409

entry_bin = revlog.index.entry_binary(rev)

418

entry_bin = revlog.index.entry_binary(rev)

410

new_index_file.write(entry_bin)

419

new_index_file.write(entry_bin)

411

420

412

revlog._docket.index_end = new_index_file.tell()

421

revlog._docket.index_end = new_index_file.tell()

413

revlog._docket.data_end = new_data_file.tell()

422

revlog._docket.data_end = new_data_file.tell()

414

revlog._docket.sidedata_end = new_sidedata_file.tell()

423

revlog._docket.sidedata_end = new_sidedata_file.tell()

415

424

416

425

417

def _rewrite_censor(

426

def _rewrite_censor(

418

revlog,

427

revlog,

419

old_index,

428

old_index,

420

all_files,

429

all_files,

421

rev,

430

rev,

422

tombstone,

431

tombstone,

423

):

432

):

424

"""rewrite and append a censored revision"""

433

"""rewrite and append a censored revision"""

425

(

434

(

426

old_data_file,

435

old_data_file,

427

old_sidedata_file,

436

old_sidedata_file,

428

new_index_file,

437

new_index_file,

429

new_data_file,

438

new_data_file,

430

new_sidedata_file,

439

new_sidedata_file,

431

) = all_files

440

) = all_files

432

entry = old_index[rev]

441

entry = old_index[rev]

433

442

434

# XXX consider trying the default compression too

443

# XXX consider trying the default compression too

435

new_data_size = len(tombstone)

444

new_data_size = len(tombstone)

436

new_data_offset = new_data_file.tell()

445

new_data_offset = new_data_file.tell()

437

new_data_file.write(tombstone)

446

new_data_file.write(tombstone)

438

447

439

# we are not adding any sidedata as they might leak info about the censored version

448

# we are not adding any sidedata as they might leak info about the censored version

440

449

441

link_rev = entry[ENTRY_LINK_REV]

450

link_rev = entry[ENTRY_LINK_REV]

442

451

443

p1 = entry[ENTRY_PARENT_1]

452

p1 = entry[ENTRY_PARENT_1]

444

p2 = entry[ENTRY_PARENT_2]

453

p2 = entry[ENTRY_PARENT_2]

445

454

446

new_entry = revlogutils.entry(

455

new_entry = revlogutils.entry(

447

flags=constants.REVIDX_ISCENSORED,

456

flags=constants.REVIDX_ISCENSORED,

448

data_offset=new_data_offset,

457

data_offset=new_data_offset,

449

data_compressed_length=new_data_size,

458

data_compressed_length=new_data_size,

450

data_uncompressed_length=new_data_size,

459

data_uncompressed_length=new_data_size,

451

data_delta_base=rev,

460

data_delta_base=rev,

452

link_rev=link_rev,

461

link_rev=link_rev,

453

parent_rev_1=p1,

462

parent_rev_1=p1,

454

parent_rev_2=p2,

463

parent_rev_2=p2,

455

node_id=entry[ENTRY_NODE_ID],

464

node_id=entry[ENTRY_NODE_ID],

456

sidedata_offset=0,

465

sidedata_offset=0,

457

sidedata_compressed_length=0,

466

sidedata_compressed_length=0,

458

data_compression_mode=COMP_MODE_PLAIN,

467

data_compression_mode=COMP_MODE_PLAIN,

459

sidedata_compression_mode=COMP_MODE_PLAIN,

468

sidedata_compression_mode=COMP_MODE_PLAIN,

460

)

469

)

461

revlog.index.append(new_entry)

470

revlog.index.append(new_entry)

462

entry_bin = revlog.index.entry_binary(rev)

471

entry_bin = revlog.index.entry_binary(rev)

463

new_index_file.write(entry_bin)

472

new_index_file.write(entry_bin)

464

revlog._docket.index_end = new_index_file.tell()

473

revlog._docket.index_end = new_index_file.tell()

465

revlog._docket.data_end = new_data_file.tell()

474

revlog._docket.data_end = new_data_file.tell()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # censor code related to censoring revision
             # coding: utf8
             #
             # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2015 Google, Inc <martinvonz@google.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import contextlib
             import os
             from ..node import (
                 nullrev,
             )
             from .constants import (
                 COMP_MODE_PLAIN,
                 ENTRY_DATA_COMPRESSED_LENGTH,
                 ENTRY_DATA_COMPRESSION_MODE,
                 ENTRY_DATA_OFFSET,
                 ENTRY_DATA_UNCOMPRESSED_LENGTH,
                 ENTRY_DELTA_BASE,
                 ENTRY_LINK_REV,
                 ENTRY_NODE_ID,
                 ENTRY_PARENT_1,
                 ENTRY_PARENT_2,
                 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
                 ENTRY_SIDEDATA_COMPRESSION_MODE,
                 ENTRY_SIDEDATA_OFFSET,
                 REVLOGV0,
                 REVLOGV1,
             )
             from ..i18n import _
             from .. import (
                 error,
                 pycompat,
                 revlogutils,
                 util,
             )
             from ..utils import (
                 storageutil,
             )
             from . import (
                 constants,
                 deltas,
             )
             def v1_censor(rl, tr, censornode, tombstone=b''):
                 """censors a revision in a "version 1" revlog"""
                 assert rl._format_version == constants.REVLOGV1, rl._format_version
                 # avoid cycle
                 from .. import revlog
                 censorrev = rl.rev(censornode)
                 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                 # Rewriting the revlog in place is hard. Our strategy for censoring is
                 # to create a new revlog, copy all revisions to it, then replace the
                 # revlogs on transaction close.
                 #
                 # This is a bit dangerous. We could easily have a mismatch of state.
                 newrl = revlog.revlog(
                     rl.opener,
                     target=rl.target,
                     radix=rl.radix,
                     postfix=b'tmpcensored',
                     censorable=True,
                 )
                 newrl._format_version = rl._format_version
                 newrl._format_flags = rl._format_flags
                 newrl._generaldelta = rl._generaldelta
                 newrl._parse_index = rl._parse_index
                 for rev in rl.revs():
                     node = rl.node(rev)
                     p1, p2 = rl.parents(node)
                     if rev == censorrev:
                         newrl.addrawrevision(
                             tombstone,
                             tr,
                             rl.linkrev(censorrev),
                             p1,
                             p2,
                             censornode,
                             constants.REVIDX_ISCENSORED,
                         )
                         if newrl.deltaparent(rev) != nullrev:
                             m = _(b'censored revision stored as delta; cannot censor')
                             h = _(
                                 b'censoring of revlogs is not fully implemented;'
                                 b' please report this bug'
                             )
                             raise error.Abort(m, hint=h)
                         continue
                     if rl.iscensored(rev):
                         if rl.deltaparent(rev) != nullrev:
                             m = _(
                                 b'cannot censor due to censored '
                                 b'revision having delta stored'
                             )
                             raise error.Abort(m)
                         rawtext = rl._chunk(rev)
                     else:
                         rawtext = rl.rawdata(rev)
                     newrl.addrawrevision(
                         rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
                     )
                 tr.addbackup(rl._indexfile, location=b'store')
                 if not rl._inline:
                     tr.addbackup(rl._datafile, location=b'store')
                 rl.opener.rename(newrl._indexfile, rl._indexfile)
                 if not rl._inline:
                     rl.opener.rename(newrl._datafile, rl._datafile)
                 rl.clearcaches()
                 rl._loadindex()
             def v2_censor(revlog, tr, censornode, tombstone=b''):
                 """censors a revision in a "version 2" revlog"""
-                # General principle
+                assert revlog._format_version != REVLOGV0, revlog._format_version
+                assert revlog._format_version != REVLOGV1, revlog._format_version
-                # We create new revlog files (index/data/sidedata) to copy the content of
-                # the existing data without the censored data.
+                censor_revs = {revlog.rev(censornode)}
+                _rewrite_v2(revlog, tr, censor_revs, tombstone)
-                # We need to recompute new delta for any revision that used the censored
-                # revision as delta base. As the cumulative size of the new delta may be
-                # large, we store them in a temporary file until they are stored in their
+            def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
-                # final destination.
+                """rewrite a revlog to censor some of its content
-                # All data before the censored data can be blindly copied. The rest needs
+                General principle
-                # to be copied as we go and the associated index entry needs adjustement.
+                We create new revlog files (index/data/sidedata) to copy the content of
+                the existing data without the censored data.
+                We need to recompute new delta for any revision that used the censored
+                revision as delta base. As the cumulative size of the new delta may be
+                large, we store them in a temporary file until they are stored in their
+                final destination.
+                All data before the censored data can be blindly copied. The rest needs
+                to be copied as we go and the associated index entry needs adjustement.
+                """
                 assert revlog._format_version != REVLOGV0, revlog._format_version
                 assert revlog._format_version != REVLOGV1, revlog._format_version
                 old_index = revlog.index
                 docket = revlog._docket
-                censor_revs = {revlog.rev(censornode)}
                 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                 first_excl_rev = min(censor_revs)
                 first_excl_entry = revlog.index[first_excl_rev]
                 index_cutoff = revlog.index.entry_size * first_excl_rev
                 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
                 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
                 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
                     # rev → (new_base, data_start, data_end, compression_mode)
                     rewritten_entries = _precompute_rewritten_delta(
                         revlog,
                         old_index,
                         censor_revs,
                         tmp_storage,
                     )
                     all_files = _setup_new_files(
                         revlog,
                         index_cutoff,
                         data_cutoff,
                         sidedata_cutoff,
                     )
                     # we dont need to open the old index file since its content already
                     # exist in a usable form in `old_index`.
                     with all_files() as open_files:
                         (
                             old_data_file,
                             old_sidedata_file,
                             new_index_file,
                             new_data_file,
                             new_sidedata_file,
                         ) = open_files
                         # writing the censored revision
                         # Writing all subsequent revisions
                         for rev in range(first_excl_rev, len(old_index)):
                             if rev in censor_revs:
                                 _rewrite_censor(
                                     revlog,
                                     old_index,
                                     open_files,
                                     rev,
                                     tombstone,
                                 )
                             else:
                                 _rewrite_simple(
                                     revlog,
                                     old_index,
                                     open_files,
                                     rev,
                                     rewritten_entries,
                                     tmp_storage,
                                 )
                 docket.write(transaction=None, stripping=True)
             def _precompute_rewritten_delta(
                 revlog,
                 old_index,
                 excluded_revs,
                 tmp_storage,
             ):
                 """Compute new delta for revisions whose delta is based on revision that
                 will not survive as is.
                 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
                 """
                 dc = deltas.deltacomputer(revlog)
                 rewritten_entries = {}
                 first_excl_rev = min(excluded_revs)
                 with revlog._segmentfile._open_read() as dfh:
                     for rev in range(first_excl_rev, len(old_index)):
                         if rev in excluded_revs:
                             # this revision will be preserved as is, so we don't need to
                             # consider recomputing a delta.
                             continue
                         entry = old_index[rev]
                         if entry[ENTRY_DELTA_BASE] not in excluded_revs:
                             continue
                         # This is a revision that use the censored revision as the base
                         # for its delta. We need a need new deltas
                         if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
                             # this revision is empty, we can delta against nullrev
                             rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
                         else:
                             text = revlog.rawdata(rev, _df=dfh)
                             info = revlogutils.revisioninfo(
                                 node=entry[ENTRY_NODE_ID],
                                 p1=revlog.node(entry[ENTRY_PARENT_1]),
                                 p2=revlog.node(entry[ENTRY_PARENT_2]),
                                 btext=[text],
                                 textlen=len(text),
                                 cachedelta=None,
                                 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
                             )
                             d = dc.finddeltainfo(
                                 info, dfh, excluded_bases=excluded_revs, target_rev=rev
                             )
                             default_comp = revlog._docket.default_compression_header
                             comp_mode, d = deltas.delta_compression(default_comp, d)
                             # using `tell` is a bit lazy, but we are not here for speed
                             start = tmp_storage.tell()
                             tmp_storage.write(d.data[1])
                             end = tmp_storage.tell()
                             rewritten_entries[rev] = (d.base, start, end, comp_mode)
                 return rewritten_entries
             def _setup_new_files(
                 revlog,
                 index_cutoff,
                 data_cutoff,
                 sidedata_cutoff,
             ):
                 """
                 return a context manager to open all the relevant files:
                 - old_data_file,
                 - old_sidedata_file,
                 - new_index_file,
                 - new_data_file,
                 - new_sidedata_file,
                 The old_index_file is not here because it is accessed through the
                 `old_index` object if the caller function.
                 """
                 docket = revlog._docket
                 old_index_filepath = revlog.opener.join(docket.index_filepath())
                 old_data_filepath = revlog.opener.join(docket.data_filepath())
                 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
                 new_index_filepath = revlog.opener.join(docket.new_index_file())
                 new_data_filepath = revlog.opener.join(docket.new_data_file())
                 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
                 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
                 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
                 util.copyfile(
                     old_sidedata_filepath,
                     new_sidedata_filepath,
                     nb_bytes=sidedata_cutoff,
                 )
                 revlog.opener.register_file(docket.index_filepath())
                 revlog.opener.register_file(docket.data_filepath())
                 revlog.opener.register_file(docket.sidedata_filepath())
                 docket.index_end = index_cutoff
                 docket.data_end = data_cutoff
                 docket.sidedata_end = sidedata_cutoff
                 # reload the revlog internal information
                 revlog.clearcaches()
                 revlog._loadindex(docket=docket)
                 @contextlib.contextmanager
                 def all_files_opener():
                     # hide opening in an helper function to please check-code, black
                     # and various python version at the same time
                     with open(old_data_filepath, 'rb') as old_data_file:
                         with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
                             with open(new_index_filepath, 'r+b') as new_index_file:
                                 with open(new_data_filepath, 'r+b') as new_data_file:
                                     with open(
                                         new_sidedata_filepath, 'r+b'
                                     ) as new_sidedata_file:
                                         new_index_file.seek(0, os.SEEK_END)
                                         assert new_index_file.tell() == index_cutoff
                                         new_data_file.seek(0, os.SEEK_END)
                                         assert new_data_file.tell() == data_cutoff
                                         new_sidedata_file.seek(0, os.SEEK_END)
                                         assert new_sidedata_file.tell() == sidedata_cutoff
                                         yield (
                                             old_data_file,
                                             old_sidedata_file,
                                             new_index_file,
                                             new_data_file,
                                             new_sidedata_file,
                                         )
                 return all_files_opener
             def _rewrite_simple(
                 revlog,
                 old_index,
                 all_files,
                 rev,
                 rewritten_entries,
                 tmp_storage,
             ):
                 """append a normal revision to the index after the rewritten one(s)"""
                 (
                     old_data_file,
                     old_sidedata_file,
                     new_index_file,
                     new_data_file,
                     new_sidedata_file,
                 ) = all_files
                 entry = old_index[rev]
                 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
                 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
                 if rev not in rewritten_entries:
                     old_data_file.seek(old_data_offset)
                     new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
                     new_data = old_data_file.read(new_data_size)
                     data_delta_base = entry[ENTRY_DELTA_BASE]
                     d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
                 else:
                     (
                         data_delta_base,
                         start,
                         end,
                         d_comp_mode,
                     ) = rewritten_entries[rev]
                     new_data_size = end - start
                     tmp_storage.seek(start)
                     new_data = tmp_storage.read(new_data_size)
                 # It might be faster to group continuous read/write operation,
                 # however, this is censor, an operation that is not focussed
                 # around stellar performance. So I have not written this
                 # optimisation yet.
                 new_data_offset = new_data_file.tell()
                 new_data_file.write(new_data)
                 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
                 new_sidedata_offset = new_sidedata_file.tell()
                 if 0 < sidedata_size:
                     old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
                     old_sidedata_file.seek(old_sidedata_offset)
                     new_sidedata = old_sidedata_file.read(sidedata_size)
                     new_sidedata_file.write(new_sidedata)
                 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
                 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
                 assert data_delta_base <= rev, (data_delta_base, rev)
                 new_entry = revlogutils.entry(
                     flags=flags,
                     data_offset=new_data_offset,
                     data_compressed_length=new_data_size,
                     data_uncompressed_length=data_uncompressed_length,
                     data_delta_base=data_delta_base,
                     link_rev=entry[ENTRY_LINK_REV],
                     parent_rev_1=entry[ENTRY_PARENT_1],
                     parent_rev_2=entry[ENTRY_PARENT_2],
                     node_id=entry[ENTRY_NODE_ID],
                     sidedata_offset=new_sidedata_offset,
                     sidedata_compressed_length=sidedata_size,
                     data_compression_mode=d_comp_mode,
                     sidedata_compression_mode=sd_com_mode,
                 )
                 revlog.index.append(new_entry)
                 entry_bin = revlog.index.entry_binary(rev)
                 new_index_file.write(entry_bin)
                 revlog._docket.index_end = new_index_file.tell()
                 revlog._docket.data_end = new_data_file.tell()
                 revlog._docket.sidedata_end = new_sidedata_file.tell()
             def _rewrite_censor(
                 revlog,
                 old_index,
                 all_files,
                 rev,
                 tombstone,
             ):
                 """rewrite and append a censored revision"""
                 (
                     old_data_file,
                     old_sidedata_file,
                     new_index_file,
                     new_data_file,
                     new_sidedata_file,
                 ) = all_files
                 entry = old_index[rev]
                 # XXX consider trying the default compression too
                 new_data_size = len(tombstone)
                 new_data_offset = new_data_file.tell()
                 new_data_file.write(tombstone)
                 # we are not adding any sidedata as they might leak info about the censored version
                 link_rev = entry[ENTRY_LINK_REV]
                 p1 = entry[ENTRY_PARENT_1]
                 p2 = entry[ENTRY_PARENT_2]
                 new_entry = revlogutils.entry(
                     flags=constants.REVIDX_ISCENSORED,
                     data_offset=new_data_offset,
                     data_compressed_length=new_data_size,
                     data_uncompressed_length=new_data_size,
                     data_delta_base=rev,
                     link_rev=link_rev,
                     parent_rev_1=p1,
                     parent_rev_2=p2,
                     node_id=entry[ENTRY_NODE_ID],
                     sidedata_offset=0,
                     sidedata_compressed_length=0,
                     data_compression_mode=COMP_MODE_PLAIN,
                     sidedata_compression_mode=COMP_MODE_PLAIN,
                 )
                 revlog.index.append(new_entry)
                 entry_bin = revlog.index.entry_binary(rev)
                 new_index_file.write(entry_bin)
                 revlog._docket.index_end = new_index_file.tell()
                 revlog._docket.data_end = new_data_file.tell()