upstream/mercurial-mirror Commit - r46258:f6811e5b

1

# metadata.py -- code related to various metadata computation and access.

1

# metadata.py -- code related to various metadata computation and access.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import multiprocessing

10

import multiprocessing

11

import struct

11

import struct

12

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

pycompat,

16

pycompat,

17

util,

17

util,

18

)

18

)

19

20

from .revlogutils import (

20

from .revlogutils import (

21

flagutil as sidedataflag,

21

flagutil as sidedataflag,

22

sidedata as sidedatamod,

22

sidedata as sidedatamod,

23

)

23

)

24

25

26

class ChangingFiles(object):

26

class ChangingFiles(object):

27

"""A class recording the changes made to files by a changeset

27

"""A class recording the changes made to files by a changeset

28

29

Actions performed on files are gathered into 3 sets:

29

Actions performed on files are gathered into 3 sets:

30

31

- added: files actively added in the changeset.

31

- added: files actively added in the changeset.

32

- merged: files whose history got merged

32

- merged: files whose history got merged

33

- removed: files removed in the revision

33

- removed: files removed in the revision

34

- salvaged: files that might have been deleted by a merge but were not

34

- salvaged: files that might have been deleted by a merge but were not

35

- touched: files affected by the merge

35

- touched: files affected by the merge

36

37

and copies information is held by 2 mappings

37

and copies information is held by 2 mappings

38

39

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

39

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

40

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

40

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

41

42

See their inline help for details.

42

See their inline help for details.

43

"""

43

"""

44

45

def __init__(

45

def __init__(

46

self,

46

self,

47

touched=None,

47

touched=None,

48

added=None,

48

added=None,

49

removed=None,

49

removed=None,

50

merged=None,

50

merged=None,

51

salvaged=None,

51

salvaged=None,

52

p1_copies=None,

52

p1_copies=None,

53

p2_copies=None,

53

p2_copies=None,

54

):

54

):

55

self._added = set(() if added is None else added)

55

self._added = set(() if added is None else added)

56

self._merged = set(() if merged is None else merged)

56

self._merged = set(() if merged is None else merged)

57

self._removed = set(() if removed is None else removed)

57

self._removed = set(() if removed is None else removed)

58

self._touched = set(() if touched is None else touched)

58

self._touched = set(() if touched is None else touched)

59

self._salvaged = set(() if salvaged is None else salvaged)

59

self._salvaged = set(() if salvaged is None else salvaged)

60

self._touched.update(self._added)

60

self._touched.update(self._added)

61

self._touched.update(self._merged)

61

self._touched.update(self._merged)

62

self._touched.update(self._removed)

62

self._touched.update(self._removed)

63

self._p1_copies = dict(() if p1_copies is None else p1_copies)

63

self._p1_copies = dict(() if p1_copies is None else p1_copies)

64

self._p2_copies = dict(() if p2_copies is None else p2_copies)

64

self._p2_copies = dict(() if p2_copies is None else p2_copies)

65

66

def __eq__(self, other):

66

def __eq__(self, other):

67

return (

67

return (

68

self.added == other.added

68

self.added == other.added

69

and self.merged == other.merged

69

and self.merged == other.merged

70

and self.removed == other.removed

70

and self.removed == other.removed

71

and self.salvaged == other.salvaged

71

and self.salvaged == other.salvaged

72

and self.touched == other.touched

72

and self.touched == other.touched

73

and self.copied_from_p1 == other.copied_from_p1

73

and self.copied_from_p1 == other.copied_from_p1

74

and self.copied_from_p2 == other.copied_from_p2

74

and self.copied_from_p2 == other.copied_from_p2

75

)

75

)

76

77

@util.propertycache

77

@util.propertycache

78

def added(self):

78

def added(self):

79

"""files actively added in the changeset

79

"""files actively added in the changeset

80

81

Any file present in that revision that was absent in all the changeset's

81

Any file present in that revision that was absent in all the changeset's

82

parents.

82

parents.

83

84

In case of merge, this means a file absent in one of the parents but

84

In case of merge, this means a file absent in one of the parents but

85

existing in the other will *not* be contained in this set. (They were

85

existing in the other will *not* be contained in this set. (They were

86

added by an ancestor)

86

added by an ancestor)

87

"""

87

"""

88

return frozenset(self._added)

88

return frozenset(self._added)

89

90

def mark_added(self, filename):

90

def mark_added(self, filename):

91

if 'added' in vars(self):

91

if 'added' in vars(self):

92

del self.added

92

del self.added

93

self._added.add(filename)

93

self._added.add(filename)

94

self.mark_touched(filename)

94

self.mark_touched(filename)

95

96

def update_added(self, filenames):

96

def update_added(self, filenames):

97

for f in filenames:

97

for f in filenames:

98

self.mark_added(f)

98

self.mark_added(f)

99

100

@util.propertycache

100

@util.propertycache

101

def merged(self):

101

def merged(self):

102

"""files actively merged during a merge

102

"""files actively merged during a merge

103

104

Any modified files which had modification on both size that needed merging.

104

Any modified files which had modification on both size that needed merging.

105

106

In this case a new filenode was created and it has two parents.

106

In this case a new filenode was created and it has two parents.

107

"""

107

"""

108

return frozenset(self._merged)

108

return frozenset(self._merged)

109

110

def mark_merged(self, filename):

110

def mark_merged(self, filename):

111

if 'merged' in vars(self):

111

if 'merged' in vars(self):

112

del self.merged

112

del self.merged

113

self._merged.add(filename)

113

self._merged.add(filename)

114

self.mark_touched(filename)

114

self.mark_touched(filename)

115

116

def update_merged(self, filenames):

116

def update_merged(self, filenames):

117

for f in filenames:

117

for f in filenames:

118

self.mark_merged(f)

118

self.mark_merged(f)

119

120

@util.propertycache

120

@util.propertycache

121

def removed(self):

121

def removed(self):

122

"""files actively removed by the changeset

122

"""files actively removed by the changeset

123

124

In case of merge this will only contain the set of files removing "new"

124

In case of merge this will only contain the set of files removing "new"

125

content. For any file absent in the current changeset:

125

content. For any file absent in the current changeset:

126

127

a) If the file exists in both parents, it is clearly "actively" removed

127

a) If the file exists in both parents, it is clearly "actively" removed

128

by this changeset.

128

by this changeset.

129

130

b) If a file exists in only one parent and in none of the common

130

b) If a file exists in only one parent and in none of the common

131

ancestors, then the file was newly added in one of the merged branches

131

ancestors, then the file was newly added in one of the merged branches

132

and then got "actively" removed.

132

and then got "actively" removed.

133

134

c) If a file exists in only one parent and at least one of the common

134

c) If a file exists in only one parent and at least one of the common

135

ancestors using the same filenode, then the file was unchanged on one

135

ancestors using the same filenode, then the file was unchanged on one

136

side and deleted on the other side. The merge "passively" propagated

136

side and deleted on the other side. The merge "passively" propagated

137

that deletion, but didn't "actively" remove the file. In this case the

137

that deletion, but didn't "actively" remove the file. In this case the

138

file is *not* included in the `removed` set.

138

file is *not* included in the `removed` set.

139

140

d) If a file exists in only one parent and at least one of the common

140

d) If a file exists in only one parent and at least one of the common

141

ancestors using a different filenode, then the file was changed on one

141

ancestors using a different filenode, then the file was changed on one

142

side and removed on the other side. The merge process "actively"

142

side and removed on the other side. The merge process "actively"

143

decided to drop the new change and delete the file. Unlike in the

143

decided to drop the new change and delete the file. Unlike in the

144

previous case, (c), the file included in the `removed` set.

144

previous case, (c), the file included in the `removed` set.

145

146

Summary table for merge:

146

Summary table for merge:

147

148

case | exists in parents | exists in gca || removed

148

case | exists in parents | exists in gca || removed

149

(a) | both | * || yes

149

(a) | both | * || yes

150

(b) | one | none || yes

150

(b) | one | none || yes

151

(c) | one | same filenode || no

151

(c) | one | same filenode || no

152

(d) | one | new filenode || yes

152

(d) | one | new filenode || yes

153

"""

153

"""

154

return frozenset(self._removed)

154

return frozenset(self._removed)

155

156

def mark_removed(self, filename):

156

def mark_removed(self, filename):

157

if 'removed' in vars(self):

157

if 'removed' in vars(self):

158

del self.removed

158

del self.removed

159

self._removed.add(filename)

159

self._removed.add(filename)

160

self.mark_touched(filename)

160

self.mark_touched(filename)

161

162

def update_removed(self, filenames):

162

def update_removed(self, filenames):

163

for f in filenames:

163

for f in filenames:

164

self.mark_removed(f)

164

self.mark_removed(f)

165

166

@util.propertycache

166

@util.propertycache

167

def salvaged(self):

167

def salvaged(self):

168

"""files that might have been deleted by a merge, but still exists.

168

"""files that might have been deleted by a merge, but still exists.

169

170

During a merge, the manifest merging might select some files for

170

During a merge, the manifest merging might select some files for

171

removal, or for a removed/changed conflict. If at commit time the file

171

removal, or for a removed/changed conflict. If at commit time the file

172

still exists, its removal was "reverted" and the file is "salvaged"

172

still exists, its removal was "reverted" and the file is "salvaged"

173

"""

173

"""

174

return frozenset(self._salvaged)

174

return frozenset(self._salvaged)

175

176

def mark_salvaged(self, filename):

176

def mark_salvaged(self, filename):

177

if "salvaged" in vars(self):

177

if "salvaged" in vars(self):

178

del self.salvaged

178

del self.salvaged

179

self._salvaged.add(filename)

179

self._salvaged.add(filename)

180

self.mark_touched(filename)

180

self.mark_touched(filename)

181

182

def update_salvaged(self, filenames):

182

def update_salvaged(self, filenames):

183

for f in filenames:

183

for f in filenames:

184

self.mark_salvaged(f)

184

self.mark_salvaged(f)

185

186

@util.propertycache

186

@util.propertycache

187

def touched(self):

187

def touched(self):

188

"""files either actively modified, added or removed"""

188

"""files either actively modified, added or removed"""

189

return frozenset(self._touched)

189

return frozenset(self._touched)

190

191

def mark_touched(self, filename):

191

def mark_touched(self, filename):

192

if 'touched' in vars(self):

192

if 'touched' in vars(self):

193

del self.touched

193

del self.touched

194

self._touched.add(filename)

194

self._touched.add(filename)

195

196

def update_touched(self, filenames):

196

def update_touched(self, filenames):

197

for f in filenames:

197

for f in filenames:

198

self.mark_touched(f)

198

self.mark_touched(f)

199

200

@util.propertycache

200

@util.propertycache

201

def copied_from_p1(self):

201

def copied_from_p1(self):

202

return self._p1_copies.copy()

202

return self._p1_copies.copy()

203

204

def mark_copied_from_p1(self, source, dest):

204

def mark_copied_from_p1(self, source, dest):

205

if 'copied_from_p1' in vars(self):

205

if 'copied_from_p1' in vars(self):

206

del self.copied_from_p1

206

del self.copied_from_p1

207

self._p1_copies[dest] = source

207

self._p1_copies[dest] = source

208

209

def update_copies_from_p1(self, copies):

209

def update_copies_from_p1(self, copies):

210

for dest, source in copies.items():

210

for dest, source in copies.items():

211

self.mark_copied_from_p1(source, dest)

211

self.mark_copied_from_p1(source, dest)

212

213

@util.propertycache

213

@util.propertycache

214

def copied_from_p2(self):

214

def copied_from_p2(self):

215

return self._p2_copies.copy()

215

return self._p2_copies.copy()

216

217

def mark_copied_from_p2(self, source, dest):

217

def mark_copied_from_p2(self, source, dest):

218

if 'copied_from_p2' in vars(self):

218

if 'copied_from_p2' in vars(self):

219

del self.copied_from_p2

219

del self.copied_from_p2

220

self._p2_copies[dest] = source

220

self._p2_copies[dest] = source

221

222

def update_copies_from_p2(self, copies):

222

def update_copies_from_p2(self, copies):

223

for dest, source in copies.items():

223

for dest, source in copies.items():

224

self.mark_copied_from_p2(source, dest)

224

self.mark_copied_from_p2(source, dest)

225

226

227

def compute_all_files_changes(ctx):

227

def compute_all_files_changes(ctx):

228

"""compute the files changed by a revision"""

228

"""compute the files changed by a revision"""

229

p1 = ctx.p1()

230

p2 = ctx.p2()

231

if p1.rev() == node.nullrev and p2.rev() == node.nullrev:

232

return _process_root(ctx)

229

filescopies = computechangesetcopies(ctx)

233

filescopies = computechangesetcopies(ctx)

230

filesadded = computechangesetfilesadded(ctx)

234

filesadded = computechangesetfilesadded(ctx)

231

filesremoved = computechangesetfilesremoved(ctx)

235

filesremoved = computechangesetfilesremoved(ctx)

232

filesmerged = computechangesetfilesmerged(ctx)

236

filesmerged = computechangesetfilesmerged(ctx)

233

files = ChangingFiles()

237

files = ChangingFiles()

234

files.update_touched(ctx.files())

238

files.update_touched(ctx.files())

235

files.update_added(filesadded)

239

files.update_added(filesadded)

236

files.update_removed(filesremoved)

240

files.update_removed(filesremoved)

237

files.update_merged(filesmerged)

241

files.update_merged(filesmerged)

238

files.update_copies_from_p1(filescopies[0])

242

files.update_copies_from_p1(filescopies[0])

239

files.update_copies_from_p2(filescopies[1])

243

files.update_copies_from_p2(filescopies[1])

240

return files

244

return files

241

245

242

246

247

def _process_root(ctx):

248

"""compute the appropriate changed files for a changeset with no parents

249

"""

250

# Simple, there was nothing before it, so everything is added.

251

md = ChangingFiles()

252

manifest = ctx.manifest()

253

for filename in manifest:

254

md.mark_added(filename)

255

return md

256

257

243

def computechangesetfilesadded(ctx):

258

def computechangesetfilesadded(ctx):

244

"""return the list of files added in a changeset

259

"""return the list of files added in a changeset

245

"""

260

"""

246

added = []

261

added = []

247

for f in ctx.files():

262

for f in ctx.files():

248

if not any(f in p for p in ctx.parents()):

263

if not any(f in p for p in ctx.parents()):

249

added.append(f)

264

added.append(f)

250

return added

265

return added

251

266

252

267

253

def get_removal_filter(ctx, x=None):

268

def get_removal_filter(ctx, x=None):

254

"""return a function to detect files "wrongly" detected as `removed`

269

"""return a function to detect files "wrongly" detected as `removed`

255

270

256

When a file is removed relative to p1 in a merge, this

271

When a file is removed relative to p1 in a merge, this

257

function determines whether the absence is due to a

272

function determines whether the absence is due to a

258

deletion from a parent, or whether the merge commit

273

deletion from a parent, or whether the merge commit

259

itself deletes the file. We decide this by doing a

274

itself deletes the file. We decide this by doing a

260

simplified three way merge of the manifest entry for

275

simplified three way merge of the manifest entry for

261

the file. There are two ways we decide the merge

276

the file. There are two ways we decide the merge

262

itself didn't delete a file:

277

itself didn't delete a file:

263

- neither parent (nor the merge) contain the file

278

- neither parent (nor the merge) contain the file

264

- exactly one parent contains the file, and that

279

- exactly one parent contains the file, and that

265

parent has the same filelog entry as the merge

280

parent has the same filelog entry as the merge

266

ancestor (or all of them if there two). In other

281

ancestor (or all of them if there two). In other

267

words, that parent left the file unchanged while the

282

words, that parent left the file unchanged while the

268

other one deleted it.

283

other one deleted it.

269

One way to think about this is that deleting a file is

284

One way to think about this is that deleting a file is

270

similar to emptying it, so the list of changed files

285

similar to emptying it, so the list of changed files

271

should be similar either way. The computation

286

should be similar either way. The computation

272

described above is not done directly in _filecommit

287

described above is not done directly in _filecommit

273

when creating the list of changed files, however

288

when creating the list of changed files, however

274

it does something very similar by comparing filelog

289

it does something very similar by comparing filelog

275

nodes.

290

nodes.

276

"""

291

"""

277

292

278

if x is not None:

293

if x is not None:

279

p1, p2, m1, m2 = x

294

p1, p2, m1, m2 = x

280

else:

295

else:

281

p1 = ctx.p1()

296

p1 = ctx.p1()

282

p2 = ctx.p2()

297

p2 = ctx.p2()

283

m1 = p1.manifest()

298

m1 = p1.manifest()

284

m2 = p2.manifest()

299

m2 = p2.manifest()

285

300

286

@util.cachefunc

301

@util.cachefunc

287

def mas():

302

def mas():

288

p1n = p1.node()

303

p1n = p1.node()

289

p2n = p2.node()

304

p2n = p2.node()

290

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

305

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

291

if not cahs:

306

if not cahs:

292

cahs = [node.nullrev]

307

cahs = [node.nullrev]

293

return [ctx.repo()[r].manifest() for r in cahs]

308

return [ctx.repo()[r].manifest() for r in cahs]

294

309

295

def deletionfromparent(f):

310

def deletionfromparent(f):

296

if f in m1:

311

if f in m1:

297

return f not in m2 and all(

312

return f not in m2 and all(

298

f in ma and ma.find(f) == m1.find(f) for ma in mas()

313

f in ma and ma.find(f) == m1.find(f) for ma in mas()

299

)

314

)

300

elif f in m2:

315

elif f in m2:

301

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

316

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

302

else:

317

else:

303

return True

318

return True

304

319

305

return deletionfromparent

320

return deletionfromparent

306

321

307

322

308

def computechangesetfilesremoved(ctx):

323

def computechangesetfilesremoved(ctx):

309

"""return the list of files removed in a changeset

324

"""return the list of files removed in a changeset

310

"""

325

"""

311

removed = []

326

removed = []

312

for f in ctx.files():

327

for f in ctx.files():

313

if f not in ctx:

328

if f not in ctx:

314

removed.append(f)

329

removed.append(f)

315

if removed:

330

if removed:

316

rf = get_removal_filter(ctx)

331

rf = get_removal_filter(ctx)

317

removed = [r for r in removed if not rf(r)]

332

removed = [r for r in removed if not rf(r)]

318

return removed

333

return removed

319

334

320

335

321

def computechangesetfilesmerged(ctx):

336

def computechangesetfilesmerged(ctx):

322

"""return the list of files merged in a changeset

337

"""return the list of files merged in a changeset

323

"""

338

"""

324

merged = []

339

merged = []

325

if len(ctx.parents()) < 2:

340

if len(ctx.parents()) < 2:

326

return merged

341

return merged

327

for f in ctx.files():

342

for f in ctx.files():

328

if f in ctx:

343

if f in ctx:

329

fctx = ctx[f]

344

fctx = ctx[f]

330

parents = fctx._filelog.parents(fctx._filenode)

345

parents = fctx._filelog.parents(fctx._filenode)

331

if parents[1] != node.nullid:

346

if parents[1] != node.nullid:

332

merged.append(f)

347

merged.append(f)

333

return merged

348

return merged

334

349

335

350

336

def computechangesetcopies(ctx):

351

def computechangesetcopies(ctx):

337

"""return the copies data for a changeset

352

"""return the copies data for a changeset

338

353

339

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

354

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

340

355

341

Each dictionnary are in the form: `{newname: oldname}`

356

Each dictionnary are in the form: `{newname: oldname}`

342

"""

357

"""

343

p1copies = {}

358

p1copies = {}

344

p2copies = {}

359

p2copies = {}

345

p1 = ctx.p1()

360

p1 = ctx.p1()

346

p2 = ctx.p2()

361

p2 = ctx.p2()

347

narrowmatch = ctx._repo.narrowmatch()

362

narrowmatch = ctx._repo.narrowmatch()

348

for dst in ctx.files():

363

for dst in ctx.files():

349

if not narrowmatch(dst) or dst not in ctx:

364

if not narrowmatch(dst) or dst not in ctx:

350

continue

365

continue

351

copied = ctx[dst].renamed()

366

copied = ctx[dst].renamed()

352

if not copied:

367

if not copied:

353

continue

368

continue

354

src, srcnode = copied

369

src, srcnode = copied

355

if src in p1 and p1[src].filenode() == srcnode:

370

if src in p1 and p1[src].filenode() == srcnode:

356

p1copies[dst] = src

371

p1copies[dst] = src

357

elif src in p2 and p2[src].filenode() == srcnode:

372

elif src in p2 and p2[src].filenode() == srcnode:

358

p2copies[dst] = src

373

p2copies[dst] = src

359

return p1copies, p2copies

374

return p1copies, p2copies

360

375

361

376

362

def encodecopies(files, copies):

377

def encodecopies(files, copies):

363

items = []

378

items = []

364

for i, dst in enumerate(files):

379

for i, dst in enumerate(files):

365

if dst in copies:

380

if dst in copies:

366

items.append(b'%d\0%s' % (i, copies[dst]))

381

items.append(b'%d\0%s' % (i, copies[dst]))

367

if len(items) != len(copies):

382

if len(items) != len(copies):

368

raise error.ProgrammingError(

383

raise error.ProgrammingError(

369

b'some copy targets missing from file list'

384

b'some copy targets missing from file list'

370

)

385

)

371

return b"\n".join(items)

386

return b"\n".join(items)

372

387

373

388

374

def decodecopies(files, data):

389

def decodecopies(files, data):

375

try:

390

try:

376

copies = {}

391

copies = {}

377

if not data:

392

if not data:

378

return copies

393

return copies

379

for l in data.split(b'\n'):

394

for l in data.split(b'\n'):

380

strindex, src = l.split(b'\0')

395

strindex, src = l.split(b'\0')

381

i = int(strindex)

396

i = int(strindex)

382

dst = files[i]

397

dst = files[i]

383

copies[dst] = src

398

copies[dst] = src

384

return copies

399

return copies

385

except (ValueError, IndexError):

400

except (ValueError, IndexError):

386

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

401

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

387

# used different syntax for the value.

402

# used different syntax for the value.

388

return None

403

return None

389

404

390

405

391

def encodefileindices(files, subset):

406

def encodefileindices(files, subset):

392

subset = set(subset)

407

subset = set(subset)

393

indices = []

408

indices = []

394

for i, f in enumerate(files):

409

for i, f in enumerate(files):

395

if f in subset:

410

if f in subset:

396

indices.append(b'%d' % i)

411

indices.append(b'%d' % i)

397

return b'\n'.join(indices)

412

return b'\n'.join(indices)

398

413

399

414

400

def decodefileindices(files, data):

415

def decodefileindices(files, data):

401

try:

416

try:

402

subset = []

417

subset = []

403

if not data:

418

if not data:

404

return subset

419

return subset

405

for strindex in data.split(b'\n'):

420

for strindex in data.split(b'\n'):

406

i = int(strindex)

421

i = int(strindex)

407

if i < 0 or i >= len(files):

422

if i < 0 or i >= len(files):

408

return None

423

return None

409

subset.append(files[i])

424

subset.append(files[i])

410

return subset

425

return subset

411

except (ValueError, IndexError):

426

except (ValueError, IndexError):

412

# Perhaps someone had chosen the same key name (e.g. "added") and

427

# Perhaps someone had chosen the same key name (e.g. "added") and

413

# used different syntax for the value.

428

# used different syntax for the value.

414

return None

429

return None

415

430

416

431

417

# see mercurial/helptext/internals/revlogs.txt for details about the format

432

# see mercurial/helptext/internals/revlogs.txt for details about the format

418

433

419

ACTION_MASK = int("111" "00", 2)

434

ACTION_MASK = int("111" "00", 2)

420

# note: untouched file used as copy source will as `000` for this mask.

435

# note: untouched file used as copy source will as `000` for this mask.

421

ADDED_FLAG = int("001" "00", 2)

436

ADDED_FLAG = int("001" "00", 2)

422

MERGED_FLAG = int("010" "00", 2)

437

MERGED_FLAG = int("010" "00", 2)

423

REMOVED_FLAG = int("011" "00", 2)

438

REMOVED_FLAG = int("011" "00", 2)

424

# `100` is reserved for future use

439

# `100` is reserved for future use

425

TOUCHED_FLAG = int("101" "00", 2)

440

TOUCHED_FLAG = int("101" "00", 2)

426

441

427

COPIED_MASK = int("11", 2)

442

COPIED_MASK = int("11", 2)

428

COPIED_FROM_P1_FLAG = int("10", 2)

443

COPIED_FROM_P1_FLAG = int("10", 2)

429

COPIED_FROM_P2_FLAG = int("11", 2)

444

COPIED_FROM_P2_FLAG = int("11", 2)

430

445

431

# structure is <flag><filename-end><copy-source>

446

# structure is <flag><filename-end><copy-source>

432

INDEX_HEADER = struct.Struct(">L")

447

INDEX_HEADER = struct.Struct(">L")

433

INDEX_ENTRY = struct.Struct(">bLL")

448

INDEX_ENTRY = struct.Struct(">bLL")

434

449

435

450

436

def encode_files_sidedata(files):

451

def encode_files_sidedata(files):

437

all_files = set(files.touched - files.salvaged)

452

all_files = set(files.touched - files.salvaged)

438

all_files.update(files.copied_from_p1.values())

453

all_files.update(files.copied_from_p1.values())

439

all_files.update(files.copied_from_p2.values())

454

all_files.update(files.copied_from_p2.values())

440

all_files = sorted(all_files)

455

all_files = sorted(all_files)

441

file_idx = {f: i for (i, f) in enumerate(all_files)}

456

file_idx = {f: i for (i, f) in enumerate(all_files)}

442

file_idx[None] = 0

457

file_idx[None] = 0

443

458

444

chunks = [INDEX_HEADER.pack(len(all_files))]

459

chunks = [INDEX_HEADER.pack(len(all_files))]

445

460

446

filename_length = 0

461

filename_length = 0

447

for f in all_files:

462

for f in all_files:

448

filename_size = len(f)

463

filename_size = len(f)

449

filename_length += filename_size

464

filename_length += filename_size

450

flag = 0

465

flag = 0

451

if f in files.added:

466

if f in files.added:

452

flag |= ADDED_FLAG

467

flag |= ADDED_FLAG

453

elif f in files.merged:

468

elif f in files.merged:

454

flag |= MERGED_FLAG

469

flag |= MERGED_FLAG

455

elif f in files.removed:

470

elif f in files.removed:

456

flag |= REMOVED_FLAG

471

flag |= REMOVED_FLAG

457

elif f in files.touched:

472

elif f in files.touched:

458

flag |= TOUCHED_FLAG

473

flag |= TOUCHED_FLAG

459

474

460

copy = None

475

copy = None

461

if f in files.copied_from_p1:

476

if f in files.copied_from_p1:

462

flag |= COPIED_FROM_P1_FLAG

477

flag |= COPIED_FROM_P1_FLAG

463

copy = files.copied_from_p1.get(f)

478

copy = files.copied_from_p1.get(f)

464

elif f in files.copied_from_p2:

479

elif f in files.copied_from_p2:

465

copy = files.copied_from_p2.get(f)

480

copy = files.copied_from_p2.get(f)

466

flag |= COPIED_FROM_P2_FLAG

481

flag |= COPIED_FROM_P2_FLAG

467

copy_idx = file_idx[copy]

482

copy_idx = file_idx[copy]

468

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

483

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

469

chunks.extend(all_files)

484

chunks.extend(all_files)

470

return {sidedatamod.SD_FILES: b''.join(chunks)}

485

return {sidedatamod.SD_FILES: b''.join(chunks)}

471

486

472

487

473

def decode_files_sidedata(sidedata):

488

def decode_files_sidedata(sidedata):

474

md = ChangingFiles()

489

md = ChangingFiles()

475

raw = sidedata.get(sidedatamod.SD_FILES)

490

raw = sidedata.get(sidedatamod.SD_FILES)

476

491

477

if raw is None:

492

if raw is None:

478

return md

493

return md

479

494

480

copies = []

495

copies = []

481

all_files = []

496

all_files = []

482

497

483

assert len(raw) >= INDEX_HEADER.size

498

assert len(raw) >= INDEX_HEADER.size

484

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

499

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

485

500

486

offset = INDEX_HEADER.size

501

offset = INDEX_HEADER.size

487

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

502

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

488

file_offset_last = file_offset_base

503

file_offset_last = file_offset_base

489

504

490

assert len(raw) >= file_offset_base

505

assert len(raw) >= file_offset_base

491

506

492

for idx in range(total_files):

507

for idx in range(total_files):

493

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

508

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

494

file_end += file_offset_base

509

file_end += file_offset_base

495

filename = raw[file_offset_last:file_end]

510

filename = raw[file_offset_last:file_end]

496

filesize = file_end - file_offset_last

511

filesize = file_end - file_offset_last

497

assert len(filename) == filesize

512

assert len(filename) == filesize

498

offset += INDEX_ENTRY.size

513

offset += INDEX_ENTRY.size

499

file_offset_last = file_end

514

file_offset_last = file_end

500

all_files.append(filename)

515

all_files.append(filename)

501

if flag & ACTION_MASK == ADDED_FLAG:

516

if flag & ACTION_MASK == ADDED_FLAG:

502

md.mark_added(filename)

517

md.mark_added(filename)

503

elif flag & ACTION_MASK == MERGED_FLAG:

518

elif flag & ACTION_MASK == MERGED_FLAG:

504

md.mark_merged(filename)

519

md.mark_merged(filename)

505

elif flag & ACTION_MASK == REMOVED_FLAG:

520

elif flag & ACTION_MASK == REMOVED_FLAG:

506

md.mark_removed(filename)

521

md.mark_removed(filename)

507

elif flag & ACTION_MASK == TOUCHED_FLAG:

522

elif flag & ACTION_MASK == TOUCHED_FLAG:

508

md.mark_touched(filename)

523

md.mark_touched(filename)

509

524

510

copied = None

525

copied = None

511

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

526

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

512

copied = md.mark_copied_from_p1

527

copied = md.mark_copied_from_p1

513

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

528

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

514

copied = md.mark_copied_from_p2

529

copied = md.mark_copied_from_p2

515

530

516

if copied is not None:

531

if copied is not None:

517

copies.append((copied, filename, copy_idx))

532

copies.append((copied, filename, copy_idx))

518

533

519

for copied, filename, copy_idx in copies:

534

for copied, filename, copy_idx in copies:

520

copied(all_files[copy_idx], filename)

535

copied(all_files[copy_idx], filename)

521

536

522

return md

537

return md

523

538

524

539

525

def _getsidedata(srcrepo, rev):

540

def _getsidedata(srcrepo, rev):

526

ctx = srcrepo[rev]

541

ctx = srcrepo[rev]

527

files = compute_all_files_changes(ctx)

542

files = compute_all_files_changes(ctx)

528

return encode_files_sidedata(files)

543

return encode_files_sidedata(files)

529

544

530

545

531

def getsidedataadder(srcrepo, destrepo):

546

def getsidedataadder(srcrepo, destrepo):

532

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

547

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

533

if pycompat.iswindows or not use_w:

548

if pycompat.iswindows or not use_w:

534

return _get_simple_sidedata_adder(srcrepo, destrepo)

549

return _get_simple_sidedata_adder(srcrepo, destrepo)

535

else:

550

else:

536

return _get_worker_sidedata_adder(srcrepo, destrepo)

551

return _get_worker_sidedata_adder(srcrepo, destrepo)

537

552

538

553

539

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

554

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

540

"""The function used by worker precomputing sidedata

555

"""The function used by worker precomputing sidedata

541

556

542

It read an input queue containing revision numbers

557

It read an input queue containing revision numbers

543

It write in an output queue containing (rev, <sidedata-map>)

558

It write in an output queue containing (rev, <sidedata-map>)

544

559

545

The `None` input value is used as a stop signal.

560

The `None` input value is used as a stop signal.

546

561

547

The `tokens` semaphore is user to avoid having too many unprocessed

562

The `tokens` semaphore is user to avoid having too many unprocessed

548

entries. The workers needs to acquire one token before fetching a task.

563

entries. The workers needs to acquire one token before fetching a task.

549

They will be released by the consumer of the produced data.

564

They will be released by the consumer of the produced data.

550

"""

565

"""

551

tokens.acquire()

566

tokens.acquire()

552

rev = revs_queue.get()

567

rev = revs_queue.get()

553

while rev is not None:

568

while rev is not None:

554

data = _getsidedata(srcrepo, rev)

569

data = _getsidedata(srcrepo, rev)

555

sidedata_queue.put((rev, data))

570

sidedata_queue.put((rev, data))

556

tokens.acquire()

571

tokens.acquire()

557

rev = revs_queue.get()

572

rev = revs_queue.get()

558

# processing of `None` is completed, release the token.

573

# processing of `None` is completed, release the token.

559

tokens.release()

574

tokens.release()

560

575

561

576

562

BUFF_PER_WORKER = 50

577

BUFF_PER_WORKER = 50

563

578

564

579

565

def _get_worker_sidedata_adder(srcrepo, destrepo):

580

def _get_worker_sidedata_adder(srcrepo, destrepo):

566

"""The parallel version of the sidedata computation

581

"""The parallel version of the sidedata computation

567

582

568

This code spawn a pool of worker that precompute a buffer of sidedata

583

This code spawn a pool of worker that precompute a buffer of sidedata

569

before we actually need them"""

584

before we actually need them"""

570

# avoid circular import copies -> scmutil -> worker -> copies

585

# avoid circular import copies -> scmutil -> worker -> copies

571

from . import worker

586

from . import worker

572

587

573

nbworkers = worker._numworkers(srcrepo.ui)

588

nbworkers = worker._numworkers(srcrepo.ui)

574

589

575

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

590

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

576

revsq = multiprocessing.Queue()

591

revsq = multiprocessing.Queue()

577

sidedataq = multiprocessing.Queue()

592

sidedataq = multiprocessing.Queue()

578

593

579

assert srcrepo.filtername is None

594

assert srcrepo.filtername is None

580

# queue all tasks beforehand, revision numbers are small and it make

595

# queue all tasks beforehand, revision numbers are small and it make

581

# synchronisation simpler

596

# synchronisation simpler

582

#

597

#

583

# Since the computation for each node can be quite expensive, the overhead

598

# Since the computation for each node can be quite expensive, the overhead

584

# of using a single queue is not revelant. In practice, most computation

599

# of using a single queue is not revelant. In practice, most computation

585

# are fast but some are very expensive and dominate all the other smaller

600

# are fast but some are very expensive and dominate all the other smaller

586

# cost.

601

# cost.

587

for r in srcrepo.changelog.revs():

602

for r in srcrepo.changelog.revs():

588

revsq.put(r)

603

revsq.put(r)

589

# queue the "no more tasks" markers

604

# queue the "no more tasks" markers

590

for i in range(nbworkers):

605

for i in range(nbworkers):

591

revsq.put(None)

606

revsq.put(None)

592

607

593

allworkers = []

608

allworkers = []

594

for i in range(nbworkers):

609

for i in range(nbworkers):

595

args = (srcrepo, revsq, sidedataq, tokens)

610

args = (srcrepo, revsq, sidedataq, tokens)

596

w = multiprocessing.Process(target=_sidedata_worker, args=args)

611

w = multiprocessing.Process(target=_sidedata_worker, args=args)

597

allworkers.append(w)

612

allworkers.append(w)

598

w.start()

613

w.start()

599

614

600

# dictionnary to store results for revision higher than we one we are

615

# dictionnary to store results for revision higher than we one we are

601

# looking for. For example, if we need the sidedatamap for 42, and 43 is

616

# looking for. For example, if we need the sidedatamap for 42, and 43 is

602

# received, when shelve 43 for later use.

617

# received, when shelve 43 for later use.

603

staging = {}

618

staging = {}

604

619

605

def sidedata_companion(revlog, rev):

620

def sidedata_companion(revlog, rev):

606

sidedata = {}

621

sidedata = {}

607

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

622

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

608

# Is the data previously shelved ?

623

# Is the data previously shelved ?

609

sidedata = staging.pop(rev, None)

624

sidedata = staging.pop(rev, None)

610

if sidedata is None:

625

if sidedata is None:

611

# look at the queued result until we find the one we are lookig

626

# look at the queued result until we find the one we are lookig

612

# for (shelve the other ones)

627

# for (shelve the other ones)

613

r, sidedata = sidedataq.get()

628

r, sidedata = sidedataq.get()

614

while r != rev:

629

while r != rev:

615

staging[r] = sidedata

630

staging[r] = sidedata

616

r, sidedata = sidedataq.get()

631

r, sidedata = sidedataq.get()

617

tokens.release()

632

tokens.release()

618

return False, (), sidedata

633

return False, (), sidedata

619

634

620

return sidedata_companion

635

return sidedata_companion

621

636

622

637

623

def _get_simple_sidedata_adder(srcrepo, destrepo):

638

def _get_simple_sidedata_adder(srcrepo, destrepo):

624

"""The simple version of the sidedata computation

639

"""The simple version of the sidedata computation

625

640

626

It just compute it in the same thread on request"""

641

It just compute it in the same thread on request"""

627

642

628

def sidedatacompanion(revlog, rev):

643

def sidedatacompanion(revlog, rev):

629

sidedata = {}

644

sidedata = {}

630

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

645

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

631

sidedata = _getsidedata(srcrepo, rev)

646

sidedata = _getsidedata(srcrepo, rev)

632

return False, (), sidedata

647

return False, (), sidedata

633

648

634

return sidedatacompanion

649

return sidedatacompanion

635

650

636

651

637

def getsidedataremover(srcrepo, destrepo):

652

def getsidedataremover(srcrepo, destrepo):

638

def sidedatacompanion(revlog, rev):

653

def sidedatacompanion(revlog, rev):

639

f = ()

654

f = ()

640

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

655

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

641

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

656

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

642

f = (

657

f = (

643

sidedatamod.SD_P1COPIES,

658

sidedatamod.SD_P1COPIES,

644

sidedatamod.SD_P2COPIES,

659

sidedatamod.SD_P2COPIES,

645

sidedatamod.SD_FILESADDED,

660

sidedatamod.SD_FILESADDED,

646

sidedatamod.SD_FILESREMOVED,

661

sidedatamod.SD_FILESREMOVED,

647

)

662

)

648

return False, f, {}

663

return False, f, {}

649

664

650

return sidedatacompanion

665

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             import struct
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to files by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - merged:  files whose history got merged
                 - removed: files removed in the revision
                 - salvaged: files that might have been deleted by a merge but were not
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self,
                     touched=None,
                     added=None,
                     removed=None,
                     merged=None,
                     salvaged=None,
                     p1_copies=None,
                     p2_copies=None,
                 ):
                     self._added = set(() if added is None else added)
                     self._merged = set(() if merged is None else merged)
                     self._removed = set(() if removed is None else removed)
                     self._touched = set(() if touched is None else touched)
                     self._salvaged = set(() if salvaged is None else salvaged)
                     self._touched.update(self._added)
                     self._touched.update(self._merged)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(() if p1_copies is None else p1_copies)
                     self._p2_copies = dict(() if p2_copies is None else p2_copies)
                 def __eq__(self, other):
                     return (
                         self.added == other.added
                         and self.merged == other.merged
                         and self.removed == other.removed
                         and self.salvaged == other.salvaged
                         and self.touched == other.touched
                         and self.copied_from_p1 == other.copied_from_p1
                         and self.copied_from_p2 == other.copied_from_p2
                     )
                 @util.propertycache
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     if 'added' in vars(self):
                         del self.added
                     self._added.add(filename)
                     self.mark_touched(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @util.propertycache
                 def merged(self):
                     """files actively merged during a merge
                     Any modified files which had modification on both size that needed merging.
                     In this case a new filenode was created and it has two parents.
                     """
                     return frozenset(self._merged)
                 def mark_merged(self, filename):
                     if 'merged' in vars(self):
                         del self.merged
                     self._merged.add(filename)
                     self.mark_touched(filename)
                 def update_merged(self, filenames):
                     for f in filenames:
                         self.mark_merged(f)
                 @util.propertycache
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     if 'removed' in vars(self):
                         del self.removed
                     self._removed.add(filename)
                     self.mark_touched(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @util.propertycache
                 def salvaged(self):
                     """files that might have been deleted by a merge, but still exists.
                     During a merge, the manifest merging might select some files for
                     removal, or for a removed/changed conflict. If at commit time the file
                     still exists, its removal was "reverted" and the file is "salvaged"
                     """
                     return frozenset(self._salvaged)
                 def mark_salvaged(self, filename):
                     if "salvaged" in vars(self):
                         del self.salvaged
                     self._salvaged.add(filename)
                     self.mark_touched(filename)
                 def update_salvaged(self, filenames):
                     for f in filenames:
                         self.mark_salvaged(f)
                 @util.propertycache
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     if 'touched' in vars(self):
                         del self.touched
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @util.propertycache
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     if 'copied_from_p1' in vars(self):
                         del self.copied_from_p1
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @util.propertycache
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     if 'copied_from_p2' in vars(self):
                         del self.copied_from_p2
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def compute_all_files_changes(ctx):
                 """compute the files changed by a revision"""
+                p1 = ctx.p1()
+                p2 = ctx.p2()
+                if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
+                    return _process_root(ctx)
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 filesmerged = computechangesetfilesmerged(ctx)
                 files = ChangingFiles()
                 files.update_touched(ctx.files())
                 files.update_added(filesadded)
                 files.update_removed(filesremoved)
                 files.update_merged(filesmerged)
                 files.update_copies_from_p1(filescopies[0])
                 files.update_copies_from_p2(filescopies[1])
                 return files
+            def _process_root(ctx):
+                """compute the appropriate changed files for a changeset with no parents
+                """
+                # Simple, there was nothing before it, so everything is added.
+                md = ChangingFiles()
+                manifest = ctx.manifest()
+                for filename in manifest:
+                    md.mark_added(filename)
+                return md
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetfilesmerged(ctx):
                 """return the list of files merged in a changeset
                 """
                 merged = []
                 if len(ctx.parents()) < 2:
                     return merged
                 for f in ctx.files():
                     if f in ctx:
                         fctx = ctx[f]
                         parents = fctx._filelog.parents(fctx._filenode)
                         if parents[1] != node.nullid:
                             merged.append(f)
                 return merged
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             # see mercurial/helptext/internals/revlogs.txt for details about the format
             ACTION_MASK = int("111" "00", 2)
             # note: untouched file used as copy source will as `000` for this mask.
             ADDED_FLAG = int("001" "00", 2)
             MERGED_FLAG = int("010" "00", 2)
             REMOVED_FLAG = int("011" "00", 2)
             # `100` is reserved for future use
             TOUCHED_FLAG = int("101" "00", 2)
             COPIED_MASK = int("11", 2)
             COPIED_FROM_P1_FLAG = int("10", 2)
             COPIED_FROM_P2_FLAG = int("11", 2)
             # structure is <flag><filename-end><copy-source>
             INDEX_HEADER = struct.Struct(">L")
             INDEX_ENTRY = struct.Struct(">bLL")
             def encode_files_sidedata(files):
                 all_files = set(files.touched - files.salvaged)
                 all_files.update(files.copied_from_p1.values())
                 all_files.update(files.copied_from_p2.values())
                 all_files = sorted(all_files)
                 file_idx = {f: i for (i, f) in enumerate(all_files)}
                 file_idx[None] = 0
                 chunks = [INDEX_HEADER.pack(len(all_files))]
                 filename_length = 0
                 for f in all_files:
                     filename_size = len(f)
                     filename_length += filename_size
                     flag = 0
                     if f in files.added:
                         flag |= ADDED_FLAG
                     elif f in files.merged:
                         flag |= MERGED_FLAG
                     elif f in files.removed:
                         flag |= REMOVED_FLAG
                     elif f in files.touched:
                         flag |= TOUCHED_FLAG
                     copy = None
                     if f in files.copied_from_p1:
                         flag |= COPIED_FROM_P1_FLAG
                         copy = files.copied_from_p1.get(f)
                     elif f in files.copied_from_p2:
                         copy = files.copied_from_p2.get(f)
                         flag |= COPIED_FROM_P2_FLAG
                     copy_idx = file_idx[copy]
                     chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
                 chunks.extend(all_files)
                 return {sidedatamod.SD_FILES: b''.join(chunks)}
             def decode_files_sidedata(sidedata):
                 md = ChangingFiles()
                 raw = sidedata.get(sidedatamod.SD_FILES)
                 if raw is None:
                     return md
                 copies = []
                 all_files = []
                 assert len(raw) >= INDEX_HEADER.size
                 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
                 offset = INDEX_HEADER.size
                 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
                 file_offset_last = file_offset_base
                 assert len(raw) >= file_offset_base
                 for idx in range(total_files):
                     flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
                     file_end += file_offset_base
                     filename = raw[file_offset_last:file_end]
                     filesize = file_end - file_offset_last
                     assert len(filename) == filesize
                     offset += INDEX_ENTRY.size
                     file_offset_last = file_end
                     all_files.append(filename)
                     if flag & ACTION_MASK == ADDED_FLAG:
                         md.mark_added(filename)
                     elif flag & ACTION_MASK == MERGED_FLAG:
                         md.mark_merged(filename)
                     elif flag & ACTION_MASK == REMOVED_FLAG:
                         md.mark_removed(filename)
                     elif flag & ACTION_MASK == TOUCHED_FLAG:
                         md.mark_touched(filename)
                     copied = None
                     if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
                         copied = md.mark_copied_from_p1
                     elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
                         copied = md.mark_copied_from_p2
                     if copied is not None:
                         copies.append((copied, filename, copy_idx))
                 for copied, filename, copy_idx in copies:
                     copied(all_files[copy_idx], filename)
                 return md
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 files = compute_all_files_changes(ctx)
                 return encode_files_sidedata(files)
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion