upstream/mercurial-mirror Commit - r46234:3bfa7c7f

1

# metadata.py -- code related to various metadata computation and access.

1

# metadata.py -- code related to various metadata computation and access.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import multiprocessing

10

import multiprocessing

11

import struct

11

import struct

12

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

pycompat,

16

pycompat,

17

util,

17

util,

18

)

18

)

19

20

from .revlogutils import (

20

from .revlogutils import (

21

flagutil as sidedataflag,

21

flagutil as sidedataflag,

22

sidedata as sidedatamod,

22

sidedata as sidedatamod,

23

)

23

)

24

25

26

class ChangingFiles(object):

26

class ChangingFiles(object):

27

"""A class recording the changes made to files by a changeset

27

"""A class recording the changes made to files by a changeset

28

29

Actions performed on files are gathered into 3 sets:

29

Actions performed on files are gathered into 3 sets:

30

31

- added: files actively added in the changeset.

31

- added: files actively added in the changeset.

32

- merged: files whose history got merged

32

- merged: files whose history got merged

33

- removed: files removed in the revision

33

- removed: files removed in the revision

34

- salvaged: files that might have been deleted by a merge but were not

34

- salvaged: files that might have been deleted by a merge but were not

35

- touched: files affected by the merge

35

- touched: files affected by the merge

36

37

and copies information is held by 2 mappings

37

and copies information is held by 2 mappings

38

39

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

39

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

40

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

40

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

41

42

See their inline help for details.

42

See their inline help for details.

43

"""

43

"""

44

45

def __init__(

45

def __init__(

46

self,

46

self,

47

touched=None,

47

touched=None,

48

added=None,

48

added=None,

49

removed=None,

49

removed=None,

50

merged=None,

50

merged=None,

51

salvaged=None,

51

salvaged=None,

52

p1_copies=None,

52

p1_copies=None,

53

p2_copies=None,

53

p2_copies=None,

54

):

54

):

55

self._added = set(() if added is None else added)

55

self._added = set(() if added is None else added)

56

self._merged = set(() if merged is None else merged)

56

self._merged = set(() if merged is None else merged)

57

self._removed = set(() if removed is None else removed)

57

self._removed = set(() if removed is None else removed)

58

self._touched = set(() if touched is None else touched)

58

self._touched = set(() if touched is None else touched)

59

self._salvaged = set(() if salvaged is None else salvaged)

59

self._salvaged = set(() if salvaged is None else salvaged)

60

self._touched.update(self._added)

60

self._touched.update(self._added)

61

self._touched.update(self._merged)

61

self._touched.update(self._merged)

62

self._touched.update(self._removed)

62

self._touched.update(self._removed)

63

self._p1_copies = dict(() if p1_copies is None else p1_copies)

63

self._p1_copies = dict(() if p1_copies is None else p1_copies)

64

self._p2_copies = dict(() if p2_copies is None else p2_copies)

64

self._p2_copies = dict(() if p2_copies is None else p2_copies)

65

66

def __eq__(self, other):

66

def __eq__(self, other):

67

return (

67

return (

68

self.added == other.added

68

self.added == other.added

69

and self.merged == other.merged

69

and self.merged == other.merged

70

and self.removed == other.removed

70

and self.removed == other.removed

71

and self.salvaged == other.salvaged

71

and self.salvaged == other.salvaged

72

and self.touched == other.touched

72

and self.touched == other.touched

73

and self.copied_from_p1 == other.copied_from_p1

73

and self.copied_from_p1 == other.copied_from_p1

74

and self.copied_from_p2 == other.copied_from_p2

74

and self.copied_from_p2 == other.copied_from_p2

75

)

75

)

76

77

@util.propertycache

77

@util.propertycache

78

def added(self):

78

def added(self):

79

"""files actively added in the changeset

79

"""files actively added in the changeset

80

81

Any file present in that revision that was absent in all the changeset's

81

Any file present in that revision that was absent in all the changeset's

82

parents.

82

parents.

83

84

In case of merge, this means a file absent in one of the parents but

84

In case of merge, this means a file absent in one of the parents but

85

existing in the other will *not* be contained in this set. (They were

85

existing in the other will *not* be contained in this set. (They were

86

added by an ancestor)

86

added by an ancestor)

87

"""

87

"""

88

return frozenset(self._added)

88

return frozenset(self._added)

89

90

def mark_added(self, filename):

90

def mark_added(self, filename):

91

if 'added' in vars(self):

91

if 'added' in vars(self):

92

del self.added

92

del self.added

93

self._added.add(filename)

93

self._added.add(filename)

94

self.mark_touched(filename)

94

self.mark_touched(filename)

95

96

def update_added(self, filenames):

96

def update_added(self, filenames):

97

for f in filenames:

97

for f in filenames:

98

self.mark_added(f)

98

self.mark_added(f)

99

100

@util.propertycache

100

@util.propertycache

101

def merged(self):

101

def merged(self):

102

"""files actively merged during a merge

102

"""files actively merged during a merge

103

104

Any modified files which had modification on both size that needed merging.

104

Any modified files which had modification on both size that needed merging.

105

106

In this case a new filenode was created and it has two parents.

106

In this case a new filenode was created and it has two parents.

107

"""

107

"""

108

return frozenset(self._merged)

108

return frozenset(self._merged)

109

110

def mark_merged(self, filename):

110

def mark_merged(self, filename):

111

if 'merged' in vars(self):

111

if 'merged' in vars(self):

112

del self.merged

112

del self.merged

113

self._merged.add(filename)

113

self._merged.add(filename)

114

self.mark_touched(filename)

114

self.mark_touched(filename)

115

116

def update_merged(self, filenames):

116

def update_merged(self, filenames):

117

for f in filenames:

117

for f in filenames:

118

self.mark_merged(f)

118

self.mark_merged(f)

119

120

@util.propertycache

120

@util.propertycache

121

def removed(self):

121

def removed(self):

122

"""files actively removed by the changeset

122

"""files actively removed by the changeset

123

124

In case of merge this will only contain the set of files removing "new"

124

In case of merge this will only contain the set of files removing "new"

125

content. For any file absent in the current changeset:

125

content. For any file absent in the current changeset:

126

127

a) If the file exists in both parents, it is clearly "actively" removed

127

a) If the file exists in both parents, it is clearly "actively" removed

128

by this changeset.

128

by this changeset.

129

130

b) If a file exists in only one parent and in none of the common

130

b) If a file exists in only one parent and in none of the common

131

ancestors, then the file was newly added in one of the merged branches

131

ancestors, then the file was newly added in one of the merged branches

132

and then got "actively" removed.

132

and then got "actively" removed.

133

134

c) If a file exists in only one parent and at least one of the common

134

c) If a file exists in only one parent and at least one of the common

135

ancestors using the same filenode, then the file was unchanged on one

135

ancestors using the same filenode, then the file was unchanged on one

136

side and deleted on the other side. The merge "passively" propagated

136

side and deleted on the other side. The merge "passively" propagated

137

that deletion, but didn't "actively" remove the file. In this case the

137

that deletion, but didn't "actively" remove the file. In this case the

138

file is *not* included in the `removed` set.

138

file is *not* included in the `removed` set.

139

140

d) If a file exists in only one parent and at least one of the common

140

d) If a file exists in only one parent and at least one of the common

141

ancestors using a different filenode, then the file was changed on one

141

ancestors using a different filenode, then the file was changed on one

142

side and removed on the other side. The merge process "actively"

142

side and removed on the other side. The merge process "actively"

143

decided to drop the new change and delete the file. Unlike in the

143

decided to drop the new change and delete the file. Unlike in the

144

previous case, (c), the file included in the `removed` set.

144

previous case, (c), the file included in the `removed` set.

145

146

Summary table for merge:

146

Summary table for merge:

147

148

case | exists in parents | exists in gca || removed

148

case | exists in parents | exists in gca || removed

149

(a) | both | * || yes

149

(a) | both | * || yes

150

(b) | one | none || yes

150

(b) | one | none || yes

151

(c) | one | same filenode || no

151

(c) | one | same filenode || no

152

(d) | one | new filenode || yes

152

(d) | one | new filenode || yes

153

"""

153

"""

154

return frozenset(self._removed)

154

return frozenset(self._removed)

155

156

def mark_removed(self, filename):

156

def mark_removed(self, filename):

157

if 'removed' in vars(self):

157

if 'removed' in vars(self):

158

del self.removed

158

del self.removed

159

self._removed.add(filename)

159

self._removed.add(filename)

160

self.mark_touched(filename)

160

self.mark_touched(filename)

161

162

def update_removed(self, filenames):

162

def update_removed(self, filenames):

163

for f in filenames:

163

for f in filenames:

164

self.mark_removed(f)

164

self.mark_removed(f)

165

166

@util.propertycache

166

@util.propertycache

167

def salvaged(self):

167

def salvaged(self):

168

"""files that might have been deleted by a merge, but still exists.

168

"""files that might have been deleted by a merge, but still exists.

169

170

During a merge, the manifest merging might select some files for

170

During a merge, the manifest merging might select some files for

171

removal, or for a removed/changed conflict. If at commit time the file

171

removal, or for a removed/changed conflict. If at commit time the file

172

still exists, its removal was "reverted" and the file is "salvaged"

172

still exists, its removal was "reverted" and the file is "salvaged"

173

"""

173

"""

174

return frozenset(self._salvaged)

174

return frozenset(self._salvaged)

175

176

def mark_salvaged(self, filename):

176

def mark_salvaged(self, filename):

177

if "salvaged" in vars(self):

177

if "salvaged" in vars(self):

178

del self.salvaged

178

del self.salvaged

179

self._salvaged.add(filename)

179

self._salvaged.add(filename)

180

self.mark_touched(filename)

180

self.mark_touched(filename)

181

182

def update_salvaged(self, filenames):

182

def update_salvaged(self, filenames):

183

for f in filenames:

183

for f in filenames:

184

self.mark_salvaged(f)

184

self.mark_salvaged(f)

185

186

@util.propertycache

186

@util.propertycache

187

def touched(self):

187

def touched(self):

188

"""files either actively modified, added or removed"""

188

"""files either actively modified, added or removed"""

189

return frozenset(self._touched)

189

return frozenset(self._touched)

190

191

def mark_touched(self, filename):

191

def mark_touched(self, filename):

192

if 'touched' in vars(self):

192

if 'touched' in vars(self):

193

del self.touched

193

del self.touched

194

self._touched.add(filename)

194

self._touched.add(filename)

195

196

def update_touched(self, filenames):

196

def update_touched(self, filenames):

197

for f in filenames:

197

for f in filenames:

198

self.mark_touched(f)

198

self.mark_touched(f)

199

200

@util.propertycache

200

@util.propertycache

201

def copied_from_p1(self):

201

def copied_from_p1(self):

202

return self._p1_copies.copy()

202

return self._p1_copies.copy()

203

204

def mark_copied_from_p1(self, source, dest):

204

def mark_copied_from_p1(self, source, dest):

205

if 'copied_from_p1' in vars(self):

205

if 'copied_from_p1' in vars(self):

206

del self.copied_from_p1

206

del self.copied_from_p1

207

self._p1_copies[dest] = source

207

self._p1_copies[dest] = source

208

209

def update_copies_from_p1(self, copies):

209

def update_copies_from_p1(self, copies):

210

for dest, source in copies.items():

210

for dest, source in copies.items():

211

self.mark_copied_from_p1(source, dest)

211

self.mark_copied_from_p1(source, dest)

212

213

@util.propertycache

213

@util.propertycache

214

def copied_from_p2(self):

214

def copied_from_p2(self):

215

return self._p2_copies.copy()

215

return self._p2_copies.copy()

216

217

def mark_copied_from_p2(self, source, dest):

217

def mark_copied_from_p2(self, source, dest):

218

if 'copied_from_p2' in vars(self):

218

if 'copied_from_p2' in vars(self):

219

del self.copied_from_p2

219

del self.copied_from_p2

220

self._p2_copies[dest] = source

220

self._p2_copies[dest] = source

221

222

def update_copies_from_p2(self, copies):

222

def update_copies_from_p2(self, copies):

223

for dest, source in copies.items():

223

for dest, source in copies.items():

224

self.mark_copied_from_p2(source, dest)

224

self.mark_copied_from_p2(source, dest)

225

226

227

def computechangesetfilesadded(ctx):

227

def computechangesetfilesadded(ctx):

228

"""return the list of files added in a changeset

228

"""return the list of files added in a changeset

229

"""

229

"""

230

added = []

230

added = []

231

for f in ctx.files():

231

for f in ctx.files():

232

if not any(f in p for p in ctx.parents()):

232

if not any(f in p for p in ctx.parents()):

233

added.append(f)

233

added.append(f)

234

return added

234

return added

235

236

237

def get_removal_filter(ctx, x=None):

237

def get_removal_filter(ctx, x=None):

238

"""return a function to detect files "wrongly" detected as `removed`

238

"""return a function to detect files "wrongly" detected as `removed`

239

240

When a file is removed relative to p1 in a merge, this

240

When a file is removed relative to p1 in a merge, this

241

function determines whether the absence is due to a

241

function determines whether the absence is due to a

242

deletion from a parent, or whether the merge commit

242

deletion from a parent, or whether the merge commit

243

itself deletes the file. We decide this by doing a

243

itself deletes the file. We decide this by doing a

244

simplified three way merge of the manifest entry for

244

simplified three way merge of the manifest entry for

245

the file. There are two ways we decide the merge

245

the file. There are two ways we decide the merge

246

itself didn't delete a file:

246

itself didn't delete a file:

247

- neither parent (nor the merge) contain the file

247

- neither parent (nor the merge) contain the file

248

- exactly one parent contains the file, and that

248

- exactly one parent contains the file, and that

249

parent has the same filelog entry as the merge

249

parent has the same filelog entry as the merge

250

ancestor (or all of them if there two). In other

250

ancestor (or all of them if there two). In other

251

words, that parent left the file unchanged while the

251

words, that parent left the file unchanged while the

252

other one deleted it.

252

other one deleted it.

253

One way to think about this is that deleting a file is

253

One way to think about this is that deleting a file is

254

similar to emptying it, so the list of changed files

254

similar to emptying it, so the list of changed files

255

should be similar either way. The computation

255

should be similar either way. The computation

256

described above is not done directly in _filecommit

256

described above is not done directly in _filecommit

257

when creating the list of changed files, however

257

when creating the list of changed files, however

258

it does something very similar by comparing filelog

258

it does something very similar by comparing filelog

259

nodes.

259

nodes.

260

"""

260

"""

261

262

if x is not None:

262

if x is not None:

263

p1, p2, m1, m2 = x

263

p1, p2, m1, m2 = x

264

else:

264

else:

265

p1 = ctx.p1()

265

p1 = ctx.p1()

266

p2 = ctx.p2()

266

p2 = ctx.p2()

267

m1 = p1.manifest()

267

m1 = p1.manifest()

268

m2 = p2.manifest()

268

m2 = p2.manifest()

269

270

@util.cachefunc

270

@util.cachefunc

271

def mas():

271

def mas():

272

p1n = p1.node()

272

p1n = p1.node()

273

p2n = p2.node()

273

p2n = p2.node()

274

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

274

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

275

if not cahs:

275

if not cahs:

276

cahs = [node.nullrev]

276

cahs = [node.nullrev]

277

return [ctx.repo()[r].manifest() for r in cahs]

277

return [ctx.repo()[r].manifest() for r in cahs]

278

279

def deletionfromparent(f):

279

def deletionfromparent(f):

280

if f in m1:

280

if f in m1:

281

return f not in m2 and all(

281

return f not in m2 and all(

282

f in ma and ma.find(f) == m1.find(f) for ma in mas()

282

f in ma and ma.find(f) == m1.find(f) for ma in mas()

283

)

283

)

284

elif f in m2:

284

elif f in m2:

285

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

285

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

286

else:

286

else:

287

return True

287

return True

288

289

return deletionfromparent

289

return deletionfromparent

290

291

292

def computechangesetfilesremoved(ctx):

292

def computechangesetfilesremoved(ctx):

293

"""return the list of files removed in a changeset

293

"""return the list of files removed in a changeset

294

"""

294

"""

295

removed = []

295

removed = []

296

for f in ctx.files():

296

for f in ctx.files():

297

if f not in ctx:

297

if f not in ctx:

298

removed.append(f)

298

removed.append(f)

299

if removed:

299

if removed:

300

rf = get_removal_filter(ctx)

300

rf = get_removal_filter(ctx)

301

removed = [r for r in removed if not rf(r)]

301

removed = [r for r in removed if not rf(r)]

302

return removed

302

return removed

303

304

305

def computechangesetfilesmerged(ctx):

305

def computechangesetfilesmerged(ctx):

306

"""return the list of files merged in a changeset

306

"""return the list of files merged in a changeset

307

"""

307

"""

308

merged = []

308

merged = []

309

if len(ctx.parents()) < 2:

309

if len(ctx.parents()) < 2:

310

return merged

310

return merged

311

for f in ctx.files():

311

for f in ctx.files():

312

if f in ctx:

312

if f in ctx:

313

fctx = ctx[f]

313

fctx = ctx[f]

314

parents = fctx._filelog.parents(fctx._filenode)

314

parents = fctx._filelog.parents(fctx._filenode)

315

if parents[1] != node.nullid:

315

if parents[1] != node.nullid:

316

merged.append(f)

316

merged.append(f)

317

return merged

317

return merged

318

319

320

def computechangesetcopies(ctx):

320

def computechangesetcopies(ctx):

321

"""return the copies data for a changeset

321

"""return the copies data for a changeset

322

323

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

323

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

324

325

Each dictionnary are in the form: `{newname: oldname}`

325

Each dictionnary are in the form: `{newname: oldname}`

326

"""

326

"""

327

p1copies = {}

327

p1copies = {}

328

p2copies = {}

328

p2copies = {}

329

p1 = ctx.p1()

329

p1 = ctx.p1()

330

p2 = ctx.p2()

330

p2 = ctx.p2()

331

narrowmatch = ctx._repo.narrowmatch()

331

narrowmatch = ctx._repo.narrowmatch()

332

for dst in ctx.files():

332

for dst in ctx.files():

333

if not narrowmatch(dst) or dst not in ctx:

333

if not narrowmatch(dst) or dst not in ctx:

334

continue

334

continue

335

copied = ctx[dst].renamed()

335

copied = ctx[dst].renamed()

336

if not copied:

336

if not copied:

337

continue

337

continue

338

src, srcnode = copied

338

src, srcnode = copied

339

if src in p1 and p1[src].filenode() == srcnode:

339

if src in p1 and p1[src].filenode() == srcnode:

340

p1copies[dst] = src

340

p1copies[dst] = src

341

elif src in p2 and p2[src].filenode() == srcnode:

341

elif src in p2 and p2[src].filenode() == srcnode:

342

p2copies[dst] = src

342

p2copies[dst] = src

343

return p1copies, p2copies

343

return p1copies, p2copies

344

345

346

def encodecopies(files, copies):

346

def encodecopies(files, copies):

347

items = []

347

items = []

348

for i, dst in enumerate(files):

348

for i, dst in enumerate(files):

349

if dst in copies:

349

if dst in copies:

350

items.append(b'%d\0%s' % (i, copies[dst]))

350

items.append(b'%d\0%s' % (i, copies[dst]))

351

if len(items) != len(copies):

351

if len(items) != len(copies):

352

raise error.ProgrammingError(

352

raise error.ProgrammingError(

353

b'some copy targets missing from file list'

353

b'some copy targets missing from file list'

354

)

354

)

355

return b"\n".join(items)

355

return b"\n".join(items)

356

357

358

def decodecopies(files, data):

358

def decodecopies(files, data):

359

try:

359

try:

360

copies = {}

360

copies = {}

361

if not data:

361

if not data:

362

return copies

362

return copies

363

for l in data.split(b'\n'):

363

for l in data.split(b'\n'):

364

strindex, src = l.split(b'\0')

364

strindex, src = l.split(b'\0')

365

i = int(strindex)

365

i = int(strindex)

366

dst = files[i]

366

dst = files[i]

367

copies[dst] = src

367

copies[dst] = src

368

return copies

368

return copies

369

except (ValueError, IndexError):

369

except (ValueError, IndexError):

370

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

370

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

371

# used different syntax for the value.

371

# used different syntax for the value.

372

return None

372

return None

373

374

375

def encodefileindices(files, subset):

375

def encodefileindices(files, subset):

376

subset = set(subset)

376

subset = set(subset)

377

indices = []

377

indices = []

378

for i, f in enumerate(files):

378

for i, f in enumerate(files):

379

if f in subset:

379

if f in subset:

380

indices.append(b'%d' % i)

380

indices.append(b'%d' % i)

381

return b'\n'.join(indices)

381

return b'\n'.join(indices)

382

383

384

def decodefileindices(files, data):

384

def decodefileindices(files, data):

385

try:

385

try:

386

subset = []

386

subset = []

387

if not data:

387

if not data:

388

return subset

388

return subset

389

for strindex in data.split(b'\n'):

389

for strindex in data.split(b'\n'):

390

i = int(strindex)

390

i = int(strindex)

391

if i < 0 or i >= len(files):

391

if i < 0 or i >= len(files):

392

return None

392

return None

393

subset.append(files[i])

393

subset.append(files[i])

394

return subset

394

return subset

395

except (ValueError, IndexError):

395

except (ValueError, IndexError):

396

# Perhaps someone had chosen the same key name (e.g. "added") and

396

# Perhaps someone had chosen the same key name (e.g. "added") and

397

# used different syntax for the value.

397

# used different syntax for the value.

398

return None

398

return None

399

400

401

# see mercurial/helptext/internals/revlogs.txt for details about the format

401

# see mercurial/helptext/internals/revlogs.txt for details about the format

402

403

ACTION_MASK = int("111" "00", 2)

403

ACTION_MASK = int("111" "00", 2)

404

# note: untouched file used as copy source will as `000` for this mask.

404

# note: untouched file used as copy source will as `000` for this mask.

405

ADDED_FLAG = int("001" "00", 2)

405

ADDED_FLAG = int("001" "00", 2)

406

MERGED_FLAG = int("010" "00", 2)

406

MERGED_FLAG = int("010" "00", 2)

407

REMOVED_FLAG = int("011" "00", 2)

407

REMOVED_FLAG = int("011" "00", 2)

408

# `100` is reserved for future use

408

# `100` is reserved for future use

409

TOUCHED_FLAG = int("101" "00", 2)

409

TOUCHED_FLAG = int("101" "00", 2)

410

411

COPIED_MASK = int("11", 2)

411

COPIED_MASK = int("11", 2)

412

COPIED_FROM_P1_FLAG = int("10", 2)

412

COPIED_FROM_P1_FLAG = int("10", 2)

413

COPIED_FROM_P2_FLAG = int("11", 2)

413

COPIED_FROM_P2_FLAG = int("11", 2)

414

415

# structure is <flag><filename-end><copy-source>

415

# structure is <flag><filename-end><copy-source>

416

INDEX_HEADER = struct.Struct(">L")

416

INDEX_HEADER = struct.Struct(">L")

417

INDEX_ENTRY = struct.Struct(">bLL")

417

INDEX_ENTRY = struct.Struct(">bLL")

418

419

420

def encode_files_sidedata(files):

420

def encode_files_sidedata(files):

421

all_files = set(files.touched)

421

all_files = set(files.touched - files.salvaged)

422

all_files.update(files.copied_from_p1.values())

422

all_files.update(files.copied_from_p1.values())

423

all_files.update(files.copied_from_p2.values())

423

all_files.update(files.copied_from_p2.values())

424

all_files = sorted(all_files)

424

all_files = sorted(all_files)

425

file_idx = {f: i for (i, f) in enumerate(all_files)}

425

file_idx = {f: i for (i, f) in enumerate(all_files)}

426

file_idx[None] = 0

426

file_idx[None] = 0

427

428

chunks = [INDEX_HEADER.pack(len(all_files))]

428

chunks = [INDEX_HEADER.pack(len(all_files))]

429

430

filename_length = 0

430

filename_length = 0

431

for f in all_files:

431

for f in all_files:

432

filename_size = len(f)

432

filename_size = len(f)

433

filename_length += filename_size

433

filename_length += filename_size

434

flag = 0

434

flag = 0

435

if f in files.added:

435

if f in files.added:

436

flag |= ADDED_FLAG

436

flag |= ADDED_FLAG

437

elif f in files.merged:

437

elif f in files.merged:

438

flag |= MERGED_FLAG

438

flag |= MERGED_FLAG

439

elif f in files.removed:

439

elif f in files.removed:

440

flag |= REMOVED_FLAG

440

flag |= REMOVED_FLAG

441

elif f in files.touched:

441

elif f in files.touched:

442

flag |= TOUCHED_FLAG

442

flag |= TOUCHED_FLAG

443

444

copy = None

444

copy = None

445

if f in files.copied_from_p1:

445

if f in files.copied_from_p1:

446

flag |= COPIED_FROM_P1_FLAG

446

flag |= COPIED_FROM_P1_FLAG

447

copy = files.copied_from_p1.get(f)

447

copy = files.copied_from_p1.get(f)

448

elif f in files.copied_from_p2:

448

elif f in files.copied_from_p2:

449

copy = files.copied_from_p2.get(f)

449

copy = files.copied_from_p2.get(f)

450

flag |= COPIED_FROM_P2_FLAG

450

flag |= COPIED_FROM_P2_FLAG

451

copy_idx = file_idx[copy]

451

copy_idx = file_idx[copy]

452

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

452

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

453

chunks.extend(all_files)

453

chunks.extend(all_files)

454

return {sidedatamod.SD_FILES: b''.join(chunks)}

454

return {sidedatamod.SD_FILES: b''.join(chunks)}

455

456

457

def decode_files_sidedata(sidedata):

457

def decode_files_sidedata(sidedata):

458

md = ChangingFiles()

458

md = ChangingFiles()

459

raw = sidedata.get(sidedatamod.SD_FILES)

459

raw = sidedata.get(sidedatamod.SD_FILES)

460

461

if raw is None:

461

if raw is None:

462

return md

462

return md

463

464

copies = []

464

copies = []

465

all_files = []

465

all_files = []

466

467

assert len(raw) >= INDEX_HEADER.size

467

assert len(raw) >= INDEX_HEADER.size

468

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

468

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

469

470

offset = INDEX_HEADER.size

470

offset = INDEX_HEADER.size

471

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

471

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

472

file_offset_last = file_offset_base

472

file_offset_last = file_offset_base

473

474

assert len(raw) >= file_offset_base

474

assert len(raw) >= file_offset_base

475

476

for idx in range(total_files):

476

for idx in range(total_files):

477

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

477

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

478

file_end += file_offset_base

478

file_end += file_offset_base

479

filename = raw[file_offset_last:file_end]

479

filename = raw[file_offset_last:file_end]

480

filesize = file_end - file_offset_last

480

filesize = file_end - file_offset_last

481

assert len(filename) == filesize

481

assert len(filename) == filesize

482

offset += INDEX_ENTRY.size

482

offset += INDEX_ENTRY.size

483

file_offset_last = file_end

483

file_offset_last = file_end

484

all_files.append(filename)

484

all_files.append(filename)

485

if flag & ACTION_MASK == ADDED_FLAG:

485

if flag & ACTION_MASK == ADDED_FLAG:

486

md.mark_added(filename)

486

md.mark_added(filename)

487

elif flag & ACTION_MASK == MERGED_FLAG:

487

elif flag & ACTION_MASK == MERGED_FLAG:

488

md.mark_merged(filename)

488

md.mark_merged(filename)

489

elif flag & ACTION_MASK == REMOVED_FLAG:

489

elif flag & ACTION_MASK == REMOVED_FLAG:

490

md.mark_removed(filename)

490

md.mark_removed(filename)

491

elif flag & ACTION_MASK == TOUCHED_FLAG:

491

elif flag & ACTION_MASK == TOUCHED_FLAG:

492

md.mark_touched(filename)

492

md.mark_touched(filename)

493

494

copied = None

494

copied = None

495

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

495

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

496

copied = md.mark_copied_from_p1

496

copied = md.mark_copied_from_p1

497

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

497

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

498

copied = md.mark_copied_from_p2

498

copied = md.mark_copied_from_p2

499

500

if copied is not None:

500

if copied is not None:

501

copies.append((copied, filename, copy_idx))

501

copies.append((copied, filename, copy_idx))

502

503

for copied, filename, copy_idx in copies:

503

for copied, filename, copy_idx in copies:

504

copied(all_files[copy_idx], filename)

504

copied(all_files[copy_idx], filename)

505

506

return md

506

return md

507

508

509

def _getsidedata(srcrepo, rev):

509

def _getsidedata(srcrepo, rev):

510

ctx = srcrepo[rev]

510

ctx = srcrepo[rev]

511

filescopies = computechangesetcopies(ctx)

511

filescopies = computechangesetcopies(ctx)

512

filesadded = computechangesetfilesadded(ctx)

512

filesadded = computechangesetfilesadded(ctx)

513

filesremoved = computechangesetfilesremoved(ctx)

513

filesremoved = computechangesetfilesremoved(ctx)

514

filesmerged = computechangesetfilesmerged(ctx)

514

filesmerged = computechangesetfilesmerged(ctx)

515

files = ChangingFiles()

515

files = ChangingFiles()

516

files.update_touched(ctx.files())

516

files.update_touched(ctx.files())

517

files.update_added(filesadded)

517

files.update_added(filesadded)

518

files.update_removed(filesremoved)

518

files.update_removed(filesremoved)

519

files.update_merged(filesmerged)

519

files.update_merged(filesmerged)

520

files.update_copies_from_p1(filescopies[0])

520

files.update_copies_from_p1(filescopies[0])

521

files.update_copies_from_p2(filescopies[1])

521

files.update_copies_from_p2(filescopies[1])

522

return encode_files_sidedata(files)

522

return encode_files_sidedata(files)

523

524

525

def getsidedataadder(srcrepo, destrepo):

525

def getsidedataadder(srcrepo, destrepo):

526

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

526

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

527

if pycompat.iswindows or not use_w:

527

if pycompat.iswindows or not use_w:

528

return _get_simple_sidedata_adder(srcrepo, destrepo)

528

return _get_simple_sidedata_adder(srcrepo, destrepo)

529

else:

529

else:

530

return _get_worker_sidedata_adder(srcrepo, destrepo)

530

return _get_worker_sidedata_adder(srcrepo, destrepo)

531

532

533

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

533

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

534

"""The function used by worker precomputing sidedata

534

"""The function used by worker precomputing sidedata

535

536

It read an input queue containing revision numbers

536

It read an input queue containing revision numbers

537

It write in an output queue containing (rev, <sidedata-map>)

537

It write in an output queue containing (rev, <sidedata-map>)

538

539

The `None` input value is used as a stop signal.

539

The `None` input value is used as a stop signal.

540

541

The `tokens` semaphore is user to avoid having too many unprocessed

541

The `tokens` semaphore is user to avoid having too many unprocessed

542

entries. The workers needs to acquire one token before fetching a task.

542

entries. The workers needs to acquire one token before fetching a task.

543

They will be released by the consumer of the produced data.

543

They will be released by the consumer of the produced data.

544

"""

544

"""

545

tokens.acquire()

545

tokens.acquire()

546

rev = revs_queue.get()

546

rev = revs_queue.get()

547

while rev is not None:

547

while rev is not None:

548

data = _getsidedata(srcrepo, rev)

548

data = _getsidedata(srcrepo, rev)

549

sidedata_queue.put((rev, data))

549

sidedata_queue.put((rev, data))

550

tokens.acquire()

550

tokens.acquire()

551

rev = revs_queue.get()

551

rev = revs_queue.get()

552

# processing of `None` is completed, release the token.

552

# processing of `None` is completed, release the token.

553

tokens.release()

553

tokens.release()

554

555

556

BUFF_PER_WORKER = 50

556

BUFF_PER_WORKER = 50

557

558

559

def _get_worker_sidedata_adder(srcrepo, destrepo):

559

def _get_worker_sidedata_adder(srcrepo, destrepo):

560

"""The parallel version of the sidedata computation

560

"""The parallel version of the sidedata computation

561

562

This code spawn a pool of worker that precompute a buffer of sidedata

562

This code spawn a pool of worker that precompute a buffer of sidedata

563

before we actually need them"""

563

before we actually need them"""

564

# avoid circular import copies -> scmutil -> worker -> copies

564

# avoid circular import copies -> scmutil -> worker -> copies

565

from . import worker

565

from . import worker

566

567

nbworkers = worker._numworkers(srcrepo.ui)

567

nbworkers = worker._numworkers(srcrepo.ui)

568

569

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

569

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

570

revsq = multiprocessing.Queue()

570

revsq = multiprocessing.Queue()

571

sidedataq = multiprocessing.Queue()

571

sidedataq = multiprocessing.Queue()

572

573

assert srcrepo.filtername is None

573

assert srcrepo.filtername is None

574

# queue all tasks beforehand, revision numbers are small and it make

574

# queue all tasks beforehand, revision numbers are small and it make

575

# synchronisation simpler

575

# synchronisation simpler

576

#

576

#

577

# Since the computation for each node can be quite expensive, the overhead

577

# Since the computation for each node can be quite expensive, the overhead

578

# of using a single queue is not revelant. In practice, most computation

578

# of using a single queue is not revelant. In practice, most computation

579

# are fast but some are very expensive and dominate all the other smaller

579

# are fast but some are very expensive and dominate all the other smaller

580

# cost.

580

# cost.

581

for r in srcrepo.changelog.revs():

581

for r in srcrepo.changelog.revs():

582

revsq.put(r)

582

revsq.put(r)

583

# queue the "no more tasks" markers

583

# queue the "no more tasks" markers

584

for i in range(nbworkers):

584

for i in range(nbworkers):

585

revsq.put(None)

585

revsq.put(None)

586

587

allworkers = []

587

allworkers = []

588

for i in range(nbworkers):

588

for i in range(nbworkers):

589

args = (srcrepo, revsq, sidedataq, tokens)

589

args = (srcrepo, revsq, sidedataq, tokens)

590

w = multiprocessing.Process(target=_sidedata_worker, args=args)

590

w = multiprocessing.Process(target=_sidedata_worker, args=args)

591

allworkers.append(w)

591

allworkers.append(w)

592

w.start()

592

w.start()

593

594

# dictionnary to store results for revision higher than we one we are

594

# dictionnary to store results for revision higher than we one we are

595

# looking for. For example, if we need the sidedatamap for 42, and 43 is

595

# looking for. For example, if we need the sidedatamap for 42, and 43 is

596

# received, when shelve 43 for later use.

596

# received, when shelve 43 for later use.

597

staging = {}

597

staging = {}

598

599

def sidedata_companion(revlog, rev):

599

def sidedata_companion(revlog, rev):

600

sidedata = {}

600

sidedata = {}

601

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

601

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

602

# Is the data previously shelved ?

602

# Is the data previously shelved ?

603

sidedata = staging.pop(rev, None)

603

sidedata = staging.pop(rev, None)

604

if sidedata is None:

604

if sidedata is None:

605

# look at the queued result until we find the one we are lookig

605

# look at the queued result until we find the one we are lookig

606

# for (shelve the other ones)

606

# for (shelve the other ones)

607

r, sidedata = sidedataq.get()

607

r, sidedata = sidedataq.get()

608

while r != rev:

608

while r != rev:

609

staging[r] = sidedata

609

staging[r] = sidedata

610

r, sidedata = sidedataq.get()

610

r, sidedata = sidedataq.get()

611

tokens.release()

611

tokens.release()

612

return False, (), sidedata

612

return False, (), sidedata

613

614

return sidedata_companion

614

return sidedata_companion

615

616

617

def _get_simple_sidedata_adder(srcrepo, destrepo):

617

def _get_simple_sidedata_adder(srcrepo, destrepo):

618

"""The simple version of the sidedata computation

618

"""The simple version of the sidedata computation

619

620

It just compute it in the same thread on request"""

620

It just compute it in the same thread on request"""

621

622

def sidedatacompanion(revlog, rev):

622

def sidedatacompanion(revlog, rev):

623

sidedata = {}

623

sidedata = {}

624

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

624

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

625

sidedata = _getsidedata(srcrepo, rev)

625

sidedata = _getsidedata(srcrepo, rev)

626

return False, (), sidedata

626

return False, (), sidedata

627

628

return sidedatacompanion

628

return sidedatacompanion

629

630

631

def getsidedataremover(srcrepo, destrepo):

631

def getsidedataremover(srcrepo, destrepo):

632

def sidedatacompanion(revlog, rev):

632

def sidedatacompanion(revlog, rev):

633

f = ()

633

f = ()

634

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

634

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

635

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

635

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

636

f = (

636

f = (

637

sidedatamod.SD_P1COPIES,

637

sidedatamod.SD_P1COPIES,

638

sidedatamod.SD_P2COPIES,

638

sidedatamod.SD_P2COPIES,

639

sidedatamod.SD_FILESADDED,

639

sidedatamod.SD_FILESADDED,

640

sidedatamod.SD_FILESREMOVED,

640

sidedatamod.SD_FILESREMOVED,

641

)

641

)

642

return False, f, {}

642

return False, f, {}

643

644

return sidedatacompanion

644

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             import struct
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to files by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - merged:  files whose history got merged
                 - removed: files removed in the revision
                 - salvaged: files that might have been deleted by a merge but were not
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self,
                     touched=None,
                     added=None,
                     removed=None,
                     merged=None,
                     salvaged=None,
                     p1_copies=None,
                     p2_copies=None,
                 ):
                     self._added = set(() if added is None else added)
                     self._merged = set(() if merged is None else merged)
                     self._removed = set(() if removed is None else removed)
                     self._touched = set(() if touched is None else touched)
                     self._salvaged = set(() if salvaged is None else salvaged)
                     self._touched.update(self._added)
                     self._touched.update(self._merged)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(() if p1_copies is None else p1_copies)
                     self._p2_copies = dict(() if p2_copies is None else p2_copies)
                 def __eq__(self, other):
                     return (
                         self.added == other.added
                         and self.merged == other.merged
                         and self.removed == other.removed
                         and self.salvaged == other.salvaged
                         and self.touched == other.touched
                         and self.copied_from_p1 == other.copied_from_p1
                         and self.copied_from_p2 == other.copied_from_p2
                     )
                 @util.propertycache
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     if 'added' in vars(self):
                         del self.added
                     self._added.add(filename)
                     self.mark_touched(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @util.propertycache
                 def merged(self):
                     """files actively merged during a merge
                     Any modified files which had modification on both size that needed merging.
                     In this case a new filenode was created and it has two parents.
                     """
                     return frozenset(self._merged)
                 def mark_merged(self, filename):
                     if 'merged' in vars(self):
                         del self.merged
                     self._merged.add(filename)
                     self.mark_touched(filename)
                 def update_merged(self, filenames):
                     for f in filenames:
                         self.mark_merged(f)
                 @util.propertycache
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     if 'removed' in vars(self):
                         del self.removed
                     self._removed.add(filename)
                     self.mark_touched(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @util.propertycache
                 def salvaged(self):
                     """files that might have been deleted by a merge, but still exists.
                     During a merge, the manifest merging might select some files for
                     removal, or for a removed/changed conflict. If at commit time the file
                     still exists, its removal was "reverted" and the file is "salvaged"
                     """
                     return frozenset(self._salvaged)
                 def mark_salvaged(self, filename):
                     if "salvaged" in vars(self):
                         del self.salvaged
                     self._salvaged.add(filename)
                     self.mark_touched(filename)
                 def update_salvaged(self, filenames):
                     for f in filenames:
                         self.mark_salvaged(f)
                 @util.propertycache
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     if 'touched' in vars(self):
                         del self.touched
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @util.propertycache
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     if 'copied_from_p1' in vars(self):
                         del self.copied_from_p1
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @util.propertycache
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     if 'copied_from_p2' in vars(self):
                         del self.copied_from_p2
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetfilesmerged(ctx):
                 """return the list of files merged in a changeset
                 """
                 merged = []
                 if len(ctx.parents()) < 2:
                     return merged
                 for f in ctx.files():
                     if f in ctx:
                         fctx = ctx[f]
                         parents = fctx._filelog.parents(fctx._filenode)
                         if parents[1] != node.nullid:
                             merged.append(f)
                 return merged
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             # see mercurial/helptext/internals/revlogs.txt for details about the format
             ACTION_MASK = int("111" "00", 2)
             # note: untouched file used as copy source will as `000` for this mask.
             ADDED_FLAG = int("001" "00", 2)
             MERGED_FLAG = int("010" "00", 2)
             REMOVED_FLAG = int("011" "00", 2)
             # `100` is reserved for future use
             TOUCHED_FLAG = int("101" "00", 2)
             COPIED_MASK = int("11", 2)
             COPIED_FROM_P1_FLAG = int("10", 2)
             COPIED_FROM_P2_FLAG = int("11", 2)
             # structure is <flag><filename-end><copy-source>
             INDEX_HEADER = struct.Struct(">L")
             INDEX_ENTRY = struct.Struct(">bLL")
             def encode_files_sidedata(files):
-                all_files = set(files.touched)
+                all_files = set(files.touched - files.salvaged)
                 all_files.update(files.copied_from_p1.values())
                 all_files.update(files.copied_from_p2.values())
                 all_files = sorted(all_files)
                 file_idx = {f: i for (i, f) in enumerate(all_files)}
                 file_idx[None] = 0
                 chunks = [INDEX_HEADER.pack(len(all_files))]
                 filename_length = 0
                 for f in all_files:
                     filename_size = len(f)
                     filename_length += filename_size
                     flag = 0
                     if f in files.added:
                         flag |= ADDED_FLAG
                     elif f in files.merged:
                         flag |= MERGED_FLAG
                     elif f in files.removed:
                         flag |= REMOVED_FLAG
                     elif f in files.touched:
                         flag |= TOUCHED_FLAG
                     copy = None
                     if f in files.copied_from_p1:
                         flag |= COPIED_FROM_P1_FLAG
                         copy = files.copied_from_p1.get(f)
                     elif f in files.copied_from_p2:
                         copy = files.copied_from_p2.get(f)
                         flag |= COPIED_FROM_P2_FLAG
                     copy_idx = file_idx[copy]
                     chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
                 chunks.extend(all_files)
                 return {sidedatamod.SD_FILES: b''.join(chunks)}
             def decode_files_sidedata(sidedata):
                 md = ChangingFiles()
                 raw = sidedata.get(sidedatamod.SD_FILES)
                 if raw is None:
                     return md
                 copies = []
                 all_files = []
                 assert len(raw) >= INDEX_HEADER.size
                 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
                 offset = INDEX_HEADER.size
                 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
                 file_offset_last = file_offset_base
                 assert len(raw) >= file_offset_base
                 for idx in range(total_files):
                     flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
                     file_end += file_offset_base
                     filename = raw[file_offset_last:file_end]
                     filesize = file_end - file_offset_last
                     assert len(filename) == filesize
                     offset += INDEX_ENTRY.size
                     file_offset_last = file_end
                     all_files.append(filename)
                     if flag & ACTION_MASK == ADDED_FLAG:
                         md.mark_added(filename)
                     elif flag & ACTION_MASK == MERGED_FLAG:
                         md.mark_merged(filename)
                     elif flag & ACTION_MASK == REMOVED_FLAG:
                         md.mark_removed(filename)
                     elif flag & ACTION_MASK == TOUCHED_FLAG:
                         md.mark_touched(filename)
                     copied = None
                     if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
                         copied = md.mark_copied_from_p1
                     elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
                         copied = md.mark_copied_from_p2
                     if copied is not None:
                         copies.append((copied, filename, copy_idx))
                 for copied, filename, copy_idx in copies:
                     copied(all_files[copy_idx], filename)
                 return md
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 filesmerged = computechangesetfilesmerged(ctx)
                 files = ChangingFiles()
                 files.update_touched(ctx.files())
                 files.update_added(filesadded)
                 files.update_removed(filesremoved)
                 files.update_merged(filesmerged)
                 files.update_copies_from_p1(filescopies[0])
                 files.update_copies_from_p2(filescopies[1])
                 return encode_files_sidedata(files)
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion