upstream/mercurial-mirror Commit - r46776:18c17d63

1

# coding: utf8

1

# coding: utf-8

2

# metadata.py -- code related to various metadata computation and access.

2

# metadata.py -- code related to various metadata computation and access.

3

#

3

#

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

from __future__ import absolute_import, print_function

9

from __future__ import absolute_import, print_function

10

11

import multiprocessing

11

import multiprocessing

12

import struct

12

import struct

13

14

from . import (

14

from . import (

15

error,

15

error,

16

node,

16

node,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

from .revlogutils import (

21

from .revlogutils import (

22

flagutil as sidedataflag,

22

flagutil as sidedataflag,

23

sidedata as sidedatamod,

23

sidedata as sidedatamod,

24

)

24

)

25

26

27

class ChangingFiles(object):

27

class ChangingFiles(object):

28

"""A class recording the changes made to files by a changeset

28

"""A class recording the changes made to files by a changeset

29

30

Actions performed on files are gathered into 3 sets:

30

Actions performed on files are gathered into 3 sets:

31

32

- added: files actively added in the changeset.

32

- added: files actively added in the changeset.

33

- merged: files whose history got merged

33

- merged: files whose history got merged

34

- removed: files removed in the revision

34

- removed: files removed in the revision

35

- salvaged: files that might have been deleted by a merge but were not

35

- salvaged: files that might have been deleted by a merge but were not

36

- touched: files affected by the merge

36

- touched: files affected by the merge

37

38

and copies information is held by 2 mappings

38

and copies information is held by 2 mappings

39

40

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

40

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

41

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

41

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

42

43

See their inline help for details.

43

See their inline help for details.

44

"""

44

"""

45

46

def __init__(

46

def __init__(

47

self,

47

self,

48

touched=None,

48

touched=None,

49

added=None,

49

added=None,

50

removed=None,

50

removed=None,

51

merged=None,

51

merged=None,

52

salvaged=None,

52

salvaged=None,

53

p1_copies=None,

53

p1_copies=None,

54

p2_copies=None,

54

p2_copies=None,

55

):

55

):

56

self._added = set(() if added is None else added)

56

self._added = set(() if added is None else added)

57

self._merged = set(() if merged is None else merged)

57

self._merged = set(() if merged is None else merged)

58

self._removed = set(() if removed is None else removed)

58

self._removed = set(() if removed is None else removed)

59

self._touched = set(() if touched is None else touched)

59

self._touched = set(() if touched is None else touched)

60

self._salvaged = set(() if salvaged is None else salvaged)

60

self._salvaged = set(() if salvaged is None else salvaged)

61

self._touched.update(self._added)

61

self._touched.update(self._added)

62

self._touched.update(self._merged)

62

self._touched.update(self._merged)

63

self._touched.update(self._removed)

63

self._touched.update(self._removed)

64

self._p1_copies = dict(() if p1_copies is None else p1_copies)

64

self._p1_copies = dict(() if p1_copies is None else p1_copies)

65

self._p2_copies = dict(() if p2_copies is None else p2_copies)

65

self._p2_copies = dict(() if p2_copies is None else p2_copies)

66

67

def __eq__(self, other):

67

def __eq__(self, other):

68

return (

68

return (

69

self.added == other.added

69

self.added == other.added

70

and self.merged == other.merged

70

and self.merged == other.merged

71

and self.removed == other.removed

71

and self.removed == other.removed

72

and self.salvaged == other.salvaged

72

and self.salvaged == other.salvaged

73

and self.touched == other.touched

73

and self.touched == other.touched

74

and self.copied_from_p1 == other.copied_from_p1

74

and self.copied_from_p1 == other.copied_from_p1

75

and self.copied_from_p2 == other.copied_from_p2

75

and self.copied_from_p2 == other.copied_from_p2

76

)

76

)

77

78

@property

78

@property

79

def has_copies_info(self):

79

def has_copies_info(self):

80

return bool(

80

return bool(

81

self.removed

81

self.removed

82

or self.merged

82

or self.merged

83

or self.salvaged

83

or self.salvaged

84

or self.copied_from_p1

84

or self.copied_from_p1

85

or self.copied_from_p2

85

or self.copied_from_p2

86

)

86

)

87

88

@util.propertycache

88

@util.propertycache

89

def added(self):

89

def added(self):

90

"""files actively added in the changeset

90

"""files actively added in the changeset

91

92

Any file present in that revision that was absent in all the changeset's

92

Any file present in that revision that was absent in all the changeset's

93

parents.

93

parents.

94

95

In case of merge, this means a file absent in one of the parents but

95

In case of merge, this means a file absent in one of the parents but

96

existing in the other will *not* be contained in this set. (They were

96

existing in the other will *not* be contained in this set. (They were

97

added by an ancestor)

97

added by an ancestor)

98

"""

98

"""

99

return frozenset(self._added)

99

return frozenset(self._added)

100

101

def mark_added(self, filename):

101

def mark_added(self, filename):

102

if 'added' in vars(self):

102

if 'added' in vars(self):

103

del self.added

103

del self.added

104

self._added.add(filename)

104

self._added.add(filename)

105

self.mark_touched(filename)

105

self.mark_touched(filename)

106

107

def update_added(self, filenames):

107

def update_added(self, filenames):

108

for f in filenames:

108

for f in filenames:

109

self.mark_added(f)

109

self.mark_added(f)

110

111

@util.propertycache

111

@util.propertycache

112

def merged(self):

112

def merged(self):

113

"""files actively merged during a merge

113

"""files actively merged during a merge

114

115

Any modified files which had modification on both size that needed merging.

115

Any modified files which had modification on both size that needed merging.

116

117

In this case a new filenode was created and it has two parents.

117

In this case a new filenode was created and it has two parents.

118

"""

118

"""

119

return frozenset(self._merged)

119

return frozenset(self._merged)

120

121

def mark_merged(self, filename):

121

def mark_merged(self, filename):

122

if 'merged' in vars(self):

122

if 'merged' in vars(self):

123

del self.merged

123

del self.merged

124

self._merged.add(filename)

124

self._merged.add(filename)

125

self.mark_touched(filename)

125

self.mark_touched(filename)

126

127

def update_merged(self, filenames):

127

def update_merged(self, filenames):

128

for f in filenames:

128

for f in filenames:

129

self.mark_merged(f)

129

self.mark_merged(f)

130

131

@util.propertycache

131

@util.propertycache

132

def removed(self):

132

def removed(self):

133

"""files actively removed by the changeset

133

"""files actively removed by the changeset

134

135

In case of merge this will only contain the set of files removing "new"

135

In case of merge this will only contain the set of files removing "new"

136

content. For any file absent in the current changeset:

136

content. For any file absent in the current changeset:

137

138

a) If the file exists in both parents, it is clearly "actively" removed

138

a) If the file exists in both parents, it is clearly "actively" removed

139

by this changeset.

139

by this changeset.

140

141

b) If a file exists in only one parent and in none of the common

141

b) If a file exists in only one parent and in none of the common

142

ancestors, then the file was newly added in one of the merged branches

142

ancestors, then the file was newly added in one of the merged branches

143

and then got "actively" removed.

143

and then got "actively" removed.

144

145

c) If a file exists in only one parent and at least one of the common

145

c) If a file exists in only one parent and at least one of the common

146

ancestors using the same filenode, then the file was unchanged on one

146

ancestors using the same filenode, then the file was unchanged on one

147

side and deleted on the other side. The merge "passively" propagated

147

side and deleted on the other side. The merge "passively" propagated

148

that deletion, but didn't "actively" remove the file. In this case the

148

that deletion, but didn't "actively" remove the file. In this case the

149

file is *not* included in the `removed` set.

149

file is *not* included in the `removed` set.

150

151

d) If a file exists in only one parent and at least one of the common

151

d) If a file exists in only one parent and at least one of the common

152

ancestors using a different filenode, then the file was changed on one

152

ancestors using a different filenode, then the file was changed on one

153

side and removed on the other side. The merge process "actively"

153

side and removed on the other side. The merge process "actively"

154

decided to drop the new change and delete the file. Unlike in the

154

decided to drop the new change and delete the file. Unlike in the

155

previous case, (c), the file included in the `removed` set.

155

previous case, (c), the file included in the `removed` set.

156

157

Summary table for merge:

157

Summary table for merge:

158

159

case | exists in parents | exists in gca || removed

159

case | exists in parents | exists in gca || removed

160

(a) | both | * || yes

160

(a) | both | * || yes

161

(b) | one | none || yes

161

(b) | one | none || yes

162

(c) | one | same filenode || no

162

(c) | one | same filenode || no

163

(d) | one | new filenode || yes

163

(d) | one | new filenode || yes

164

"""

164

"""

165

return frozenset(self._removed)

165

return frozenset(self._removed)

166

167

def mark_removed(self, filename):

167

def mark_removed(self, filename):

168

if 'removed' in vars(self):

168

if 'removed' in vars(self):

169

del self.removed

169

del self.removed

170

self._removed.add(filename)

170

self._removed.add(filename)

171

self.mark_touched(filename)

171

self.mark_touched(filename)

172

173

def update_removed(self, filenames):

173

def update_removed(self, filenames):

174

for f in filenames:

174

for f in filenames:

175

self.mark_removed(f)

175

self.mark_removed(f)

176

177

@util.propertycache

177

@util.propertycache

178

def salvaged(self):

178

def salvaged(self):

179

"""files that might have been deleted by a merge, but still exists.

179

"""files that might have been deleted by a merge, but still exists.

180

181

During a merge, the manifest merging might select some files for

181

During a merge, the manifest merging might select some files for

182

removal, or for a removed/changed conflict. If at commit time the file

182

removal, or for a removed/changed conflict. If at commit time the file

183

still exists, its removal was "reverted" and the file is "salvaged"

183

still exists, its removal was "reverted" and the file is "salvaged"

184

"""

184

"""

185

return frozenset(self._salvaged)

185

return frozenset(self._salvaged)

186

187

def mark_salvaged(self, filename):

187

def mark_salvaged(self, filename):

188

if "salvaged" in vars(self):

188

if "salvaged" in vars(self):

189

del self.salvaged

189

del self.salvaged

190

self._salvaged.add(filename)

190

self._salvaged.add(filename)

191

self.mark_touched(filename)

191

self.mark_touched(filename)

192

193

def update_salvaged(self, filenames):

193

def update_salvaged(self, filenames):

194

for f in filenames:

194

for f in filenames:

195

self.mark_salvaged(f)

195

self.mark_salvaged(f)

196

197

@util.propertycache

197

@util.propertycache

198

def touched(self):

198

def touched(self):

199

"""files either actively modified, added or removed"""

199

"""files either actively modified, added or removed"""

200

return frozenset(self._touched)

200

return frozenset(self._touched)

201

202

def mark_touched(self, filename):

202

def mark_touched(self, filename):

203

if 'touched' in vars(self):

203

if 'touched' in vars(self):

204

del self.touched

204

del self.touched

205

self._touched.add(filename)

205

self._touched.add(filename)

206

207

def update_touched(self, filenames):

207

def update_touched(self, filenames):

208

for f in filenames:

208

for f in filenames:

209

self.mark_touched(f)

209

self.mark_touched(f)

210

211

@util.propertycache

211

@util.propertycache

212

def copied_from_p1(self):

212

def copied_from_p1(self):

213

return self._p1_copies.copy()

213

return self._p1_copies.copy()

214

215

def mark_copied_from_p1(self, source, dest):

215

def mark_copied_from_p1(self, source, dest):

216

if 'copied_from_p1' in vars(self):

216

if 'copied_from_p1' in vars(self):

217

del self.copied_from_p1

217

del self.copied_from_p1

218

self._p1_copies[dest] = source

218

self._p1_copies[dest] = source

219

220

def update_copies_from_p1(self, copies):

220

def update_copies_from_p1(self, copies):

221

for dest, source in copies.items():

221

for dest, source in copies.items():

222

self.mark_copied_from_p1(source, dest)

222

self.mark_copied_from_p1(source, dest)

223

224

@util.propertycache

224

@util.propertycache

225

def copied_from_p2(self):

225

def copied_from_p2(self):

226

return self._p2_copies.copy()

226

return self._p2_copies.copy()

227

228

def mark_copied_from_p2(self, source, dest):

228

def mark_copied_from_p2(self, source, dest):

229

if 'copied_from_p2' in vars(self):

229

if 'copied_from_p2' in vars(self):

230

del self.copied_from_p2

230

del self.copied_from_p2

231

self._p2_copies[dest] = source

231

self._p2_copies[dest] = source

232

233

def update_copies_from_p2(self, copies):

233

def update_copies_from_p2(self, copies):

234

for dest, source in copies.items():

234

for dest, source in copies.items():

235

self.mark_copied_from_p2(source, dest)

235

self.mark_copied_from_p2(source, dest)

236

237

238

def compute_all_files_changes(ctx):

238

def compute_all_files_changes(ctx):

239

"""compute the files changed by a revision"""

239

"""compute the files changed by a revision"""

240

p1 = ctx.p1()

240

p1 = ctx.p1()

241

p2 = ctx.p2()

241

p2 = ctx.p2()

242

if p1.rev() == node.nullrev and p2.rev() == node.nullrev:

242

if p1.rev() == node.nullrev and p2.rev() == node.nullrev:

243

return _process_root(ctx)

243

return _process_root(ctx)

244

elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:

244

elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:

245

return _process_linear(p1, ctx)

245

return _process_linear(p1, ctx)

246

elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:

246

elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:

247

# In the wild, one can encounter changeset where p1 is null but p2 is not

247

# In the wild, one can encounter changeset where p1 is null but p2 is not

248

return _process_linear(p1, ctx, parent=2)

248

return _process_linear(p1, ctx, parent=2)

249

elif p1.rev() == p2.rev():

249

elif p1.rev() == p2.rev():

250

# In the wild, one can encounter such "non-merge"

250

# In the wild, one can encounter such "non-merge"

251

return _process_linear(p1, ctx)

251

return _process_linear(p1, ctx)

252

else:

252

else:

253

return _process_merge(p1, p2, ctx)

253

return _process_merge(p1, p2, ctx)

254

255

256

def _process_root(ctx):

256

def _process_root(ctx):

257

"""compute the appropriate changed files for a changeset with no parents

257

"""compute the appropriate changed files for a changeset with no parents

258

"""

258

"""

259

# Simple, there was nothing before it, so everything is added.

259

# Simple, there was nothing before it, so everything is added.

260

md = ChangingFiles()

260

md = ChangingFiles()

261

manifest = ctx.manifest()

261

manifest = ctx.manifest()

262

for filename in manifest:

262

for filename in manifest:

263

md.mark_added(filename)

263

md.mark_added(filename)

264

return md

264

return md

265

266

267

def _process_linear(parent_ctx, children_ctx, parent=1):

267

def _process_linear(parent_ctx, children_ctx, parent=1):

268

"""compute the appropriate changed files for a changeset with a single parent

268

"""compute the appropriate changed files for a changeset with a single parent

269

"""

269

"""

270

md = ChangingFiles()

270

md = ChangingFiles()

271

parent_manifest = parent_ctx.manifest()

271

parent_manifest = parent_ctx.manifest()

272

children_manifest = children_ctx.manifest()

272

children_manifest = children_ctx.manifest()

273

274

copies_candidate = []

274

copies_candidate = []

275

276

for filename, d in parent_manifest.diff(children_manifest).items():

276

for filename, d in parent_manifest.diff(children_manifest).items():

277

if d[1][0] is None:

277

if d[1][0] is None:

278

# no filenode for the "new" value, file is absent

278

# no filenode for the "new" value, file is absent

279

md.mark_removed(filename)

279

md.mark_removed(filename)

280

else:

280

else:

281

copies_candidate.append(filename)

281

copies_candidate.append(filename)

282

if d[0][0] is None:

282

if d[0][0] is None:

283

# not filenode for the "old" value file was absent

283

# not filenode for the "old" value file was absent

284

md.mark_added(filename)

284

md.mark_added(filename)

285

else:

285

else:

286

# filenode for both "old" and "new"

286

# filenode for both "old" and "new"

287

md.mark_touched(filename)

287

md.mark_touched(filename)

288

289

if parent == 1:

289

if parent == 1:

290

copied = md.mark_copied_from_p1

290

copied = md.mark_copied_from_p1

291

elif parent == 2:

291

elif parent == 2:

292

copied = md.mark_copied_from_p2

292

copied = md.mark_copied_from_p2

293

else:

293

else:

294

assert False, "bad parent value %d" % parent

294

assert False, "bad parent value %d" % parent

295

296

for filename in copies_candidate:

296

for filename in copies_candidate:

297

copy_info = children_ctx[filename].renamed()

297

copy_info = children_ctx[filename].renamed()

298

if copy_info:

298

if copy_info:

299

source, srcnode = copy_info

299

source, srcnode = copy_info

300

copied(source, filename)

300

copied(source, filename)

301

302

return md

302

return md

303

304

305

def _process_merge(p1_ctx, p2_ctx, ctx):

305

def _process_merge(p1_ctx, p2_ctx, ctx):

306

"""compute the appropriate changed files for a changeset with two parents

306

"""compute the appropriate changed files for a changeset with two parents

307

308

This is a more advance case. The information we need to record is summarise

308

This is a more advance case. The information we need to record is summarise

309

in the following table:

309

in the following table:

310

311

┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐

311

┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐

312

│ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │

312

│ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │

313

│ p2 ╲ p1 │ │ │ │ │

313

│ p2 ╲ p1 │ │ │ │ │

314

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

314

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

315

│ │ │🄱 No Changes │🄳 No Changes │ │

315

│ │ │🄱 No Changes │🄳 No Changes │ │

316

│ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │

316

│ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │

317

│ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │

317

│ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │

318

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

318

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

319

│ │🄶 No Changes │ │ │ │

319

│ │🄶 No Changes │ │ │ │

320

│ (Some, None) │ OR │🄻 Deleted │ ø │ ø │

320

│ (Some, None) │ OR │🄻 Deleted │ ø │ ø │

321

│ │🄷 Deleted[1] │ │ │ │

321

│ │🄷 Deleted[1] │ │ │ │

322

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

322

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

323

│ │🄸 No Changes │ │ │ │

323

│ │🄸 No Changes │ │ │ │

324

│ (None, Some) │ OR │ ø │🄼 Added │🄽 Merged │

324

│ (None, Some) │ OR │ ø │🄼 Added │🄽 Merged │

325

│ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │

325

│ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │

326

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

326

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

327

│ │ │ │ │ │

327

│ │ │ │ │ │

328

│ (Some, Some) │🄺 No Changes │ ø │🄾 Merged │🄿 Merged │

328

│ (Some, Some) │🄺 No Changes │ ø │🄾 Merged │🄿 Merged │

329

│ │ [3] │ │ (copied?) │ (copied?) │

329

│ │ [3] │ │ (copied?) │ (copied?) │

330

└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘

330

└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘

331

332

Special case [1]:

332

Special case [1]:

333

334

The situation is:

334

The situation is:

335

- parent-A: file exists,

335

- parent-A: file exists,

336

- parent-B: no file,

336

- parent-B: no file,

337

- working-copy: no file.

337

- working-copy: no file.

338

339

Detecting a "deletion" will depend on the presence of actual change on

339

Detecting a "deletion" will depend on the presence of actual change on

340

the "parent-A" branch:

340

the "parent-A" branch:

341

342

Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged

342

Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged

343

compared to the merge ancestors, then parent-A branch left the file

343

compared to the merge ancestors, then parent-A branch left the file

344

untouched while parent-B deleted it. We simply apply the change from

344

untouched while parent-B deleted it. We simply apply the change from

345

"parent-B" branch the file was automatically dropped.

345

"parent-B" branch the file was automatically dropped.

346

The result is:

346

The result is:

347

- file is not recorded as touched by the merge.

347

- file is not recorded as touched by the merge.

348

349

Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and

349

Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and

350

the file was "deleted again". From a user perspective, the message

350

the file was "deleted again". From a user perspective, the message

351

about "locally changed" while "remotely deleted" (or the other way

351

about "locally changed" while "remotely deleted" (or the other way

352

around) was issued and the user chose to deleted the file.

352

around) was issued and the user chose to deleted the file.

353

The result:

353

The result:

354

- file is recorded as touched by the merge.

354

- file is recorded as touched by the merge.

355

356

357

Special case [2]:

357

Special case [2]:

358

359

The situation is:

359

The situation is:

360

- parent-A: no file,

360

- parent-A: no file,

361

- parent-B: file,

361

- parent-B: file,

362

- working-copy: file (same content as parent-B).

362

- working-copy: file (same content as parent-B).

363

364

There are three subcases depending on the ancestors contents:

364

There are three subcases depending on the ancestors contents:

365

366

- A) the file is missing in all ancestors,

366

- A) the file is missing in all ancestors,

367

- B) at least one ancestor has the file with filenode ≠ from parent-B,

367

- B) at least one ancestor has the file with filenode ≠ from parent-B,

368

- C) all ancestors use the same filenode as parent-B,

368

- C) all ancestors use the same filenode as parent-B,

369

370

Subcase (A) is the simpler, nothing happend on parent-A side while

370

Subcase (A) is the simpler, nothing happend on parent-A side while

371

parent-B added it.

371

parent-B added it.

372

373

The result:

373

The result:

374

- the file is not marked as touched by the merge.

374

- the file is not marked as touched by the merge.

375

376

Subcase (B) is the counter part of "Special case [1]", the file was

376

Subcase (B) is the counter part of "Special case [1]", the file was

377

modified on parent-B side, while parent-A side deleted it. However this

377

modified on parent-B side, while parent-A side deleted it. However this

378

time, the conflict was solved by keeping the file (and its

378

time, the conflict was solved by keeping the file (and its

379

modification). We consider the file as "salvaged".

379

modification). We consider the file as "salvaged".

380

381

The result:

381

The result:

382

- the file is marked as "salvaged" by the merge.

382

- the file is marked as "salvaged" by the merge.

383

384

Subcase (C) is subtle variation of the case above. In this case, the

384

Subcase (C) is subtle variation of the case above. In this case, the

385

file in unchanged on the parent-B side and actively removed on the

385

file in unchanged on the parent-B side and actively removed on the

386

parent-A side. So the merge machinery correctly decide it should be

386

parent-A side. So the merge machinery correctly decide it should be

387

removed. However, the file was explicitly restored to its parent-B

387

removed. However, the file was explicitly restored to its parent-B

388

content before the merge was commited. The file is be marked

388

content before the merge was commited. The file is be marked

389

as salvaged too. From the merge result perspective, this is similar to

389

as salvaged too. From the merge result perspective, this is similar to

390

Subcase (B), however from the merge resolution perspective they differ

390

Subcase (B), however from the merge resolution perspective they differ

391

since in (C), there was some conflict not obvious solution to the

391

since in (C), there was some conflict not obvious solution to the

392

merge (That got reversed)

392

merge (That got reversed)

393

394

Special case [3]:

394

Special case [3]:

395

396

The situation is:

396

The situation is:

397

- parent-A: file,

397

- parent-A: file,

398

- parent-B: file (different filenode as parent-A),

398

- parent-B: file (different filenode as parent-A),

399

- working-copy: file (same filenode as parent-B).

399

- working-copy: file (same filenode as parent-B).

400

401

This case is in theory much simple, for this to happens, this mean the

401

This case is in theory much simple, for this to happens, this mean the

402

filenode in parent-A is purely replacing the one in parent-B (either a

402

filenode in parent-A is purely replacing the one in parent-B (either a

403

descendant, or a full new file history, see changeset). So the merge

403

descendant, or a full new file history, see changeset). So the merge

404

introduce no changes, and the file is not affected by the merge...

404

introduce no changes, and the file is not affected by the merge...

405

406

However, in the wild it is possible to find commit with the above is not

406

However, in the wild it is possible to find commit with the above is not

407

True. For example repository have some commit where the *new* node is an

407

True. For example repository have some commit where the *new* node is an

408

ancestor of the node in parent-A, or where parent-A and parent-B are two

408

ancestor of the node in parent-A, or where parent-A and parent-B are two

409

branches of the same file history, yet not merge-filenode were created

409

branches of the same file history, yet not merge-filenode were created

410

(while the "merge" should have led to a "modification").

410

(while the "merge" should have led to a "modification").

411

412

Detecting such cases (and not recording the file as modified) would be a

412

Detecting such cases (and not recording the file as modified) would be a

413

nice bonus. However do not any of this yet.

413

nice bonus. However do not any of this yet.

414

"""

414

"""

415

416

md = ChangingFiles()

416

md = ChangingFiles()

417

418

m = ctx.manifest()

418

m = ctx.manifest()

419

p1m = p1_ctx.manifest()

419

p1m = p1_ctx.manifest()

420

p2m = p2_ctx.manifest()

420

p2m = p2_ctx.manifest()

421

diff_p1 = p1m.diff(m)

421

diff_p1 = p1m.diff(m)

422

diff_p2 = p2m.diff(m)

422

diff_p2 = p2m.diff(m)

423

424

cahs = ctx.repo().changelog.commonancestorsheads(

424

cahs = ctx.repo().changelog.commonancestorsheads(

425

p1_ctx.node(), p2_ctx.node()

425

p1_ctx.node(), p2_ctx.node()

426

)

426

)

427

if not cahs:

427

if not cahs:

428

cahs = [node.nullrev]

428

cahs = [node.nullrev]

429

mas = [ctx.repo()[r].manifest() for r in cahs]

429

mas = [ctx.repo()[r].manifest() for r in cahs]

430

431

copy_candidates = []

431

copy_candidates = []

432

433

# Dealing with case 🄰 happens automatically. Since there are no entry in

433

# Dealing with case 🄰 happens automatically. Since there are no entry in

434

# d1 nor d2, we won't iterate on it ever.

434

# d1 nor d2, we won't iterate on it ever.

435

436

# Iteration over d1 content will deal with all cases, but the one in the

436

# Iteration over d1 content will deal with all cases, but the one in the

437

# first column of the table.

437

# first column of the table.

438

for filename, d1 in diff_p1.items():

438

for filename, d1 in diff_p1.items():

439

440

d2 = diff_p2.pop(filename, None)

440

d2 = diff_p2.pop(filename, None)

441

442

if d2 is None:

442

if d2 is None:

443

# this deal with the first line of the table.

443

# this deal with the first line of the table.

444

_process_other_unchanged(md, mas, filename, d1)

444

_process_other_unchanged(md, mas, filename, d1)

445

else:

445

else:

446

447

if d1[0][0] is None and d2[0][0] is None:

447

if d1[0][0] is None and d2[0][0] is None:

448

# case 🄼 — both deleted the file.

448

# case 🄼 — both deleted the file.

449

md.mark_added(filename)

449

md.mark_added(filename)

450

copy_candidates.append(filename)

450

copy_candidates.append(filename)

451

elif d1[1][0] is None and d2[1][0] is None:

451

elif d1[1][0] is None and d2[1][0] is None:

452

# case 🄻 — both deleted the file.

452

# case 🄻 — both deleted the file.

453

md.mark_removed(filename)

453

md.mark_removed(filename)

454

elif d1[1][0] is not None and d2[1][0] is not None:

454

elif d1[1][0] is not None and d2[1][0] is not None:

455

# case 🄽 🄾 🄿

455

# case 🄽 🄾 🄿

456

md.mark_merged(filename)

456

md.mark_merged(filename)

457

copy_candidates.append(filename)

457

copy_candidates.append(filename)

458

else:

458

else:

459

# Impossible case, the post-merge file status cannot be None on

459

# Impossible case, the post-merge file status cannot be None on

460

# one side and Something on the other side.

460

# one side and Something on the other side.

461

assert False, "unreachable"

461

assert False, "unreachable"

462

463

# Iteration over remaining d2 content deal with the first column of the

463

# Iteration over remaining d2 content deal with the first column of the

464

# table.

464

# table.

465

for filename, d2 in diff_p2.items():

465

for filename, d2 in diff_p2.items():

466

_process_other_unchanged(md, mas, filename, d2)

466

_process_other_unchanged(md, mas, filename, d2)

467

468

for filename in copy_candidates:

468

for filename in copy_candidates:

469

copy_info = ctx[filename].renamed()

469

copy_info = ctx[filename].renamed()

470

if copy_info:

470

if copy_info:

471

source, srcnode = copy_info

471

source, srcnode = copy_info

472

if source in p1_ctx and p1_ctx[source].filenode() == srcnode:

472

if source in p1_ctx and p1_ctx[source].filenode() == srcnode:

473

md.mark_copied_from_p1(source, filename)

473

md.mark_copied_from_p1(source, filename)

474

elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:

474

elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:

475

md.mark_copied_from_p2(source, filename)

475

md.mark_copied_from_p2(source, filename)

476

return md

476

return md

477

478

479

def _find(manifest, filename):

479

def _find(manifest, filename):

480

"""return the associate filenode or None"""

480

"""return the associate filenode or None"""

481

if filename not in manifest:

481

if filename not in manifest:

482

return None

482

return None

483

return manifest.find(filename)[0]

483

return manifest.find(filename)[0]

484

485

486

def _process_other_unchanged(md, mas, filename, diff):

486

def _process_other_unchanged(md, mas, filename, diff):

487

source_node = diff[0][0]

487

source_node = diff[0][0]

488

target_node = diff[1][0]

488

target_node = diff[1][0]

489

490

if source_node is not None and target_node is None:

490

if source_node is not None and target_node is None:

491

if any(not _find(ma, filename) == source_node for ma in mas):

491

if any(not _find(ma, filename) == source_node for ma in mas):

492

# case 🄲 of 🄷

492

# case 🄲 of 🄷

493

md.mark_removed(filename)

493

md.mark_removed(filename)

494

# else, we have case 🄱 or 🄶 : no change need to be recorded

494

# else, we have case 🄱 or 🄶 : no change need to be recorded

495

elif source_node is None and target_node is not None:

495

elif source_node is None and target_node is not None:

496

if any(_find(ma, filename) is not None for ma in mas):

496

if any(_find(ma, filename) is not None for ma in mas):

497

# case 🄴 or 🄹

497

# case 🄴 or 🄹

498

md.mark_salvaged(filename)

498

md.mark_salvaged(filename)

499

# else, we have case 🄳 or 🄸 : simple merge without intervention

499

# else, we have case 🄳 or 🄸 : simple merge without intervention

500

elif source_node is not None and target_node is not None:

500

elif source_node is not None and target_node is not None:

501

# case 🄵 or 🄺 : simple merge without intervention

501

# case 🄵 or 🄺 : simple merge without intervention

502

#

502

#

503

# In buggy case where source_node is not an ancestors of target_node.

503

# In buggy case where source_node is not an ancestors of target_node.

504

# There should have a been a new filenode created, recording this as

504

# There should have a been a new filenode created, recording this as

505

# "modified". We do not deal with them yet.

505

# "modified". We do not deal with them yet.

506

pass

506

pass

507

else:

507

else:

508

# An impossible case, the diff algorithm should not return entry if the

508

# An impossible case, the diff algorithm should not return entry if the

509

# file is missing on both side.

509

# file is missing on both side.

510

assert False, "unreachable"

510

assert False, "unreachable"

511

512

513

def _missing_from_all_ancestors(mas, filename):

513

def _missing_from_all_ancestors(mas, filename):

514

return all(_find(ma, filename) is None for ma in mas)

514

return all(_find(ma, filename) is None for ma in mas)

515

516

517

def computechangesetfilesadded(ctx):

517

def computechangesetfilesadded(ctx):

518

"""return the list of files added in a changeset

518

"""return the list of files added in a changeset

519

"""

519

"""

520

added = []

520

added = []

521

for f in ctx.files():

521

for f in ctx.files():

522

if not any(f in p for p in ctx.parents()):

522

if not any(f in p for p in ctx.parents()):

523

added.append(f)

523

added.append(f)

524

return added

524

return added

525

526

527

def get_removal_filter(ctx, x=None):

527

def get_removal_filter(ctx, x=None):

528

"""return a function to detect files "wrongly" detected as `removed`

528

"""return a function to detect files "wrongly" detected as `removed`

529

530

When a file is removed relative to p1 in a merge, this

530

When a file is removed relative to p1 in a merge, this

531

function determines whether the absence is due to a

531

function determines whether the absence is due to a

532

deletion from a parent, or whether the merge commit

532

deletion from a parent, or whether the merge commit

533

itself deletes the file. We decide this by doing a

533

itself deletes the file. We decide this by doing a

534

simplified three way merge of the manifest entry for

534

simplified three way merge of the manifest entry for

535

the file. There are two ways we decide the merge

535

the file. There are two ways we decide the merge

536

itself didn't delete a file:

536

itself didn't delete a file:

537

- neither parent (nor the merge) contain the file

537

- neither parent (nor the merge) contain the file

538

- exactly one parent contains the file, and that

538

- exactly one parent contains the file, and that

539

parent has the same filelog entry as the merge

539

parent has the same filelog entry as the merge

540

ancestor (or all of them if there two). In other

540

ancestor (or all of them if there two). In other

541

words, that parent left the file unchanged while the

541

words, that parent left the file unchanged while the

542

other one deleted it.

542

other one deleted it.

543

One way to think about this is that deleting a file is

543

One way to think about this is that deleting a file is

544

similar to emptying it, so the list of changed files

544

similar to emptying it, so the list of changed files

545

should be similar either way. The computation

545

should be similar either way. The computation

546

described above is not done directly in _filecommit

546

described above is not done directly in _filecommit

547

when creating the list of changed files, however

547

when creating the list of changed files, however

548

it does something very similar by comparing filelog

548

it does something very similar by comparing filelog

549

nodes.

549

nodes.

550

"""

550

"""

551

552

if x is not None:

552

if x is not None:

553

p1, p2, m1, m2 = x

553

p1, p2, m1, m2 = x

554

else:

554

else:

555

p1 = ctx.p1()

555

p1 = ctx.p1()

556

p2 = ctx.p2()

556

p2 = ctx.p2()

557

m1 = p1.manifest()

557

m1 = p1.manifest()

558

m2 = p2.manifest()

558

m2 = p2.manifest()

559

560

@util.cachefunc

560

@util.cachefunc

561

def mas():

561

def mas():

562

p1n = p1.node()

562

p1n = p1.node()

563

p2n = p2.node()

563

p2n = p2.node()

564

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

564

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

565

if not cahs:

565

if not cahs:

566

cahs = [node.nullrev]

566

cahs = [node.nullrev]

567

return [ctx.repo()[r].manifest() for r in cahs]

567

return [ctx.repo()[r].manifest() for r in cahs]

568

569

def deletionfromparent(f):

569

def deletionfromparent(f):

570

if f in m1:

570

if f in m1:

571

return f not in m2 and all(

571

return f not in m2 and all(

572

f in ma and ma.find(f) == m1.find(f) for ma in mas()

572

f in ma and ma.find(f) == m1.find(f) for ma in mas()

573

)

573

)

574

elif f in m2:

574

elif f in m2:

575

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

575

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

576

else:

576

else:

577

return True

577

return True

578

579

return deletionfromparent

579

return deletionfromparent

580

581

582

def computechangesetfilesremoved(ctx):

582

def computechangesetfilesremoved(ctx):

583

"""return the list of files removed in a changeset

583

"""return the list of files removed in a changeset

584

"""

584

"""

585

removed = []

585

removed = []

586

for f in ctx.files():

586

for f in ctx.files():

587

if f not in ctx:

587

if f not in ctx:

588

removed.append(f)

588

removed.append(f)

589

if removed:

589

if removed:

590

rf = get_removal_filter(ctx)

590

rf = get_removal_filter(ctx)

591

removed = [r for r in removed if not rf(r)]

591

removed = [r for r in removed if not rf(r)]

592

return removed

592

return removed

593

594

595

def computechangesetfilesmerged(ctx):

595

def computechangesetfilesmerged(ctx):

596

"""return the list of files merged in a changeset

596

"""return the list of files merged in a changeset

597

"""

597

"""

598

merged = []

598

merged = []

599

if len(ctx.parents()) < 2:

599

if len(ctx.parents()) < 2:

600

return merged

600

return merged

601

for f in ctx.files():

601

for f in ctx.files():

602

if f in ctx:

602

if f in ctx:

603

fctx = ctx[f]

603

fctx = ctx[f]

604

parents = fctx._filelog.parents(fctx._filenode)

604

parents = fctx._filelog.parents(fctx._filenode)

605

if parents[1] != node.nullid:

605

if parents[1] != node.nullid:

606

merged.append(f)

606

merged.append(f)

607

return merged

607

return merged

608

609

610

def computechangesetcopies(ctx):

610

def computechangesetcopies(ctx):

611

"""return the copies data for a changeset

611

"""return the copies data for a changeset

612

613

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

613

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

614

615

Each dictionnary are in the form: `{newname: oldname}`

615

Each dictionnary are in the form: `{newname: oldname}`

616

"""

616

"""

617

p1copies = {}

617

p1copies = {}

618

p2copies = {}

618

p2copies = {}

619

p1 = ctx.p1()

619

p1 = ctx.p1()

620

p2 = ctx.p2()

620

p2 = ctx.p2()

621

narrowmatch = ctx._repo.narrowmatch()

621

narrowmatch = ctx._repo.narrowmatch()

622

for dst in ctx.files():

622

for dst in ctx.files():

623

if not narrowmatch(dst) or dst not in ctx:

623

if not narrowmatch(dst) or dst not in ctx:

624

continue

624

continue

625

copied = ctx[dst].renamed()

625

copied = ctx[dst].renamed()

626

if not copied:

626

if not copied:

627

continue

627

continue

628

src, srcnode = copied

628

src, srcnode = copied

629

if src in p1 and p1[src].filenode() == srcnode:

629

if src in p1 and p1[src].filenode() == srcnode:

630

p1copies[dst] = src

630

p1copies[dst] = src

631

elif src in p2 and p2[src].filenode() == srcnode:

631

elif src in p2 and p2[src].filenode() == srcnode:

632

p2copies[dst] = src

632

p2copies[dst] = src

633

return p1copies, p2copies

633

return p1copies, p2copies

634

635

636

def encodecopies(files, copies):

636

def encodecopies(files, copies):

637

items = []

637

items = []

638

for i, dst in enumerate(files):

638

for i, dst in enumerate(files):

639

if dst in copies:

639

if dst in copies:

640

items.append(b'%d\0%s' % (i, copies[dst]))

640

items.append(b'%d\0%s' % (i, copies[dst]))

641

if len(items) != len(copies):

641

if len(items) != len(copies):

642

raise error.ProgrammingError(

642

raise error.ProgrammingError(

643

b'some copy targets missing from file list'

643

b'some copy targets missing from file list'

644

)

644

)

645

return b"\n".join(items)

645

return b"\n".join(items)

646

647

648

def decodecopies(files, data):

648

def decodecopies(files, data):

649

try:

649

try:

650

copies = {}

650

copies = {}

651

if not data:

651

if not data:

652

return copies

652

return copies

653

for l in data.split(b'\n'):

653

for l in data.split(b'\n'):

654

strindex, src = l.split(b'\0')

654

strindex, src = l.split(b'\0')

655

i = int(strindex)

655

i = int(strindex)

656

dst = files[i]

656

dst = files[i]

657

copies[dst] = src

657

copies[dst] = src

658

return copies

658

return copies

659

except (ValueError, IndexError):

659

except (ValueError, IndexError):

660

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

660

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

661

# used different syntax for the value.

661

# used different syntax for the value.

662

return None

662

return None

663

664

665

def encodefileindices(files, subset):

665

def encodefileindices(files, subset):

666

subset = set(subset)

666

subset = set(subset)

667

indices = []

667

indices = []

668

for i, f in enumerate(files):

668

for i, f in enumerate(files):

669

if f in subset:

669

if f in subset:

670

indices.append(b'%d' % i)

670

indices.append(b'%d' % i)

671

return b'\n'.join(indices)

671

return b'\n'.join(indices)

672

673

674

def decodefileindices(files, data):

674

def decodefileindices(files, data):

675

try:

675

try:

676

subset = []

676

subset = []

677

if not data:

677

if not data:

678

return subset

678

return subset

679

for strindex in data.split(b'\n'):

679

for strindex in data.split(b'\n'):

680

i = int(strindex)

680

i = int(strindex)

681

if i < 0 or i >= len(files):

681

if i < 0 or i >= len(files):

682

return None

682

return None

683

subset.append(files[i])

683

subset.append(files[i])

684

return subset

684

return subset

685

except (ValueError, IndexError):

685

except (ValueError, IndexError):

686

# Perhaps someone had chosen the same key name (e.g. "added") and

686

# Perhaps someone had chosen the same key name (e.g. "added") and

687

# used different syntax for the value.

687

# used different syntax for the value.

688

return None

688

return None

689

690

691

# see mercurial/helptext/internals/revlogs.txt for details about the format

691

# see mercurial/helptext/internals/revlogs.txt for details about the format

692

693

ACTION_MASK = int("111" "00", 2)

693

ACTION_MASK = int("111" "00", 2)

694

# note: untouched file used as copy source will as `000` for this mask.

694

# note: untouched file used as copy source will as `000` for this mask.

695

ADDED_FLAG = int("001" "00", 2)

695

ADDED_FLAG = int("001" "00", 2)

696

MERGED_FLAG = int("010" "00", 2)

696

MERGED_FLAG = int("010" "00", 2)

697

REMOVED_FLAG = int("011" "00", 2)

697

REMOVED_FLAG = int("011" "00", 2)

698

SALVAGED_FLAG = int("100" "00", 2)

698

SALVAGED_FLAG = int("100" "00", 2)

699

TOUCHED_FLAG = int("101" "00", 2)

699

TOUCHED_FLAG = int("101" "00", 2)

700

701

COPIED_MASK = int("11", 2)

701

COPIED_MASK = int("11", 2)

702

COPIED_FROM_P1_FLAG = int("10", 2)

702

COPIED_FROM_P1_FLAG = int("10", 2)

703

COPIED_FROM_P2_FLAG = int("11", 2)

703

COPIED_FROM_P2_FLAG = int("11", 2)

704

705

# structure is <flag><filename-end><copy-source>

705

# structure is <flag><filename-end><copy-source>

706

INDEX_HEADER = struct.Struct(">L")

706

INDEX_HEADER = struct.Struct(">L")

707

INDEX_ENTRY = struct.Struct(">bLL")

707

INDEX_ENTRY = struct.Struct(">bLL")

708

709

710

def encode_files_sidedata(files):

710

def encode_files_sidedata(files):

711

all_files = set(files.touched)

711

all_files = set(files.touched)

712

all_files.update(files.copied_from_p1.values())

712

all_files.update(files.copied_from_p1.values())

713

all_files.update(files.copied_from_p2.values())

713

all_files.update(files.copied_from_p2.values())

714

all_files = sorted(all_files)

714

all_files = sorted(all_files)

715

file_idx = {f: i for (i, f) in enumerate(all_files)}

715

file_idx = {f: i for (i, f) in enumerate(all_files)}

716

file_idx[None] = 0

716

file_idx[None] = 0

717

718

chunks = [INDEX_HEADER.pack(len(all_files))]

718

chunks = [INDEX_HEADER.pack(len(all_files))]

719

720

filename_length = 0

720

filename_length = 0

721

for f in all_files:

721

for f in all_files:

722

filename_size = len(f)

722

filename_size = len(f)

723

filename_length += filename_size

723

filename_length += filename_size

724

flag = 0

724

flag = 0

725

if f in files.added:

725

if f in files.added:

726

flag |= ADDED_FLAG

726

flag |= ADDED_FLAG

727

elif f in files.merged:

727

elif f in files.merged:

728

flag |= MERGED_FLAG

728

flag |= MERGED_FLAG

729

elif f in files.removed:

729

elif f in files.removed:

730

flag |= REMOVED_FLAG

730

flag |= REMOVED_FLAG

731

elif f in files.salvaged:

731

elif f in files.salvaged:

732

flag |= SALVAGED_FLAG

732

flag |= SALVAGED_FLAG

733

elif f in files.touched:

733

elif f in files.touched:

734

flag |= TOUCHED_FLAG

734

flag |= TOUCHED_FLAG

735

736

copy = None

736

copy = None

737

if f in files.copied_from_p1:

737

if f in files.copied_from_p1:

738

flag |= COPIED_FROM_P1_FLAG

738

flag |= COPIED_FROM_P1_FLAG

739

copy = files.copied_from_p1.get(f)

739

copy = files.copied_from_p1.get(f)

740

elif f in files.copied_from_p2:

740

elif f in files.copied_from_p2:

741

copy = files.copied_from_p2.get(f)

741

copy = files.copied_from_p2.get(f)

742

flag |= COPIED_FROM_P2_FLAG

742

flag |= COPIED_FROM_P2_FLAG

743

copy_idx = file_idx[copy]

743

copy_idx = file_idx[copy]

744

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

744

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

745

chunks.extend(all_files)

745

chunks.extend(all_files)

746

return {sidedatamod.SD_FILES: b''.join(chunks)}

746

return {sidedatamod.SD_FILES: b''.join(chunks)}

747

748

749

def decode_files_sidedata(sidedata):

749

def decode_files_sidedata(sidedata):

750

md = ChangingFiles()

750

md = ChangingFiles()

751

raw = sidedata.get(sidedatamod.SD_FILES)

751

raw = sidedata.get(sidedatamod.SD_FILES)

752

753

if raw is None:

753

if raw is None:

754

return md

754

return md

755

756

copies = []

756

copies = []

757

all_files = []

757

all_files = []

758

759

assert len(raw) >= INDEX_HEADER.size

759

assert len(raw) >= INDEX_HEADER.size

760

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

760

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

761

762

offset = INDEX_HEADER.size

762

offset = INDEX_HEADER.size

763

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

763

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

764

file_offset_last = file_offset_base

764

file_offset_last = file_offset_base

765

766

assert len(raw) >= file_offset_base

766

assert len(raw) >= file_offset_base

767

768

for idx in range(total_files):

768

for idx in range(total_files):

769

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

769

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

770

file_end += file_offset_base

770

file_end += file_offset_base

771

filename = raw[file_offset_last:file_end]

771

filename = raw[file_offset_last:file_end]

772

filesize = file_end - file_offset_last

772

filesize = file_end - file_offset_last

773

assert len(filename) == filesize

773

assert len(filename) == filesize

774

offset += INDEX_ENTRY.size

774

offset += INDEX_ENTRY.size

775

file_offset_last = file_end

775

file_offset_last = file_end

776

all_files.append(filename)

776

all_files.append(filename)

777

if flag & ACTION_MASK == ADDED_FLAG:

777

if flag & ACTION_MASK == ADDED_FLAG:

778

md.mark_added(filename)

778

md.mark_added(filename)

779

elif flag & ACTION_MASK == MERGED_FLAG:

779

elif flag & ACTION_MASK == MERGED_FLAG:

780

md.mark_merged(filename)

780

md.mark_merged(filename)

781

elif flag & ACTION_MASK == REMOVED_FLAG:

781

elif flag & ACTION_MASK == REMOVED_FLAG:

782

md.mark_removed(filename)

782

md.mark_removed(filename)

783

elif flag & ACTION_MASK == SALVAGED_FLAG:

783

elif flag & ACTION_MASK == SALVAGED_FLAG:

784

md.mark_salvaged(filename)

784

md.mark_salvaged(filename)

785

elif flag & ACTION_MASK == TOUCHED_FLAG:

785

elif flag & ACTION_MASK == TOUCHED_FLAG:

786

md.mark_touched(filename)

786

md.mark_touched(filename)

787

788

copied = None

788

copied = None

789

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

789

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

790

copied = md.mark_copied_from_p1

790

copied = md.mark_copied_from_p1

791

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

791

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

792

copied = md.mark_copied_from_p2

792

copied = md.mark_copied_from_p2

793

794

if copied is not None:

794

if copied is not None:

795

copies.append((copied, filename, copy_idx))

795

copies.append((copied, filename, copy_idx))

796

797

for copied, filename, copy_idx in copies:

797

for copied, filename, copy_idx in copies:

798

copied(all_files[copy_idx], filename)

798

copied(all_files[copy_idx], filename)

799

800

return md

800

return md

801

802

803

def _getsidedata(srcrepo, rev):

803

def _getsidedata(srcrepo, rev):

804

ctx = srcrepo[rev]

804

ctx = srcrepo[rev]

805

files = compute_all_files_changes(ctx)

805

files = compute_all_files_changes(ctx)

806

return encode_files_sidedata(files), files.has_copies_info

806

return encode_files_sidedata(files), files.has_copies_info

807

808

809

def getsidedataadder(srcrepo, destrepo):

809

def getsidedataadder(srcrepo, destrepo):

810

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

810

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

811

if pycompat.iswindows or not use_w:

811

if pycompat.iswindows or not use_w:

812

return _get_simple_sidedata_adder(srcrepo, destrepo)

812

return _get_simple_sidedata_adder(srcrepo, destrepo)

813

else:

813

else:

814

return _get_worker_sidedata_adder(srcrepo, destrepo)

814

return _get_worker_sidedata_adder(srcrepo, destrepo)

815

816

817

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

817

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

818

"""The function used by worker precomputing sidedata

818

"""The function used by worker precomputing sidedata

819

820

It read an input queue containing revision numbers

820

It read an input queue containing revision numbers

821

It write in an output queue containing (rev, <sidedata-map>)

821

It write in an output queue containing (rev, <sidedata-map>)

822

823

The `None` input value is used as a stop signal.

823

The `None` input value is used as a stop signal.

824

825

The `tokens` semaphore is user to avoid having too many unprocessed

825

The `tokens` semaphore is user to avoid having too many unprocessed

826

entries. The workers needs to acquire one token before fetching a task.

826

entries. The workers needs to acquire one token before fetching a task.

827

They will be released by the consumer of the produced data.

827

They will be released by the consumer of the produced data.

828

"""

828

"""

829

tokens.acquire()

829

tokens.acquire()

830

rev = revs_queue.get()

830

rev = revs_queue.get()

831

while rev is not None:

831

while rev is not None:

832

data = _getsidedata(srcrepo, rev)

832

data = _getsidedata(srcrepo, rev)

833

sidedata_queue.put((rev, data))

833

sidedata_queue.put((rev, data))

834

tokens.acquire()

834

tokens.acquire()

835

rev = revs_queue.get()

835

rev = revs_queue.get()

836

# processing of `None` is completed, release the token.

836

# processing of `None` is completed, release the token.

837

tokens.release()

837

tokens.release()

838

839

840

BUFF_PER_WORKER = 50

840

BUFF_PER_WORKER = 50

841

842

843

def _get_worker_sidedata_adder(srcrepo, destrepo):

843

def _get_worker_sidedata_adder(srcrepo, destrepo):

844

"""The parallel version of the sidedata computation

844

"""The parallel version of the sidedata computation

845

846

This code spawn a pool of worker that precompute a buffer of sidedata

846

This code spawn a pool of worker that precompute a buffer of sidedata

847

before we actually need them"""

847

before we actually need them"""

848

# avoid circular import copies -> scmutil -> worker -> copies

848

# avoid circular import copies -> scmutil -> worker -> copies

849

from . import worker

849

from . import worker

850

851

nbworkers = worker._numworkers(srcrepo.ui)

851

nbworkers = worker._numworkers(srcrepo.ui)

852

853

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

853

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

854

revsq = multiprocessing.Queue()

854

revsq = multiprocessing.Queue()

855

sidedataq = multiprocessing.Queue()

855

sidedataq = multiprocessing.Queue()

856

857

assert srcrepo.filtername is None

857

assert srcrepo.filtername is None

858

# queue all tasks beforehand, revision numbers are small and it make

858

# queue all tasks beforehand, revision numbers are small and it make

859

# synchronisation simpler

859

# synchronisation simpler

860

#

860

#

861

# Since the computation for each node can be quite expensive, the overhead

861

# Since the computation for each node can be quite expensive, the overhead

862

# of using a single queue is not revelant. In practice, most computation

862

# of using a single queue is not revelant. In practice, most computation

863

# are fast but some are very expensive and dominate all the other smaller

863

# are fast but some are very expensive and dominate all the other smaller

864

# cost.

864

# cost.

865

for r in srcrepo.changelog.revs():

865

for r in srcrepo.changelog.revs():

866

revsq.put(r)

866

revsq.put(r)

867

# queue the "no more tasks" markers

867

# queue the "no more tasks" markers

868

for i in range(nbworkers):

868

for i in range(nbworkers):

869

revsq.put(None)

869

revsq.put(None)

870

871

allworkers = []

871

allworkers = []

872

for i in range(nbworkers):

872

for i in range(nbworkers):

873

args = (srcrepo, revsq, sidedataq, tokens)

873

args = (srcrepo, revsq, sidedataq, tokens)

874

w = multiprocessing.Process(target=_sidedata_worker, args=args)

874

w = multiprocessing.Process(target=_sidedata_worker, args=args)

875

allworkers.append(w)

875

allworkers.append(w)

876

w.start()

876

w.start()

877

878

# dictionnary to store results for revision higher than we one we are

878

# dictionnary to store results for revision higher than we one we are

879

# looking for. For example, if we need the sidedatamap for 42, and 43 is

879

# looking for. For example, if we need the sidedatamap for 42, and 43 is

880

# received, when shelve 43 for later use.

880

# received, when shelve 43 for later use.

881

staging = {}

881

staging = {}

882

883

def sidedata_companion(revlog, rev):

883

def sidedata_companion(revlog, rev):

884

data = {}, False

884

data = {}, False

885

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

885

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

886

# Is the data previously shelved ?

886

# Is the data previously shelved ?

887

sidedata = staging.pop(rev, None)

887

sidedata = staging.pop(rev, None)

888

if sidedata is None:

888

if sidedata is None:

889

# look at the queued result until we find the one we are lookig

889

# look at the queued result until we find the one we are lookig

890

# for (shelve the other ones)

890

# for (shelve the other ones)

891

r, data = sidedataq.get()

891

r, data = sidedataq.get()

892

while r != rev:

892

while r != rev:

893

staging[r] = data

893

staging[r] = data

894

r, sidedata = sidedataq.get()

894

r, sidedata = sidedataq.get()

895

tokens.release()

895

tokens.release()

896

sidedata, has_copies_info = data

896

sidedata, has_copies_info = data

897

new_flag = 0

897

new_flag = 0

898

if has_copies_info:

898

if has_copies_info:

899

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

899

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

900

return False, (), sidedata, new_flag, 0

900

return False, (), sidedata, new_flag, 0

901

902

return sidedata_companion

902

return sidedata_companion

903

904

905

def _get_simple_sidedata_adder(srcrepo, destrepo):

905

def _get_simple_sidedata_adder(srcrepo, destrepo):

906

"""The simple version of the sidedata computation

906

"""The simple version of the sidedata computation

907

908

It just compute it in the same thread on request"""

908

It just compute it in the same thread on request"""

909

910

def sidedatacompanion(revlog, rev):

910

def sidedatacompanion(revlog, rev):

911

sidedata, has_copies_info = {}, False

911

sidedata, has_copies_info = {}, False

912

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

912

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

913

sidedata, has_copies_info = _getsidedata(srcrepo, rev)

913

sidedata, has_copies_info = _getsidedata(srcrepo, rev)

914

new_flag = 0

914

new_flag = 0

915

if has_copies_info:

915

if has_copies_info:

916

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

916

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

917

918

return False, (), sidedata, new_flag, 0

918

return False, (), sidedata, new_flag, 0

919

920

return sidedatacompanion

920

return sidedatacompanion

921

922

923

def getsidedataremover(srcrepo, destrepo):

923

def getsidedataremover(srcrepo, destrepo):

924

def sidedatacompanion(revlog, rev):

924

def sidedatacompanion(revlog, rev):

925

f = ()

925

f = ()

926

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

926

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

927

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

927

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

928

f = (

928

f = (

929

sidedatamod.SD_P1COPIES,

929

sidedatamod.SD_P1COPIES,

930

sidedatamod.SD_P2COPIES,

930

sidedatamod.SD_P2COPIES,

931

sidedatamod.SD_FILESADDED,

931

sidedatamod.SD_FILESADDED,

932

sidedatamod.SD_FILESREMOVED,

932

sidedatamod.SD_FILESREMOVED,

933

)

933

)

934

return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO

934

return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO

935

936

return sidedatacompanion

936

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

-            # coding: utf8
+            # coding: utf-8
             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             import struct
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to files by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - merged:  files whose history got merged
                 - removed: files removed in the revision
                 - salvaged: files that might have been deleted by a merge but were not
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self,
                     touched=None,
                     added=None,
                     removed=None,
                     merged=None,
                     salvaged=None,
                     p1_copies=None,
                     p2_copies=None,
                 ):
                     self._added = set(() if added is None else added)
                     self._merged = set(() if merged is None else merged)
                     self._removed = set(() if removed is None else removed)
                     self._touched = set(() if touched is None else touched)
                     self._salvaged = set(() if salvaged is None else salvaged)
                     self._touched.update(self._added)
                     self._touched.update(self._merged)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(() if p1_copies is None else p1_copies)
                     self._p2_copies = dict(() if p2_copies is None else p2_copies)
                 def __eq__(self, other):
                     return (
                         self.added == other.added
                         and self.merged == other.merged
                         and self.removed == other.removed
                         and self.salvaged == other.salvaged
                         and self.touched == other.touched
                         and self.copied_from_p1 == other.copied_from_p1
                         and self.copied_from_p2 == other.copied_from_p2
                     )
                 @property
                 def has_copies_info(self):
                     return bool(
                         self.removed
                         or self.merged
                         or self.salvaged
                         or self.copied_from_p1
                         or self.copied_from_p2
                     )
                 @util.propertycache
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     if 'added' in vars(self):
                         del self.added
                     self._added.add(filename)
                     self.mark_touched(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @util.propertycache
                 def merged(self):
                     """files actively merged during a merge
                     Any modified files which had modification on both size that needed merging.
                     In this case a new filenode was created and it has two parents.
                     """
                     return frozenset(self._merged)
                 def mark_merged(self, filename):
                     if 'merged' in vars(self):
                         del self.merged
                     self._merged.add(filename)
                     self.mark_touched(filename)
                 def update_merged(self, filenames):
                     for f in filenames:
                         self.mark_merged(f)
                 @util.propertycache
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     if 'removed' in vars(self):
                         del self.removed
                     self._removed.add(filename)
                     self.mark_touched(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @util.propertycache
                 def salvaged(self):
                     """files that might have been deleted by a merge, but still exists.
                     During a merge, the manifest merging might select some files for
                     removal, or for a removed/changed conflict. If at commit time the file
                     still exists, its removal was "reverted" and the file is "salvaged"
                     """
                     return frozenset(self._salvaged)
                 def mark_salvaged(self, filename):
                     if "salvaged" in vars(self):
                         del self.salvaged
                     self._salvaged.add(filename)
                     self.mark_touched(filename)
                 def update_salvaged(self, filenames):
                     for f in filenames:
                         self.mark_salvaged(f)
                 @util.propertycache
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     if 'touched' in vars(self):
                         del self.touched
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @util.propertycache
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     if 'copied_from_p1' in vars(self):
                         del self.copied_from_p1
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @util.propertycache
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     if 'copied_from_p2' in vars(self):
                         del self.copied_from_p2
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def compute_all_files_changes(ctx):
                 """compute the files changed by a revision"""
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
                     return _process_root(ctx)
                 elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:
                     return _process_linear(p1, ctx)
                 elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:
                     # In the wild, one can encounter changeset where p1 is null but p2 is not
                     return _process_linear(p1, ctx, parent=2)
                 elif p1.rev() == p2.rev():
                     # In the wild, one can encounter such "non-merge"
                     return _process_linear(p1, ctx)
                 else:
                     return _process_merge(p1, p2, ctx)
             def _process_root(ctx):
                 """compute the appropriate changed files for a changeset with no parents
                 """
                 # Simple, there was nothing before it, so everything is added.
                 md = ChangingFiles()
                 manifest = ctx.manifest()
                 for filename in manifest:
                     md.mark_added(filename)
                 return md
             def _process_linear(parent_ctx, children_ctx, parent=1):
                 """compute the appropriate changed files for a changeset with a single parent
                 """
                 md = ChangingFiles()
                 parent_manifest = parent_ctx.manifest()
                 children_manifest = children_ctx.manifest()
                 copies_candidate = []
                 for filename, d in parent_manifest.diff(children_manifest).items():
                     if d[1][0] is None:
                         # no filenode for the "new" value, file is absent
                         md.mark_removed(filename)
                     else:
                         copies_candidate.append(filename)
                         if d[0][0] is None:
                             # not filenode for the "old" value file was absent
                             md.mark_added(filename)
                         else:
                             # filenode for both "old" and "new"
                             md.mark_touched(filename)
                 if parent == 1:
                     copied = md.mark_copied_from_p1
                 elif parent == 2:
                     copied = md.mark_copied_from_p2
                 else:
                     assert False, "bad parent value %d" % parent
                 for filename in copies_candidate:
                     copy_info = children_ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         copied(source, filename)
                 return md
             def _process_merge(p1_ctx, p2_ctx, ctx):
                 """compute the appropriate changed files for a changeset with two parents
                 This is a more advance case. The information we need to record is summarise
                 in the following table:
                 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
                 │ diff ╲  diff │       ø      │ (Some, None) │ (None, Some) │ (Some, Some) │
                 │  p2   ╲  p1  │              │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │🄱  No Changes │🄳  No Changes │              │
                 │  ø           │🄰  No Changes │      OR      │     OR       │🄵  No Changes │
                 │              │              │🄲  Deleted[1] │🄴  Salvaged[2]│     [3]      │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄶  No Changes │              │              │              │
                 │ (Some, None) │      OR      │🄻  Deleted    │       ø      │      ø       │
                 │              │🄷  Deleted[1] │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄸  No Changes │              │              │              │
                 │ (None, Some) │     OR       │      ø       │🄼   Added     │🄽   Merged    │
                 │              │🄹  Salvaged[2]│              │   (copied?)  │   (copied?)  │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │              │              │              │
                 │ (Some, Some) │🄺  No Changes │      ø       │🄾   Merged    │🄿   Merged    │
                 │              │     [3]      │              │   (copied?)  │   (copied?)  │
                 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
                 Special case [1]:
                   The situation is:
                     - parent-A:     file exists,
                     - parent-B:     no file,
                     - working-copy: no file.
                   Detecting a "deletion" will depend on the presence of actual change on
                   the "parent-A" branch:
                   Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
                   compared to the merge ancestors, then parent-A branch left the file
                   untouched while parent-B deleted it. We simply apply the change from
                   "parent-B" branch the file was automatically dropped.
                   The result is:
                       - file is not recorded as touched by the merge.
                   Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
                   the file was "deleted again". From a user perspective, the message
                   about "locally changed" while "remotely deleted" (or the other way
                   around) was issued and the user chose to deleted the file.
                   The result:
                       - file is recorded as touched by the merge.
                 Special case [2]:
                   The situation is:
                     - parent-A:     no file,
                     - parent-B:     file,
                     - working-copy: file (same content as parent-B).
                   There are three subcases depending on the ancestors contents:
                   - A) the file is missing in all ancestors,
                   - B) at least one ancestor has the file with filenode ≠ from parent-B,
                   - C) all ancestors use the same filenode as parent-B,
                   Subcase (A) is the simpler, nothing happend on parent-A side while
                   parent-B added it.
                     The result:
                         - the file is not marked as touched by the merge.
                   Subcase (B) is the counter part of "Special case [1]", the file was
                     modified on parent-B side, while parent-A side deleted it. However this
                     time, the conflict was solved by keeping the file (and its
                     modification). We consider the file as "salvaged".
                     The result:
                         - the file is marked as "salvaged" by the merge.
                   Subcase (C) is subtle variation of the case above. In this case, the
                     file in unchanged on the parent-B side and actively removed on the
                     parent-A side. So the merge machinery correctly decide it should be
                     removed. However, the file was explicitly restored to its parent-B
                     content before the merge was commited. The file is be marked
                     as salvaged too. From the merge result perspective, this is similar to
                     Subcase (B), however from the merge resolution perspective they differ
                     since in (C), there was some conflict not obvious solution to the
                     merge (That got reversed)
                 Special case [3]:
                   The situation is:
                     - parent-A:     file,
                     - parent-B:     file (different filenode as parent-A),
                     - working-copy: file (same filenode as parent-B).
                   This case is in theory much simple, for this to happens, this mean the
                   filenode in parent-A is purely replacing the one in parent-B (either a
                   descendant, or a full new file history, see changeset). So the merge
                   introduce no changes, and the file is not affected by the merge...
                   However, in the wild it is possible to find commit with the above is not
                   True. For example repository have some commit where the *new* node is an
                   ancestor of the node in parent-A, or where parent-A and parent-B are two
                   branches of the same file history, yet not merge-filenode were created
                   (while the "merge" should have led to a "modification").
                   Detecting such cases (and not recording the file as modified) would be a
                   nice bonus. However do not any of this yet.
                 """
                 md = ChangingFiles()
                 m = ctx.manifest()
                 p1m = p1_ctx.manifest()
                 p2m = p2_ctx.manifest()
                 diff_p1 = p1m.diff(m)
                 diff_p2 = p2m.diff(m)
                 cahs = ctx.repo().changelog.commonancestorsheads(
                     p1_ctx.node(), p2_ctx.node()
                 )
                 if not cahs:
                     cahs = [node.nullrev]
                 mas = [ctx.repo()[r].manifest() for r in cahs]
                 copy_candidates = []
                 # Dealing with case 🄰 happens automatically.  Since there are no entry in
                 # d1 nor d2, we won't iterate on it ever.
                 # Iteration over d1 content will deal with all cases, but the one in the
                 # first column of the table.
                 for filename, d1 in diff_p1.items():
                     d2 = diff_p2.pop(filename, None)
                     if d2 is None:
                         # this deal with the first line of the table.
                         _process_other_unchanged(md, mas, filename, d1)
                     else:
                         if d1[0][0] is None and d2[0][0] is None:
                             # case 🄼 — both deleted the file.
                             md.mark_added(filename)
                             copy_candidates.append(filename)
                         elif d1[1][0] is None and d2[1][0] is None:
                             # case 🄻 — both deleted the file.
                             md.mark_removed(filename)
                         elif d1[1][0] is not None and d2[1][0] is not None:
                             # case 🄽 🄾 🄿
                             md.mark_merged(filename)
                             copy_candidates.append(filename)
                         else:
                             # Impossible case, the post-merge file status cannot be None on
                             # one side and Something on the other side.
                             assert False, "unreachable"
                 # Iteration over remaining d2 content deal with the first column of the
                 # table.
                 for filename, d2 in diff_p2.items():
                     _process_other_unchanged(md, mas, filename, d2)
                 for filename in copy_candidates:
                     copy_info = ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p1(source, filename)
                         elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p2(source, filename)
                 return md
             def _find(manifest, filename):
                 """return the associate filenode or None"""
                 if filename not in manifest:
                     return None
                 return manifest.find(filename)[0]
             def _process_other_unchanged(md, mas, filename, diff):
                 source_node = diff[0][0]
                 target_node = diff[1][0]
                 if source_node is not None and target_node is None:
                     if any(not _find(ma, filename) == source_node for ma in mas):
                         # case 🄲 of 🄷
                         md.mark_removed(filename)
                     # else, we have case 🄱 or 🄶 : no change need to be recorded
                 elif source_node is None and target_node is not None:
                     if any(_find(ma, filename) is not None for ma in mas):
                         # case 🄴 or 🄹
                         md.mark_salvaged(filename)
                     # else, we have case 🄳 or 🄸 : simple merge without intervention
                 elif source_node is not None and target_node is not None:
                     # case 🄵  or 🄺 : simple merge without intervention
                     #
                     # In buggy case where source_node is not an ancestors of target_node.
                     # There should have a been a new filenode created, recording this as
                     # "modified". We do not deal with them yet.
                     pass
                 else:
                     # An impossible case, the diff algorithm should not return entry if the
                     # file is missing on both side.
                     assert False, "unreachable"
             def _missing_from_all_ancestors(mas, filename):
                 return all(_find(ma, filename) is None for ma in mas)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetfilesmerged(ctx):
                 """return the list of files merged in a changeset
                 """
                 merged = []
                 if len(ctx.parents()) < 2:
                     return merged
                 for f in ctx.files():
                     if f in ctx:
                         fctx = ctx[f]
                         parents = fctx._filelog.parents(fctx._filenode)
                         if parents[1] != node.nullid:
                             merged.append(f)
                 return merged
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             # see mercurial/helptext/internals/revlogs.txt for details about the format
             ACTION_MASK = int("111" "00", 2)
             # note: untouched file used as copy source will as `000` for this mask.
             ADDED_FLAG = int("001" "00", 2)
             MERGED_FLAG = int("010" "00", 2)
             REMOVED_FLAG = int("011" "00", 2)
             SALVAGED_FLAG = int("100" "00", 2)
             TOUCHED_FLAG = int("101" "00", 2)
             COPIED_MASK = int("11", 2)
             COPIED_FROM_P1_FLAG = int("10", 2)
             COPIED_FROM_P2_FLAG = int("11", 2)
             # structure is <flag><filename-end><copy-source>
             INDEX_HEADER = struct.Struct(">L")
             INDEX_ENTRY = struct.Struct(">bLL")
             def encode_files_sidedata(files):
                 all_files = set(files.touched)
                 all_files.update(files.copied_from_p1.values())
                 all_files.update(files.copied_from_p2.values())
                 all_files = sorted(all_files)
                 file_idx = {f: i for (i, f) in enumerate(all_files)}
                 file_idx[None] = 0
                 chunks = [INDEX_HEADER.pack(len(all_files))]
                 filename_length = 0
                 for f in all_files:
                     filename_size = len(f)
                     filename_length += filename_size
                     flag = 0
                     if f in files.added:
                         flag |= ADDED_FLAG
                     elif f in files.merged:
                         flag |= MERGED_FLAG
                     elif f in files.removed:
                         flag |= REMOVED_FLAG
                     elif f in files.salvaged:
                         flag |= SALVAGED_FLAG
                     elif f in files.touched:
                         flag |= TOUCHED_FLAG
                     copy = None
                     if f in files.copied_from_p1:
                         flag |= COPIED_FROM_P1_FLAG
                         copy = files.copied_from_p1.get(f)
                     elif f in files.copied_from_p2:
                         copy = files.copied_from_p2.get(f)
                         flag |= COPIED_FROM_P2_FLAG
                     copy_idx = file_idx[copy]
                     chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
                 chunks.extend(all_files)
                 return {sidedatamod.SD_FILES: b''.join(chunks)}
             def decode_files_sidedata(sidedata):
                 md = ChangingFiles()
                 raw = sidedata.get(sidedatamod.SD_FILES)
                 if raw is None:
                     return md
                 copies = []
                 all_files = []
                 assert len(raw) >= INDEX_HEADER.size
                 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
                 offset = INDEX_HEADER.size
                 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
                 file_offset_last = file_offset_base
                 assert len(raw) >= file_offset_base
                 for idx in range(total_files):
                     flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
                     file_end += file_offset_base
                     filename = raw[file_offset_last:file_end]
                     filesize = file_end - file_offset_last
                     assert len(filename) == filesize
                     offset += INDEX_ENTRY.size
                     file_offset_last = file_end
                     all_files.append(filename)
                     if flag & ACTION_MASK == ADDED_FLAG:
                         md.mark_added(filename)
                     elif flag & ACTION_MASK == MERGED_FLAG:
                         md.mark_merged(filename)
                     elif flag & ACTION_MASK == REMOVED_FLAG:
                         md.mark_removed(filename)
                     elif flag & ACTION_MASK == SALVAGED_FLAG:
                         md.mark_salvaged(filename)
                     elif flag & ACTION_MASK == TOUCHED_FLAG:
                         md.mark_touched(filename)
                     copied = None
                     if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
                         copied = md.mark_copied_from_p1
                     elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
                         copied = md.mark_copied_from_p2
                     if copied is not None:
                         copies.append((copied, filename, copy_idx))
                 for copied, filename, copy_idx in copies:
                     copied(all_files[copy_idx], filename)
                 return md
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 files = compute_all_files_changes(ctx)
                 return encode_files_sidedata(files), files.has_copies_info
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     data = {}, False
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, data = sidedataq.get()
                             while r != rev:
                                 staging[r] = data
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     sidedata, has_copies_info = data
                     new_flag = 0
                     if has_copies_info:
                         new_flag = sidedataflag.REVIDX_HASCOPIESINFO
                     return False, (), sidedata, new_flag, 0
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata, has_copies_info = {}, False
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata, has_copies_info = _getsidedata(srcrepo, rev)
                     new_flag = 0
                     if has_copies_info:
                         new_flag = sidedataflag.REVIDX_HASCOPIESINFO
                     return False, (), sidedata, new_flag, 0
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
                 return sidedatacompanion