upstream/mercurial-mirror Commit - r47840:b409cdc6

1

# coding: utf-8

1

# coding: utf-8

2

# metadata.py -- code related to various metadata computation and access.

2

# metadata.py -- code related to various metadata computation and access.

3

#

3

#

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

from __future__ import absolute_import, print_function

9

from __future__ import absolute_import, print_function

10

11

import multiprocessing

11

import multiprocessing

12

import struct

12

import struct

13

14

from .node import nullrev

14

from .node import nullrev

15

from . import (

15

from . import (

16

error,

16

error,

17

pycompat,

17

pycompat,

18

requirements as requirementsmod,

18

requirements as requirementsmod,

19

util,

19

util,

20

)

20

)

21

22

from .revlogutils import (

22

from .revlogutils import (

23

constants as revlogconst,

23

constants as revlogconst,

24

flagutil as sidedataflag,

24

flagutil as sidedataflag,

25

sidedata as sidedatamod,

25

sidedata as sidedatamod,

26

)

26

)

27

28

29

class ChangingFiles(object):

29

class ChangingFiles(object):

30

"""A class recording the changes made to files by a changeset

30

"""A class recording the changes made to files by a changeset

31

32

Actions performed on files are gathered into 3 sets:

32

Actions performed on files are gathered into 3 sets:

33

34

- added: files actively added in the changeset.

34

- added: files actively added in the changeset.

35

- merged: files whose history got merged

35

- merged: files whose history got merged

36

- removed: files removed in the revision

36

- removed: files removed in the revision

37

- salvaged: files that might have been deleted by a merge but were not

37

- salvaged: files that might have been deleted by a merge but were not

38

- touched: files affected by the merge

38

- touched: files affected by the merge

39

40

and copies information is held by 2 mappings

40

and copies information is held by 2 mappings

41

42

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

42

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

43

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

43

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

44

45

See their inline help for details.

45

See their inline help for details.

46

"""

46

"""

47

48

def __init__(

48

def __init__(

49

self,

49

self,

50

touched=None,

50

touched=None,

51

added=None,

51

added=None,

52

removed=None,

52

removed=None,

53

merged=None,

53

merged=None,

54

salvaged=None,

54

salvaged=None,

55

p1_copies=None,

55

p1_copies=None,

56

p2_copies=None,

56

p2_copies=None,

57

):

57

):

58

self._added = set(() if added is None else added)

58

self._added = set(() if added is None else added)

59

self._merged = set(() if merged is None else merged)

59

self._merged = set(() if merged is None else merged)

60

self._removed = set(() if removed is None else removed)

60

self._removed = set(() if removed is None else removed)

61

self._touched = set(() if touched is None else touched)

61

self._touched = set(() if touched is None else touched)

62

self._salvaged = set(() if salvaged is None else salvaged)

62

self._salvaged = set(() if salvaged is None else salvaged)

63

self._touched.update(self._added)

63

self._touched.update(self._added)

64

self._touched.update(self._merged)

64

self._touched.update(self._merged)

65

self._touched.update(self._removed)

65

self._touched.update(self._removed)

66

self._p1_copies = dict(() if p1_copies is None else p1_copies)

66

self._p1_copies = dict(() if p1_copies is None else p1_copies)

67

self._p2_copies = dict(() if p2_copies is None else p2_copies)

67

self._p2_copies = dict(() if p2_copies is None else p2_copies)

68

69

def __eq__(self, other):

69

def __eq__(self, other):

70

return (

70

return (

71

self.added == other.added

71

self.added == other.added

72

and self.merged == other.merged

72

and self.merged == other.merged

73

and self.removed == other.removed

73

and self.removed == other.removed

74

and self.salvaged == other.salvaged

74

and self.salvaged == other.salvaged

75

and self.touched == other.touched

75

and self.touched == other.touched

76

and self.copied_from_p1 == other.copied_from_p1

76

and self.copied_from_p1 == other.copied_from_p1

77

and self.copied_from_p2 == other.copied_from_p2

77

and self.copied_from_p2 == other.copied_from_p2

78

)

78

)

79

80

@property

80

@property

81

def has_copies_info(self):

81

def has_copies_info(self):

82

return bool(

82

return bool(

83

self.removed

83

self.removed

84

or self.merged

84

or self.merged

85

or self.salvaged

85

or self.salvaged

86

or self.copied_from_p1

86

or self.copied_from_p1

87

or self.copied_from_p2

87

or self.copied_from_p2

88

)

88

)

89

90

@util.propertycache

90

@util.propertycache

91

def added(self):

91

def added(self):

92

"""files actively added in the changeset

92

"""files actively added in the changeset

93

94

Any file present in that revision that was absent in all the changeset's

94

Any file present in that revision that was absent in all the changeset's

95

parents.

95

parents.

96

97

In case of merge, this means a file absent in one of the parents but

97

In case of merge, this means a file absent in one of the parents but

98

existing in the other will *not* be contained in this set. (They were

98

existing in the other will *not* be contained in this set. (They were

99

added by an ancestor)

99

added by an ancestor)

100

"""

100

"""

101

return frozenset(self._added)

101

return frozenset(self._added)

102

103

def mark_added(self, filename):

103

def mark_added(self, filename):

104

if 'added' in vars(self):

104

if 'added' in vars(self):

105

del self.added

105

del self.added

106

self._added.add(filename)

106

self._added.add(filename)

107

self.mark_touched(filename)

107

self.mark_touched(filename)

108

109

def update_added(self, filenames):

109

def update_added(self, filenames):

110

for f in filenames:

110

for f in filenames:

111

self.mark_added(f)

111

self.mark_added(f)

112

113

@util.propertycache

113

@util.propertycache

114

def merged(self):

114

def merged(self):

115

"""files actively merged during a merge

115

"""files actively merged during a merge

116

117

Any modified files which had modification on both size that needed merging.

117

Any modified files which had modification on both size that needed merging.

118

119

In this case a new filenode was created and it has two parents.

119

In this case a new filenode was created and it has two parents.

120

"""

120

"""

121

return frozenset(self._merged)

121

return frozenset(self._merged)

122

123

def mark_merged(self, filename):

123

def mark_merged(self, filename):

124

if 'merged' in vars(self):

124

if 'merged' in vars(self):

125

del self.merged

125

del self.merged

126

self._merged.add(filename)

126

self._merged.add(filename)

127

self.mark_touched(filename)

127

self.mark_touched(filename)

128

129

def update_merged(self, filenames):

129

def update_merged(self, filenames):

130

for f in filenames:

130

for f in filenames:

131

self.mark_merged(f)

131

self.mark_merged(f)

132

133

@util.propertycache

133

@util.propertycache

134

def removed(self):

134

def removed(self):

135

"""files actively removed by the changeset

135

"""files actively removed by the changeset

136

137

In case of merge this will only contain the set of files removing "new"

137

In case of merge this will only contain the set of files removing "new"

138

content. For any file absent in the current changeset:

138

content. For any file absent in the current changeset:

139

140

a) If the file exists in both parents, it is clearly "actively" removed

140

a) If the file exists in both parents, it is clearly "actively" removed

141

by this changeset.

141

by this changeset.

142

143

b) If a file exists in only one parent and in none of the common

143

b) If a file exists in only one parent and in none of the common

144

ancestors, then the file was newly added in one of the merged branches

144

ancestors, then the file was newly added in one of the merged branches

145

and then got "actively" removed.

145

and then got "actively" removed.

146

147

c) If a file exists in only one parent and at least one of the common

147

c) If a file exists in only one parent and at least one of the common

148

ancestors using the same filenode, then the file was unchanged on one

148

ancestors using the same filenode, then the file was unchanged on one

149

side and deleted on the other side. The merge "passively" propagated

149

side and deleted on the other side. The merge "passively" propagated

150

that deletion, but didn't "actively" remove the file. In this case the

150

that deletion, but didn't "actively" remove the file. In this case the

151

file is *not* included in the `removed` set.

151

file is *not* included in the `removed` set.

152

153

d) If a file exists in only one parent and at least one of the common

153

d) If a file exists in only one parent and at least one of the common

154

ancestors using a different filenode, then the file was changed on one

154

ancestors using a different filenode, then the file was changed on one

155

side and removed on the other side. The merge process "actively"

155

side and removed on the other side. The merge process "actively"

156

decided to drop the new change and delete the file. Unlike in the

156

decided to drop the new change and delete the file. Unlike in the

157

previous case, (c), the file included in the `removed` set.

157

previous case, (c), the file included in the `removed` set.

158

159

Summary table for merge:

159

Summary table for merge:

160

161

case | exists in parents | exists in gca || removed

161

case | exists in parents | exists in gca || removed

162

(a) | both | * || yes

162

(a) | both | * || yes

163

(b) | one | none || yes

163

(b) | one | none || yes

164

(c) | one | same filenode || no

164

(c) | one | same filenode || no

165

(d) | one | new filenode || yes

165

(d) | one | new filenode || yes

166

"""

166

"""

167

return frozenset(self._removed)

167

return frozenset(self._removed)

168

169

def mark_removed(self, filename):

169

def mark_removed(self, filename):

170

if 'removed' in vars(self):

170

if 'removed' in vars(self):

171

del self.removed

171

del self.removed

172

self._removed.add(filename)

172

self._removed.add(filename)

173

self.mark_touched(filename)

173

self.mark_touched(filename)

174

175

def update_removed(self, filenames):

175

def update_removed(self, filenames):

176

for f in filenames:

176

for f in filenames:

177

self.mark_removed(f)

177

self.mark_removed(f)

178

179

@util.propertycache

179

@util.propertycache

180

def salvaged(self):

180

def salvaged(self):

181

"""files that might have been deleted by a merge, but still exists.

181

"""files that might have been deleted by a merge, but still exists.

182

183

During a merge, the manifest merging might select some files for

183

During a merge, the manifest merging might select some files for

184

removal, or for a removed/changed conflict. If at commit time the file

184

removal, or for a removed/changed conflict. If at commit time the file

185

still exists, its removal was "reverted" and the file is "salvaged"

185

still exists, its removal was "reverted" and the file is "salvaged"

186

"""

186

"""

187

return frozenset(self._salvaged)

187

return frozenset(self._salvaged)

188

189

def mark_salvaged(self, filename):

189

def mark_salvaged(self, filename):

190

if "salvaged" in vars(self):

190

if "salvaged" in vars(self):

191

del self.salvaged

191

del self.salvaged

192

self._salvaged.add(filename)

192

self._salvaged.add(filename)

193

self.mark_touched(filename)

193

self.mark_touched(filename)

194

195

def update_salvaged(self, filenames):

195

def update_salvaged(self, filenames):

196

for f in filenames:

196

for f in filenames:

197

self.mark_salvaged(f)

197

self.mark_salvaged(f)

198

199

@util.propertycache

199

@util.propertycache

200

def touched(self):

200

def touched(self):

201

"""files either actively modified, added or removed"""

201

"""files either actively modified, added or removed"""

202

return frozenset(self._touched)

202

return frozenset(self._touched)

203

204

def mark_touched(self, filename):

204

def mark_touched(self, filename):

205

if 'touched' in vars(self):

205

if 'touched' in vars(self):

206

del self.touched

206

del self.touched

207

self._touched.add(filename)

207

self._touched.add(filename)

208

209

def update_touched(self, filenames):

209

def update_touched(self, filenames):

210

for f in filenames:

210

for f in filenames:

211

self.mark_touched(f)

211

self.mark_touched(f)

212

213

@util.propertycache

213

@util.propertycache

214

def copied_from_p1(self):

214

def copied_from_p1(self):

215

return self._p1_copies.copy()

215

return self._p1_copies.copy()

216

217

def mark_copied_from_p1(self, source, dest):

217

def mark_copied_from_p1(self, source, dest):

218

if 'copied_from_p1' in vars(self):

218

if 'copied_from_p1' in vars(self):

219

del self.copied_from_p1

219

del self.copied_from_p1

220

self._p1_copies[dest] = source

220

self._p1_copies[dest] = source

221

222

def update_copies_from_p1(self, copies):

222

def update_copies_from_p1(self, copies):

223

for dest, source in copies.items():

223

for dest, source in copies.items():

224

self.mark_copied_from_p1(source, dest)

224

self.mark_copied_from_p1(source, dest)

225

226

@util.propertycache

226

@util.propertycache

227

def copied_from_p2(self):

227

def copied_from_p2(self):

228

return self._p2_copies.copy()

228

return self._p2_copies.copy()

229

230

def mark_copied_from_p2(self, source, dest):

230

def mark_copied_from_p2(self, source, dest):

231

if 'copied_from_p2' in vars(self):

231

if 'copied_from_p2' in vars(self):

232

del self.copied_from_p2

232

del self.copied_from_p2

233

self._p2_copies[dest] = source

233

self._p2_copies[dest] = source

234

235

def update_copies_from_p2(self, copies):

235

def update_copies_from_p2(self, copies):

236

for dest, source in copies.items():

236

for dest, source in copies.items():

237

self.mark_copied_from_p2(source, dest)

237

self.mark_copied_from_p2(source, dest)

238

239

240

def compute_all_files_changes(ctx):

240

def compute_all_files_changes(ctx):

241

"""compute the files changed by a revision"""

241

"""compute the files changed by a revision"""

242

p1 = ctx.p1()

242

p1 = ctx.p1()

243

p2 = ctx.p2()

243

p2 = ctx.p2()

244

if p1.rev() == nullrev and p2.rev() == nullrev:

244

if p1.rev() == nullrev and p2.rev() == nullrev:

245

return _process_root(ctx)

245

return _process_root(ctx)

246

elif p1.rev() != nullrev and p2.rev() == nullrev:

246

elif p1.rev() != nullrev and p2.rev() == nullrev:

247

return _process_linear(p1, ctx)

247

return _process_linear(p1, ctx)

248

elif p1.rev() == nullrev and p2.rev() != nullrev:

248

elif p1.rev() == nullrev and p2.rev() != nullrev:

249

# In the wild, one can encounter changeset where p1 is null but p2 is not

249

# In the wild, one can encounter changeset where p1 is null but p2 is not

250

return _process_linear(p1, ctx, parent=2)

250

return _process_linear(p1, ctx, parent=2)

251

elif p1.rev() == p2.rev():

251

elif p1.rev() == p2.rev():

252

# In the wild, one can encounter such "non-merge"

252

# In the wild, one can encounter such "non-merge"

253

return _process_linear(p1, ctx)

253

return _process_linear(p1, ctx)

254

else:

254

else:

255

return _process_merge(p1, p2, ctx)

255

return _process_merge(p1, p2, ctx)

256

257

258

def _process_root(ctx):

258

def _process_root(ctx):

259

"""compute the appropriate changed files for a changeset with no parents"""

259

"""compute the appropriate changed files for a changeset with no parents"""

260

# Simple, there was nothing before it, so everything is added.

260

# Simple, there was nothing before it, so everything is added.

261

md = ChangingFiles()

261

md = ChangingFiles()

262

manifest = ctx.manifest()

262

manifest = ctx.manifest()

263

for filename in manifest:

263

for filename in manifest:

264

md.mark_added(filename)

264

md.mark_added(filename)

265

return md

265

return md

266

267

268

def _process_linear(parent_ctx, children_ctx, parent=1):

268

def _process_linear(parent_ctx, children_ctx, parent=1):

269

"""compute the appropriate changed files for a changeset with a single parent"""

269

"""compute the appropriate changed files for a changeset with a single parent"""

270

md = ChangingFiles()

270

md = ChangingFiles()

271

parent_manifest = parent_ctx.manifest()

271

parent_manifest = parent_ctx.manifest()

272

children_manifest = children_ctx.manifest()

272

children_manifest = children_ctx.manifest()

273

274

copies_candidate = []

274

copies_candidate = []

275

276

for filename, d in parent_manifest.diff(children_manifest).items():

276

for filename, d in parent_manifest.diff(children_manifest).items():

277

if d[1][0] is None:

277

if d[1][0] is None:

278

# no filenode for the "new" value, file is absent

278

# no filenode for the "new" value, file is absent

279

md.mark_removed(filename)

279

md.mark_removed(filename)

280

else:

280

else:

281

copies_candidate.append(filename)

281

copies_candidate.append(filename)

282

if d[0][0] is None:

282

if d[0][0] is None:

283

# not filenode for the "old" value file was absent

283

# not filenode for the "old" value file was absent

284

md.mark_added(filename)

284

md.mark_added(filename)

285

else:

285

else:

286

# filenode for both "old" and "new"

286

# filenode for both "old" and "new"

287

md.mark_touched(filename)

287

md.mark_touched(filename)

288

289

if parent == 1:

289

if parent == 1:

290

copied = md.mark_copied_from_p1

290

copied = md.mark_copied_from_p1

291

elif parent == 2:

291

elif parent == 2:

292

copied = md.mark_copied_from_p2

292

copied = md.mark_copied_from_p2

293

else:

293

else:

294

assert False, "bad parent value %d" % parent

294

assert False, "bad parent value %d" % parent

295

296

for filename in copies_candidate:

296

for filename in copies_candidate:

297

copy_info = children_ctx[filename].renamed()

297

copy_info = children_ctx[filename].renamed()

298

if copy_info:

298

if copy_info:

299

source, srcnode = copy_info

299

source, srcnode = copy_info

300

copied(source, filename)

300

copied(source, filename)

301

302

return md

302

return md

303

304

305

def _process_merge(p1_ctx, p2_ctx, ctx):

305

def _process_merge(p1_ctx, p2_ctx, ctx):

306

"""compute the appropriate changed files for a changeset with two parents

306

"""compute the appropriate changed files for a changeset with two parents

307

308

This is a more advance case. The information we need to record is summarise

308

This is a more advance case. The information we need to record is summarise

309

in the following table:

309

in the following table:

310

311

┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐

311

┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐

312

│ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │

312

│ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │

313

│ p2 ╲ p1 │ │ │ │ │

313

│ p2 ╲ p1 │ │ │ │ │

314

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

314

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

315

│ │ │🄱 No Changes │🄳 No Changes │ │

315

│ │ │🄱 No Changes │🄳 No Changes │ │

316

│ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │

316

│ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │

317

│ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │

317

│ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │

318

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

318

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

319

│ │🄶 No Changes │ │ │ │

319

│ │🄶 No Changes │ │ │ │

320

│ (Some, None) │ OR │🄻 Deleted │ ø │ ø │

320

│ (Some, None) │ OR │🄻 Deleted │ ø │ ø │

321

│ │🄷 Deleted[1] │ │ │ │

321

│ │🄷 Deleted[1] │ │ │ │

322

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

322

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

323

│ │🄸 No Changes │ │ │ 🄽 Touched │

323

│ │🄸 No Changes │ │ │ 🄽 Touched │

324

│ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │

324

│ (None, Some) │ OR │ ø │🄼 Added │OR 🅀 Salvaged │

325

│ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │

325

│ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │

326

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

326

├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

327

│ │ │ │ 🄾 Touched │ 🄿 Merged │

327

│ │ │ │ 🄾 Touched │ 🄿 Merged │

328

│ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │

328

│ (Some, Some) │🄺 No Changes │ ø │OR 🅁 Salvaged │OR 🅂 Touched │

329

│ │ [3] │ │ (copied?) │ (copied?) │

329

│ │ [3] │ │ (copied?) │ (copied?) │

330

└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘

330

└──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘

331

332

Special case [1]:

332

Special case [1]:

333

334

The situation is:

334

The situation is:

335

- parent-A: file exists,

335

- parent-A: file exists,

336

- parent-B: no file,

336

- parent-B: no file,

337

- working-copy: no file.

337

- working-copy: no file.

338

339

Detecting a "deletion" will depend on the presence of actual change on

339

Detecting a "deletion" will depend on the presence of actual change on

340

the "parent-A" branch:

340

the "parent-A" branch:

341

342

Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged

342

Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged

343

compared to the merge ancestors, then parent-A branch left the file

343

compared to the merge ancestors, then parent-A branch left the file

344

untouched while parent-B deleted it. We simply apply the change from

344

untouched while parent-B deleted it. We simply apply the change from

345

"parent-B" branch the file was automatically dropped.

345

"parent-B" branch the file was automatically dropped.

346

The result is:

346

The result is:

347

- file is not recorded as touched by the merge.

347

- file is not recorded as touched by the merge.

348

349

Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and

349

Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and

350

the file was "deleted again". From a user perspective, the message

350

the file was "deleted again". From a user perspective, the message

351

about "locally changed" while "remotely deleted" (or the other way

351

about "locally changed" while "remotely deleted" (or the other way

352

around) was issued and the user chose to deleted the file.

352

around) was issued and the user chose to deleted the file.

353

The result:

353

The result:

354

- file is recorded as touched by the merge.

354

- file is recorded as touched by the merge.

355

356

357

Special case [2]:

357

Special case [2]:

358

359

The situation is:

359

The situation is:

360

- parent-A: no file,

360

- parent-A: no file,

361

- parent-B: file,

361

- parent-B: file,

362

- working-copy: file (same content as parent-B).

362

- working-copy: file (same content as parent-B).

363

364

There are three subcases depending on the ancestors contents:

364

There are three subcases depending on the ancestors contents:

365

366

- A) the file is missing in all ancestors,

366

- A) the file is missing in all ancestors,

367

- B) at least one ancestor has the file with filenode ≠ from parent-B,

367

- B) at least one ancestor has the file with filenode ≠ from parent-B,

368

- C) all ancestors use the same filenode as parent-B,

368

- C) all ancestors use the same filenode as parent-B,

369

370

Subcase (A) is the simpler, nothing happend on parent-A side while

370

Subcase (A) is the simpler, nothing happend on parent-A side while

371

parent-B added it.

371

parent-B added it.

372

373

The result:

373

The result:

374

- the file is not marked as touched by the merge.

374

- the file is not marked as touched by the merge.

375

376

Subcase (B) is the counter part of "Special case [1]", the file was

376

Subcase (B) is the counter part of "Special case [1]", the file was

377

modified on parent-B side, while parent-A side deleted it. However this

377

modified on parent-B side, while parent-A side deleted it. However this

378

time, the conflict was solved by keeping the file (and its

378

time, the conflict was solved by keeping the file (and its

379

modification). We consider the file as "salvaged".

379

modification). We consider the file as "salvaged".

380

381

The result:

381

The result:

382

- the file is marked as "salvaged" by the merge.

382

- the file is marked as "salvaged" by the merge.

383

384

Subcase (C) is subtle variation of the case above. In this case, the

384

Subcase (C) is subtle variation of the case above. In this case, the

385

file in unchanged on the parent-B side and actively removed on the

385

file in unchanged on the parent-B side and actively removed on the

386

parent-A side. So the merge machinery correctly decide it should be

386

parent-A side. So the merge machinery correctly decide it should be

387

removed. However, the file was explicitly restored to its parent-B

387

removed. However, the file was explicitly restored to its parent-B

388

content before the merge was commited. The file is be marked

388

content before the merge was commited. The file is be marked

389

as salvaged too. From the merge result perspective, this is similar to

389

as salvaged too. From the merge result perspective, this is similar to

390

Subcase (B), however from the merge resolution perspective they differ

390

Subcase (B), however from the merge resolution perspective they differ

391

since in (C), there was some conflict not obvious solution to the

391

since in (C), there was some conflict not obvious solution to the

392

merge (That got reversed)

392

merge (That got reversed)

393

394

Special case [3]:

394

Special case [3]:

395

396

The situation is:

396

The situation is:

397

- parent-A: file,

397

- parent-A: file,

398

- parent-B: file (different filenode as parent-A),

398

- parent-B: file (different filenode as parent-A),

399

- working-copy: file (same filenode as parent-B).

399

- working-copy: file (same filenode as parent-B).

400

401

This case is in theory much simple, for this to happens, this mean the

401

This case is in theory much simple, for this to happens, this mean the

402

filenode in parent-A is purely replacing the one in parent-B (either a

402

filenode in parent-A is purely replacing the one in parent-B (either a

403

descendant, or a full new file history, see changeset). So the merge

403

descendant, or a full new file history, see changeset). So the merge

404

introduce no changes, and the file is not affected by the merge...

404

introduce no changes, and the file is not affected by the merge...

405

406

However, in the wild it is possible to find commit with the above is not

406

However, in the wild it is possible to find commit with the above is not

407

True. For example repository have some commit where the *new* node is an

407

True. For example repository have some commit where the *new* node is an

408

ancestor of the node in parent-A, or where parent-A and parent-B are two

408

ancestor of the node in parent-A, or where parent-A and parent-B are two

409

branches of the same file history, yet not merge-filenode were created

409

branches of the same file history, yet not merge-filenode were created

410

(while the "merge" should have led to a "modification").

410

(while the "merge" should have led to a "modification").

411

412

Detecting such cases (and not recording the file as modified) would be a

412

Detecting such cases (and not recording the file as modified) would be a

413

nice bonus. However do not any of this yet.

413

nice bonus. However do not any of this yet.

414

"""

414

"""

415

416

repo = ctx.repo()

416

repo = ctx.repo()

417

md = ChangingFiles()

417

md = ChangingFiles()

418

419

m = ctx.manifest()

419

m = ctx.manifest()

420

p1m = p1_ctx.manifest()

420

p1m = p1_ctx.manifest()

421

p2m = p2_ctx.manifest()

421

p2m = p2_ctx.manifest()

422

diff_p1 = p1m.diff(m)

422

diff_p1 = p1m.diff(m)

423

diff_p2 = p2m.diff(m)

423

diff_p2 = p2m.diff(m)

424

425

cahs = ctx.repo().changelog.commonancestorsheads(

425

cahs = ctx.repo().changelog.commonancestorsheads(

426

p1_ctx.node(), p2_ctx.node()

426

p1_ctx.node(), p2_ctx.node()

427

)

427

)

428

if not cahs:

428

if not cahs:

429

cahs = [nullrev]

429

cahs = [nullrev]

430

mas = [ctx.repo()[r].manifest() for r in cahs]

430

mas = [ctx.repo()[r].manifest() for r in cahs]

431

432

copy_candidates = []

432

copy_candidates = []

433

434

# Dealing with case 🄰 happens automatically. Since there are no entry in

434

# Dealing with case 🄰 happens automatically. Since there are no entry in

435

# d1 nor d2, we won't iterate on it ever.

435

# d1 nor d2, we won't iterate on it ever.

436

437

# Iteration over d1 content will deal with all cases, but the one in the

437

# Iteration over d1 content will deal with all cases, but the one in the

438

# first column of the table.

438

# first column of the table.

439

for filename, d1 in diff_p1.items():

439

for filename, d1 in diff_p1.items():

440

441

d2 = diff_p2.pop(filename, None)

441

d2 = diff_p2.pop(filename, None)

442

443

if d2 is None:

443

if d2 is None:

444

# this deal with the first line of the table.

444

# this deal with the first line of the table.

445

_process_other_unchanged(md, mas, filename, d1)

445

_process_other_unchanged(md, mas, filename, d1)

446

else:

446

else:

447

448

if d1[0][0] is None and d2[0][0] is None:

448

if d1[0][0] is None and d2[0][0] is None:

449

# case 🄼 — both deleted the file.

449

# case 🄼 — both deleted the file.

450

md.mark_added(filename)

450

md.mark_added(filename)

451

copy_candidates.append(filename)

451

copy_candidates.append(filename)

452

elif d1[1][0] is None and d2[1][0] is None:

452

elif d1[1][0] is None and d2[1][0] is None:

453

# case 🄻 — both deleted the file.

453

# case 🄻 — both deleted the file.

454

md.mark_removed(filename)

454

md.mark_removed(filename)

455

elif d1[1][0] is not None and d2[1][0] is not None:

455

elif d1[1][0] is not None and d2[1][0] is not None:

456

if d1[0][0] is None or d2[0][0] is None:

456

if d1[0][0] is None or d2[0][0] is None:

457

if any(_find(ma, filename) is not None for ma in mas):

457

if any(_find(ma, filename) is not None for ma in mas):

458

# case 🅀 or 🅁

458

# case 🅀 or 🅁

459

md.mark_salvaged(filename)

459

md.mark_salvaged(filename)

460

else:

460

else:

461

# case 🄽 🄾 : touched

461

# case 🄽 🄾 : touched

462

md.mark_touched(filename)

462

md.mark_touched(filename)

463

else:

463

else:

464

fctx = repo.filectx(filename, fileid=d1[1][0])

464

fctx = repo.filectx(filename, fileid=d1[1][0])

465

if fctx.p2().rev() == nullrev:

465

if fctx.p2().rev() == nullrev:

466

# case 🅂

466

# case 🅂

467

# lets assume we can trust the file history. If the

467

# lets assume we can trust the file history. If the

468

# filenode is not a merge, the file was not merged.

468

# filenode is not a merge, the file was not merged.

469

md.mark_touched(filename)

469

md.mark_touched(filename)

470

else:

470

else:

471

# case 🄿

471

# case 🄿

472

md.mark_merged(filename)

472

md.mark_merged(filename)

473

copy_candidates.append(filename)

473

copy_candidates.append(filename)

474

else:

474

else:

475

# Impossible case, the post-merge file status cannot be None on

475

# Impossible case, the post-merge file status cannot be None on

476

# one side and Something on the other side.

476

# one side and Something on the other side.

477

assert False, "unreachable"

477

assert False, "unreachable"

478

479

# Iteration over remaining d2 content deal with the first column of the

479

# Iteration over remaining d2 content deal with the first column of the

480

# table.

480

# table.

481

for filename, d2 in diff_p2.items():

481

for filename, d2 in diff_p2.items():

482

_process_other_unchanged(md, mas, filename, d2)

482

_process_other_unchanged(md, mas, filename, d2)

483

484

for filename in copy_candidates:

484

for filename in copy_candidates:

485

copy_info = ctx[filename].renamed()

485

copy_info = ctx[filename].renamed()

486

if copy_info:

486

if copy_info:

487

source, srcnode = copy_info

487

source, srcnode = copy_info

488

if source in p1_ctx and p1_ctx[source].filenode() == srcnode:

488

if source in p1_ctx and p1_ctx[source].filenode() == srcnode:

489

md.mark_copied_from_p1(source, filename)

489

md.mark_copied_from_p1(source, filename)

490

elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:

490

elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:

491

md.mark_copied_from_p2(source, filename)

491

md.mark_copied_from_p2(source, filename)

492

return md

492

return md

493

494

495

def _find(manifest, filename):

495

def _find(manifest, filename):

496

"""return the associate filenode or None"""

496

"""return the associate filenode or None"""

497

if filename not in manifest:

497

if filename not in manifest:

498

return None

498

return None

499

return manifest.find(filename)[0]

499

return manifest.find(filename)[0]

500

501

502

def _process_other_unchanged(md, mas, filename, diff):

502

def _process_other_unchanged(md, mas, filename, diff):

503

source_node = diff[0][0]

503

source_node = diff[0][0]

504

target_node = diff[1][0]

504

target_node = diff[1][0]

505

506

if source_node is not None and target_node is None:

506

if source_node is not None and target_node is None:

507

if any(not _find(ma, filename) == source_node for ma in mas):

507

if any(not _find(ma, filename) == source_node for ma in mas):

508

# case 🄲 of 🄷

508

# case 🄲 of 🄷

509

md.mark_removed(filename)

509

md.mark_removed(filename)

510

# else, we have case 🄱 or 🄶 : no change need to be recorded

510

# else, we have case 🄱 or 🄶 : no change need to be recorded

511

elif source_node is None and target_node is not None:

511

elif source_node is None and target_node is not None:

512

if any(_find(ma, filename) is not None for ma in mas):

512

if any(_find(ma, filename) is not None for ma in mas):

513

# case 🄴 or 🄹

513

# case 🄴 or 🄹

514

md.mark_salvaged(filename)

514

md.mark_salvaged(filename)

515

# else, we have case 🄳 or 🄸 : simple merge without intervention

515

# else, we have case 🄳 or 🄸 : simple merge without intervention

516

elif source_node is not None and target_node is not None:

516

elif source_node is not None and target_node is not None:

517

# case 🄵 or 🄺 : simple merge without intervention

517

# case 🄵 or 🄺 : simple merge without intervention

518

#

518

#

519

# In buggy case where source_node is not an ancestors of target_node.

519

# In buggy case where source_node is not an ancestors of target_node.

520

# There should have a been a new filenode created, recording this as

520

# There should have a been a new filenode created, recording this as

521

# "modified". We do not deal with them yet.

521

# "modified". We do not deal with them yet.

522

pass

522

pass

523

else:

523

else:

524

# An impossible case, the diff algorithm should not return entry if the

524

# An impossible case, the diff algorithm should not return entry if the

525

# file is missing on both side.

525

# file is missing on both side.

526

assert False, "unreachable"

526

assert False, "unreachable"

527

528

529

def _missing_from_all_ancestors(mas, filename):

529

def _missing_from_all_ancestors(mas, filename):

530

return all(_find(ma, filename) is None for ma in mas)

530

return all(_find(ma, filename) is None for ma in mas)

531

532

533

def computechangesetfilesadded(ctx):

533

def computechangesetfilesadded(ctx):

534

"""return the list of files added in a changeset"""

534

"""return the list of files added in a changeset"""

535

added = []

535

added = []

536

for f in ctx.files():

536

for f in ctx.files():

537

if not any(f in p for p in ctx.parents()):

537

if not any(f in p for p in ctx.parents()):

538

added.append(f)

538

added.append(f)

539

return added

539

return added

540

541

542

def get_removal_filter(ctx, x=None):

542

def get_removal_filter(ctx, x=None):

543

"""return a function to detect files "wrongly" detected as `removed`

543

"""return a function to detect files "wrongly" detected as `removed`

544

545

When a file is removed relative to p1 in a merge, this

545

When a file is removed relative to p1 in a merge, this

546

function determines whether the absence is due to a

546

function determines whether the absence is due to a

547

deletion from a parent, or whether the merge commit

547

deletion from a parent, or whether the merge commit

548

itself deletes the file. We decide this by doing a

548

itself deletes the file. We decide this by doing a

549

simplified three way merge of the manifest entry for

549

simplified three way merge of the manifest entry for

550

the file. There are two ways we decide the merge

550

the file. There are two ways we decide the merge

551

itself didn't delete a file:

551

itself didn't delete a file:

552

- neither parent (nor the merge) contain the file

552

- neither parent (nor the merge) contain the file

553

- exactly one parent contains the file, and that

553

- exactly one parent contains the file, and that

554

parent has the same filelog entry as the merge

554

parent has the same filelog entry as the merge

555

ancestor (or all of them if there two). In other

555

ancestor (or all of them if there two). In other

556

words, that parent left the file unchanged while the

556

words, that parent left the file unchanged while the

557

other one deleted it.

557

other one deleted it.

558

One way to think about this is that deleting a file is

558

One way to think about this is that deleting a file is

559

similar to emptying it, so the list of changed files

559

similar to emptying it, so the list of changed files

560

should be similar either way. The computation

560

should be similar either way. The computation

561

described above is not done directly in _filecommit

561

described above is not done directly in _filecommit

562

when creating the list of changed files, however

562

when creating the list of changed files, however

563

it does something very similar by comparing filelog

563

it does something very similar by comparing filelog

564

nodes.

564

nodes.

565

"""

565

"""

566

567

if x is not None:

567

if x is not None:

568

p1, p2, m1, m2 = x

568

p1, p2, m1, m2 = x

569

else:

569

else:

570

p1 = ctx.p1()

570

p1 = ctx.p1()

571

p2 = ctx.p2()

571

p2 = ctx.p2()

572

m1 = p1.manifest()

572

m1 = p1.manifest()

573

m2 = p2.manifest()

573

m2 = p2.manifest()

574

575

@util.cachefunc

575

@util.cachefunc

576

def mas():

576

def mas():

577

p1n = p1.node()

577

p1n = p1.node()

578

p2n = p2.node()

578

p2n = p2.node()

579

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

579

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

580

if not cahs:

580

if not cahs:

581

cahs = [nullrev]

581

cahs = [nullrev]

582

return [ctx.repo()[r].manifest() for r in cahs]

582

return [ctx.repo()[r].manifest() for r in cahs]

583

584

def deletionfromparent(f):

584

def deletionfromparent(f):

585

if f in m1:

585

if f in m1:

586

return f not in m2 and all(

586

return f not in m2 and all(

587

f in ma and ma.find(f) == m1.find(f) for ma in mas()

587

f in ma and ma.find(f) == m1.find(f) for ma in mas()

588

)

588

)

589

elif f in m2:

589

elif f in m2:

590

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

590

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

591

else:

591

else:

592

return True

592

return True

593

594

return deletionfromparent

594

return deletionfromparent

595

596

597

def computechangesetfilesremoved(ctx):

597

def computechangesetfilesremoved(ctx):

598

"""return the list of files removed in a changeset"""

598

"""return the list of files removed in a changeset"""

599

removed = []

599

removed = []

600

for f in ctx.files():

600

for f in ctx.files():

601

if f not in ctx:

601

if f not in ctx:

602

removed.append(f)

602

removed.append(f)

603

if removed:

603

if removed:

604

rf = get_removal_filter(ctx)

604

rf = get_removal_filter(ctx)

605

removed = [r for r in removed if not rf(r)]

605

removed = [r for r in removed if not rf(r)]

606

return removed

606

return removed

607

608

609

def computechangesetfilesmerged(ctx):

609

def computechangesetfilesmerged(ctx):

610

"""return the list of files merged in a changeset"""

610

"""return the list of files merged in a changeset"""

611

merged = []

611

merged = []

612

if len(ctx.parents()) < 2:

612

if len(ctx.parents()) < 2:

613

return merged

613

return merged

614

for f in ctx.files():

614

for f in ctx.files():

615

if f in ctx:

615

if f in ctx:

616

fctx = ctx[f]

616

fctx = ctx[f]

617

parents = fctx._filelog.parents(fctx._filenode)

617

parents = fctx._filelog.parents(fctx._filenode)

618

if parents[1] != ctx.repo().nullid:

618

if parents[1] != ctx.repo().nullid:

619

merged.append(f)

619

merged.append(f)

620

return merged

620

return merged

621

622

623

def computechangesetcopies(ctx):

623

def computechangesetcopies(ctx):

624

"""return the copies data for a changeset

624

"""return the copies data for a changeset

625

626

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

626

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

627

628

Each dictionnary are in the form: `{newname: oldname}`

628

Each dictionnary are in the form: `{newname: oldname}`

629

"""

629

"""

630

p1copies = {}

630

p1copies = {}

631

p2copies = {}

631

p2copies = {}

632

p1 = ctx.p1()

632

p1 = ctx.p1()

633

p2 = ctx.p2()

633

p2 = ctx.p2()

634

narrowmatch = ctx._repo.narrowmatch()

634

narrowmatch = ctx._repo.narrowmatch()

635

for dst in ctx.files():

635

for dst in ctx.files():

636

if not narrowmatch(dst) or dst not in ctx:

636

if not narrowmatch(dst) or dst not in ctx:

637

continue

637

continue

638

copied = ctx[dst].renamed()

638

copied = ctx[dst].renamed()

639

if not copied:

639

if not copied:

640

continue

640

continue

641

src, srcnode = copied

641

src, srcnode = copied

642

if src in p1 and p1[src].filenode() == srcnode:

642

if src in p1 and p1[src].filenode() == srcnode:

643

p1copies[dst] = src

643

p1copies[dst] = src

644

elif src in p2 and p2[src].filenode() == srcnode:

644

elif src in p2 and p2[src].filenode() == srcnode:

645

p2copies[dst] = src

645

p2copies[dst] = src

646

return p1copies, p2copies

646

return p1copies, p2copies

647

648

649

def encodecopies(files, copies):

649

def encodecopies(files, copies):

650

items = []

650

items = []

651

for i, dst in enumerate(files):

651

for i, dst in enumerate(files):

652

if dst in copies:

652

if dst in copies:

653

items.append(b'%d\0%s' % (i, copies[dst]))

653

items.append(b'%d\0%s' % (i, copies[dst]))

654

if len(items) != len(copies):

654

if len(items) != len(copies):

655

raise error.ProgrammingError(

655

raise error.ProgrammingError(

656

b'some copy targets missing from file list'

656

b'some copy targets missing from file list'

657

)

657

)

658

return b"\n".join(items)

658

return b"\n".join(items)

659

660

661

def decodecopies(files, data):

661

def decodecopies(files, data):

662

try:

662

try:

663

copies = {}

663

copies = {}

664

if not data:

664

if not data:

665

return copies

665

return copies

666

for l in data.split(b'\n'):

666

for l in data.split(b'\n'):

667

strindex, src = l.split(b'\0')

667

strindex, src = l.split(b'\0')

668

i = int(strindex)

668

i = int(strindex)

669

dst = files[i]

669

dst = files[i]

670

copies[dst] = src

670

copies[dst] = src

671

return copies

671

return copies

672

except (ValueError, IndexError):

672

except (ValueError, IndexError):

673

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

673

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

674

# used different syntax for the value.

674

# used different syntax for the value.

675

return None

675

return None

676

677

678

def encodefileindices(files, subset):

678

def encodefileindices(files, subset):

679

subset = set(subset)

679

subset = set(subset)

680

indices = []

680

indices = []

681

for i, f in enumerate(files):

681

for i, f in enumerate(files):

682

if f in subset:

682

if f in subset:

683

indices.append(b'%d' % i)

683

indices.append(b'%d' % i)

684

return b'\n'.join(indices)

684

return b'\n'.join(indices)

685

686

687

def decodefileindices(files, data):

687

def decodefileindices(files, data):

688

try:

688

try:

689

subset = []

689

subset = []

690

if not data:

690

if not data:

691

return subset

691

return subset

692

for strindex in data.split(b'\n'):

692

for strindex in data.split(b'\n'):

693

i = int(strindex)

693

i = int(strindex)

694

if i < 0 or i >= len(files):

694

if i < 0 or i >= len(files):

695

return None

695

return None

696

subset.append(files[i])

696

subset.append(files[i])

697

return subset

697

return subset

698

except (ValueError, IndexError):

698

except (ValueError, IndexError):

699

# Perhaps someone had chosen the same key name (e.g. "added") and

699

# Perhaps someone had chosen the same key name (e.g. "added") and

700

# used different syntax for the value.

700

# used different syntax for the value.

701

return None

701

return None

702

703

704

# see mercurial/helptext/internals/revlogs.txt for details about the format

704

# see mercurial/helptext/internals/revlogs.txt for details about the format

705

706

ACTION_MASK = int("111" "00", 2)

706

ACTION_MASK = int("111" "00", 2)

707

# note: untouched file used as copy source will as `000` for this mask.

707

# note: untouched file used as copy source will as `000` for this mask.

708

ADDED_FLAG = int("001" "00", 2)

708

ADDED_FLAG = int("001" "00", 2)

709

MERGED_FLAG = int("010" "00", 2)

709

MERGED_FLAG = int("010" "00", 2)

710

REMOVED_FLAG = int("011" "00", 2)

710

REMOVED_FLAG = int("011" "00", 2)

711

SALVAGED_FLAG = int("100" "00", 2)

711

SALVAGED_FLAG = int("100" "00", 2)

712

TOUCHED_FLAG = int("101" "00", 2)

712

TOUCHED_FLAG = int("101" "00", 2)

713

714

COPIED_MASK = int("11", 2)

714

COPIED_MASK = int("11", 2)

715

COPIED_FROM_P1_FLAG = int("10", 2)

715

COPIED_FROM_P1_FLAG = int("10", 2)

716

COPIED_FROM_P2_FLAG = int("11", 2)

716

COPIED_FROM_P2_FLAG = int("11", 2)

717

718

# structure is <flag><filename-end><copy-source>

718

# structure is <flag><filename-end><copy-source>

719

INDEX_HEADER = struct.Struct(">L")

719

INDEX_HEADER = struct.Struct(">L")

720

INDEX_ENTRY = struct.Struct(">bLL")

720

INDEX_ENTRY = struct.Struct(">bLL")

721

722

723

def encode_files_sidedata(files):

723

def encode_files_sidedata(files):

724

all_files = set(files.touched)

724

all_files = set(files.touched)

725

all_files.update(files.copied_from_p1.values())

725

all_files.update(files.copied_from_p1.values())

726

all_files.update(files.copied_from_p2.values())

726

all_files.update(files.copied_from_p2.values())

727

all_files = sorted(all_files)

727

all_files = sorted(all_files)

728

file_idx = {f: i for (i, f) in enumerate(all_files)}

728

file_idx = {f: i for (i, f) in enumerate(all_files)}

729

file_idx[None] = 0

729

file_idx[None] = 0

730

731

chunks = [INDEX_HEADER.pack(len(all_files))]

731

chunks = [INDEX_HEADER.pack(len(all_files))]

732

733

filename_length = 0

733

filename_length = 0

734

for f in all_files:

734

for f in all_files:

735

filename_size = len(f)

735

filename_size = len(f)

736

filename_length += filename_size

736

filename_length += filename_size

737

flag = 0

737

flag = 0

738

if f in files.added:

738

if f in files.added:

739

flag |= ADDED_FLAG

739

flag |= ADDED_FLAG

740

elif f in files.merged:

740

elif f in files.merged:

741

flag |= MERGED_FLAG

741

flag |= MERGED_FLAG

742

elif f in files.removed:

742

elif f in files.removed:

743

flag |= REMOVED_FLAG

743

flag |= REMOVED_FLAG

744

elif f in files.salvaged:

744

elif f in files.salvaged:

745

flag |= SALVAGED_FLAG

745

flag |= SALVAGED_FLAG

746

elif f in files.touched:

746

elif f in files.touched:

747

flag |= TOUCHED_FLAG

747

flag |= TOUCHED_FLAG

748

749

copy = None

749

copy = None

750

if f in files.copied_from_p1:

750

if f in files.copied_from_p1:

751

flag |= COPIED_FROM_P1_FLAG

751

flag |= COPIED_FROM_P1_FLAG

752

copy = files.copied_from_p1.get(f)

752

copy = files.copied_from_p1.get(f)

753

elif f in files.copied_from_p2:

753

elif f in files.copied_from_p2:

754

copy = files.copied_from_p2.get(f)

754

copy = files.copied_from_p2.get(f)

755

flag |= COPIED_FROM_P2_FLAG

755

flag |= COPIED_FROM_P2_FLAG

756

copy_idx = file_idx[copy]

756

copy_idx = file_idx[copy]

757

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

757

chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

758

chunks.extend(all_files)

758

chunks.extend(all_files)

759

return {sidedatamod.SD_FILES: b''.join(chunks)}

759

return {sidedatamod.SD_FILES: b''.join(chunks)}

760

761

762

def decode_files_sidedata(sidedata):

762

def decode_files_sidedata(sidedata):

763

md = ChangingFiles()

763

md = ChangingFiles()

764

raw = sidedata.get(sidedatamod.SD_FILES)

764

raw = sidedata.get(sidedatamod.SD_FILES)

765

766

if raw is None:

766

if raw is None:

767

return md

767

return md

768

769

copies = []

769

copies = []

770

all_files = []

770

all_files = []

771

772

assert len(raw) >= INDEX_HEADER.size

772

assert len(raw) >= INDEX_HEADER.size

773

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

773

total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

774

775

offset = INDEX_HEADER.size

775

offset = INDEX_HEADER.size

776

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

776

file_offset_base = offset + (INDEX_ENTRY.size * total_files)

777

file_offset_last = file_offset_base

777

file_offset_last = file_offset_base

778

779

assert len(raw) >= file_offset_base

779

assert len(raw) >= file_offset_base

780

781

for idx in range(total_files):

781

for idx in range(total_files):

782

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

782

flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

783

file_end += file_offset_base

783

file_end += file_offset_base

784

filename = raw[file_offset_last:file_end]

784

filename = raw[file_offset_last:file_end]

785

filesize = file_end - file_offset_last

785

filesize = file_end - file_offset_last

786

assert len(filename) == filesize

786

assert len(filename) == filesize

787

offset += INDEX_ENTRY.size

787

offset += INDEX_ENTRY.size

788

file_offset_last = file_end

788

file_offset_last = file_end

789

all_files.append(filename)

789

all_files.append(filename)

790

if flag & ACTION_MASK == ADDED_FLAG:

790

if flag & ACTION_MASK == ADDED_FLAG:

791

md.mark_added(filename)

791

md.mark_added(filename)

792

elif flag & ACTION_MASK == MERGED_FLAG:

792

elif flag & ACTION_MASK == MERGED_FLAG:

793

md.mark_merged(filename)

793

md.mark_merged(filename)

794

elif flag & ACTION_MASK == REMOVED_FLAG:

794

elif flag & ACTION_MASK == REMOVED_FLAG:

795

md.mark_removed(filename)

795

md.mark_removed(filename)

796

elif flag & ACTION_MASK == SALVAGED_FLAG:

796

elif flag & ACTION_MASK == SALVAGED_FLAG:

797

md.mark_salvaged(filename)

797

md.mark_salvaged(filename)

798

elif flag & ACTION_MASK == TOUCHED_FLAG:

798

elif flag & ACTION_MASK == TOUCHED_FLAG:

799

md.mark_touched(filename)

799

md.mark_touched(filename)

800

801

copied = None

801

copied = None

802

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

802

if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

803

copied = md.mark_copied_from_p1

803

copied = md.mark_copied_from_p1

804

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

804

elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

805

copied = md.mark_copied_from_p2

805

copied = md.mark_copied_from_p2

806

807

if copied is not None:

807

if copied is not None:

808

copies.append((copied, filename, copy_idx))

808

copies.append((copied, filename, copy_idx))

809

810

for copied, filename, copy_idx in copies:

810

for copied, filename, copy_idx in copies:

811

copied(all_files[copy_idx], filename)

811

copied(all_files[copy_idx], filename)

812

813

return md

813

return md

814

815

816

def _getsidedata(srcrepo, rev):

816

def _getsidedata(srcrepo, rev):

817

ctx = srcrepo[rev]

817

ctx = srcrepo[rev]

818

files = compute_all_files_changes(ctx)

818

files = compute_all_files_changes(ctx)

819

return encode_files_sidedata(files), files.has_copies_info

819

return encode_files_sidedata(files), files.has_copies_info

820

821

822

def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):

822

def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):

823

return _getsidedata(repo, rev)[0]

823

return _getsidedata(repo, rev)[0]

824

825

826

def set_sidedata_spec_for_repo(repo):

826

def set_sidedata_spec_for_repo(repo):

827

if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:

827

if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:

828

repo.register_wanted_sidedata(sidedatamod.SD_FILES)

828

repo.register_wanted_sidedata(sidedatamod.SD_FILES)

829

repo.register_sidedata_computer(

829

repo.register_sidedata_computer(

830

revlogconst.KIND_CHANGELOG,

830

revlogconst.KIND_CHANGELOG,

831

sidedatamod.SD_FILES,

831

sidedatamod.SD_FILES,

832

(sidedatamod.SD_FILES,),

832

(sidedatamod.SD_FILES,),

833

copies_sidedata_computer,

833

copies_sidedata_computer,

834

)

834

)

835

836

837

def getsidedataadder(srcrepo, destrepo):

837

def getsidedataadder(srcrepo, destrepo):

838

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

838

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

839

if pycompat.iswindows or not use_w:

839

if pycompat.iswindows or not use_w:

840

return _get_simple_sidedata_adder(srcrepo, destrepo)

840

return _get_simple_sidedata_adder(srcrepo, destrepo)

841

else:

841

else:

842

return _get_worker_sidedata_adder(srcrepo, destrepo)

842

return _get_worker_sidedata_adder(srcrepo, destrepo)

843

844

845

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

845

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

846

"""The function used by worker precomputing sidedata

846

"""The function used by worker precomputing sidedata

847

848

It read an input queue containing revision numbers

848

It read an input queue containing revision numbers

849

It write in an output queue containing (rev, <sidedata-map>)

849

It write in an output queue containing (rev, <sidedata-map>)

850

851

The `None` input value is used as a stop signal.

851

The `None` input value is used as a stop signal.

852

853

The `tokens` semaphore is user to avoid having too many unprocessed

853

The `tokens` semaphore is user to avoid having too many unprocessed

854

entries. The workers needs to acquire one token before fetching a task.

854

entries. The workers needs to acquire one token before fetching a task.

855

They will be released by the consumer of the produced data.

855

They will be released by the consumer of the produced data.

856

"""

856

"""

857

tokens.acquire()

857

tokens.acquire()

858

rev = revs_queue.get()

858

rev = revs_queue.get()

859

while rev is not None:

859

while rev is not None:

860

data = _getsidedata(srcrepo, rev)

860

data = _getsidedata(srcrepo, rev)

861

sidedata_queue.put((rev, data))

861

sidedata_queue.put((rev, data))

862

tokens.acquire()

862

tokens.acquire()

863

rev = revs_queue.get()

863

rev = revs_queue.get()

864

# processing of `None` is completed, release the token.

864

# processing of `None` is completed, release the token.

865

tokens.release()

865

tokens.release()

866

867

868

BUFF_PER_WORKER = 50

868

BUFF_PER_WORKER = 50

869

870

871

def _get_worker_sidedata_adder(srcrepo, destrepo):

871

def _get_worker_sidedata_adder(srcrepo, destrepo):

872

"""The parallel version of the sidedata computation

872

"""The parallel version of the sidedata computation

873

874

This code spawn a pool of worker that precompute a buffer of sidedata

874

This code spawn a pool of worker that precompute a buffer of sidedata

875

before we actually need them"""

875

before we actually need them"""

876

# avoid circular import copies -> scmutil -> worker -> copies

876

# avoid circular import copies -> scmutil -> worker -> copies

877

from . import worker

877

from . import worker

878

879

nbworkers = worker._numworkers(srcrepo.ui)

879

nbworkers = worker._numworkers(srcrepo.ui)

880

881

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

881

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

882

revsq = multiprocessing.Queue()

882

revsq = multiprocessing.Queue()

883

sidedataq = multiprocessing.Queue()

883

sidedataq = multiprocessing.Queue()

884

885

assert srcrepo.filtername is None

885

assert srcrepo.filtername is None

886

# queue all tasks beforehand, revision numbers are small and it make

886

# queue all tasks beforehand, revision numbers are small and it make

887

# synchronisation simpler

887

# synchronisation simpler

888

#

888

#

889

# Since the computation for each node can be quite expensive, the overhead

889

# Since the computation for each node can be quite expensive, the overhead

890

# of using a single queue is not revelant. In practice, most computation

890

# of using a single queue is not revelant. In practice, most computation

891

# are fast but some are very expensive and dominate all the other smaller

891

# are fast but some are very expensive and dominate all the other smaller

892

# cost.

892

# cost.

893

for r in srcrepo.changelog.revs():

893

for r in srcrepo.changelog.revs():

894

revsq.put(r)

894

revsq.put(r)

895

# queue the "no more tasks" markers

895

# queue the "no more tasks" markers

896

for i in range(nbworkers):

896

for i in range(nbworkers):

897

revsq.put(None)

897

revsq.put(None)

898

899

allworkers = []

899

allworkers = []

900

for i in range(nbworkers):

900

for i in range(nbworkers):

901

args = (srcrepo, revsq, sidedataq, tokens)

901

args = (srcrepo, revsq, sidedataq, tokens)

902

w = multiprocessing.Process(target=_sidedata_worker, args=args)

902

w = multiprocessing.Process(target=_sidedata_worker, args=args)

903

allworkers.append(w)

903

allworkers.append(w)

904

w.start()

904

w.start()

905

906

# dictionnary to store results for revision higher than we one we are

906

# dictionnary to store results for revision higher than we one we are

907

# looking for. For example, if we need the sidedatamap for 42, and 43 is

907

# looking for. For example, if we need the sidedatamap for 42, and 43 is

908

# received, when shelve 43 for later use.

908

# received, when shelve 43 for later use.

909

staging = {}

909

staging = {}

910

911

def sidedata_companion(revlog, rev):

911

def sidedata_companion(revlog, rev):

912

data = {}, False

912

data = {}, False

913

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

913

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

914

# Is the data previously shelved ?

914

# Is the data previously shelved ?

915

data = staging.pop(rev, None)

915

data = staging.pop(rev, None)

916

if data is None:

916

if data is None:

917

# look at the queued result until we find the one we are lookig

917

# look at the queued result until we find the one we are lookig

918

# for (shelve the other ones)

918

# for (shelve the other ones)

919

r, data = sidedataq.get()

919

r, data = sidedataq.get()

920

while r != rev:

920

while r != rev:

921

staging[r] = data

921

staging[r] = data

922

r, data = sidedataq.get()

922

r, data = sidedataq.get()

923

tokens.release()

923

tokens.release()

924

sidedata, has_copies_info = data

924

sidedata, has_copies_info = data

925

new_flag = 0

925

new_flag = 0

926

if has_copies_info:

926

if has_copies_info:

927

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

927

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

928

return False, (), sidedata, new_flag, 0

928

return False, (), sidedata, new_flag, 0

929

930

return sidedata_companion

930

return sidedata_companion

931

932

933

def _get_simple_sidedata_adder(srcrepo, destrepo):

933

def _get_simple_sidedata_adder(srcrepo, destrepo):

934

"""The simple version of the sidedata computation

934

"""The simple version of the sidedata computation

935

936

It just compute it in the same thread on request"""

936

It just compute it in the same thread on request"""

937

938

def sidedatacompanion(revlog, rev):

938

def sidedatacompanion(revlog, rev):

939

sidedata, has_copies_info = {}, False

939

sidedata, has_copies_info = {}, False

940

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

940

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

941

sidedata, has_copies_info = _getsidedata(srcrepo, rev)

941

sidedata, has_copies_info = _getsidedata(srcrepo, rev)

942

new_flag = 0

942

new_flag = 0

943

if has_copies_info:

943

if has_copies_info:

944

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

944

new_flag = sidedataflag.REVIDX_HASCOPIESINFO

945

946

return False, (), sidedata, new_flag, 0

946

return False, (), sidedata, new_flag, 0

947

948

return sidedatacompanion

948

return sidedatacompanion

949

950

951

def getsidedataremover(srcrepo, destrepo):

951

def getsidedataremover(srcrepo, destrepo):

952

def sidedatacompanion(revlog, rev):

952

def sidedatacompanion(revlog, rev):

953

f = ()

953

f = ()

954

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

954

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

955

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

955

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

956

f = (

956

f = (

957

sidedatamod.SD_P1COPIES,

957

sidedatamod.SD_P1COPIES,

958

sidedatamod.SD_P2COPIES,

958

sidedatamod.SD_P2COPIES,

959

sidedatamod.SD_FILESADDED,

959

sidedatamod.SD_FILESADDED,

960

sidedatamod.SD_FILESREMOVED,

960

sidedatamod.SD_FILESREMOVED,

961

)

961

)

962

return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO

962

return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO

963

964

return sidedatacompanion

964

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf-8
             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             import struct
             from .node import nullrev
             from . import (
                 error,
                 pycompat,
                 requirements as requirementsmod,
                 util,
             )
             from .revlogutils import (
                 constants as revlogconst,
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to files by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - merged:  files whose history got merged
                 - removed: files removed in the revision
                 - salvaged: files that might have been deleted by a merge but were not
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self,
                     touched=None,
                     added=None,
                     removed=None,
                     merged=None,
                     salvaged=None,
                     p1_copies=None,
                     p2_copies=None,
                 ):
                     self._added = set(() if added is None else added)
                     self._merged = set(() if merged is None else merged)
                     self._removed = set(() if removed is None else removed)
                     self._touched = set(() if touched is None else touched)
                     self._salvaged = set(() if salvaged is None else salvaged)
                     self._touched.update(self._added)
                     self._touched.update(self._merged)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(() if p1_copies is None else p1_copies)
                     self._p2_copies = dict(() if p2_copies is None else p2_copies)
                 def __eq__(self, other):
                     return (
                         self.added == other.added
                         and self.merged == other.merged
                         and self.removed == other.removed
                         and self.salvaged == other.salvaged
                         and self.touched == other.touched
                         and self.copied_from_p1 == other.copied_from_p1
                         and self.copied_from_p2 == other.copied_from_p2
                     )
                 @property
                 def has_copies_info(self):
                     return bool(
                         self.removed
                         or self.merged
                         or self.salvaged
                         or self.copied_from_p1
                         or self.copied_from_p2
                     )
                 @util.propertycache
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     if 'added' in vars(self):
                         del self.added
                     self._added.add(filename)
                     self.mark_touched(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @util.propertycache
                 def merged(self):
                     """files actively merged during a merge
                     Any modified files which had modification on both size that needed merging.
                     In this case a new filenode was created and it has two parents.
                     """
                     return frozenset(self._merged)
                 def mark_merged(self, filename):
                     if 'merged' in vars(self):
                         del self.merged
                     self._merged.add(filename)
                     self.mark_touched(filename)
                 def update_merged(self, filenames):
                     for f in filenames:
                         self.mark_merged(f)
                 @util.propertycache
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     if 'removed' in vars(self):
                         del self.removed
                     self._removed.add(filename)
                     self.mark_touched(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @util.propertycache
                 def salvaged(self):
                     """files that might have been deleted by a merge, but still exists.
                     During a merge, the manifest merging might select some files for
                     removal, or for a removed/changed conflict. If at commit time the file
                     still exists, its removal was "reverted" and the file is "salvaged"
                     """
                     return frozenset(self._salvaged)
                 def mark_salvaged(self, filename):
                     if "salvaged" in vars(self):
                         del self.salvaged
                     self._salvaged.add(filename)
                     self.mark_touched(filename)
                 def update_salvaged(self, filenames):
                     for f in filenames:
                         self.mark_salvaged(f)
                 @util.propertycache
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     if 'touched' in vars(self):
                         del self.touched
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @util.propertycache
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     if 'copied_from_p1' in vars(self):
                         del self.copied_from_p1
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @util.propertycache
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     if 'copied_from_p2' in vars(self):
                         del self.copied_from_p2
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def compute_all_files_changes(ctx):
                 """compute the files changed by a revision"""
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 if p1.rev() == nullrev and p2.rev() == nullrev:
                     return _process_root(ctx)
                 elif p1.rev() != nullrev and p2.rev() == nullrev:
                     return _process_linear(p1, ctx)
                 elif p1.rev() == nullrev and p2.rev() != nullrev:
                     # In the wild, one can encounter changeset where p1 is null but p2 is not
                     return _process_linear(p1, ctx, parent=2)
                 elif p1.rev() == p2.rev():
                     # In the wild, one can encounter such "non-merge"
                     return _process_linear(p1, ctx)
                 else:
                     return _process_merge(p1, p2, ctx)
             def _process_root(ctx):
                 """compute the appropriate changed files for a changeset with no parents"""
                 # Simple, there was nothing before it, so everything is added.
                 md = ChangingFiles()
                 manifest = ctx.manifest()
                 for filename in manifest:
                     md.mark_added(filename)
                 return md
             def _process_linear(parent_ctx, children_ctx, parent=1):
                 """compute the appropriate changed files for a changeset with a single parent"""
                 md = ChangingFiles()
                 parent_manifest = parent_ctx.manifest()
                 children_manifest = children_ctx.manifest()
                 copies_candidate = []
                 for filename, d in parent_manifest.diff(children_manifest).items():
                     if d[1][0] is None:
                         # no filenode for the "new" value, file is absent
                         md.mark_removed(filename)
                     else:
                         copies_candidate.append(filename)
                         if d[0][0] is None:
                             # not filenode for the "old" value file was absent
                             md.mark_added(filename)
                         else:
                             # filenode for both "old" and "new"
                             md.mark_touched(filename)
                 if parent == 1:
                     copied = md.mark_copied_from_p1
                 elif parent == 2:
                     copied = md.mark_copied_from_p2
                 else:
                     assert False, "bad parent value %d" % parent
                 for filename in copies_candidate:
                     copy_info = children_ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         copied(source, filename)
                 return md
             def _process_merge(p1_ctx, p2_ctx, ctx):
                 """compute the appropriate changed files for a changeset with two parents
                 This is a more advance case. The information we need to record is summarise
                 in the following table:
                 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
                 │ diff ╲  diff │       ø      │ (Some, None) │ (None, Some) │ (Some, Some) │
                 │  p2   ╲  p1  │              │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │🄱  No Changes │🄳  No Changes │              │
                 │  ø           │🄰  No Changes │      OR      │     OR       │🄵  No Changes │
                 │              │              │🄲  Deleted[1] │🄴  Salvaged[2]│     [3]      │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄶  No Changes │              │              │              │
                 │ (Some, None) │      OR      │🄻  Deleted    │       ø      │      ø       │
                 │              │🄷  Deleted[1] │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄸  No Changes │              │              │   🄽 Touched  │
                 │ (None, Some) │     OR       │      ø       │🄼   Added     │OR 🅀 Salvaged │
                 │              │🄹  Salvaged[2]│              │   (copied?)  │   (copied?)  │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │              │   🄾 Touched  │   🄿 Merged   │
                 │ (Some, Some) │🄺  No Changes │      ø       │OR 🅁 Salvaged │OR 🅂 Touched  │
                 │              │     [3]      │              │   (copied?)  │   (copied?)  │
                 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
                 Special case [1]:
                   The situation is:
                     - parent-A:     file exists,
                     - parent-B:     no file,
                     - working-copy: no file.
                   Detecting a "deletion" will depend on the presence of actual change on
                   the "parent-A" branch:
                   Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
                   compared to the merge ancestors, then parent-A branch left the file
                   untouched while parent-B deleted it. We simply apply the change from
                   "parent-B" branch the file was automatically dropped.
                   The result is:
                       - file is not recorded as touched by the merge.
                   Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
                   the file was "deleted again". From a user perspective, the message
                   about "locally changed" while "remotely deleted" (or the other way
                   around) was issued and the user chose to deleted the file.
                   The result:
                       - file is recorded as touched by the merge.
                 Special case [2]:
                   The situation is:
                     - parent-A:     no file,
                     - parent-B:     file,
                     - working-copy: file (same content as parent-B).
                   There are three subcases depending on the ancestors contents:
                   - A) the file is missing in all ancestors,
                   - B) at least one ancestor has the file with filenode ≠ from parent-B,
                   - C) all ancestors use the same filenode as parent-B,
                   Subcase (A) is the simpler, nothing happend on parent-A side while
                   parent-B added it.
                     The result:
                         - the file is not marked as touched by the merge.
                   Subcase (B) is the counter part of "Special case [1]", the file was
                     modified on parent-B side, while parent-A side deleted it. However this
                     time, the conflict was solved by keeping the file (and its
                     modification). We consider the file as "salvaged".
                     The result:
                         - the file is marked as "salvaged" by the merge.
                   Subcase (C) is subtle variation of the case above. In this case, the
                     file in unchanged on the parent-B side and actively removed on the
                     parent-A side. So the merge machinery correctly decide it should be
                     removed. However, the file was explicitly restored to its parent-B
                     content before the merge was commited. The file is be marked
                     as salvaged too. From the merge result perspective, this is similar to
                     Subcase (B), however from the merge resolution perspective they differ
                     since in (C), there was some conflict not obvious solution to the
                     merge (That got reversed)
                 Special case [3]:
                   The situation is:
                     - parent-A:     file,
                     - parent-B:     file (different filenode as parent-A),
                     - working-copy: file (same filenode as parent-B).
                   This case is in theory much simple, for this to happens, this mean the
                   filenode in parent-A is purely replacing the one in parent-B (either a
                   descendant, or a full new file history, see changeset). So the merge
                   introduce no changes, and the file is not affected by the merge...
                   However, in the wild it is possible to find commit with the above is not
                   True. For example repository have some commit where the *new* node is an
                   ancestor of the node in parent-A, or where parent-A and parent-B are two
                   branches of the same file history, yet not merge-filenode were created
                   (while the "merge" should have led to a "modification").
                   Detecting such cases (and not recording the file as modified) would be a
                   nice bonus. However do not any of this yet.
                 """
                 repo = ctx.repo()
                 md = ChangingFiles()
                 m = ctx.manifest()
                 p1m = p1_ctx.manifest()
                 p2m = p2_ctx.manifest()
                 diff_p1 = p1m.diff(m)
                 diff_p2 = p2m.diff(m)
                 cahs = ctx.repo().changelog.commonancestorsheads(
                     p1_ctx.node(), p2_ctx.node()
                 )
                 if not cahs:
                     cahs = [nullrev]
                 mas = [ctx.repo()[r].manifest() for r in cahs]
                 copy_candidates = []
                 # Dealing with case 🄰 happens automatically.  Since there are no entry in
                 # d1 nor d2, we won't iterate on it ever.
                 # Iteration over d1 content will deal with all cases, but the one in the
                 # first column of the table.
                 for filename, d1 in diff_p1.items():
                     d2 = diff_p2.pop(filename, None)
                     if d2 is None:
                         # this deal with the first line of the table.
                         _process_other_unchanged(md, mas, filename, d1)
                     else:
                         if d1[0][0] is None and d2[0][0] is None:
                             # case 🄼 — both deleted the file.
                             md.mark_added(filename)
                             copy_candidates.append(filename)
                         elif d1[1][0] is None and d2[1][0] is None:
                             # case 🄻 — both deleted the file.
                             md.mark_removed(filename)
                         elif d1[1][0] is not None and d2[1][0] is not None:
                             if d1[0][0] is None or d2[0][0] is None:
                                 if any(_find(ma, filename) is not None for ma in mas):
                                     # case 🅀 or 🅁
                                     md.mark_salvaged(filename)
                                 else:
                                     # case 🄽 🄾 : touched
                                     md.mark_touched(filename)
                             else:
                                 fctx = repo.filectx(filename, fileid=d1[1][0])
                                 if fctx.p2().rev() == nullrev:
                                     # case 🅂
                                     # lets assume we can trust the file history. If the
                                     # filenode is not a merge, the file was not merged.
                                     md.mark_touched(filename)
                                 else:
                                     # case 🄿
                                     md.mark_merged(filename)
                             copy_candidates.append(filename)
                         else:
                             # Impossible case, the post-merge file status cannot be None on
                             # one side and Something on the other side.
                             assert False, "unreachable"
                 # Iteration over remaining d2 content deal with the first column of the
                 # table.
                 for filename, d2 in diff_p2.items():
                     _process_other_unchanged(md, mas, filename, d2)
                 for filename in copy_candidates:
                     copy_info = ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p1(source, filename)
                         elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p2(source, filename)
                 return md
             def _find(manifest, filename):
                 """return the associate filenode or None"""
                 if filename not in manifest:
                     return None
                 return manifest.find(filename)[0]
             def _process_other_unchanged(md, mas, filename, diff):
                 source_node = diff[0][0]
                 target_node = diff[1][0]
                 if source_node is not None and target_node is None:
                     if any(not _find(ma, filename) == source_node for ma in mas):
                         # case 🄲 of 🄷
                         md.mark_removed(filename)
                     # else, we have case 🄱 or 🄶 : no change need to be recorded
                 elif source_node is None and target_node is not None:
                     if any(_find(ma, filename) is not None for ma in mas):
                         # case 🄴 or 🄹
                         md.mark_salvaged(filename)
                     # else, we have case 🄳 or 🄸 : simple merge without intervention
                 elif source_node is not None and target_node is not None:
                     # case 🄵  or 🄺 : simple merge without intervention
                     #
                     # In buggy case where source_node is not an ancestors of target_node.
                     # There should have a been a new filenode created, recording this as
                     # "modified". We do not deal with them yet.
                     pass
                 else:
                     # An impossible case, the diff algorithm should not return entry if the
                     # file is missing on both side.
                     assert False, "unreachable"
             def _missing_from_all_ancestors(mas, filename):
                 return all(_find(ma, filename) is None for ma in mas)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset"""
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset"""
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetfilesmerged(ctx):
                 """return the list of files merged in a changeset"""
                 merged = []
                 if len(ctx.parents()) < 2:
                     return merged
                 for f in ctx.files():
                     if f in ctx:
                         fctx = ctx[f]
                         parents = fctx._filelog.parents(fctx._filenode)
                         if parents[1] != ctx.repo().nullid:
                             merged.append(f)
                 return merged
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             # see mercurial/helptext/internals/revlogs.txt for details about the format
             ACTION_MASK = int("111" "00", 2)
             # note: untouched file used as copy source will as `000` for this mask.
             ADDED_FLAG = int("001" "00", 2)
             MERGED_FLAG = int("010" "00", 2)
             REMOVED_FLAG = int("011" "00", 2)
             SALVAGED_FLAG = int("100" "00", 2)
             TOUCHED_FLAG = int("101" "00", 2)
             COPIED_MASK = int("11", 2)
             COPIED_FROM_P1_FLAG = int("10", 2)
             COPIED_FROM_P2_FLAG = int("11", 2)
             # structure is <flag><filename-end><copy-source>
             INDEX_HEADER = struct.Struct(">L")
             INDEX_ENTRY = struct.Struct(">bLL")
             def encode_files_sidedata(files):
                 all_files = set(files.touched)
                 all_files.update(files.copied_from_p1.values())
                 all_files.update(files.copied_from_p2.values())
                 all_files = sorted(all_files)
                 file_idx = {f: i for (i, f) in enumerate(all_files)}
                 file_idx[None] = 0
                 chunks = [INDEX_HEADER.pack(len(all_files))]
                 filename_length = 0
                 for f in all_files:
                     filename_size = len(f)
                     filename_length += filename_size
                     flag = 0
                     if f in files.added:
                         flag |= ADDED_FLAG
                     elif f in files.merged:
                         flag |= MERGED_FLAG
                     elif f in files.removed:
                         flag |= REMOVED_FLAG
                     elif f in files.salvaged:
                         flag |= SALVAGED_FLAG
                     elif f in files.touched:
                         flag |= TOUCHED_FLAG
                     copy = None
                     if f in files.copied_from_p1:
                         flag |= COPIED_FROM_P1_FLAG
                         copy = files.copied_from_p1.get(f)
                     elif f in files.copied_from_p2:
                         copy = files.copied_from_p2.get(f)
                         flag |= COPIED_FROM_P2_FLAG
                     copy_idx = file_idx[copy]
                     chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
                 chunks.extend(all_files)
                 return {sidedatamod.SD_FILES: b''.join(chunks)}
             def decode_files_sidedata(sidedata):
                 md = ChangingFiles()
                 raw = sidedata.get(sidedatamod.SD_FILES)
                 if raw is None:
                     return md
                 copies = []
                 all_files = []
                 assert len(raw) >= INDEX_HEADER.size
                 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
                 offset = INDEX_HEADER.size
                 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
                 file_offset_last = file_offset_base
                 assert len(raw) >= file_offset_base
                 for idx in range(total_files):
                     flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
                     file_end += file_offset_base
                     filename = raw[file_offset_last:file_end]
                     filesize = file_end - file_offset_last
                     assert len(filename) == filesize
                     offset += INDEX_ENTRY.size
                     file_offset_last = file_end
                     all_files.append(filename)
                     if flag & ACTION_MASK == ADDED_FLAG:
                         md.mark_added(filename)
                     elif flag & ACTION_MASK == MERGED_FLAG:
                         md.mark_merged(filename)
                     elif flag & ACTION_MASK == REMOVED_FLAG:
                         md.mark_removed(filename)
                     elif flag & ACTION_MASK == SALVAGED_FLAG:
                         md.mark_salvaged(filename)
                     elif flag & ACTION_MASK == TOUCHED_FLAG:
                         md.mark_touched(filename)
                     copied = None
                     if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
                         copied = md.mark_copied_from_p1
                     elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
                         copied = md.mark_copied_from_p2
                     if copied is not None:
                         copies.append((copied, filename, copy_idx))
                 for copied, filename, copy_idx in copies:
                     copied(all_files[copy_idx], filename)
                 return md
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 files = compute_all_files_changes(ctx)
                 return encode_files_sidedata(files), files.has_copies_info
             def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
                 return _getsidedata(repo, rev)[0]
             def set_sidedata_spec_for_repo(repo):
                 if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:
                     repo.register_wanted_sidedata(sidedatamod.SD_FILES)
-                    repo.register_sidedata_computer(
+                repo.register_sidedata_computer(
-                        revlogconst.KIND_CHANGELOG,
+                    revlogconst.KIND_CHANGELOG,
-                        sidedatamod.SD_FILES,
+                    sidedatamod.SD_FILES,
-                        (sidedatamod.SD_FILES,),
+                    (sidedatamod.SD_FILES,),
-                        copies_sidedata_computer,
+                    copies_sidedata_computer,
+                )
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     data = {}, False
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         data = staging.pop(rev, None)
                         if data is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, data = sidedataq.get()
                             while r != rev:
                                 staging[r] = data
                                 r, data = sidedataq.get()
                         tokens.release()
                     sidedata, has_copies_info = data
                     new_flag = 0
                     if has_copies_info:
                         new_flag = sidedataflag.REVIDX_HASCOPIESINFO
                     return False, (), sidedata, new_flag, 0
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata, has_copies_info = {}, False
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata, has_copies_info = _getsidedata(srcrepo, rev)
                     new_flag = 0
                     if has_copies_info:
                         new_flag = sidedataflag.REVIDX_HASCOPIESINFO
                     return False, (), sidedata, new_flag, 0
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
                 return sidedatacompanion