upstream/mercurial-mirror Commit - r46078:1f50bcc9

1

# metadata.py -- code related to various metadata computation and access.

1

# metadata.py -- code related to various metadata computation and access.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import multiprocessing

10

import multiprocessing

11

12

from . import (

12

from . import (

13

error,

13

error,

14

node,

14

node,

15

pycompat,

15

pycompat,

16

util,

16

util,

17

)

17

)

18

19

from .revlogutils import (

19

from .revlogutils import (

20

flagutil as sidedataflag,

20

flagutil as sidedataflag,

21

sidedata as sidedatamod,

21

sidedata as sidedatamod,

22

)

22

)

23

24

25

class ChangingFiles(object):

25

class ChangingFiles(object):

26

"""A class recording the changes made to a file by a ~~revision~~

26

"""A class recording the changes made to a file by a changeset

27

28

Actions performed on files are gathered into 3 sets:

29

30

- added: files actively added in the changeset.

31

- removed: files removed in the revision

32

- touched: files affected by the merge

33

34

and copies information is held by 2 mappings

35

36

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

37

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

38

39

See their inline help for details.

27

"""

40

"""

28

41

29

def __init__(

42

def __init__(

30

self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),

43

self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),

31

):

44

):

32

self._added = set(added)

45

self._added = set(added)

33

self._removed = set(removed)

46

self._removed = set(removed)

34

self._touched = set(touched)

47

self._touched = set(touched)

35

self._touched.update(self._added)

48

self._touched.update(self._added)

36

self._touched.update(self._removed)

49

self._touched.update(self._removed)

37

self._p1_copies = dict(p1_copies)

50

self._p1_copies = dict(p1_copies)

38

self._p2_copies = dict(p2_copies)

51

self._p2_copies = dict(p2_copies)

39

52

40

@property

53

@property

41

def added(self):

54

def added(self):

55

"""files actively added in the changeset

56

57

Any file present in that revision that was absent in all the changeset's

58

parents.

59

60

In case of merge, this means a file absent in one of the parents but

61

existing in the other will *not* be contained in this set. (They were

62

added by an ancestor)

63

"""

42

return frozenset(self._added)

64

return frozenset(self._added)

43

65

44

def mark_added(self, filename):

66

def mark_added(self, filename):

45

self._added.add(filename)

67

self._added.add(filename)

46

self._touched.add(filename)

68

self._touched.add(filename)

47

69

48

def update_added(self, filenames):

70

def update_added(self, filenames):

49

for f in filenames:

71

for f in filenames:

50

self.mark_added(f)

72

self.mark_added(f)

51

73

52

@property

74

@property

53

def removed(self):

75

def removed(self):

76

"""files actively removed by the changeset

77

78

In case of merge this will only contain the set of files removing "new"

79

content. For any file absent in the current changeset:

80

81

a) If the file exists in both parents, it is clearly "actively" removed

82

by this changeset.

83

84

b) If a file exists in only one parent and in none of the common

85

ancestors, then the file was newly added in one of the merged branches

86

and then got "actively" removed.

87

88

c) If a file exists in only one parent and at least one of the common

89

ancestors using the same filenode, then the file was unchanged on one

90

side and deleted on the other side. The merge "passively" propagated

91

that deletion, but didn't "actively" remove the file. In this case the

92

file is *not* included in the `removed` set.

93

94

d) If a file exists in only one parent and at least one of the common

95

ancestors using a different filenode, then the file was changed on one

96

side and removed on the other side. The merge process "actively"

97

decided to drop the new change and delete the file. Unlike in the

98

previous case, (c), the file included in the `removed` set.

99

100

Summary table for merge:

101

102

case | exists in parents | exists in gca || removed

103

(a) | both | * || yes

104

(b) | one | none || yes

105

(c) | one | same filenode || no

106

(d) | one | new filenode || yes

107

"""

54

return frozenset(self._removed)

108

return frozenset(self._removed)

55

109

56

def mark_removed(self, filename):

110

def mark_removed(self, filename):

57

self._removed.add(filename)

111

self._removed.add(filename)

58

self._touched.add(filename)

112

self._touched.add(filename)

59

113

60

def update_removed(self, filenames):

114

def update_removed(self, filenames):

61

for f in filenames:

115

for f in filenames:

62

self.mark_removed(f)

116

self.mark_removed(f)

63

117

64

@property

118

@property

65

def touched(self):

119

def touched(self):

120

"""files either actively modified, added or removed"""

66

return frozenset(self._touched)

121

return frozenset(self._touched)

67

122

68

def mark_touched(self, filename):

123

def mark_touched(self, filename):

69

self._touched.add(filename)

124

self._touched.add(filename)

70

125

71

def update_touched(self, filenames):

126

def update_touched(self, filenames):

72

for f in filenames:

127

for f in filenames:

73

self.mark_touched(f)

128

self.mark_touched(f)

74

129

75

@property

130

@property

76

def copied_from_p1(self):

131

def copied_from_p1(self):

77

return self._p1_copies.copy()

132

return self._p1_copies.copy()

78

133

79

def mark_copied_from_p1(self, source, dest):

134

def mark_copied_from_p1(self, source, dest):

80

self._p1_copies[dest] = source

135

self._p1_copies[dest] = source

81

136

82

def update_copies_from_p1(self, copies):

137

def update_copies_from_p1(self, copies):

83

for dest, source in copies.items():

138

for dest, source in copies.items():

84

self.mark_copied_from_p1(source, dest)

139

self.mark_copied_from_p1(source, dest)

85

140

86

@property

141

@property

87

def copied_from_p2(self):

142

def copied_from_p2(self):

88

return self._p2_copies.copy()

143

return self._p2_copies.copy()

89

144

90

def mark_copied_from_p2(self, source, dest):

145

def mark_copied_from_p2(self, source, dest):

91

self._p2_copies[dest] = source

146

self._p2_copies[dest] = source

92

147

93

def update_copies_from_p2(self, copies):

148

def update_copies_from_p2(self, copies):

94

for dest, source in copies.items():

149

for dest, source in copies.items():

95

self.mark_copied_from_p2(source, dest)

150

self.mark_copied_from_p2(source, dest)

96

151

97

152

98

def computechangesetfilesadded(ctx):

153

def computechangesetfilesadded(ctx):

99

"""return the list of files added in a changeset

154

"""return the list of files added in a changeset

100

"""

155

"""

101

added = []

156

added = []

102

for f in ctx.files():

157

for f in ctx.files():

103

if not any(f in p for p in ctx.parents()):

158

if not any(f in p for p in ctx.parents()):

104

added.append(f)

159

added.append(f)

105

return added

160

return added

106

161

107

162

108

def get_removal_filter(ctx, x=None):

163

def get_removal_filter(ctx, x=None):

109

"""return a function to detect files "wrongly" detected as `removed`

164

"""return a function to detect files "wrongly" detected as `removed`

110

165

111

When a file is removed relative to p1 in a merge, this

166

When a file is removed relative to p1 in a merge, this

112

function determines whether the absence is due to a

167

function determines whether the absence is due to a

113

deletion from a parent, or whether the merge commit

168

deletion from a parent, or whether the merge commit

114

itself deletes the file. We decide this by doing a

169

itself deletes the file. We decide this by doing a

115

simplified three way merge of the manifest entry for

170

simplified three way merge of the manifest entry for

116

the file. There are two ways we decide the merge

171

the file. There are two ways we decide the merge

117

itself didn't delete a file:

172

itself didn't delete a file:

118

- neither parent (nor the merge) contain the file

173

- neither parent (nor the merge) contain the file

119

- exactly one parent contains the file, and that

174

- exactly one parent contains the file, and that

120

parent has the same filelog entry as the merge

175

parent has the same filelog entry as the merge

121

ancestor (or all of them if there two). In other

176

ancestor (or all of them if there two). In other

122

words, that parent left the file unchanged while the

177

words, that parent left the file unchanged while the

123

other one deleted it.

178

other one deleted it.

124

One way to think about this is that deleting a file is

179

One way to think about this is that deleting a file is

125

similar to emptying it, so the list of changed files

180

similar to emptying it, so the list of changed files

126

should be similar either way. The computation

181

should be similar either way. The computation

127

described above is not done directly in _filecommit

182

described above is not done directly in _filecommit

128

when creating the list of changed files, however

183

when creating the list of changed files, however

129

it does something very similar by comparing filelog

184

it does something very similar by comparing filelog

130

nodes.

185

nodes.

131

"""

186

"""

132

187

133

if x is not None:

188

if x is not None:

134

p1, p2, m1, m2 = x

189

p1, p2, m1, m2 = x

135

else:

190

else:

136

p1 = ctx.p1()

191

p1 = ctx.p1()

137

p2 = ctx.p2()

192

p2 = ctx.p2()

138

m1 = p1.manifest()

193

m1 = p1.manifest()

139

m2 = p2.manifest()

194

m2 = p2.manifest()

140

195

141

@util.cachefunc

196

@util.cachefunc

142

def mas():

197

def mas():

143

p1n = p1.node()

198

p1n = p1.node()

144

p2n = p2.node()

199

p2n = p2.node()

145

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

200

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

146

if not cahs:

201

if not cahs:

147

cahs = [node.nullrev]

202

cahs = [node.nullrev]

148

return [ctx.repo()[r].manifest() for r in cahs]

203

return [ctx.repo()[r].manifest() for r in cahs]

149

204

150

def deletionfromparent(f):

205

def deletionfromparent(f):

151

if f in m1:

206

if f in m1:

152

return f not in m2 and all(

207

return f not in m2 and all(

153

f in ma and ma.find(f) == m1.find(f) for ma in mas()

208

f in ma and ma.find(f) == m1.find(f) for ma in mas()

154

)

209

)

155

elif f in m2:

210

elif f in m2:

156

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

211

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

157

else:

212

else:

158

return True

213

return True

159

214

160

return deletionfromparent

215

return deletionfromparent

161

216

162

217

163

def computechangesetfilesremoved(ctx):

218

def computechangesetfilesremoved(ctx):

164

"""return the list of files removed in a changeset

219

"""return the list of files removed in a changeset

165

"""

220

"""

166

removed = []

221

removed = []

167

for f in ctx.files():

222

for f in ctx.files():

168

if f not in ctx:

223

if f not in ctx:

169

removed.append(f)

224

removed.append(f)

170

if removed:

225

if removed:

171

rf = get_removal_filter(ctx)

226

rf = get_removal_filter(ctx)

172

removed = [r for r in removed if not rf(r)]

227

removed = [r for r in removed if not rf(r)]

173

return removed

228

return removed

174

229

175

230

176

def computechangesetcopies(ctx):

231

def computechangesetcopies(ctx):

177

"""return the copies data for a changeset

232

"""return the copies data for a changeset

178

233

179

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

234

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

180

235

181

Each dictionnary are in the form: `{newname: oldname}`

236

Each dictionnary are in the form: `{newname: oldname}`

182

"""

237

"""

183

p1copies = {}

238

p1copies = {}

184

p2copies = {}

239

p2copies = {}

185

p1 = ctx.p1()

240

p1 = ctx.p1()

186

p2 = ctx.p2()

241

p2 = ctx.p2()

187

narrowmatch = ctx._repo.narrowmatch()

242

narrowmatch = ctx._repo.narrowmatch()

188

for dst in ctx.files():

243

for dst in ctx.files():

189

if not narrowmatch(dst) or dst not in ctx:

244

if not narrowmatch(dst) or dst not in ctx:

190

continue

245

continue

191

copied = ctx[dst].renamed()

246

copied = ctx[dst].renamed()

192

if not copied:

247

if not copied:

193

continue

248

continue

194

src, srcnode = copied

249

src, srcnode = copied

195

if src in p1 and p1[src].filenode() == srcnode:

250

if src in p1 and p1[src].filenode() == srcnode:

196

p1copies[dst] = src

251

p1copies[dst] = src

197

elif src in p2 and p2[src].filenode() == srcnode:

252

elif src in p2 and p2[src].filenode() == srcnode:

198

p2copies[dst] = src

253

p2copies[dst] = src

199

return p1copies, p2copies

254

return p1copies, p2copies

200

255

201

256

202

def encodecopies(files, copies):

257

def encodecopies(files, copies):

203

items = []

258

items = []

204

for i, dst in enumerate(files):

259

for i, dst in enumerate(files):

205

if dst in copies:

260

if dst in copies:

206

items.append(b'%d\0%s' % (i, copies[dst]))

261

items.append(b'%d\0%s' % (i, copies[dst]))

207

if len(items) != len(copies):

262

if len(items) != len(copies):

208

raise error.ProgrammingError(

263

raise error.ProgrammingError(

209

b'some copy targets missing from file list'

264

b'some copy targets missing from file list'

210

)

265

)

211

return b"\n".join(items)

266

return b"\n".join(items)

212

267

213

268

214

def decodecopies(files, data):

269

def decodecopies(files, data):

215

try:

270

try:

216

copies = {}

271

copies = {}

217

if not data:

272

if not data:

218

return copies

273

return copies

219

for l in data.split(b'\n'):

274

for l in data.split(b'\n'):

220

strindex, src = l.split(b'\0')

275

strindex, src = l.split(b'\0')

221

i = int(strindex)

276

i = int(strindex)

222

dst = files[i]

277

dst = files[i]

223

copies[dst] = src

278

copies[dst] = src

224

return copies

279

return copies

225

except (ValueError, IndexError):

280

except (ValueError, IndexError):

226

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

281

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

227

# used different syntax for the value.

282

# used different syntax for the value.

228

return None

283

return None

229

284

230

285

231

def encodefileindices(files, subset):

286

def encodefileindices(files, subset):

232

subset = set(subset)

287

subset = set(subset)

233

indices = []

288

indices = []

234

for i, f in enumerate(files):

289

for i, f in enumerate(files):

235

if f in subset:

290

if f in subset:

236

indices.append(b'%d' % i)

291

indices.append(b'%d' % i)

237

return b'\n'.join(indices)

292

return b'\n'.join(indices)

238

293

239

294

240

def decodefileindices(files, data):

295

def decodefileindices(files, data):

241

try:

296

try:

242

subset = []

297

subset = []

243

if not data:

298

if not data:

244

return subset

299

return subset

245

for strindex in data.split(b'\n'):

300

for strindex in data.split(b'\n'):

246

i = int(strindex)

301

i = int(strindex)

247

if i < 0 or i >= len(files):

302

if i < 0 or i >= len(files):

248

return None

303

return None

249

subset.append(files[i])

304

subset.append(files[i])

250

return subset

305

return subset

251

except (ValueError, IndexError):

306

except (ValueError, IndexError):

252

# Perhaps someone had chosen the same key name (e.g. "added") and

307

# Perhaps someone had chosen the same key name (e.g. "added") and

253

# used different syntax for the value.

308

# used different syntax for the value.

254

return None

309

return None

255

310

256

311

257

def encode_copies_sidedata(files):

312

def encode_copies_sidedata(files):

258

sortedfiles = sorted(files.touched)

313

sortedfiles = sorted(files.touched)

259

sidedata = {}

314

sidedata = {}

260

p1copies = files.copied_from_p1

315

p1copies = files.copied_from_p1

261

if p1copies:

316

if p1copies:

262

p1copies = encodecopies(sortedfiles, p1copies)

317

p1copies = encodecopies(sortedfiles, p1copies)

263

sidedata[sidedatamod.SD_P1COPIES] = p1copies

318

sidedata[sidedatamod.SD_P1COPIES] = p1copies

264

p2copies = files.copied_from_p2

319

p2copies = files.copied_from_p2

265

if p2copies:

320

if p2copies:

266

p2copies = encodecopies(sortedfiles, p2copies)

321

p2copies = encodecopies(sortedfiles, p2copies)

267

sidedata[sidedatamod.SD_P2COPIES] = p2copies

322

sidedata[sidedatamod.SD_P2COPIES] = p2copies

268

filesadded = files.added

323

filesadded = files.added

269

if filesadded:

324

if filesadded:

270

filesadded = encodefileindices(sortedfiles, filesadded)

325

filesadded = encodefileindices(sortedfiles, filesadded)

271

sidedata[sidedatamod.SD_FILESADDED] = filesadded

326

sidedata[sidedatamod.SD_FILESADDED] = filesadded

272

filesremoved = files.removed

327

filesremoved = files.removed

273

if filesremoved:

328

if filesremoved:

274

filesremoved = encodefileindices(sortedfiles, filesremoved)

329

filesremoved = encodefileindices(sortedfiles, filesremoved)

275

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

330

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

276

if not sidedata:

331

if not sidedata:

277

sidedata = None

332

sidedata = None

278

return sidedata

333

return sidedata

279

334

280

335

281

def _getsidedata(srcrepo, rev):

336

def _getsidedata(srcrepo, rev):

282

ctx = srcrepo[rev]

337

ctx = srcrepo[rev]

283

filescopies = computechangesetcopies(ctx)

338

filescopies = computechangesetcopies(ctx)

284

filesadded = computechangesetfilesadded(ctx)

339

filesadded = computechangesetfilesadded(ctx)

285

filesremoved = computechangesetfilesremoved(ctx)

340

filesremoved = computechangesetfilesremoved(ctx)

286

sidedata = {}

341

sidedata = {}

287

if any([filescopies, filesadded, filesremoved]):

342

if any([filescopies, filesadded, filesremoved]):

288

sortedfiles = sorted(ctx.files())

343

sortedfiles = sorted(ctx.files())

289

p1copies, p2copies = filescopies

344

p1copies, p2copies = filescopies

290

p1copies = encodecopies(sortedfiles, p1copies)

345

p1copies = encodecopies(sortedfiles, p1copies)

291

p2copies = encodecopies(sortedfiles, p2copies)

346

p2copies = encodecopies(sortedfiles, p2copies)

292

filesadded = encodefileindices(sortedfiles, filesadded)

347

filesadded = encodefileindices(sortedfiles, filesadded)

293

filesremoved = encodefileindices(sortedfiles, filesremoved)

348

filesremoved = encodefileindices(sortedfiles, filesremoved)

294

if p1copies:

349

if p1copies:

295

sidedata[sidedatamod.SD_P1COPIES] = p1copies

350

sidedata[sidedatamod.SD_P1COPIES] = p1copies

296

if p2copies:

351

if p2copies:

297

sidedata[sidedatamod.SD_P2COPIES] = p2copies

352

sidedata[sidedatamod.SD_P2COPIES] = p2copies

298

if filesadded:

353

if filesadded:

299

sidedata[sidedatamod.SD_FILESADDED] = filesadded

354

sidedata[sidedatamod.SD_FILESADDED] = filesadded

300

if filesremoved:

355

if filesremoved:

301

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

356

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

302

return sidedata

357

return sidedata

303

358

304

359

305

def getsidedataadder(srcrepo, destrepo):

360

def getsidedataadder(srcrepo, destrepo):

306

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

361

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

307

if pycompat.iswindows or not use_w:

362

if pycompat.iswindows or not use_w:

308

return _get_simple_sidedata_adder(srcrepo, destrepo)

363

return _get_simple_sidedata_adder(srcrepo, destrepo)

309

else:

364

else:

310

return _get_worker_sidedata_adder(srcrepo, destrepo)

365

return _get_worker_sidedata_adder(srcrepo, destrepo)

311

366

312

367

313

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

368

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

314

"""The function used by worker precomputing sidedata

369

"""The function used by worker precomputing sidedata

315

370

316

It read an input queue containing revision numbers

371

It read an input queue containing revision numbers

317

It write in an output queue containing (rev, <sidedata-map>)

372

It write in an output queue containing (rev, <sidedata-map>)

318

373

319

The `None` input value is used as a stop signal.

374

The `None` input value is used as a stop signal.

320

375

321

The `tokens` semaphore is user to avoid having too many unprocessed

376

The `tokens` semaphore is user to avoid having too many unprocessed

322

entries. The workers needs to acquire one token before fetching a task.

377

entries. The workers needs to acquire one token before fetching a task.

323

They will be released by the consumer of the produced data.

378

They will be released by the consumer of the produced data.

324

"""

379

"""

325

tokens.acquire()

380

tokens.acquire()

326

rev = revs_queue.get()

381

rev = revs_queue.get()

327

while rev is not None:

382

while rev is not None:

328

data = _getsidedata(srcrepo, rev)

383

data = _getsidedata(srcrepo, rev)

329

sidedata_queue.put((rev, data))

384

sidedata_queue.put((rev, data))

330

tokens.acquire()

385

tokens.acquire()

331

rev = revs_queue.get()

386

rev = revs_queue.get()

332

# processing of `None` is completed, release the token.

387

# processing of `None` is completed, release the token.

333

tokens.release()

388

tokens.release()

334

389

335

390

336

BUFF_PER_WORKER = 50

391

BUFF_PER_WORKER = 50

337

392

338

393

339

def _get_worker_sidedata_adder(srcrepo, destrepo):

394

def _get_worker_sidedata_adder(srcrepo, destrepo):

340

"""The parallel version of the sidedata computation

395

"""The parallel version of the sidedata computation

341

396

342

This code spawn a pool of worker that precompute a buffer of sidedata

397

This code spawn a pool of worker that precompute a buffer of sidedata

343

before we actually need them"""

398

before we actually need them"""

344

# avoid circular import copies -> scmutil -> worker -> copies

399

# avoid circular import copies -> scmutil -> worker -> copies

345

from . import worker

400

from . import worker

346

401

347

nbworkers = worker._numworkers(srcrepo.ui)

402

nbworkers = worker._numworkers(srcrepo.ui)

348

403

349

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

404

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

350

revsq = multiprocessing.Queue()

405

revsq = multiprocessing.Queue()

351

sidedataq = multiprocessing.Queue()

406

sidedataq = multiprocessing.Queue()

352

407

353

assert srcrepo.filtername is None

408

assert srcrepo.filtername is None

354

# queue all tasks beforehand, revision numbers are small and it make

409

# queue all tasks beforehand, revision numbers are small and it make

355

# synchronisation simpler

410

# synchronisation simpler

356

#

411

#

357

# Since the computation for each node can be quite expensive, the overhead

412

# Since the computation for each node can be quite expensive, the overhead

358

# of using a single queue is not revelant. In practice, most computation

413

# of using a single queue is not revelant. In practice, most computation

359

# are fast but some are very expensive and dominate all the other smaller

414

# are fast but some are very expensive and dominate all the other smaller

360

# cost.

415

# cost.

361

for r in srcrepo.changelog.revs():

416

for r in srcrepo.changelog.revs():

362

revsq.put(r)

417

revsq.put(r)

363

# queue the "no more tasks" markers

418

# queue the "no more tasks" markers

364

for i in range(nbworkers):

419

for i in range(nbworkers):

365

revsq.put(None)

420

revsq.put(None)

366

421

367

allworkers = []

422

allworkers = []

368

for i in range(nbworkers):

423

for i in range(nbworkers):

369

args = (srcrepo, revsq, sidedataq, tokens)

424

args = (srcrepo, revsq, sidedataq, tokens)

370

w = multiprocessing.Process(target=_sidedata_worker, args=args)

425

w = multiprocessing.Process(target=_sidedata_worker, args=args)

371

allworkers.append(w)

426

allworkers.append(w)

372

w.start()

427

w.start()

373

428

374

# dictionnary to store results for revision higher than we one we are

429

# dictionnary to store results for revision higher than we one we are

375

# looking for. For example, if we need the sidedatamap for 42, and 43 is

430

# looking for. For example, if we need the sidedatamap for 42, and 43 is

376

# received, when shelve 43 for later use.

431

# received, when shelve 43 for later use.

377

staging = {}

432

staging = {}

378

433

379

def sidedata_companion(revlog, rev):

434

def sidedata_companion(revlog, rev):

380

sidedata = {}

435

sidedata = {}

381

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

436

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

382

# Is the data previously shelved ?

437

# Is the data previously shelved ?

383

sidedata = staging.pop(rev, None)

438

sidedata = staging.pop(rev, None)

384

if sidedata is None:

439

if sidedata is None:

385

# look at the queued result until we find the one we are lookig

440

# look at the queued result until we find the one we are lookig

386

# for (shelve the other ones)

441

# for (shelve the other ones)

387

r, sidedata = sidedataq.get()

442

r, sidedata = sidedataq.get()

388

while r != rev:

443

while r != rev:

389

staging[r] = sidedata

444

staging[r] = sidedata

390

r, sidedata = sidedataq.get()

445

r, sidedata = sidedataq.get()

391

tokens.release()

446

tokens.release()

392

return False, (), sidedata

447

return False, (), sidedata

393

448

394

return sidedata_companion

449

return sidedata_companion

395

450

396

451

397

def _get_simple_sidedata_adder(srcrepo, destrepo):

452

def _get_simple_sidedata_adder(srcrepo, destrepo):

398

"""The simple version of the sidedata computation

453

"""The simple version of the sidedata computation

399

454

400

It just compute it in the same thread on request"""

455

It just compute it in the same thread on request"""

401

456

402

def sidedatacompanion(revlog, rev):

457

def sidedatacompanion(revlog, rev):

403

sidedata = {}

458

sidedata = {}

404

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

459

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

405

sidedata = _getsidedata(srcrepo, rev)

460

sidedata = _getsidedata(srcrepo, rev)

406

return False, (), sidedata

461

return False, (), sidedata

407

462

408

return sidedatacompanion

463

return sidedatacompanion

409

464

410

465

411

def getsidedataremover(srcrepo, destrepo):

466

def getsidedataremover(srcrepo, destrepo):

412

def sidedatacompanion(revlog, rev):

467

def sidedatacompanion(revlog, rev):

413

f = ()

468

f = ()

414

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

469

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

415

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

470

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

416

f = (

471

f = (

417

sidedatamod.SD_P1COPIES,

472

sidedatamod.SD_P1COPIES,

418

sidedatamod.SD_P2COPIES,

473

sidedatamod.SD_P2COPIES,

419

sidedatamod.SD_FILESADDED,

474

sidedatamod.SD_FILESADDED,

420

sidedatamod.SD_FILESREMOVED,

475

sidedatamod.SD_FILESREMOVED,

421

)

476

)

422

return False, f, {}

477

return False, f, {}

423

478

424

return sidedatacompanion

479

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
-                """A class recording the changes made to a file by a revision
+                """A class recording the changes made to a file by a changeset
+                Actions performed on files are gathered into 3 sets:
+                - added:   files actively added in the changeset.
+                - removed: files removed in the revision
+                - touched: files affected by the merge
+                and copies information is held by 2 mappings
+                - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
+                - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
+                See their inline help for details.
                 """
                 def __init__(
                     self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
                 ):
                     self._added = set(added)
                     self._removed = set(removed)
                     self._touched = set(touched)
                     self._touched.update(self._added)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(p1_copies)
                     self._p2_copies = dict(p2_copies)
                 @property
                 def added(self):
+                    """files actively added in the changeset
+                    Any file present in that revision that was absent in all the changeset's
+                    parents.
+                    In case of merge, this means a file absent in one of the parents but
+                    existing in the other will *not* be contained in this set. (They were
+                    added by an ancestor)
+                    """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     self._added.add(filename)
                     self._touched.add(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @property
                 def removed(self):
+                    """files actively removed by the changeset
+                    In case of merge this will only contain the set of files removing "new"
+                    content. For any file absent in the current changeset:
+                    a) If the file exists in both parents, it is clearly "actively" removed
+                    by this changeset.
+                    b) If a file exists in only one parent and in none of the common
+                    ancestors, then the file was newly added in one of the merged branches
+                    and then got "actively" removed.
+                    c) If a file exists in only one parent and at least one of the common
+                    ancestors using the same filenode, then the file was unchanged on one
+                    side and deleted on the other side. The merge "passively" propagated
+                    that deletion, but didn't "actively" remove the file. In this case the
+                    file is *not* included in the `removed` set.
+                    d) If a file exists in only one parent and at least one of the common
+                    ancestors using a different filenode, then the file was changed on one
+                    side and removed on the other side. The merge process "actively"
+                    decided to drop the new change and delete the file. Unlike in the
+                    previous case, (c), the file included in the `removed` set.
+                    Summary table for merge:
+                    case | exists in parents | exists in gca || removed
+                     (a) |       both        |     *         ||   yes
+                     (b) |       one         |     none      ||   yes
+                     (c) |       one         | same filenode ||   no
+                     (d) |       one         |  new filenode ||   yes
+                    """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     self._removed.add(filename)
                     self._touched.add(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @property
                 def touched(self):
+                    """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @property
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @property
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def encode_copies_sidedata(files):
                 sortedfiles = sorted(files.touched)
                 sidedata = {}
                 p1copies = files.copied_from_p1
                 if p1copies:
                     p1copies = encodecopies(sortedfiles, p1copies)
                     sidedata[sidedatamod.SD_P1COPIES] = p1copies
                 p2copies = files.copied_from_p2
                 if p2copies:
                     p2copies = encodecopies(sortedfiles, p2copies)
                     sidedata[sidedatamod.SD_P2COPIES] = p2copies
                 filesadded = files.added
                 if filesadded:
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     sidedata[sidedatamod.SD_FILESADDED] = filesadded
                 filesremoved = files.removed
                 if filesremoved:
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 if not sidedata:
                     sidedata = None
                 return sidedata
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion