upstream/mercurial-mirror Commit - r46079:df878210

1

# metadata.py -- code related to various metadata computation and access.

1

# metadata.py -- code related to various metadata computation and access.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import multiprocessing

10

import multiprocessing

11

12

from . import (

12

from . import (

13

error,

13

error,

14

node,

14

node,

15

pycompat,

15

pycompat,

16

util,

16

util,

17

)

17

)

18

19

from .revlogutils import (

19

from .revlogutils import (

20

flagutil as sidedataflag,

20

flagutil as sidedataflag,

21

sidedata as sidedatamod,

21

sidedata as sidedatamod,

22

)

22

)

23

24

25

class ChangingFiles(object):

25

class ChangingFiles(object):

26

"""A class recording the changes made to a file by a changeset

26

"""A class recording the changes made to a file by a changeset

27

28

Actions performed on files are gathered into 3 sets:

28

Actions performed on files are gathered into 3 sets:

29

30

- added: files actively added in the changeset.

30

- added: files actively added in the changeset.

31

- removed: files removed in the revision

31

- removed: files removed in the revision

32

- touched: files affected by the merge

32

- touched: files affected by the merge

33

34

and copies information is held by 2 mappings

34

and copies information is held by 2 mappings

35

36

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

36

- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

37

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

37

- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

38

39

See their inline help for details.

39

See their inline help for details.

40

"""

40

"""

41

42

def __init__(

42

def __init__(

43

self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),

43

self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),

44

):

44

):

45

self._added = set(added)

45

self._added = set(added)

46

self._removed = set(removed)

46

self._removed = set(removed)

47

self._touched = set(touched)

47

self._touched = set(touched)

48

self._touched.update(self._added)

48

self._touched.update(self._added)

49

self._touched.update(self._removed)

49

self._touched.update(self._removed)

50

self._p1_copies = dict(p1_copies)

50

self._p1_copies = dict(p1_copies)

51

self._p2_copies = dict(p2_copies)

51

self._p2_copies = dict(p2_copies)

52

53

def __eq__(self, other):

54

return (

55

self.added == other.added

56

and self.removed == other.removed

57

and self.touched == other.touched

58

and self.copied_from_p1 == other.copied_from_p1

59

and self.copied_from_p2 == other.copied_from_p2

60

)

61

53

@property

62

@property

54

def added(self):

63

def added(self):

55

"""files actively added in the changeset

64

"""files actively added in the changeset

56

65

57

Any file present in that revision that was absent in all the changeset's

66

Any file present in that revision that was absent in all the changeset's

58

parents.

67

parents.

59

68

60

In case of merge, this means a file absent in one of the parents but

69

In case of merge, this means a file absent in one of the parents but

61

existing in the other will *not* be contained in this set. (They were

70

existing in the other will *not* be contained in this set. (They were

62

added by an ancestor)

71

added by an ancestor)

63

"""

72

"""

64

return frozenset(self._added)

73

return frozenset(self._added)

65

74

66

def mark_added(self, filename):

75

def mark_added(self, filename):

67

self._added.add(filename)

76

self._added.add(filename)

68

self._touched.add(filename)

77

self._touched.add(filename)

69

78

70

def update_added(self, filenames):

79

def update_added(self, filenames):

71

for f in filenames:

80

for f in filenames:

72

self.mark_added(f)

81

self.mark_added(f)

73

82

74

@property

83

@property

75

def removed(self):

84

def removed(self):

76

"""files actively removed by the changeset

85

"""files actively removed by the changeset

77

86

78

In case of merge this will only contain the set of files removing "new"

87

In case of merge this will only contain the set of files removing "new"

79

content. For any file absent in the current changeset:

88

content. For any file absent in the current changeset:

80

89

81

a) If the file exists in both parents, it is clearly "actively" removed

90

a) If the file exists in both parents, it is clearly "actively" removed

82

by this changeset.

91

by this changeset.

83

92

84

b) If a file exists in only one parent and in none of the common

93

b) If a file exists in only one parent and in none of the common

85

ancestors, then the file was newly added in one of the merged branches

94

ancestors, then the file was newly added in one of the merged branches

86

and then got "actively" removed.

95

and then got "actively" removed.

87

96

88

c) If a file exists in only one parent and at least one of the common

97

c) If a file exists in only one parent and at least one of the common

89

ancestors using the same filenode, then the file was unchanged on one

98

ancestors using the same filenode, then the file was unchanged on one

90

side and deleted on the other side. The merge "passively" propagated

99

side and deleted on the other side. The merge "passively" propagated

91

that deletion, but didn't "actively" remove the file. In this case the

100

that deletion, but didn't "actively" remove the file. In this case the

92

file is *not* included in the `removed` set.

101

file is *not* included in the `removed` set.

93

102

94

d) If a file exists in only one parent and at least one of the common

103

d) If a file exists in only one parent and at least one of the common

95

ancestors using a different filenode, then the file was changed on one

104

ancestors using a different filenode, then the file was changed on one

96

side and removed on the other side. The merge process "actively"

105

side and removed on the other side. The merge process "actively"

97

decided to drop the new change and delete the file. Unlike in the

106

decided to drop the new change and delete the file. Unlike in the

98

previous case, (c), the file included in the `removed` set.

107

previous case, (c), the file included in the `removed` set.

99

108

100

Summary table for merge:

109

Summary table for merge:

101

110

102

case | exists in parents | exists in gca || removed

111

case | exists in parents | exists in gca || removed

103

(a) | both | * || yes

112

(a) | both | * || yes

104

(b) | one | none || yes

113

(b) | one | none || yes

105

(c) | one | same filenode || no

114

(c) | one | same filenode || no

106

(d) | one | new filenode || yes

115

(d) | one | new filenode || yes

107

"""

116

"""

108

return frozenset(self._removed)

117

return frozenset(self._removed)

109

118

110

def mark_removed(self, filename):

119

def mark_removed(self, filename):

111

self._removed.add(filename)

120

self._removed.add(filename)

112

self._touched.add(filename)

121

self._touched.add(filename)

113

122

114

def update_removed(self, filenames):

123

def update_removed(self, filenames):

115

for f in filenames:

124

for f in filenames:

116

self.mark_removed(f)

125

self.mark_removed(f)

117

126

118

@property

127

@property

119

def touched(self):

128

def touched(self):

120

"""files either actively modified, added or removed"""

129

"""files either actively modified, added or removed"""

121

return frozenset(self._touched)

130

return frozenset(self._touched)

122

131

123

def mark_touched(self, filename):

132

def mark_touched(self, filename):

124

self._touched.add(filename)

133

self._touched.add(filename)

125

134

126

def update_touched(self, filenames):

135

def update_touched(self, filenames):

127

for f in filenames:

136

for f in filenames:

128

self.mark_touched(f)

137

self.mark_touched(f)

129

138

130

@property

139

@property

131

def copied_from_p1(self):

140

def copied_from_p1(self):

132

return self._p1_copies.copy()

141

return self._p1_copies.copy()

133

142

134

def mark_copied_from_p1(self, source, dest):

143

def mark_copied_from_p1(self, source, dest):

135

self._p1_copies[dest] = source

144

self._p1_copies[dest] = source

136

145

137

def update_copies_from_p1(self, copies):

146

def update_copies_from_p1(self, copies):

138

for dest, source in copies.items():

147

for dest, source in copies.items():

139

self.mark_copied_from_p1(source, dest)

148

self.mark_copied_from_p1(source, dest)

140

149

141

@property

150

@property

142

def copied_from_p2(self):

151

def copied_from_p2(self):

143

return self._p2_copies.copy()

152

return self._p2_copies.copy()

144

153

145

def mark_copied_from_p2(self, source, dest):

154

def mark_copied_from_p2(self, source, dest):

146

self._p2_copies[dest] = source

155

self._p2_copies[dest] = source

147

156

148

def update_copies_from_p2(self, copies):

157

def update_copies_from_p2(self, copies):

149

for dest, source in copies.items():

158

for dest, source in copies.items():

150

self.mark_copied_from_p2(source, dest)

159

self.mark_copied_from_p2(source, dest)

151

160

152

161

153

def computechangesetfilesadded(ctx):

162

def computechangesetfilesadded(ctx):

154

"""return the list of files added in a changeset

163

"""return the list of files added in a changeset

155

"""

164

"""

156

added = []

165

added = []

157

for f in ctx.files():

166

for f in ctx.files():

158

if not any(f in p for p in ctx.parents()):

167

if not any(f in p for p in ctx.parents()):

159

added.append(f)

168

added.append(f)

160

return added

169

return added

161

170

162

171

163

def get_removal_filter(ctx, x=None):

172

def get_removal_filter(ctx, x=None):

164

"""return a function to detect files "wrongly" detected as `removed`

173

"""return a function to detect files "wrongly" detected as `removed`

165

174

166

When a file is removed relative to p1 in a merge, this

175

When a file is removed relative to p1 in a merge, this

167

function determines whether the absence is due to a

176

function determines whether the absence is due to a

168

deletion from a parent, or whether the merge commit

177

deletion from a parent, or whether the merge commit

169

itself deletes the file. We decide this by doing a

178

itself deletes the file. We decide this by doing a

170

simplified three way merge of the manifest entry for

179

simplified three way merge of the manifest entry for

171

the file. There are two ways we decide the merge

180

the file. There are two ways we decide the merge

172

itself didn't delete a file:

181

itself didn't delete a file:

173

- neither parent (nor the merge) contain the file

182

- neither parent (nor the merge) contain the file

174

- exactly one parent contains the file, and that

183

- exactly one parent contains the file, and that

175

parent has the same filelog entry as the merge

184

parent has the same filelog entry as the merge

176

ancestor (or all of them if there two). In other

185

ancestor (or all of them if there two). In other

177

words, that parent left the file unchanged while the

186

words, that parent left the file unchanged while the

178

other one deleted it.

187

other one deleted it.

179

One way to think about this is that deleting a file is

188

One way to think about this is that deleting a file is

180

similar to emptying it, so the list of changed files

189

similar to emptying it, so the list of changed files

181

should be similar either way. The computation

190

should be similar either way. The computation

182

described above is not done directly in _filecommit

191

described above is not done directly in _filecommit

183

when creating the list of changed files, however

192

when creating the list of changed files, however

184

it does something very similar by comparing filelog

193

it does something very similar by comparing filelog

185

nodes.

194

nodes.

186

"""

195

"""

187

196

188

if x is not None:

197

if x is not None:

189

p1, p2, m1, m2 = x

198

p1, p2, m1, m2 = x

190

else:

199

else:

191

p1 = ctx.p1()

200

p1 = ctx.p1()

192

p2 = ctx.p2()

201

p2 = ctx.p2()

193

m1 = p1.manifest()

202

m1 = p1.manifest()

194

m2 = p2.manifest()

203

m2 = p2.manifest()

195

204

196

@util.cachefunc

205

@util.cachefunc

197

def mas():

206

def mas():

198

p1n = p1.node()

207

p1n = p1.node()

199

p2n = p2.node()

208

p2n = p2.node()

200

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

209

cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

201

if not cahs:

210

if not cahs:

202

cahs = [node.nullrev]

211

cahs = [node.nullrev]

203

return [ctx.repo()[r].manifest() for r in cahs]

212

return [ctx.repo()[r].manifest() for r in cahs]

204

213

205

def deletionfromparent(f):

214

def deletionfromparent(f):

206

if f in m1:

215

if f in m1:

207

return f not in m2 and all(

216

return f not in m2 and all(

208

f in ma and ma.find(f) == m1.find(f) for ma in mas()

217

f in ma and ma.find(f) == m1.find(f) for ma in mas()

209

)

218

)

210

elif f in m2:

219

elif f in m2:

211

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

220

return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

212

else:

221

else:

213

return True

222

return True

214

223

215

return deletionfromparent

224

return deletionfromparent

216

225

217

226

218

def computechangesetfilesremoved(ctx):

227

def computechangesetfilesremoved(ctx):

219

"""return the list of files removed in a changeset

228

"""return the list of files removed in a changeset

220

"""

229

"""

221

removed = []

230

removed = []

222

for f in ctx.files():

231

for f in ctx.files():

223

if f not in ctx:

232

if f not in ctx:

224

removed.append(f)

233

removed.append(f)

225

if removed:

234

if removed:

226

rf = get_removal_filter(ctx)

235

rf = get_removal_filter(ctx)

227

removed = [r for r in removed if not rf(r)]

236

removed = [r for r in removed if not rf(r)]

228

return removed

237

return removed

229

238

230

239

231

def computechangesetcopies(ctx):

240

def computechangesetcopies(ctx):

232

"""return the copies data for a changeset

241

"""return the copies data for a changeset

233

242

234

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

243

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

235

244

236

Each dictionnary are in the form: `{newname: oldname}`

245

Each dictionnary are in the form: `{newname: oldname}`

237

"""

246

"""

238

p1copies = {}

247

p1copies = {}

239

p2copies = {}

248

p2copies = {}

240

p1 = ctx.p1()

249

p1 = ctx.p1()

241

p2 = ctx.p2()

250

p2 = ctx.p2()

242

narrowmatch = ctx._repo.narrowmatch()

251

narrowmatch = ctx._repo.narrowmatch()

243

for dst in ctx.files():

252

for dst in ctx.files():

244

if not narrowmatch(dst) or dst not in ctx:

253

if not narrowmatch(dst) or dst not in ctx:

245

continue

254

continue

246

copied = ctx[dst].renamed()

255

copied = ctx[dst].renamed()

247

if not copied:

256

if not copied:

248

continue

257

continue

249

src, srcnode = copied

258

src, srcnode = copied

250

if src in p1 and p1[src].filenode() == srcnode:

259

if src in p1 and p1[src].filenode() == srcnode:

251

p1copies[dst] = src

260

p1copies[dst] = src

252

elif src in p2 and p2[src].filenode() == srcnode:

261

elif src in p2 and p2[src].filenode() == srcnode:

253

p2copies[dst] = src

262

p2copies[dst] = src

254

return p1copies, p2copies

263

return p1copies, p2copies

255

264

256

265

257

def encodecopies(files, copies):

266

def encodecopies(files, copies):

258

items = []

267

items = []

259

for i, dst in enumerate(files):

268

for i, dst in enumerate(files):

260

if dst in copies:

269

if dst in copies:

261

items.append(b'%d\0%s' % (i, copies[dst]))

270

items.append(b'%d\0%s' % (i, copies[dst]))

262

if len(items) != len(copies):

271

if len(items) != len(copies):

263

raise error.ProgrammingError(

272

raise error.ProgrammingError(

264

b'some copy targets missing from file list'

273

b'some copy targets missing from file list'

265

)

274

)

266

return b"\n".join(items)

275

return b"\n".join(items)

267

276

268

277

269

def decodecopies(files, data):

278

def decodecopies(files, data):

270

try:

279

try:

271

copies = {}

280

copies = {}

272

if not data:

281

if not data:

273

return copies

282

return copies

274

for l in data.split(b'\n'):

283

for l in data.split(b'\n'):

275

strindex, src = l.split(b'\0')

284

strindex, src = l.split(b'\0')

276

i = int(strindex)

285

i = int(strindex)

277

dst = files[i]

286

dst = files[i]

278

copies[dst] = src

287

copies[dst] = src

279

return copies

288

return copies

280

except (ValueError, IndexError):

289

except (ValueError, IndexError):

281

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

290

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

282

# used different syntax for the value.

291

# used different syntax for the value.

283

return None

292

return None

284

293

285

294

286

def encodefileindices(files, subset):

295

def encodefileindices(files, subset):

287

subset = set(subset)

296

subset = set(subset)

288

indices = []

297

indices = []

289

for i, f in enumerate(files):

298

for i, f in enumerate(files):

290

if f in subset:

299

if f in subset:

291

indices.append(b'%d' % i)

300

indices.append(b'%d' % i)

292

return b'\n'.join(indices)

301

return b'\n'.join(indices)

293

302

294

303

295

def decodefileindices(files, data):

304

def decodefileindices(files, data):

296

try:

305

try:

297

subset = []

306

subset = []

298

if not data:

307

if not data:

299

return subset

308

return subset

300

for strindex in data.split(b'\n'):

309

for strindex in data.split(b'\n'):

301

i = int(strindex)

310

i = int(strindex)

302

if i < 0 or i >= len(files):

311

if i < 0 or i >= len(files):

303

return None

312

return None

304

subset.append(files[i])

313

subset.append(files[i])

305

return subset

314

return subset

306

except (ValueError, IndexError):

315

except (ValueError, IndexError):

307

# Perhaps someone had chosen the same key name (e.g. "added") and

316

# Perhaps someone had chosen the same key name (e.g. "added") and

308

# used different syntax for the value.

317

# used different syntax for the value.

309

return None

318

return None

310

319

311

320

312

def encode_copies_sidedata(files):

321

def encode_copies_sidedata(files):

313

sortedfiles = sorted(files.touched)

322

sortedfiles = sorted(files.touched)

314

sidedata = {}

323

sidedata = {}

315

p1copies = files.copied_from_p1

324

p1copies = files.copied_from_p1

316

if p1copies:

325

if p1copies:

317

p1copies = encodecopies(sortedfiles, p1copies)

326

p1copies = encodecopies(sortedfiles, p1copies)

318

sidedata[sidedatamod.SD_P1COPIES] = p1copies

327

sidedata[sidedatamod.SD_P1COPIES] = p1copies

319

p2copies = files.copied_from_p2

328

p2copies = files.copied_from_p2

320

if p2copies:

329

if p2copies:

321

p2copies = encodecopies(sortedfiles, p2copies)

330

p2copies = encodecopies(sortedfiles, p2copies)

322

sidedata[sidedatamod.SD_P2COPIES] = p2copies

331

sidedata[sidedatamod.SD_P2COPIES] = p2copies

323

filesadded = files.added

332

filesadded = files.added

324

if filesadded:

333

if filesadded:

325

filesadded = encodefileindices(sortedfiles, filesadded)

334

filesadded = encodefileindices(sortedfiles, filesadded)

326

sidedata[sidedatamod.SD_FILESADDED] = filesadded

335

sidedata[sidedatamod.SD_FILESADDED] = filesadded

327

filesremoved = files.removed

336

filesremoved = files.removed

328

if filesremoved:

337

if filesremoved:

329

filesremoved = encodefileindices(sortedfiles, filesremoved)

338

filesremoved = encodefileindices(sortedfiles, filesremoved)

330

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

339

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

331

if not sidedata:

340

if not sidedata:

332

sidedata = None

341

sidedata = None

333

return sidedata

342

return sidedata

334

343

335

344

336

def _getsidedata(srcrepo, rev):

345

def _getsidedata(srcrepo, rev):

337

ctx = srcrepo[rev]

346

ctx = srcrepo[rev]

338

filescopies = computechangesetcopies(ctx)

347

filescopies = computechangesetcopies(ctx)

339

filesadded = computechangesetfilesadded(ctx)

348

filesadded = computechangesetfilesadded(ctx)

340

filesremoved = computechangesetfilesremoved(ctx)

349

filesremoved = computechangesetfilesremoved(ctx)

341

sidedata = {}

350

sidedata = {}

342

if any([filescopies, filesadded, filesremoved]):

351

if any([filescopies, filesadded, filesremoved]):

343

sortedfiles = sorted(ctx.files())

352

sortedfiles = sorted(ctx.files())

344

p1copies, p2copies = filescopies

353

p1copies, p2copies = filescopies

345

p1copies = encodecopies(sortedfiles, p1copies)

354

p1copies = encodecopies(sortedfiles, p1copies)

346

p2copies = encodecopies(sortedfiles, p2copies)

355

p2copies = encodecopies(sortedfiles, p2copies)

347

filesadded = encodefileindices(sortedfiles, filesadded)

356

filesadded = encodefileindices(sortedfiles, filesadded)

348

filesremoved = encodefileindices(sortedfiles, filesremoved)

357

filesremoved = encodefileindices(sortedfiles, filesremoved)

349

if p1copies:

358

if p1copies:

350

sidedata[sidedatamod.SD_P1COPIES] = p1copies

359

sidedata[sidedatamod.SD_P1COPIES] = p1copies

351

if p2copies:

360

if p2copies:

352

sidedata[sidedatamod.SD_P2COPIES] = p2copies

361

sidedata[sidedatamod.SD_P2COPIES] = p2copies

353

if filesadded:

362

if filesadded:

354

sidedata[sidedatamod.SD_FILESADDED] = filesadded

363

sidedata[sidedatamod.SD_FILESADDED] = filesadded

355

if filesremoved:

364

if filesremoved:

356

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

365

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

357

return sidedata

366

return sidedata

358

367

359

368

360

def getsidedataadder(srcrepo, destrepo):

369

def getsidedataadder(srcrepo, destrepo):

361

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

370

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

362

if pycompat.iswindows or not use_w:

371

if pycompat.iswindows or not use_w:

363

return _get_simple_sidedata_adder(srcrepo, destrepo)

372

return _get_simple_sidedata_adder(srcrepo, destrepo)

364

else:

373

else:

365

return _get_worker_sidedata_adder(srcrepo, destrepo)

374

return _get_worker_sidedata_adder(srcrepo, destrepo)

366

375

367

376

368

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

377

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

369

"""The function used by worker precomputing sidedata

378

"""The function used by worker precomputing sidedata

370

379

371

It read an input queue containing revision numbers

380

It read an input queue containing revision numbers

372

It write in an output queue containing (rev, <sidedata-map>)

381

It write in an output queue containing (rev, <sidedata-map>)

373

382

374

The `None` input value is used as a stop signal.

383

The `None` input value is used as a stop signal.

375

384

376

The `tokens` semaphore is user to avoid having too many unprocessed

385

The `tokens` semaphore is user to avoid having too many unprocessed

377

entries. The workers needs to acquire one token before fetching a task.

386

entries. The workers needs to acquire one token before fetching a task.

378

They will be released by the consumer of the produced data.

387

They will be released by the consumer of the produced data.

379

"""

388

"""

380

tokens.acquire()

389

tokens.acquire()

381

rev = revs_queue.get()

390

rev = revs_queue.get()

382

while rev is not None:

391

while rev is not None:

383

data = _getsidedata(srcrepo, rev)

392

data = _getsidedata(srcrepo, rev)

384

sidedata_queue.put((rev, data))

393

sidedata_queue.put((rev, data))

385

tokens.acquire()

394

tokens.acquire()

386

rev = revs_queue.get()

395

rev = revs_queue.get()

387

# processing of `None` is completed, release the token.

396

# processing of `None` is completed, release the token.

388

tokens.release()

397

tokens.release()

389

398

390

399

391

BUFF_PER_WORKER = 50

400

BUFF_PER_WORKER = 50

392

401

393

402

394

def _get_worker_sidedata_adder(srcrepo, destrepo):

403

def _get_worker_sidedata_adder(srcrepo, destrepo):

395

"""The parallel version of the sidedata computation

404

"""The parallel version of the sidedata computation

396

405

397

This code spawn a pool of worker that precompute a buffer of sidedata

406

This code spawn a pool of worker that precompute a buffer of sidedata

398

before we actually need them"""

407

before we actually need them"""

399

# avoid circular import copies -> scmutil -> worker -> copies

408

# avoid circular import copies -> scmutil -> worker -> copies

400

from . import worker

409

from . import worker

401

410

402

nbworkers = worker._numworkers(srcrepo.ui)

411

nbworkers = worker._numworkers(srcrepo.ui)

403

412

404

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

413

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

405

revsq = multiprocessing.Queue()

414

revsq = multiprocessing.Queue()

406

sidedataq = multiprocessing.Queue()

415

sidedataq = multiprocessing.Queue()

407

416

408

assert srcrepo.filtername is None

417

assert srcrepo.filtername is None

409

# queue all tasks beforehand, revision numbers are small and it make

418

# queue all tasks beforehand, revision numbers are small and it make

410

# synchronisation simpler

419

# synchronisation simpler

411

#

420

#

412

# Since the computation for each node can be quite expensive, the overhead

421

# Since the computation for each node can be quite expensive, the overhead

413

# of using a single queue is not revelant. In practice, most computation

422

# of using a single queue is not revelant. In practice, most computation

414

# are fast but some are very expensive and dominate all the other smaller

423

# are fast but some are very expensive and dominate all the other smaller

415

# cost.

424

# cost.

416

for r in srcrepo.changelog.revs():

425

for r in srcrepo.changelog.revs():

417

revsq.put(r)

426

revsq.put(r)

418

# queue the "no more tasks" markers

427

# queue the "no more tasks" markers

419

for i in range(nbworkers):

428

for i in range(nbworkers):

420

revsq.put(None)

429

revsq.put(None)

421

430

422

allworkers = []

431

allworkers = []

423

for i in range(nbworkers):

432

for i in range(nbworkers):

424

args = (srcrepo, revsq, sidedataq, tokens)

433

args = (srcrepo, revsq, sidedataq, tokens)

425

w = multiprocessing.Process(target=_sidedata_worker, args=args)

434

w = multiprocessing.Process(target=_sidedata_worker, args=args)

426

allworkers.append(w)

435

allworkers.append(w)

427

w.start()

436

w.start()

428

437

429

# dictionnary to store results for revision higher than we one we are

438

# dictionnary to store results for revision higher than we one we are

430

# looking for. For example, if we need the sidedatamap for 42, and 43 is

439

# looking for. For example, if we need the sidedatamap for 42, and 43 is

431

# received, when shelve 43 for later use.

440

# received, when shelve 43 for later use.

432

staging = {}

441

staging = {}

433

442

434

def sidedata_companion(revlog, rev):

443

def sidedata_companion(revlog, rev):

435

sidedata = {}

444

sidedata = {}

436

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

445

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

437

# Is the data previously shelved ?

446

# Is the data previously shelved ?

438

sidedata = staging.pop(rev, None)

447

sidedata = staging.pop(rev, None)

439

if sidedata is None:

448

if sidedata is None:

440

# look at the queued result until we find the one we are lookig

449

# look at the queued result until we find the one we are lookig

441

# for (shelve the other ones)

450

# for (shelve the other ones)

442

r, sidedata = sidedataq.get()

451

r, sidedata = sidedataq.get()

443

while r != rev:

452

while r != rev:

444

staging[r] = sidedata

453

staging[r] = sidedata

445

r, sidedata = sidedataq.get()

454

r, sidedata = sidedataq.get()

446

tokens.release()

455

tokens.release()

447

return False, (), sidedata

456

return False, (), sidedata

448

457

449

return sidedata_companion

458

return sidedata_companion

450

459

451

460

452

def _get_simple_sidedata_adder(srcrepo, destrepo):

461

def _get_simple_sidedata_adder(srcrepo, destrepo):

453

"""The simple version of the sidedata computation

462

"""The simple version of the sidedata computation

454

463

455

It just compute it in the same thread on request"""

464

It just compute it in the same thread on request"""

456

465

457

def sidedatacompanion(revlog, rev):

466

def sidedatacompanion(revlog, rev):

458

sidedata = {}

467

sidedata = {}

459

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

468

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

460

sidedata = _getsidedata(srcrepo, rev)

469

sidedata = _getsidedata(srcrepo, rev)

461

return False, (), sidedata

470

return False, (), sidedata

462

471

463

return sidedatacompanion

472

return sidedatacompanion

464

473

465

474

466

def getsidedataremover(srcrepo, destrepo):

475

def getsidedataremover(srcrepo, destrepo):

467

def sidedatacompanion(revlog, rev):

476

def sidedatacompanion(revlog, rev):

468

f = ()

477

f = ()

469

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

478

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

470

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

479

if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

471

f = (

480

f = (

472

sidedatamod.SD_P1COPIES,

481

sidedatamod.SD_P1COPIES,

473

sidedatamod.SD_P2COPIES,

482

sidedatamod.SD_P2COPIES,

474

sidedatamod.SD_FILESADDED,

483

sidedatamod.SD_FILESADDED,

475

sidedatamod.SD_FILESREMOVED,

484

sidedatamod.SD_FILESREMOVED,

476

)

485

)

477

return False, f, {}

486

return False, f, {}

478

487

479

return sidedatacompanion

488

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to a file by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - removed: files removed in the revision
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
                 ):
                     self._added = set(added)
                     self._removed = set(removed)
                     self._touched = set(touched)
                     self._touched.update(self._added)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(p1_copies)
                     self._p2_copies = dict(p2_copies)
+                def __eq__(self, other):
+                    return (
+                        self.added == other.added
+                        and self.removed == other.removed
+                        and self.touched == other.touched
+                        and self.copied_from_p1 == other.copied_from_p1
+                        and self.copied_from_p2 == other.copied_from_p2
+                    )
                 @property
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     self._added.add(filename)
                     self._touched.add(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @property
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     self._removed.add(filename)
                     self._touched.add(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @property
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @property
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @property
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def encode_copies_sidedata(files):
                 sortedfiles = sorted(files.touched)
                 sidedata = {}
                 p1copies = files.copied_from_p1
                 if p1copies:
                     p1copies = encodecopies(sortedfiles, p1copies)
                     sidedata[sidedatamod.SD_P1COPIES] = p1copies
                 p2copies = files.copied_from_p2
                 if p2copies:
                     p2copies = encodecopies(sortedfiles, p2copies)
                     sidedata[sidedatamod.SD_P2COPIES] = p2copies
                 filesadded = files.added
                 if filesadded:
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     sidedata[sidedatamod.SD_FILESADDED] = filesadded
                 filesremoved = files.removed
                 if filesremoved:
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 if not sidedata:
                     sidedata = None
                 return sidedata
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion