upstream/mercurial-mirror Commit - r45472:fb2936c5

1

from __future__ import absolute_import

1

from __future__ import absolute_import

2

3

import collections

3

import collections

4

import os

4

import os

5

import sqlite3

5

import sqlite3

6

7

from mercurial.i18n import _

7

from mercurial.i18n import _

8

9

from mercurial import (

9

from mercurial import (

10

encoding,

10

encoding,

11

error,

11

error,

12

node as nodemod,

12

node as nodemod,

13

pycompat,

13

pycompat,

14

)

14

)

15

16

from . import gitutil

16

from . import gitutil

17

18

19

pygit2 = gitutil.get_pygit2()

19

pygit2 = gitutil.get_pygit2()

20

21

_CURRENT_SCHEMA_VERSION = 1

21

_CURRENT_SCHEMA_VERSION = 1

22

_SCHEMA = (

22

_SCHEMA = (

23

"""

23

"""

24

CREATE TABLE refs (

24

CREATE TABLE refs (

25

-- node and name are unique together. There may be more than one name for

25

-- node and name are unique together. There may be more than one name for

26

-- a given node, and there may be no name at all for a given node (in the

26

-- a given node, and there may be no name at all for a given node (in the

27

-- case of an anonymous hg head).

27

-- case of an anonymous hg head).

28

node TEXT NOT NULL,

28

node TEXT NOT NULL,

29

name TEXT

29

name TEXT

30

);

30

);

31

32

-- The "possible heads" of the repository, which we use to figure out

32

-- The "possible heads" of the repository, which we use to figure out

33

-- if we need to re-walk the changelog.

33

-- if we need to re-walk the changelog.

34

CREATE TABLE possible_heads (

34

CREATE TABLE possible_heads (

35

node TEXT NOT NULL

35

node TEXT NOT NULL

36

);

36

);

37

38

-- The topological heads of the changelog, which hg depends on.

38

-- The topological heads of the changelog, which hg depends on.

39

CREATE TABLE heads (

39

CREATE TABLE heads (

40

node TEXT NOT NULL

40

node TEXT NOT NULL

41

);

41

);

42

43

-- A total ordering of the changelog

43

-- A total ordering of the changelog

44

CREATE TABLE changelog (

44

CREATE TABLE changelog (

45

rev INTEGER NOT NULL PRIMARY KEY,

45

rev INTEGER NOT NULL PRIMARY KEY,

46

node TEXT NOT NULL,

46

node TEXT NOT NULL,

47

p1 TEXT,

47

p1 TEXT,

48

p2 TEXT

48

p2 TEXT

49

);

49

);

50

51

CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);

51

CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);

52

CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);

52

CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);

53

54

-- Changed files for each commit, which lets us dynamically build

54

-- Changed files for each commit, which lets us dynamically build

55

-- filelogs.

55

-- filelogs.

56

CREATE TABLE changedfiles (

56

CREATE TABLE changedfiles (

57

node TEXT NOT NULL,

57

node TEXT NOT NULL,

58

filename TEXT NOT NULL,

58

filename TEXT NOT NULL,

59

-- 40 zeroes for deletions

59

-- 40 zeroes for deletions

60

filenode TEXT NOT NULL,

60

filenode TEXT NOT NULL,

61

-- to handle filelog parentage:

61

-- to handle filelog parentage:

62

p1node TEXT,

62

p1node TEXT,

63

p1filenode TEXT,

63

p1filenode TEXT,

64

p2node TEXT,

64

p2node TEXT,

65

p2filenode TEXT

65

p2filenode TEXT

66

);

66

);

67

68

CREATE INDEX changedfiles_nodes_idx

68

CREATE INDEX changedfiles_nodes_idx

69

ON changedfiles(node);

69

ON changedfiles(node);

70

71

PRAGMA user_version=%d

71

PRAGMA user_version=%d

72

"""

72

"""

73

% _CURRENT_SCHEMA_VERSION

73

% _CURRENT_SCHEMA_VERSION

74

)

74

)

75

76

77

def _createdb(path):

77

def _createdb(path):

78

# print('open db', path)

78

# print('open db', path)

79

# import traceback

79

# import traceback

80

# traceback.print_stack()

80

# traceback.print_stack()

81

db = sqlite3.connect(encoding.strfromlocal(path))

81

db = sqlite3.connect(encoding.strfromlocal(path))

82

db.text_factory = bytes

82

db.text_factory = bytes

83

84

res = db.execute('PRAGMA user_version').fetchone()[0]

84

res = db.execute('PRAGMA user_version').fetchone()[0]

85

86

# New database.

86

# New database.

87

if res == 0:

87

if res == 0:

88

for statement in _SCHEMA.split(';'):

88

for statement in _SCHEMA.split(';'):

89

db.execute(statement.strip())

89

db.execute(statement.strip())

90

91

db.commit()

91

db.commit()

92

93

elif res == _CURRENT_SCHEMA_VERSION:

93

elif res == _CURRENT_SCHEMA_VERSION:

94

pass

94

pass

95

96

else:

96

else:

97

raise error.Abort(_(b'sqlite database has unrecognized version'))

97

raise error.Abort(_(b'sqlite database has unrecognized version'))

98

99

db.execute('PRAGMA journal_mode=WAL')

99

db.execute('PRAGMA journal_mode=WAL')

100

101

return db

101

return db

102

103

104

_OUR_ORDER = ()

104

_OUR_ORDER = ()

105

if pygit2:

105

if pygit2:

106

_OUR_ORDER = (

106

_OUR_ORDER = (

107

pygit2.GIT_SORT_TOPOLOGICAL

107

pygit2.GIT_SORT_TOPOLOGICAL

108

| pygit2.GIT_SORT_TIME

108

| pygit2.GIT_SORT_TIME

109

| pygit2.GIT_SORT_REVERSE

109

| pygit2.GIT_SORT_REVERSE

110

)

110

)

111

112

_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2

112

_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2

113

114

115

def _find_nearest_ancestor_introducing_node(

115

def _find_nearest_ancestor_introducing_node(

116

db, gitrepo, file_path, walk_start, filenode

116

db, gitrepo, file_path, walk_start, filenode

117

):

117

):

118

"""Find the nearest ancestor that introduces a file node.

118

"""Find the nearest ancestor that introduces a file node.

119

120

Args:

120

Args:

121

db: a handle to our sqlite database.

121

db: a handle to our sqlite database.

122

gitrepo: A pygit2.Repository instance.

122

gitrepo: A pygit2.Repository instance.

123

file_path: the path of a file in the repo

123

file_path: the path of a file in the repo

124

walk_start: a pygit2.Oid that is a commit where we should start walking

124

walk_start: a pygit2.Oid that is a commit where we should start walking

125

for our nearest ancestor.

125

for our nearest ancestor.

126

127

Returns:

127

Returns:

128

A hexlified SHA that is the commit ID of the next-nearest parent.

128

A hexlified SHA that is the commit ID of the next-nearest parent.

129

"""

129

"""

130

assert isinstance(file_path, str), 'file_path must be str, got %r' % type(

130

assert isinstance(file_path, str), 'file_path must be str, got %r' % type(

131

file_path

131

file_path

132

)

132

)

133

assert isinstance(filenode, str), 'filenode must be str, got %r' % type(

133

assert isinstance(filenode, str), 'filenode must be str, got %r' % type(

134

filenode

134

filenode

135

)

135

)

136

parent_options = {

136

parent_options = {

137

row[0].decode('ascii')

137

row[0].decode('ascii')

138

for row in db.execute(

138

for row in db.execute(

139

'SELECT node FROM changedfiles '

139

'SELECT node FROM changedfiles '

140

'WHERE filename = ? AND filenode = ?',

140

'WHERE filename = ? AND filenode = ?',

141

(file_path, filenode),

141

(file_path, filenode),

142

)

142

)

143

}

143

}

144

inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)

144

inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)

145

for w in inner_walker:

145

for w in inner_walker:

146

if w.id.hex in parent_options:

146

if w.id.hex in parent_options:

147

return w.id.hex

147

return w.id.hex

148

raise error.ProgrammingError(

148

raise error.ProgrammingError(

149

'Unable to find introducing commit for %s node %s from %s',

149

'Unable to find introducing commit for %s node %s from %s',

150

(file_path, filenode, walk_start),

150

(file_path, filenode, walk_start),

151

)

151

)

152

153

154

def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):

154

def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):

155

"""Given a starting commit and path, fill in a filelog's parent pointers.

155

"""Given a starting commit and path, fill in a filelog's parent pointers.

156

157

Args:

157

Args:

158

gitrepo: a pygit2.Repository

158

gitrepo: a pygit2.Repository

159

db: a handle to our sqlite database

159

db: a handle to our sqlite database

160

startcommit: a hexlified node id for the commit to start at

160

startcommit: a hexlified node id for the commit to start at

161

path: the path of the file whose parent pointers we should fill in.

161

path: the path of the file whose parent pointers we should fill in.

162

filenode: the hexlified node id of the file at startcommit

162

filenode: the hexlified node id of the file at startcommit

163

164

TODO: make filenode optional

164

TODO: make filenode optional

165

"""

165

"""

166

assert isinstance(

166

assert isinstance(

167

startcommit, str

167

startcommit, str

168

), 'startcommit must be str, got %r' % type(startcommit)

168

), 'startcommit must be str, got %r' % type(startcommit)

169

assert isinstance(

169

assert isinstance(

170

startfilenode, str

170

startfilenode, str

171

), 'startfilenode must be str, got %r' % type(startfilenode)

171

), 'startfilenode must be str, got %r' % type(startfilenode)

172

visit = collections.deque([(startcommit, startfilenode)])

172

visit = collections.deque([(startcommit, startfilenode)])

173

while visit:

173

while visit:

174

cnode, filenode = visit.popleft()

174

cnode, filenode = visit.popleft()

175

commit = gitrepo[cnode]

175

commit = gitrepo[cnode]

176

parents = []

176

parents = []

177

for parent in commit.parents:

177

for parent in commit.parents:

178

t = parent.tree

178

t = parent.tree

179

for comp in path.split('/'):

179

for comp in path.split('/'):

180

try:

180

try:

181

t = gitrepo[t[comp].id]

181

t = gitrepo[t[comp].id]

182

except KeyError:

182

except KeyError:

183

break

183

break

184

else:

184

else:

185

introducer = _find_nearest_ancestor_introducing_node(

185

introducer = _find_nearest_ancestor_introducing_node(

186

db, gitrepo, path, parent.id, t.id.hex

186

db, gitrepo, path, parent.id, t.id.hex

187

)

187

)

188

parents.append((introducer, t.id.hex))

188

parents.append((introducer, t.id.hex))

189

p1node = p1fnode = p2node = p2fnode = gitutil.nullgit

189

p1node = p1fnode = p2node = p2fnode = gitutil.nullgit

190

for par, parfnode in parents:

190

for par, parfnode in parents:

191

found = int(

191

found = int(

192

db.execute(

192

db.execute(

193

'SELECT COUNT(*) FROM changedfiles WHERE '

193

'SELECT COUNT(*) FROM changedfiles WHERE '

194

'node = ? AND filename = ? AND filenode = ? AND '

194

'node = ? AND filename = ? AND filenode = ? AND '

195

'p1node NOT NULL',

195

'p1node NOT NULL',

196

(par, path, parfnode),

196

(par, path, parfnode),

197

).fetchone()[0]

197

).fetchone()[0]

198

)

198

)

199

if found == 0:

199

if found == 0:

200

assert par is not None

200

assert par is not None

201

visit.append((par, parfnode))

201

visit.append((par, parfnode))

202

if parents:

202

if parents:

203

p1node, p1fnode = parents[0]

203

p1node, p1fnode = parents[0]

204

if len(parents) == 2:

204

if len(parents) == 2:

205

p2node, p2fnode = parents[1]

205

p2node, p2fnode = parents[1]

206

if len(parents) > 2:

206

if len(parents) > 2:

207

raise error.ProgrammingError(

207

raise error.ProgrammingError(

208

b"git support can't handle octopus merges"

208

b"git support can't handle octopus merges"

209

)

209

)

210

db.execute(

210

db.execute(

211

'UPDATE changedfiles SET '

211

'UPDATE changedfiles SET '

212

'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '

212

'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '

213

'WHERE node = ? AND filename = ? AND filenode = ?',

213

'WHERE node = ? AND filename = ? AND filenode = ?',

214

(p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),

214

(p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),

215

)

215

)

216

db.commit()

216

db.commit()

217

218

219

def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):

219

def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):

220

# Identify all references so we can tell the walker to visit all of them.

220

# Identify all references so we can tell the walker to visit all of them.

221

all_refs = gitrepo.listall_references()

221

all_refs = gitrepo.listall_references()

222

possible_heads = set()

222

possible_heads = set()

223

prog = progress_factory(b'refs')

223

prog = progress_factory(b'refs')

224

for pos, ref in enumerate(all_refs):

224

for pos, ref in enumerate(all_refs):

225

if prog is not None:

225

if prog is not None:

226

prog.update(pos)

226

prog.update(pos)

227

if not (

227

if not (

228

ref.startswith('refs/heads/') # local branch

228

ref.startswith('refs/heads/') # local branch

229

or ref.startswith('refs/tags/') # tag

229

or ref.startswith('refs/tags/') # tag

230

or ref.startswith('refs/remotes/') # remote branch

230

or ref.startswith('refs/remotes/') # remote branch

231

or ref.startswith('refs/hg/') # from this extension

231

or ref.startswith('refs/hg/') # from this extension

232

):

232

):

233

continue

233

continue

234

try:

234

try:

235

start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)

235

start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)

236

except ValueError:

236

except ValueError:

237

# No commit to be found, so we don't care for hg's purposes.

237

# No commit to be found, so we don't care for hg's purposes.

238

continue

238

continue

239

possible_heads.add(start.id)

239

possible_heads.add(start.id)

240

# Optimization: if the list of heads hasn't changed, don't

240

# Optimization: if the list of heads hasn't changed, don't

241

# reindex, the changelog. This doesn't matter on small

241

# reindex, the changelog. This doesn't matter on small

242

# repositories, but on even moderately deep histories (eg cpython)

242

# repositories, but on even moderately deep histories (eg cpython)

243

# this is a very important performance win.

243

# this is a very important performance win.

244

#

244

#

245

# TODO: we should figure out how to incrementally index history

245

# TODO: we should figure out how to incrementally index history

246

# (preferably by detecting rewinds!) so that we don't have to do a

246

# (preferably by detecting rewinds!) so that we don't have to do a

247

# full changelog walk every time a new commit is created.

247

# full changelog walk every time a new commit is created.

248

cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}

248

cache_heads = {

249

pycompat.sysstr(x[0])

250

for x in db.execute('SELECT node FROM possible_heads')

251

}

249

walker = None

252

walker = None

250

cur_cache_heads = {h.hex for h in possible_heads}

253

cur_cache_heads = {h.hex for h in possible_heads}

251

if cur_cache_heads == cache_heads:

254

if cur_cache_heads == cache_heads:

252

return

255

return

253

for start in possible_heads:

256

for start in possible_heads:

254

if walker is None:

257

if walker is None:

255

walker = gitrepo.walk(start, _OUR_ORDER)

258

walker = gitrepo.walk(start, _OUR_ORDER)

256

else:

259

else:

257

walker.push(start)

260

walker.push(start)

258

261

259

# Empty out the existing changelog. Even for large-ish histories

262

# Empty out the existing changelog. Even for large-ish histories

260

# we can do the top-level "walk all the commits" dance very

263

# we can do the top-level "walk all the commits" dance very

261

# quickly as long as we don't need to figure out the changed files

264

# quickly as long as we don't need to figure out the changed files

262

# list.

265

# list.

263

db.execute('DELETE FROM changelog')

266

db.execute('DELETE FROM changelog')

264

if prog is not None:

267

if prog is not None:

265

prog.complete()

268

prog.complete()

266

prog = progress_factory(b'commits')

269

prog = progress_factory(b'commits')

267

# This walker is sure to visit all the revisions in history, but

270

# This walker is sure to visit all the revisions in history, but

268

# only once.

271

# only once.

269

for pos, commit in enumerate(walker):

272

for pos, commit in enumerate(walker):

270

if prog is not None:

273

if prog is not None:

271

prog.update(pos)

274

prog.update(pos)

272

p1 = p2 = nodemod.nullhex

275

p1 = p2 = nodemod.nullhex

273

if len(commit.parents) > 2:

276

if len(commit.parents) > 2:

274

raise error.ProgrammingError(

277

raise error.ProgrammingError(

275

(

278

(

276

b"git support can't handle octopus merges, "

279

b"git support can't handle octopus merges, "

277

b"found a commit with %d parents :("

280

b"found a commit with %d parents :("

278

)

281

)

279

% len(commit.parents)

282

% len(commit.parents)

280

)

283

)

281

if commit.parents:

284

if commit.parents:

282

p1 = commit.parents[0].id.hex

285

p1 = commit.parents[0].id.hex

283

if len(commit.parents) == 2:

286

if len(commit.parents) == 2:

284

p2 = commit.parents[1].id.hex

287

p2 = commit.parents[1].id.hex

285

db.execute(

288

db.execute(

286

'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',

289

'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',

287

(pos, commit.id.hex, p1, p2),

290

(pos, commit.id.hex, p1, p2),

288

)

291

)

289

292

290

num_changedfiles = db.execute(

293

num_changedfiles = db.execute(

291

"SELECT COUNT(*) from changedfiles WHERE node = ?",

294

"SELECT COUNT(*) from changedfiles WHERE node = ?",

292

(commit.id.hex,),

295

(commit.id.hex,),

293

).fetchone()[0]

296

).fetchone()[0]

294

if not num_changedfiles:

297

if not num_changedfiles:

295

files = {}

298

files = {}

296

# I *think* we only need to check p1 for changed files

299

# I *think* we only need to check p1 for changed files

297

# (and therefore linkrevs), because any node that would

300

# (and therefore linkrevs), because any node that would

298

# actually have this commit as a linkrev would be

301

# actually have this commit as a linkrev would be

299

# completely new in this rev.

302

# completely new in this rev.

300

p1 = commit.parents[0].id.hex if commit.parents else None

303

p1 = commit.parents[0].id.hex if commit.parents else None

301

if p1 is not None:

304

if p1 is not None:

302

patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)

305

patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)

303

else:

306

else:

304

patchgen = commit.tree.diff_to_tree(

307

patchgen = commit.tree.diff_to_tree(

305

swap=True, flags=_DIFF_FLAGS

308

swap=True, flags=_DIFF_FLAGS

306

)

309

)

307

new_files = (p.delta.new_file for p in patchgen)

310

new_files = (p.delta.new_file for p in patchgen)

308

files = {

311

files = {

309

nf.path: nf.id.hex

312

nf.path: nf.id.hex

310

for nf in new_files

313

for nf in new_files

311

if nf.id.raw != nodemod.nullid

314

if nf.id.raw != nodemod.nullid

312

}

315

}

313

for p, n in files.items():

316

for p, n in files.items():

314

# We intentionally set NULLs for any file parentage

317

# We intentionally set NULLs for any file parentage

315

# information so it'll get demand-computed later. We

318

# information so it'll get demand-computed later. We

316

# used to do it right here, and it was _very_ slow.

319

# used to do it right here, and it was _very_ slow.

317

db.execute(

320

db.execute(

318

'INSERT INTO changedfiles ('

321

'INSERT INTO changedfiles ('

319

'node, filename, filenode, p1node, p1filenode, p2node, '

322

'node, filename, filenode, p1node, p1filenode, p2node, '

320

'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',

323

'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',

321

(commit.id.hex, p, n, None, None, None, None),

324

(commit.id.hex, p, n, None, None, None, None),

322

)

325

)

323

db.execute('DELETE FROM heads')

326

db.execute('DELETE FROM heads')

324

db.execute('DELETE FROM possible_heads')

327

db.execute('DELETE FROM possible_heads')

325

for hid in possible_heads:

328

for hid in possible_heads:

326

h = hid.hex

329

h = hid.hex

327

db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))

330

db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))

328

haschild = db.execute(

331

haschild = db.execute(

329

'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)

332

'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)

330

).fetchone()[0]

333

).fetchone()[0]

331

if not haschild:

334

if not haschild:

332

db.execute('INSERT INTO heads (node) VALUES(?)', (h,))

335

db.execute('INSERT INTO heads (node) VALUES(?)', (h,))

333

336

334

db.commit()

337

db.commit()

335

if prog is not None:

338

if prog is not None:

336

prog.complete()

339

prog.complete()

337

340

338

341

339

def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):

342

def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):

340

cachepath = os.path.join(

343

cachepath = os.path.join(

341

pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'

344

pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'

342

)

345

)

343

if not os.path.exists(cachepath):

346

if not os.path.exists(cachepath):

344

os.makedirs(cachepath)

347

os.makedirs(cachepath)

345

dbpath = os.path.join(cachepath, b'git-commits.sqlite')

348

dbpath = os.path.join(cachepath, b'git-commits.sqlite')

346

db = _createdb(dbpath)

349

db = _createdb(dbpath)

347

# TODO check against gitrepo heads before doing a full index

350

# TODO check against gitrepo heads before doing a full index

348

# TODO thread a ui.progress call into this layer

351

# TODO thread a ui.progress call into this layer

349

_index_repo(gitrepo, db, progress_factory)

352

_index_repo(gitrepo, db, progress_factory)

350

return db

353

return db

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             from __future__ import absolute_import
             import collections
             import os
             import sqlite3
             from mercurial.i18n import _
             from mercurial import (
                 encoding,
                 error,
                 node as nodemod,
                 pycompat,
             )
             from . import gitutil
             pygit2 = gitutil.get_pygit2()
             _CURRENT_SCHEMA_VERSION = 1
             _SCHEMA = (
                 """
             CREATE TABLE refs (
               -- node and name are unique together. There may be more than one name for
               -- a given node, and there may be no name at all for a given node (in the
               -- case of an anonymous hg head).
               node TEXT NOT NULL,
               name TEXT
             );
             -- The "possible heads" of the repository, which we use to figure out
             -- if we need to re-walk the changelog.
             CREATE TABLE possible_heads (
               node TEXT NOT NULL
             );
             -- The topological heads of the changelog, which hg depends on.
             CREATE TABLE heads (
               node TEXT NOT NULL
             );
             -- A total ordering of the changelog
             CREATE TABLE changelog (
               rev INTEGER NOT NULL PRIMARY KEY,
               node TEXT NOT NULL,
               p1 TEXT,
               p2 TEXT
             );
             CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
             CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
             -- Changed files for each commit, which lets us dynamically build
             -- filelogs.
             CREATE TABLE changedfiles (
               node TEXT NOT NULL,
               filename TEXT NOT NULL,
               -- 40 zeroes for deletions
               filenode TEXT NOT NULL,
             -- to handle filelog parentage:
               p1node TEXT,
               p1filenode TEXT,
               p2node TEXT,
               p2filenode TEXT
             );
             CREATE INDEX changedfiles_nodes_idx
               ON changedfiles(node);
             PRAGMA user_version=%d
             """
                 % _CURRENT_SCHEMA_VERSION
             )
             def _createdb(path):
                 # print('open db', path)
                 # import traceback
                 # traceback.print_stack()
                 db = sqlite3.connect(encoding.strfromlocal(path))
                 db.text_factory = bytes
                 res = db.execute('PRAGMA user_version').fetchone()[0]
                 # New database.
                 if res == 0:
                     for statement in _SCHEMA.split(';'):
                         db.execute(statement.strip())
                     db.commit()
                 elif res == _CURRENT_SCHEMA_VERSION:
                     pass
                 else:
                     raise error.Abort(_(b'sqlite database has unrecognized version'))
                 db.execute('PRAGMA journal_mode=WAL')
                 return db
             _OUR_ORDER = ()
             if pygit2:
                 _OUR_ORDER = (
                     pygit2.GIT_SORT_TOPOLOGICAL
                     | pygit2.GIT_SORT_TIME
                     | pygit2.GIT_SORT_REVERSE
                 )
             _DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
             def _find_nearest_ancestor_introducing_node(
                 db, gitrepo, file_path, walk_start, filenode
             ):
                 """Find the nearest ancestor that introduces a file node.
                 Args:
                   db: a handle to our sqlite database.
                   gitrepo: A pygit2.Repository instance.
                   file_path: the path of a file in the repo
                   walk_start: a pygit2.Oid that is a commit where we should start walking
                               for our nearest ancestor.
                 Returns:
                   A hexlified SHA that is the commit ID of the next-nearest parent.
                 """
                 assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
                     file_path
                 )
                 assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
                     filenode
                 )
                 parent_options = {
                     row[0].decode('ascii')
                     for row in db.execute(
                         'SELECT node FROM changedfiles '
                         'WHERE filename = ? AND filenode = ?',
                         (file_path, filenode),
                     )
                 }
                 inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
                 for w in inner_walker:
                     if w.id.hex in parent_options:
                         return w.id.hex
                 raise error.ProgrammingError(
                     'Unable to find introducing commit for %s node %s from %s',
                     (file_path, filenode, walk_start),
                 )
             def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
                 """Given a starting commit and path, fill in a filelog's parent pointers.
                 Args:
                   gitrepo: a pygit2.Repository
                   db: a handle to our sqlite database
                   startcommit: a hexlified node id for the commit to start at
                   path: the path of the file whose parent pointers we should fill in.
                   filenode: the hexlified node id of the file at startcommit
                 TODO: make filenode optional
                 """
                 assert isinstance(
                     startcommit, str
                 ), 'startcommit must be str, got %r' % type(startcommit)
                 assert isinstance(
                     startfilenode, str
                 ), 'startfilenode must be str, got %r' % type(startfilenode)
                 visit = collections.deque([(startcommit, startfilenode)])
                 while visit:
                     cnode, filenode = visit.popleft()
                     commit = gitrepo[cnode]
                     parents = []
                     for parent in commit.parents:
                         t = parent.tree
                         for comp in path.split('/'):
                             try:
                                 t = gitrepo[t[comp].id]
                             except KeyError:
                                 break
                         else:
                             introducer = _find_nearest_ancestor_introducing_node(
                                 db, gitrepo, path, parent.id, t.id.hex
                             )
                             parents.append((introducer, t.id.hex))
                     p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
                     for par, parfnode in parents:
                         found = int(
                             db.execute(
                                 'SELECT COUNT(*) FROM changedfiles WHERE '
                                 'node = ? AND filename = ? AND filenode = ? AND '
                                 'p1node NOT NULL',
                                 (par, path, parfnode),
                             ).fetchone()[0]
                         )
                         if found == 0:
                             assert par is not None
                             visit.append((par, parfnode))
                     if parents:
                         p1node, p1fnode = parents[0]
                     if len(parents) == 2:
                         p2node, p2fnode = parents[1]
                     if len(parents) > 2:
                         raise error.ProgrammingError(
                             b"git support can't handle octopus merges"
                         )
                     db.execute(
                         'UPDATE changedfiles SET '
                         'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
                         'WHERE node = ? AND filename = ? AND filenode = ?',
                         (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
                     )
                 db.commit()
             def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):
                 # Identify all references so we can tell the walker to visit all of them.
                 all_refs = gitrepo.listall_references()
                 possible_heads = set()
                 prog = progress_factory(b'refs')
                 for pos, ref in enumerate(all_refs):
                     if prog is not None:
                         prog.update(pos)
                     if not (
                         ref.startswith('refs/heads/')  # local branch
                         or ref.startswith('refs/tags/')  # tag
                         or ref.startswith('refs/remotes/')  # remote branch
                         or ref.startswith('refs/hg/')  # from this extension
                     ):
                         continue
                     try:
                         start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
                     except ValueError:
                         # No commit to be found, so we don't care for hg's purposes.
                         continue
                     possible_heads.add(start.id)
                 # Optimization: if the list of heads hasn't changed, don't
                 # reindex, the changelog. This doesn't matter on small
                 # repositories, but on even moderately deep histories (eg cpython)
                 # this is a very important performance win.
                 #
                 # TODO: we should figure out how to incrementally index history
                 # (preferably by detecting rewinds!) so that we don't have to do a
                 # full changelog walk every time a new commit is created.
-                cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}
+                cache_heads = {
+                    pycompat.sysstr(x[0])
+                    for x in db.execute('SELECT node FROM possible_heads')
+                }
                 walker = None
                 cur_cache_heads = {h.hex for h in possible_heads}
                 if cur_cache_heads == cache_heads:
                     return
                 for start in possible_heads:
                     if walker is None:
                         walker = gitrepo.walk(start, _OUR_ORDER)
                     else:
                         walker.push(start)
                 # Empty out the existing changelog. Even for large-ish histories
                 # we can do the top-level "walk all the commits" dance very
                 # quickly as long as we don't need to figure out the changed files
                 # list.
                 db.execute('DELETE FROM changelog')
                 if prog is not None:
                     prog.complete()
                 prog = progress_factory(b'commits')
                 # This walker is sure to visit all the revisions in history, but
                 # only once.
                 for pos, commit in enumerate(walker):
                     if prog is not None:
                         prog.update(pos)
                     p1 = p2 = nodemod.nullhex
                     if len(commit.parents) > 2:
                         raise error.ProgrammingError(
                             (
                                 b"git support can't handle octopus merges, "
                                 b"found a commit with %d parents :("
                             )
                             % len(commit.parents)
                         )
                     if commit.parents:
                         p1 = commit.parents[0].id.hex
                     if len(commit.parents) == 2:
                         p2 = commit.parents[1].id.hex
                     db.execute(
                         'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
                         (pos, commit.id.hex, p1, p2),
                     )
                     num_changedfiles = db.execute(
                         "SELECT COUNT(*) from changedfiles WHERE node = ?",
                         (commit.id.hex,),
                     ).fetchone()[0]
                     if not num_changedfiles:
                         files = {}
                         # I *think* we only need to check p1 for changed files
                         # (and therefore linkrevs), because any node that would
                         # actually have this commit as a linkrev would be
                         # completely new in this rev.
                         p1 = commit.parents[0].id.hex if commit.parents else None
                         if p1 is not None:
                             patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
                         else:
                             patchgen = commit.tree.diff_to_tree(
                                 swap=True, flags=_DIFF_FLAGS
                             )
                         new_files = (p.delta.new_file for p in patchgen)
                         files = {
                             nf.path: nf.id.hex
                             for nf in new_files
                             if nf.id.raw != nodemod.nullid
                         }
                         for p, n in files.items():
                             # We intentionally set NULLs for any file parentage
                             # information so it'll get demand-computed later. We
                             # used to do it right here, and it was _very_ slow.
                             db.execute(
                                 'INSERT INTO changedfiles ('
                                 'node, filename, filenode, p1node, p1filenode, p2node, '
                                 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
                                 (commit.id.hex, p, n, None, None, None, None),
                             )
                 db.execute('DELETE FROM heads')
                 db.execute('DELETE FROM possible_heads')
                 for hid in possible_heads:
                     h = hid.hex
                     db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))
                     haschild = db.execute(
                         'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)
                     ).fetchone()[0]
                     if not haschild:
                         db.execute('INSERT INTO heads (node) VALUES(?)', (h,))
                 db.commit()
                 if prog is not None:
                     prog.complete()
             def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):
                 cachepath = os.path.join(
                     pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
                 )
                 if not os.path.exists(cachepath):
                     os.makedirs(cachepath)
                 dbpath = os.path.join(cachepath, b'git-commits.sqlite')
                 db = _createdb(dbpath)
                 # TODO check against gitrepo heads before doing a full index
                 # TODO thread a ui.progress call into this layer
                 _index_repo(gitrepo, db, progress_factory)
                 return db