upstream/mercurial-mirror Commit - r52618:bbe59cc5

1

use std::ascii::escape_default;

1

use std::ascii::escape_default;

2

use std::borrow::Cow;

2

use std::borrow::Cow;

3

use std::collections::BTreeMap;

3

use std::collections::BTreeMap;

4

use std::fmt::{Debug, Formatter};

4

use std::fmt::{Debug, Formatter};

5

use std::{iter, str};

5

use std::{iter, str};

6

7

use chrono::{DateTime, FixedOffset, NaiveDateTime};

7

use chrono::{DateTime, FixedOffset, NaiveDateTime};

8

use itertools::{Either, Itertools};

8

use itertools::{Either, Itertools};

9

10

use crate::errors::HgError;

10

use crate::errors::HgError;

11

use crate::revlog::Index;

11

use crate::revlog::Revision;

12

use crate::revlog::Revision;

12

use crate::revlog::{Node, NodePrefix};

13

use crate::revlog::{Node, NodePrefix};

13

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

14

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

14

use crate::utils::hg_path::HgPath;

15

use crate::utils::hg_path::HgPath;

15

use crate::vfs::Vfs;

16

use crate::vfs::Vfs;

16

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

17

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

17

18

/// A specialized `Revlog` to work with changelog data format.

19

/// A specialized `Revlog` to work with changelog data format.

19

pub struct Changelog {

20

pub struct Changelog {

20

/// The generic `revlog` format.

21

/// The generic `revlog` format.

21

pub(crate) revlog: Revlog,

22

pub(crate) revlog: Revlog,

22

}

23

}

23

24

impl Changelog {

25

impl Changelog {

25

/// Open the `changelog` of a repository given by its root.

26

/// Open the `changelog` of a repository given by its root.

26

pub fn open(

27

pub fn open(

27

store_vfs: &Vfs,

28

store_vfs: &Vfs,

28

options: RevlogOpenOptions,

29

options: RevlogOpenOptions,

29

) -> Result<Self, HgError> {

30

) -> Result<Self, HgError> {

30

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

31

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

31

Ok(Self { revlog })

32

Ok(Self { revlog })

32

}

33

}

33

34

/// Return the `ChangelogRevisionData` for the given node ID.

35

/// Return the `ChangelogRevisionData` for the given node ID.

35

pub fn data_for_node(

36

pub fn data_for_node(

36

&self,

37

&self,

37

node: NodePrefix,

38

node: NodePrefix,

38

) -> Result<ChangelogRevisionData, RevlogError> {

39

) -> Result<ChangelogRevisionData, RevlogError> {

39

let rev = self.revlog.rev_from_node(node)?;

40

let rev = self.revlog.rev_from_node(node)?;

40

self.entry_for_checked_rev(rev)?.data()

41

self.entry_for_checked_rev(rev)?.data()

41

}

42

}

42

43

/// Return the [`ChangelogEntry`] for the given revision number.

44

/// Return the [`ChangelogEntry`] for the given revision number.

44

pub fn entry_for_rev(

45

pub fn entry_for_rev(

45

&self,

46

&self,

46

rev: UncheckedRevision,

47

rev: UncheckedRevision,

47

) -> Result<ChangelogEntry, RevlogError> {

48

) -> Result<ChangelogEntry, RevlogError> {

48

let revlog_entry = self.revlog.get_entry(rev)?;

49

let revlog_entry = self.revlog.get_entry(rev)?;

49

Ok(ChangelogEntry { revlog_entry })

50

Ok(ChangelogEntry { revlog_entry })

50

}

51

}

51

52

/// Same as [`Self::entry_for_rev`] for checked revisions.

53

/// Same as [`Self::entry_for_rev`] for checked revisions.

53

fn entry_for_checked_rev(

54

fn entry_for_checked_rev(

54

&self,

55

&self,

55

rev: Revision,

56

rev: Revision,

56

) -> Result<ChangelogEntry, RevlogError> {

57

) -> Result<ChangelogEntry, RevlogError> {

57

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

58

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

58

Ok(ChangelogEntry { revlog_entry })

59

Ok(ChangelogEntry { revlog_entry })

59

}

60

}

60

61

/// Return the [`ChangelogRevisionData`] for the given revision number.

62

/// Return the [`ChangelogRevisionData`] for the given revision number.

62

///

63

///

63

/// This is a useful shortcut in case the caller does not need the

64

/// This is a useful shortcut in case the caller does not need the

64

/// generic revlog information (parents, hashes etc). Otherwise

65

/// generic revlog information (parents, hashes etc). Otherwise

65

/// consider taking a [`ChangelogEntry`] with

66

/// consider taking a [`ChangelogEntry`] with

66

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

67

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

67

pub fn data_for_rev(

68

pub fn data_for_rev(

68

&self,

69

&self,

69

rev: UncheckedRevision,

70

rev: UncheckedRevision,

70

) -> Result<ChangelogRevisionData, RevlogError> {

71

) -> Result<ChangelogRevisionData, RevlogError> {

71

self.entry_for_rev(rev)?.data()

72

self.entry_for_rev(rev)?.data()

72

}

73

}

73

74

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

75

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

75

self.revlog.node_from_rev(rev)

76

self.revlog.node_from_rev(rev)

76

}

77

}

77

78

pub fn rev_from_node(

79

pub fn rev_from_node(

79

&self,

80

&self,

80

node: NodePrefix,

81

node: NodePrefix,

81

) -> Result<Revision, RevlogError> {

82

) -> Result<Revision, RevlogError> {

82

self.revlog.rev_from_node(node)

83

self.revlog.rev_from_node(node)

83

}

84

}

85

86

pub fn get_index(&self) -> &Index {

87

&self.revlog.index

88

}

84

}

89

}

85

90

86

impl Graph for Changelog {

91

impl Graph for Changelog {

87

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

92

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

88

self.revlog.parents(rev)

93

self.revlog.parents(rev)

89

}

94

}

90

}

95

}

91

96

92

/// A specialized `RevlogEntry` for `changelog` data format

97

/// A specialized `RevlogEntry` for `changelog` data format

93

///

98

///

94

/// This is a `RevlogEntry` with the added semantics that the associated

99

/// This is a `RevlogEntry` with the added semantics that the associated

95

/// data should meet the requirements for `changelog`, materialized by

100

/// data should meet the requirements for `changelog`, materialized by

96

/// the fact that `data()` constructs a `ChangelogRevisionData`.

101

/// the fact that `data()` constructs a `ChangelogRevisionData`.

97

/// In case that promise would be broken, the `data` method returns an error.

102

/// In case that promise would be broken, the `data` method returns an error.

98

#[derive(Clone)]

103

#[derive(Clone)]

99

pub struct ChangelogEntry<'changelog> {

104

pub struct ChangelogEntry<'changelog> {

100

/// Same data, as a generic `RevlogEntry`.

105

/// Same data, as a generic `RevlogEntry`.

101

pub(crate) revlog_entry: RevlogEntry<'changelog>,

106

pub(crate) revlog_entry: RevlogEntry<'changelog>,

102

}

107

}

103

108

104

impl<'changelog> ChangelogEntry<'changelog> {

109

impl<'changelog> ChangelogEntry<'changelog> {

105

pub fn data<'a>(

110

pub fn data<'a>(

106

&'a self,

111

&'a self,

107

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

112

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

108

let bytes = self.revlog_entry.data()?;

113

let bytes = self.revlog_entry.data()?;

109

if bytes.is_empty() {

114

if bytes.is_empty() {

110

Ok(ChangelogRevisionData::null())

115

Ok(ChangelogRevisionData::null())

111

} else {

116

} else {

112

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

117

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

113

RevlogError::Other(HgError::CorruptedRepository(format!(

118

RevlogError::Other(HgError::CorruptedRepository(format!(

114

"Invalid changelog data for revision {}: {:?}",

119

"Invalid changelog data for revision {}: {:?}",

115

self.revlog_entry.revision(),

120

self.revlog_entry.revision(),

116

err

121

err

117

)))

122

)))

118

})?)

123

})?)

119

}

124

}

120

}

125

}

121

126

122

/// Obtain a reference to the underlying `RevlogEntry`.

127

/// Obtain a reference to the underlying `RevlogEntry`.

123

///

128

///

124

/// This allows the caller to access the information that is common

129

/// This allows the caller to access the information that is common

125

/// to all revlog entries: revision number, node id, parent revisions etc.

130

/// to all revlog entries: revision number, node id, parent revisions etc.

126

pub fn as_revlog_entry(&self) -> &RevlogEntry {

131

pub fn as_revlog_entry(&self) -> &RevlogEntry {

127

&self.revlog_entry

132

&self.revlog_entry

128

}

133

}

129

134

130

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

135

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

131

Ok(self

136

Ok(self

132

.revlog_entry

137

.revlog_entry

133

.p1_entry()?

138

.p1_entry()?

134

.map(|revlog_entry| Self { revlog_entry }))

139

.map(|revlog_entry| Self { revlog_entry }))

135

}

140

}

136

141

137

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

142

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

138

Ok(self

143

Ok(self

139

.revlog_entry

144

.revlog_entry

140

.p2_entry()?

145

.p2_entry()?

141

.map(|revlog_entry| Self { revlog_entry }))

146

.map(|revlog_entry| Self { revlog_entry }))

142

}

147

}

143

}

148

}

144

149

145

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

150

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

146

#[derive(PartialEq)]

151

#[derive(PartialEq)]

147

pub struct ChangelogRevisionData<'changelog> {

152

pub struct ChangelogRevisionData<'changelog> {

148

/// The data bytes of the `changelog` entry.

153

/// The data bytes of the `changelog` entry.

149

bytes: Cow<'changelog, [u8]>,

154

bytes: Cow<'changelog, [u8]>,

150

/// The end offset for the hex manifest (not including the newline)

155

/// The end offset for the hex manifest (not including the newline)

151

manifest_end: usize,

156

manifest_end: usize,

152

/// The end offset for the user+email (not including the newline)

157

/// The end offset for the user+email (not including the newline)

153

user_end: usize,

158

user_end: usize,

154

/// The end offset for the timestamp+timezone+extras (not including the

159

/// The end offset for the timestamp+timezone+extras (not including the

155

/// newline)

160

/// newline)

156

timestamp_end: usize,

161

timestamp_end: usize,

157

/// The end offset for the file list (not including the newline)

162

/// The end offset for the file list (not including the newline)

158

files_end: usize,

163

files_end: usize,

159

}

164

}

160

165

161

impl<'changelog> ChangelogRevisionData<'changelog> {

166

impl<'changelog> ChangelogRevisionData<'changelog> {

162

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

167

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

163

let mut line_iter = bytes.split(|b| b == &b'\n');

168

let mut line_iter = bytes.split(|b| b == &b'\n');

164

let manifest_end = line_iter

169

let manifest_end = line_iter

165

.next()

170

.next()

166

.expect("Empty iterator from split()?")

171

.expect("Empty iterator from split()?")

167

.len();

172

.len();

168

let user_slice = line_iter.next().ok_or_else(|| {

173

let user_slice = line_iter.next().ok_or_else(|| {

169

HgError::corrupted("Changeset data truncated after manifest line")

174

HgError::corrupted("Changeset data truncated after manifest line")

170

})?;

175

})?;

171

let user_end = manifest_end + 1 + user_slice.len();

176

let user_end = manifest_end + 1 + user_slice.len();

172

let timestamp_slice = line_iter.next().ok_or_else(|| {

177

let timestamp_slice = line_iter.next().ok_or_else(|| {

173

HgError::corrupted("Changeset data truncated after user line")

178

HgError::corrupted("Changeset data truncated after user line")

174

})?;

179

})?;

175

let timestamp_end = user_end + 1 + timestamp_slice.len();

180

let timestamp_end = user_end + 1 + timestamp_slice.len();

176

let mut files_end = timestamp_end + 1;

181

let mut files_end = timestamp_end + 1;

177

loop {

182

loop {

178

let line = line_iter.next().ok_or_else(|| {

183

let line = line_iter.next().ok_or_else(|| {

179

HgError::corrupted("Changeset data truncated in files list")

184

HgError::corrupted("Changeset data truncated in files list")

180

})?;

185

})?;

181

if line.is_empty() {

186

if line.is_empty() {

182

if files_end == bytes.len() {

187

if files_end == bytes.len() {

183

// The list of files ended with a single newline (there

188

// The list of files ended with a single newline (there

184

// should be two)

189

// should be two)

185

return Err(HgError::corrupted(

190

return Err(HgError::corrupted(

186

"Changeset data truncated after files list",

191

"Changeset data truncated after files list",

187

));

192

));

188

}

193

}

189

files_end -= 1;

194

files_end -= 1;

190

break;

195

break;

191

}

196

}

192

files_end += line.len() + 1;

197

files_end += line.len() + 1;

193

}

198

}

194

199

195

Ok(Self {

200

Ok(Self {

196

bytes,

201

bytes,

197

manifest_end,

202

manifest_end,

198

user_end,

203

user_end,

199

timestamp_end,

204

timestamp_end,

200

files_end,

205

files_end,

201

})

206

})

202

}

207

}

203

208

204

fn null() -> Self {

209

fn null() -> Self {

205

Self::new(Cow::Borrowed(

210

Self::new(Cow::Borrowed(

206

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

211

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

207

))

212

))

208

.unwrap()

213

.unwrap()

209

}

214

}

210

215

211

/// Return an iterator over the lines of the entry.

216

/// Return an iterator over the lines of the entry.

212

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

217

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

213

self.bytes.split(|b| b == &b'\n')

218

self.bytes.split(|b| b == &b'\n')

214

}

219

}

215

220

216

/// Return the node id of the `manifest` referenced by this `changelog`

221

/// Return the node id of the `manifest` referenced by this `changelog`

217

/// entry.

222

/// entry.

218

pub fn manifest_node(&self) -> Result<Node, HgError> {

223

pub fn manifest_node(&self) -> Result<Node, HgError> {

219

let manifest_node_hex = &self.bytes[..self.manifest_end];

224

let manifest_node_hex = &self.bytes[..self.manifest_end];

220

Node::from_hex_for_repo(manifest_node_hex)

225

Node::from_hex_for_repo(manifest_node_hex)

221

}

226

}

222

227

223

/// The full user string (usually a name followed by an email enclosed in

228

/// The full user string (usually a name followed by an email enclosed in

224

/// angle brackets)

229

/// angle brackets)

225

pub fn user(&self) -> &[u8] {

230

pub fn user(&self) -> &[u8] {

226

&self.bytes[self.manifest_end + 1..self.user_end]

231

&self.bytes[self.manifest_end + 1..self.user_end]

227

}

232

}

228

233

229

/// The full timestamp line (timestamp in seconds, offset in seconds, and

234

/// The full timestamp line (timestamp in seconds, offset in seconds, and

230

/// possibly extras)

235

/// possibly extras)

231

// TODO: We should expose this in a more useful way

236

// TODO: We should expose this in a more useful way

232

pub fn timestamp_line(&self) -> &[u8] {

237

pub fn timestamp_line(&self) -> &[u8] {

233

&self.bytes[self.user_end + 1..self.timestamp_end]

238

&self.bytes[self.user_end + 1..self.timestamp_end]

234

}

239

}

235

240

236

/// Parsed timestamp.

241

/// Parsed timestamp.

237

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

242

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

238

parse_timestamp(self.timestamp_line())

243

parse_timestamp(self.timestamp_line())

239

}

244

}

240

245

241

/// Optional commit extras.

246

/// Optional commit extras.

242

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

247

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

243

parse_timestamp_line_extra(self.timestamp_line())

248

parse_timestamp_line_extra(self.timestamp_line())

244

}

249

}

245

250

246

/// The files changed in this revision.

251

/// The files changed in this revision.

247

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

252

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

248

if self.timestamp_end == self.files_end {

253

if self.timestamp_end == self.files_end {

249

Either::Left(iter::empty())

254

Either::Left(iter::empty())

250

} else {

255

} else {

251

Either::Right(

256

Either::Right(

252

self.bytes[self.timestamp_end + 1..self.files_end]

257

self.bytes[self.timestamp_end + 1..self.files_end]

253

.split(|b| b == &b'\n')

258

.split(|b| b == &b'\n')

254

.map(HgPath::new),

259

.map(HgPath::new),

255

)

260

)

256

}

261

}

257

}

262

}

258

263

259

/// The change description.

264

/// The change description.

260

pub fn description(&self) -> &[u8] {

265

pub fn description(&self) -> &[u8] {

261

&self.bytes[self.files_end + 2..]

266

&self.bytes[self.files_end + 2..]

262

}

267

}

263

}

268

}

264

269

265

impl Debug for ChangelogRevisionData<'_> {

270

impl Debug for ChangelogRevisionData<'_> {

266

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

271

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

267

f.debug_struct("ChangelogRevisionData")

272

f.debug_struct("ChangelogRevisionData")

268

.field("bytes", &debug_bytes(&self.bytes))

273

.field("bytes", &debug_bytes(&self.bytes))

269

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

274

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

270

.field(

275

.field(

271

"user",

276

"user",

272

&debug_bytes(

277

&debug_bytes(

273

&self.bytes[self.manifest_end + 1..self.user_end],

278

&self.bytes[self.manifest_end + 1..self.user_end],

274

),

279

),

275

)

280

)

276

.field(

281

.field(

277

"timestamp",

282

"timestamp",

278

&debug_bytes(

283

&debug_bytes(

279

&self.bytes[self.user_end + 1..self.timestamp_end],

284

&self.bytes[self.user_end + 1..self.timestamp_end],

280

),

285

),

281

)

286

)

282

.field(

287

.field(

283

"files",

288

"files",

284

&debug_bytes(

289

&debug_bytes(

285

&self.bytes[self.timestamp_end + 1..self.files_end],

290

&self.bytes[self.timestamp_end + 1..self.files_end],

286

),

291

),

287

)

292

)

288

.field(

293

.field(

289

"description",

294

"description",

290

&debug_bytes(&self.bytes[self.files_end + 2..]),

295

&debug_bytes(&self.bytes[self.files_end + 2..]),

291

)

296

)

292

.finish()

297

.finish()

293

}

298

}

294

}

299

}

295

300

296

fn debug_bytes(bytes: &[u8]) -> String {

301

fn debug_bytes(bytes: &[u8]) -> String {

297

String::from_utf8_lossy(

302

String::from_utf8_lossy(

298

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

303

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

299

)

304

)

300

.to_string()

305

.to_string()

301

}

306

}

302

307

303

/// Parse the raw bytes of the timestamp line from a changelog entry.

308

/// Parse the raw bytes of the timestamp line from a changelog entry.

304

///

309

///

305

/// According to the documentation in `hg help dates` and the

310

/// According to the documentation in `hg help dates` and the

306

/// implementation in `changelog.py`, the format of the timestamp line

311

/// implementation in `changelog.py`, the format of the timestamp line

307

/// is `time tz extra\n` where:

312

/// is `time tz extra\n` where:

308

///

313

///

309

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

314

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

310

/// as seconds since the UNIX epoch.

315

/// as seconds since the UNIX epoch.

311

///

316

///

312

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

317

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

313

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

318

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

314

/// opposite of the sign in ISO 8601 timestamps).

319

/// opposite of the sign in ISO 8601 timestamps).

315

///

320

///

316

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

321

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

317

/// and value in each pair separated by an ASCII colon. Keys are limited to

322

/// and value in each pair separated by an ASCII colon. Keys are limited to

318

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

323

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

319

/// arbitrary bytes.

324

/// arbitrary bytes.

320

fn parse_timestamp(

325

fn parse_timestamp(

321

timestamp_line: &[u8],

326

timestamp_line: &[u8],

322

) -> Result<DateTime<FixedOffset>, HgError> {

327

) -> Result<DateTime<FixedOffset>, HgError> {

323

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

328

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

324

329

325

let timestamp_bytes = parts

330

let timestamp_bytes = parts

326

.next()

331

.next()

327

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

332

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

328

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

333

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

329

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

334

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

330

})?;

335

})?;

331

let timestamp_utc = timestamp_str

336

let timestamp_utc = timestamp_str

332

.parse()

337

.parse()

333

.map_err(|e| {

338

.map_err(|e| {

334

HgError::corrupted(format!("failed to parse timestamp: {e}"))

339

HgError::corrupted(format!("failed to parse timestamp: {e}"))

335

})

340

})

336

.and_then(|secs| {

341

.and_then(|secs| {

337

NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

342

NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

338

HgError::corrupted(format!(

343

HgError::corrupted(format!(

339

"integer timestamp out of valid range: {secs}"

344

"integer timestamp out of valid range: {secs}"

340

))

345

))

341

})

346

})

342

})

347

})

343

// Attempt to parse the timestamp as a float if we can't parse

348

// Attempt to parse the timestamp as a float if we can't parse

344

// it as an int. It doesn't seem like float timestamps are actually

349

// it as an int. It doesn't seem like float timestamps are actually

345

// used in practice, but the Python code supports them.

350

// used in practice, but the Python code supports them.

346

.or_else(|_| parse_float_timestamp(timestamp_str))?;

351

.or_else(|_| parse_float_timestamp(timestamp_str))?;

347

352

348

let timezone_bytes = parts

353

let timezone_bytes = parts

349

.next()

354

.next()

350

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

355

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

351

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

356

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

352

.map_err(|e| {

357

.map_err(|e| {

353

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

358

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

354

})?

359

})?

355

.parse()

360

.parse()

356

.map_err(|e| {

361

.map_err(|e| {

357

HgError::corrupted(format!("timezone is not an integer: {e}"))

362

HgError::corrupted(format!("timezone is not an integer: {e}"))

358

})?;

363

})?;

359

let timezone = FixedOffset::west_opt(timezone_secs)

364

let timezone = FixedOffset::west_opt(timezone_secs)

360

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

365

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

361

366

362

Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))

367

Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))

363

}

368

}

364

369

365

/// Attempt to parse the given string as floating-point timestamp, and

370

/// Attempt to parse the given string as floating-point timestamp, and

366

/// convert the result into a `chrono::NaiveDateTime`.

371

/// convert the result into a `chrono::NaiveDateTime`.

367

fn parse_float_timestamp(

372

fn parse_float_timestamp(

368

timestamp_str: &str,

373

timestamp_str: &str,

369

) -> Result<NaiveDateTime, HgError> {

374

) -> Result<NaiveDateTime, HgError> {

370

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

375

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

371

HgError::corrupted(format!("failed to parse timestamp: {e}"))

376

HgError::corrupted(format!("failed to parse timestamp: {e}"))

372

})?;

377

})?;

373

378

374

// To construct a `NaiveDateTime` we'll need to convert the float

379

// To construct a `NaiveDateTime` we'll need to convert the float

375

// into signed integer seconds and unsigned integer nanoseconds.

380

// into signed integer seconds and unsigned integer nanoseconds.

376

let mut secs = timestamp.trunc() as i64;

381

let mut secs = timestamp.trunc() as i64;

377

let mut subsecs = timestamp.fract();

382

let mut subsecs = timestamp.fract();

378

383

379

// If the timestamp is negative, we need to express the fractional

384

// If the timestamp is negative, we need to express the fractional

380

// component as positive nanoseconds since the previous second.

385

// component as positive nanoseconds since the previous second.

381

if timestamp < 0.0 {

386

if timestamp < 0.0 {

382

secs -= 1;

387

secs -= 1;

383

subsecs += 1.0;

388

subsecs += 1.0;

384

}

389

}

385

390

386

// This cast should be safe because the fractional component is

391

// This cast should be safe because the fractional component is

387

// by definition less than 1.0, so this value should not exceed

392

// by definition less than 1.0, so this value should not exceed

388

// 1 billion, which is representable as an f64 without loss of

393

// 1 billion, which is representable as an f64 without loss of

389

// precision and should fit into a u32 without overflowing.

394

// precision and should fit into a u32 without overflowing.

390

//

395

//

391

// (Any loss of precision in the fractional component will have

396

// (Any loss of precision in the fractional component will have

392

// already happened at the time of initial parsing; in general,

397

// already happened at the time of initial parsing; in general,

393

// f64s are insufficiently precise to provide nanosecond-level

398

// f64s are insufficiently precise to provide nanosecond-level

394

// precision with present-day timestamps.)

399

// precision with present-day timestamps.)

395

let nsecs = (subsecs * 1_000_000_000.0) as u32;

400

let nsecs = (subsecs * 1_000_000_000.0) as u32;

396

401

397

NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

402

NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

398

HgError::corrupted(format!(

403

HgError::corrupted(format!(

399

"float timestamp out of valid range: {timestamp}"

404

"float timestamp out of valid range: {timestamp}"

400

))

405

))

401

})

406

})

402

}

407

}

403

408

404

/// Decode changeset extra fields.

409

/// Decode changeset extra fields.

405

///

410

///

406

/// Extras are null-delimited key-value pairs where the key consists of ASCII

411

/// Extras are null-delimited key-value pairs where the key consists of ASCII

407

/// alphanumeric characters plus hyphens and underscores, and the value can

412

/// alphanumeric characters plus hyphens and underscores, and the value can

408

/// contain arbitrary bytes.

413

/// contain arbitrary bytes.

409

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

414

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

410

extra

415

extra

411

.split(|c| *c == b'\0')

416

.split(|c| *c == b'\0')

412

.map(|pair| {

417

.map(|pair| {

413

let pair = unescape_extra(pair);

418

let pair = unescape_extra(pair);

414

let mut iter = pair.splitn(2, |c| *c == b':');

419

let mut iter = pair.splitn(2, |c| *c == b':');

415

420

416

let key_bytes =

421

let key_bytes =

417

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

422

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

418

HgError::corrupted("empty key in changeset extras")

423

HgError::corrupted("empty key in changeset extras")

419

})?;

424

})?;

420

425

421

let key = str::from_utf8(key_bytes)

426

let key = str::from_utf8(key_bytes)

422

.ok()

427

.ok()

423

.filter(|k| {

428

.filter(|k| {

424

k.chars().all(|c| {

429

k.chars().all(|c| {

425

c.is_ascii_alphanumeric() || c == '_' || c == '-'

430

c.is_ascii_alphanumeric() || c == '_' || c == '-'

426

})

431

})

427

})

432

})

428

.ok_or_else(|| {

433

.ok_or_else(|| {

429

let key = String::from_utf8_lossy(key_bytes);

434

let key = String::from_utf8_lossy(key_bytes);

430

HgError::corrupted(format!(

435

HgError::corrupted(format!(

431

"invalid key in changeset extras: {key}",

436

"invalid key in changeset extras: {key}",

432

))

437

))

433

})?

438

})?

434

.to_string();

439

.to_string();

435

440

436

let value = iter.next().map(Into::into).ok_or_else(|| {

441

let value = iter.next().map(Into::into).ok_or_else(|| {

437

HgError::corrupted(format!(

442

HgError::corrupted(format!(

438

"missing value for changeset extra: {key}"

443

"missing value for changeset extra: {key}"

439

))

444

))

440

})?;

445

})?;

441

446

442

Ok((key, value))

447

Ok((key, value))

443

})

448

})

444

.collect()

449

.collect()

445

}

450

}

446

451

447

/// Parse the extra fields from a changeset's timestamp line.

452

/// Parse the extra fields from a changeset's timestamp line.

448

fn parse_timestamp_line_extra(

453

fn parse_timestamp_line_extra(

449

timestamp_line: &[u8],

454

timestamp_line: &[u8],

450

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

455

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

451

Ok(timestamp_line

456

Ok(timestamp_line

452

.splitn(3, |c| *c == b' ')

457

.splitn(3, |c| *c == b' ')

453

.nth(2)

458

.nth(2)

454

.map(decode_extra)

459

.map(decode_extra)

455

.transpose()?

460

.transpose()?

456

.unwrap_or_default())

461

.unwrap_or_default())

457

}

462

}

458

463

459

/// Decode Mercurial's escaping for changelog extras.

464

/// Decode Mercurial's escaping for changelog extras.

460

///

465

///

461

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

466

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

462

/// (null, backslash, newline, and carriage return) so we only decode those.

467

/// (null, backslash, newline, and carriage return) so we only decode those.

463

///

468

///

464

/// The Python code also includes a workaround for decoding escaped nuls

469

/// The Python code also includes a workaround for decoding escaped nuls

465

/// that are followed by an ASCII octal digit, since Python's built-in

470

/// that are followed by an ASCII octal digit, since Python's built-in

466

/// `string_escape` codec will interpret that as an escaped octal byte value.

471

/// `string_escape` codec will interpret that as an escaped octal byte value.

467

/// That workaround is omitted here since we don't support decoding octal.

472

/// That workaround is omitted here since we don't support decoding octal.

468

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

473

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

469

let mut output = Vec::with_capacity(bytes.len());

474

let mut output = Vec::with_capacity(bytes.len());

470

let mut input = bytes.iter().copied();

475

let mut input = bytes.iter().copied();

471

476

472

while let Some(c) = input.next() {

477

while let Some(c) = input.next() {

473

if c != b'\\' {

478

if c != b'\\' {

474

output.push(c);

479

output.push(c);

475

continue;

480

continue;

476

}

481

}

477

482

478

match input.next() {

483

match input.next() {

479

Some(b'0') => output.push(b'\0'),

484

Some(b'0') => output.push(b'\0'),

480

Some(b'\\') => output.push(b'\\'),

485

Some(b'\\') => output.push(b'\\'),

481

Some(b'n') => output.push(b'\n'),

486

Some(b'n') => output.push(b'\n'),

482

Some(b'r') => output.push(b'\r'),

487

Some(b'r') => output.push(b'\r'),

483

// The following cases should never occur in theory because any

488

// The following cases should never occur in theory because any

484

// backslashes in the original input should have been escaped

489

// backslashes in the original input should have been escaped

485

// with another backslash, so it should not be possible to

490

// with another backslash, so it should not be possible to

486

// observe an escape sequence other than the 4 above.

491

// observe an escape sequence other than the 4 above.

487

Some(c) => output.extend_from_slice(&[b'\\', c]),

492

Some(c) => output.extend_from_slice(&[b'\\', c]),

488

None => output.push(b'\\'),

493

None => output.push(b'\\'),

489

}

494

}

490

}

495

}

491

496

492

output

497

output

493

}

498

}

494

499

495

#[cfg(test)]

500

#[cfg(test)]

496

mod tests {

501

mod tests {

497

use super::*;

502

use super::*;

498

use crate::vfs::Vfs;

503

use crate::vfs::Vfs;

499

use crate::NULL_REVISION;

504

use crate::NULL_REVISION;

500

use pretty_assertions::assert_eq;

505

use pretty_assertions::assert_eq;

501

506

502

#[test]

507

#[test]

503

fn test_create_changelogrevisiondata_invalid() {

508

fn test_create_changelogrevisiondata_invalid() {

504

// Completely empty

509

// Completely empty

505

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

510

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

506

// No newline after manifest

511

// No newline after manifest

507

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

512

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

508

// No newline after user

513

// No newline after user

509

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

514

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

510

// No newline after timestamp

515

// No newline after timestamp

511

assert!(

516

assert!(

512

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

517

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

513

);

518

);

514

// Missing newline after files

519

// Missing newline after files

515

assert!(ChangelogRevisionData::new(Cow::Borrowed(

520

assert!(ChangelogRevisionData::new(Cow::Borrowed(

516

b"abcd\n\n0 0\nfile1\nfile2"

521

b"abcd\n\n0 0\nfile1\nfile2"

517

))

522

))

518

.is_err(),);

523

.is_err(),);

519

// Only one newline after files

524

// Only one newline after files

520

assert!(ChangelogRevisionData::new(Cow::Borrowed(

525

assert!(ChangelogRevisionData::new(Cow::Borrowed(

521

b"abcd\n\n0 0\nfile1\nfile2\n"

526

b"abcd\n\n0 0\nfile1\nfile2\n"

522

))

527

))

523

.is_err(),);

528

.is_err(),);

524

}

529

}

525

530

526

#[test]

531

#[test]

527

fn test_create_changelogrevisiondata() {

532

fn test_create_changelogrevisiondata() {

528

let data = ChangelogRevisionData::new(Cow::Borrowed(

533

let data = ChangelogRevisionData::new(Cow::Borrowed(

529

b"0123456789abcdef0123456789abcdef01234567

534

b"0123456789abcdef0123456789abcdef01234567

530

Some One <someone@example.com>

535

Some One <someone@example.com>

531

0 0

536

0 0

532

file1

537

file1

533

file2

538

file2

534

539

535

some

540

some

536

commit

541

commit

537

message",

542

message",

538

))

543

))

539

.unwrap();

544

.unwrap();

540

assert_eq!(

545

assert_eq!(

541

data.manifest_node().unwrap(),

546

data.manifest_node().unwrap(),

542

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

547

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

543

.unwrap()

548

.unwrap()

544

);

549

);

545

assert_eq!(data.user(), b"Some One <someone@example.com>");

550

assert_eq!(data.user(), b"Some One <someone@example.com>");

546

assert_eq!(data.timestamp_line(), b"0 0");

551

assert_eq!(data.timestamp_line(), b"0 0");

547

assert_eq!(

552

assert_eq!(

548

data.files().collect_vec(),

553

data.files().collect_vec(),

549

vec![HgPath::new("file1"), HgPath::new("file2")]

554

vec![HgPath::new("file1"), HgPath::new("file2")]

550

);

555

);

551

assert_eq!(data.description(), b"some\ncommit\nmessage");

556

assert_eq!(data.description(), b"some\ncommit\nmessage");

552

}

557

}

553

558

554

#[test]

559

#[test]

555

fn test_data_from_rev_null() -> Result<(), RevlogError> {

560

fn test_data_from_rev_null() -> Result<(), RevlogError> {

556

// an empty revlog will be enough for this case

561

// an empty revlog will be enough for this case

557

let temp = tempfile::tempdir().unwrap();

562

let temp = tempfile::tempdir().unwrap();

558

let vfs = Vfs { base: temp.path() };

563

let vfs = Vfs { base: temp.path() };

559

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

564

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

560

let revlog =

565

let revlog =

561

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

566

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

562

.unwrap();

567

.unwrap();

563

568

564

let changelog = Changelog { revlog };

569

let changelog = Changelog { revlog };

565

assert_eq!(

570

assert_eq!(

566

changelog.data_for_rev(NULL_REVISION.into())?,

571

changelog.data_for_rev(NULL_REVISION.into())?,

567

ChangelogRevisionData::null()

572

ChangelogRevisionData::null()

568

);

573

);

569

// same with the intermediate entry object

574

// same with the intermediate entry object

570

assert_eq!(

575

assert_eq!(

571

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

576

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

572

ChangelogRevisionData::null()

577

ChangelogRevisionData::null()

573

);

578

);

574

Ok(())

579

Ok(())

575

}

580

}

576

581

577

#[test]

582

#[test]

578

fn test_empty_files_list() {

583

fn test_empty_files_list() {

579

assert!(ChangelogRevisionData::null()

584

assert!(ChangelogRevisionData::null()

580

.files()

585

.files()

581

.collect_vec()

586

.collect_vec()

582

.is_empty());

587

.is_empty());

583

}

588

}

584

589

585

#[test]

590

#[test]

586

fn test_unescape_basic() {

591

fn test_unescape_basic() {

587

// '\0', '\\', '\n', and '\r' are correctly unescaped.

592

// '\0', '\\', '\n', and '\r' are correctly unescaped.

588

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

593

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

589

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

594

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

590

let unescaped = unescape_extra(escaped);

595

let unescaped = unescape_extra(escaped);

591

assert_eq!(&expected[..], &unescaped[..]);

596

assert_eq!(&expected[..], &unescaped[..]);

592

}

597

}

593

598

594

#[test]

599

#[test]

595

fn test_unescape_unsupported_sequence() {

600

fn test_unescape_unsupported_sequence() {

596

// Other escape sequences are left unaltered.

601

// Other escape sequences are left unaltered.

597

for c in 0u8..255 {

602

for c in 0u8..255 {

598

match c {

603

match c {

599

b'0' | b'\\' | b'n' | b'r' => continue,

604

b'0' | b'\\' | b'n' | b'r' => continue,

600

c => {

605

c => {

601

let expected = &[b'\\', c][..];

606

let expected = &[b'\\', c][..];

602

let unescaped = unescape_extra(expected);

607

let unescaped = unescape_extra(expected);

603

assert_eq!(expected, &unescaped[..]);

608

assert_eq!(expected, &unescaped[..]);

604

}

609

}

605

}

610

}

606

}

611

}

607

}

612

}

608

613

609

#[test]

614

#[test]

610

fn test_unescape_trailing_backslash() {

615

fn test_unescape_trailing_backslash() {

611

// Trailing backslashes are OK.

616

// Trailing backslashes are OK.

612

let expected = br"hi\";

617

let expected = br"hi\";

613

let unescaped = unescape_extra(expected);

618

let unescaped = unescape_extra(expected);

614

assert_eq!(&expected[..], &unescaped[..]);

619

assert_eq!(&expected[..], &unescaped[..]);

615

}

620

}

616

621

617

#[test]

622

#[test]

618

fn test_unescape_nul_followed_by_octal() {

623

fn test_unescape_nul_followed_by_octal() {

619

// Escaped NUL chars followed by octal digits are decoded correctly.

624

// Escaped NUL chars followed by octal digits are decoded correctly.

620

let expected = b"\x0012";

625

let expected = b"\x0012";

621

let escaped = br"\012";

626

let escaped = br"\012";

622

let unescaped = unescape_extra(escaped);

627

let unescaped = unescape_extra(escaped);

623

assert_eq!(&expected[..], &unescaped[..]);

628

assert_eq!(&expected[..], &unescaped[..]);

624

}

629

}

625

630

626

#[test]

631

#[test]

627

fn test_parse_float_timestamp() {

632

fn test_parse_float_timestamp() {

628

let test_cases = [

633

let test_cases = [

629

// Zero should map to the UNIX epoch.

634

// Zero should map to the UNIX epoch.

630

("0.0", "1970-01-01 00:00:00"),

635

("0.0", "1970-01-01 00:00:00"),

631

// Negative zero should be the same as positive zero.

636

// Negative zero should be the same as positive zero.

632

("-0.0", "1970-01-01 00:00:00"),

637

("-0.0", "1970-01-01 00:00:00"),

633

// Values without fractional components should work like integers.

638

// Values without fractional components should work like integers.

634

// (Assuming the timestamp is within the limits of f64 precision.)

639

// (Assuming the timestamp is within the limits of f64 precision.)

635

("1115154970.0", "2005-05-03 21:16:10"),

640

("1115154970.0", "2005-05-03 21:16:10"),

636

// We expect some loss of precision in the fractional component

641

// We expect some loss of precision in the fractional component

637

// when parsing arbitrary floating-point values.

642

// when parsing arbitrary floating-point values.

638

("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

643

("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

639

// But representable f64 values should parse losslessly.

644

// But representable f64 values should parse losslessly.

640

("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

645

("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

641

// Negative fractional components are subtracted from the epoch.

646

// Negative fractional components are subtracted from the epoch.

642

("-1.333", "1969-12-31 23:59:58.667"),

647

("-1.333", "1969-12-31 23:59:58.667"),

643

];

648

];

644

649

645

for (input, expected) in test_cases {

650

for (input, expected) in test_cases {

646

let res = parse_float_timestamp(input).unwrap().to_string();

651

let res = parse_float_timestamp(input).unwrap().to_string();

647

assert_eq!(res, expected);

652

assert_eq!(res, expected);

648

}

653

}

649

}

654

}

650

655

651

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

656

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

652

let mut output = Vec::with_capacity(bytes.len());

657

let mut output = Vec::with_capacity(bytes.len());

653

658

654

for c in bytes.iter().copied() {

659

for c in bytes.iter().copied() {

655

output.extend_from_slice(match c {

660

output.extend_from_slice(match c {

656

b'\0' => &b"\\0"[..],

661

b'\0' => &b"\\0"[..],

657

b'\\' => &b"\\\\"[..],

662

b'\\' => &b"\\\\"[..],

658

b'\n' => &b"\\n"[..],

663

b'\n' => &b"\\n"[..],

659

b'\r' => &b"\\r"[..],

664

b'\r' => &b"\\r"[..],

660

_ => {

665

_ => {

661

output.push(c);

666

output.push(c);

662

continue;

667

continue;

663

}

668

}

664

});

669

});

665

}

670

}

666

671

667

output

672

output

668

}

673

}

669

674

670

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

675

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

671

where

676

where

672

K: AsRef<[u8]>,

677

K: AsRef<[u8]>,

673

V: AsRef<[u8]>,

678

V: AsRef<[u8]>,

674

{

679

{

675

let extras = pairs.into_iter().map(|(k, v)| {

680

let extras = pairs.into_iter().map(|(k, v)| {

676

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

681

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

677

});

682

});

678

// Use fully-qualified syntax to avoid a future naming conflict with

683

// Use fully-qualified syntax to avoid a future naming conflict with

679

// the standard library: https://github.com/rust-lang/rust/issues/79524

684

// the standard library: https://github.com/rust-lang/rust/issues/79524

680

Itertools::intersperse(extras, b"\0".to_vec()).concat()

685

Itertools::intersperse(extras, b"\0".to_vec()).concat()

681

}

686

}

682

687

683

#[test]

688

#[test]

684

fn test_decode_extra() {

689

fn test_decode_extra() {

685

let extra = [

690

let extra = [

686

("branch".into(), b"default".to_vec()),

691

("branch".into(), b"default".to_vec()),

687

("key-with-hyphens".into(), b"value1".to_vec()),

692

("key-with-hyphens".into(), b"value1".to_vec()),

688

("key_with_underscores".into(), b"value2".to_vec()),

693

("key_with_underscores".into(), b"value2".to_vec()),

689

("empty-value".into(), b"".to_vec()),

694

("empty-value".into(), b"".to_vec()),

690

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

695

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

691

]

696

]

692

.into_iter()

697

.into_iter()

693

.collect::<BTreeMap<String, Vec<u8>>>();

698

.collect::<BTreeMap<String, Vec<u8>>>();

694

699

695

let encoded = encode_extra(&extra);

700

let encoded = encode_extra(&extra);

696

let decoded = decode_extra(&encoded).unwrap();

701

let decoded = decode_extra(&encoded).unwrap();

697

702

698

assert_eq!(extra, decoded);

703

assert_eq!(extra, decoded);

699

}

704

}

700

705

701

#[test]

706

#[test]

702

fn test_corrupt_extra() {

707

fn test_corrupt_extra() {

703

let test_cases = [

708

let test_cases = [

704

(&b""[..], "empty input"),

709

(&b""[..], "empty input"),

705

(&b"\0"[..], "unexpected null byte"),

710

(&b"\0"[..], "unexpected null byte"),

706

(&b":empty-key"[..], "empty key"),

711

(&b":empty-key"[..], "empty key"),

707

(&b"\0leading-null:"[..], "leading null"),

712

(&b"\0leading-null:"[..], "leading null"),

708

(&b"trailing-null:\0"[..], "trailing null"),

713

(&b"trailing-null:\0"[..], "trailing null"),

709

(&b"missing-value"[..], "missing value"),

714

(&b"missing-value"[..], "missing value"),

710

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

715

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

711

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

716

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

712

];

717

];

713

718

714

for (extra, msg) in test_cases {

719

for (extra, msg) in test_cases {

715

assert!(

720

assert!(

716

decode_extra(extra).is_err(),

721

decode_extra(extra).is_err(),

717

"corrupt extra should have failed to parse: {}",

722

"corrupt extra should have failed to parse: {}",

718

msg

723

msg

719

);

724

);

720

}

725

}

721

}

726

}

722

727

723

#[test]

728

#[test]

724

fn test_parse_timestamp_line() {

729

fn test_parse_timestamp_line() {

725

let extra = [

730

let extra = [

726

("branch".into(), b"default".to_vec()),

731

("branch".into(), b"default".to_vec()),

727

("key-with-hyphens".into(), b"value1".to_vec()),

732

("key-with-hyphens".into(), b"value1".to_vec()),

728

("key_with_underscores".into(), b"value2".to_vec()),

733

("key_with_underscores".into(), b"value2".to_vec()),

729

("empty-value".into(), b"".to_vec()),

734

("empty-value".into(), b"".to_vec()),

730

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

735

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

731

]

736

]

732

.into_iter()

737

.into_iter()

733

.collect::<BTreeMap<String, Vec<u8>>>();

738

.collect::<BTreeMap<String, Vec<u8>>>();

734

739

735

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

740

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

736

line.extend_from_slice(&encode_extra(&extra));

741

line.extend_from_slice(&encode_extra(&extra));

737

742

738

let timestamp = parse_timestamp(&line).unwrap();

743

let timestamp = parse_timestamp(&line).unwrap();

739

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

744

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

740

745

741

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

746

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

742

assert_eq!(extra, parsed_extra);

747

assert_eq!(extra, parsed_extra);

743

}

748

}

744

}

749

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::collections::BTreeMap;
             use std::fmt::{Debug, Formatter};
             use std::{iter, str};
             use chrono::{DateTime, FixedOffset, NaiveDateTime};
             use itertools::{Either, Itertools};
             use crate::errors::HgError;
+            use crate::revlog::Index;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::Vfs;
             use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(
                     store_vfs: &Vfs,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Same as [`Self::entry_for_rev`] for checked revisions.
                 fn entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
+                pub fn get_index(&self) -> &Index {
+                    &self.revlog.index
+                }
             }
             impl Graph for Changelog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.revlog.parents(rev)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// Parsed timestamp.
                 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
                     parse_timestamp(self.timestamp_line())
                 }
                 /// Optional commit extras.
                 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                     parse_timestamp_line_extra(self.timestamp_line())
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     if self.timestamp_end == self.files_end {
                         Either::Left(iter::empty())
                     } else {
                         Either::Right(
                             self.bytes[self.timestamp_end + 1..self.files_end]
                                 .split(|b| b == &b'\n')
                                 .map(HgPath::new),
                         )
                     }
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             /// Parse the raw bytes of the timestamp line from a changelog entry.
             ///
             /// According to the documentation in `hg help dates` and the
             /// implementation in `changelog.py`, the format of the timestamp line
             /// is `time tz extra\n` where:
             ///
             /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
             ///   as seconds since the UNIX epoch.
             ///
             /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
             ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
             ///   opposite of the sign in ISO 8601 timestamps).
             ///
             /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
             ///   and value in each pair separated by an ASCII colon. Keys are limited to
             ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
             ///   arbitrary bytes.
             fn parse_timestamp(
                 timestamp_line: &[u8],
             ) -> Result<DateTime<FixedOffset>, HgError> {
                 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
                 let timestamp_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
                 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
                     HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
                 })?;
                 let timestamp_utc = timestamp_str
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("failed to parse timestamp: {e}"))
                     })
                     .and_then(|secs| {
                         NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "integer timestamp out of valid range: {secs}"
                             ))
                         })
                     })
                     // Attempt to parse the timestamp as a float if we can't parse
                     // it as an int. It doesn't seem like float timestamps are actually
                     // used in practice, but the Python code supports them.
                     .or_else(|_| parse_float_timestamp(timestamp_str))?;
                 let timezone_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timezone"))?;
                 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
                     })?
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not an integer: {e}"))
                     })?;
                 let timezone = FixedOffset::west_opt(timezone_secs)
                     .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
                 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
             }
             /// Attempt to parse the given string as floating-point timestamp, and
             /// convert the result into a `chrono::NaiveDateTime`.
             fn parse_float_timestamp(
                 timestamp_str: &str,
             ) -> Result<NaiveDateTime, HgError> {
                 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                     HgError::corrupted(format!("failed to parse timestamp: {e}"))
                 })?;
                 // To construct a `NaiveDateTime` we'll need to convert the float
                 // into signed integer seconds and unsigned integer nanoseconds.
                 let mut secs = timestamp.trunc() as i64;
                 let mut subsecs = timestamp.fract();
                 // If the timestamp is negative, we need to express the fractional
                 // component as positive nanoseconds since the previous second.
                 if timestamp < 0.0 {
                     secs -= 1;
                     subsecs += 1.0;
                 }
                 // This cast should be safe because the fractional component is
                 // by definition less than 1.0, so this value should not exceed
                 // 1 billion, which is representable as an f64 without loss of
                 // precision and should fit into a u32 without overflowing.
                 //
                 // (Any loss of precision in the fractional component will have
                 // already happened at the time of initial parsing; in general,
                 // f64s are insufficiently precise to provide nanosecond-level
                 // precision with present-day timestamps.)
                 let nsecs = (subsecs * 1_000_000_000.0) as u32;
                 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
                     HgError::corrupted(format!(
                         "float timestamp out of valid range: {timestamp}"
                     ))
                 })
             }
             /// Decode changeset extra fields.
             ///
             /// Extras are null-delimited key-value pairs where the key consists of ASCII
             /// alphanumeric characters plus hyphens and underscores, and the value can
             /// contain arbitrary bytes.
             fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 extra
                     .split(|c| *c == b'\0')
                     .map(|pair| {
                         let pair = unescape_extra(pair);
                         let mut iter = pair.splitn(2, |c| *c == b':');
                         let key_bytes =
                             iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                 HgError::corrupted("empty key in changeset extras")
                             })?;
                         let key = str::from_utf8(key_bytes)
                             .ok()
                             .filter(|k| {
                                 k.chars().all(|c| {
                                     c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                 })
                             })
                             .ok_or_else(|| {
                                 let key = String::from_utf8_lossy(key_bytes);
                                 HgError::corrupted(format!(
                                     "invalid key in changeset extras: {key}",
                                 ))
                             })?
                             .to_string();
                         let value = iter.next().map(Into::into).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "missing value for changeset extra: {key}"
                             ))
                         })?;
                         Ok((key, value))
                     })
                     .collect()
             }
             /// Parse the extra fields from a changeset's timestamp line.
             fn parse_timestamp_line_extra(
                 timestamp_line: &[u8],
             ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 Ok(timestamp_line
                     .splitn(3, |c| *c == b' ')
                     .nth(2)
                     .map(decode_extra)
                     .transpose()?
                     .unwrap_or_default())
             }
             /// Decode Mercurial's escaping for changelog extras.
             ///
             /// The `_string_escape` function in `changelog.py` only escapes 4 characters
             /// (null, backslash, newline, and carriage return) so we only decode those.
             ///
             /// The Python code also includes a workaround for decoding escaped nuls
             /// that are followed by an ASCII octal digit, since Python's built-in
             /// `string_escape` codec will interpret that as an escaped octal byte value.
             /// That workaround is omitted here since we don't support decoding octal.
             fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                 let mut output = Vec::with_capacity(bytes.len());
                 let mut input = bytes.iter().copied();
                 while let Some(c) = input.next() {
                     if c != b'\\' {
                         output.push(c);
                         continue;
                     }
                     match input.next() {
                         Some(b'0') => output.push(b'\0'),
                         Some(b'\\') => output.push(b'\\'),
                         Some(b'n') => output.push(b'\n'),
                         Some(b'r') => output.push(b'\r'),
                         // The following cases should never occur in theory because any
                         // backslashes in the original input should have been escaped
                         // with another backslash, so it should not be possible to
                         // observe an escape sequence other than the 4 above.
                         Some(c) => output.extend_from_slice(&[b'\\', c]),
                         None => output.push(b'\\'),
                     }
                 }
                 output
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::Vfs;
                 use crate::NULL_REVISION;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                             .unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
                         changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
                         changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_empty_files_list() {
                     assert!(ChangelogRevisionData::null()
                         .files()
                         .collect_vec()
                         .is_empty());
                 }
                 #[test]
                 fn test_unescape_basic() {
                     // '\0', '\\', '\n', and '\r' are correctly unescaped.
                     let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                     let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_unsupported_sequence() {
                     // Other escape sequences are left unaltered.
                     for c in 0u8..255 {
                         match c {
                             b'0' | b'\\' | b'n' | b'r' => continue,
                             c => {
                                 let expected = &[b'\\', c][..];
                                 let unescaped = unescape_extra(expected);
                                 assert_eq!(expected, &unescaped[..]);
                             }
                         }
                     }
                 }
                 #[test]
                 fn test_unescape_trailing_backslash() {
                     // Trailing backslashes are OK.
                     let expected = br"hi\";
                     let unescaped = unescape_extra(expected);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_nul_followed_by_octal() {
                     // Escaped NUL chars followed by octal digits are decoded correctly.
                     let expected = b"\x0012";
                     let escaped = br"\012";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_parse_float_timestamp() {
                     let test_cases = [
                         // Zero should map to the UNIX epoch.
                         ("0.0", "1970-01-01 00:00:00"),
                         // Negative zero should be the same as positive zero.
                         ("-0.0", "1970-01-01 00:00:00"),
                         // Values without fractional components should work like integers.
                         // (Assuming the timestamp is within the limits of f64 precision.)
                         ("1115154970.0", "2005-05-03 21:16:10"),
                         // We expect some loss of precision in the fractional component
                         // when parsing arbitrary floating-point values.
                         ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
                         // But representable f64 values should parse losslessly.
                         ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
                         // Negative fractional components are subtracted from the epoch.
                         ("-1.333", "1969-12-31 23:59:58.667"),
                     ];
                     for (input, expected) in test_cases {
                         let res = parse_float_timestamp(input).unwrap().to_string();
                         assert_eq!(res, expected);
                     }
                 }
                 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                     let mut output = Vec::with_capacity(bytes.len());
                     for c in bytes.iter().copied() {
                         output.extend_from_slice(match c {
                             b'\0' => &b"\\0"[..],
                             b'\\' => &b"\\\\"[..],
                             b'\n' => &b"\\n"[..],
                             b'\r' => &b"\\r"[..],
                             _ => {
                                 output.push(c);
                                 continue;
                             }
                         });
                     }
                     output
                 }
                 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                 where
                     K: AsRef<[u8]>,
                     V: AsRef<[u8]>,
                 {
                     let extras = pairs.into_iter().map(|(k, v)| {
                         escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                     });
                     // Use fully-qualified syntax to avoid a future naming conflict with
                     // the standard library: https://github.com/rust-lang/rust/issues/79524
                     Itertools::intersperse(extras, b"\0".to_vec()).concat()
                 }
                 #[test]
                 fn test_decode_extra() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let encoded = encode_extra(&extra);
                     let decoded = decode_extra(&encoded).unwrap();
                     assert_eq!(extra, decoded);
                 }
                 #[test]
                 fn test_corrupt_extra() {
                     let test_cases = [
                         (&b""[..], "empty input"),
                         (&b"\0"[..], "unexpected null byte"),
                         (&b":empty-key"[..], "empty key"),
                         (&b"\0leading-null:"[..], "leading null"),
                         (&b"trailing-null:\0"[..], "trailing null"),
                         (&b"missing-value"[..], "missing value"),
                         (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                         (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                     ];
                     for (extra, msg) in test_cases {
                         assert!(
                             decode_extra(extra).is_err(),
                             "corrupt extra should have failed to parse: {}",
                             msg
                         );
                     }
                 }
                 #[test]
                 fn test_parse_timestamp_line() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                     line.extend_from_slice(&encode_extra(&extra));
                     let timestamp = parse_timestamp(&line).unwrap();
                     assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
                     let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
                     assert_eq!(extra, parsed_extra);
                 }
             }