upstream/mercurial-mirror Commit - r52286:6603a144

1

use std::ascii::escape_default;

1

use std::ascii::escape_default;

2

use std::borrow::Cow;

2

use std::borrow::Cow;

3

use std::collections::BTreeMap;

3

use std::collections::BTreeMap;

4

use std::fmt::{Debug, Formatter};

4

use std::fmt::{Debug, Formatter};

5

use std::{iter, str};

5

use std::{iter, str};

6

7

use chrono::{DateTime, FixedOffset, NaiveDateTime};

7

use chrono::{DateTime, FixedOffset, NaiveDateTime};

8

use itertools::{Either, Itertools};

8

use itertools::{Either, Itertools};

9

10

use crate::errors::HgError;

10

use crate::errors::HgError;

11

use crate::revlog::Revision;

11

use crate::revlog::Revision;

12

use crate::revlog::{Node, NodePrefix};

12

use crate::revlog::{Node, NodePrefix};

13

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

13

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

14

use crate::utils::hg_path::HgPath;

14

use crate::utils::hg_path::HgPath;

15

use crate::vfs::Vfs;

15

use crate::vfs::Vfs;

16

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

16

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

17

18

/// A specialized `Revlog` to work with changelog data format.

18

/// A specialized `Revlog` to work with changelog data format.

19

pub struct Changelog {

19

pub struct Changelog {

20

/// The generic `revlog` format.

20

/// The generic `revlog` format.

21

pub(crate) revlog: Revlog,

21

pub(crate) revlog: Revlog,

22

}

22

}

23

24

impl Changelog {

24

impl Changelog {

25

/// Open the `changelog` of a repository given by its root.

25

/// Open the `changelog` of a repository given by its root.

26

pub fn open(

26

pub fn open(

27

store_vfs: &Vfs,

27

store_vfs: &Vfs,

28

options: RevlogOpenOptions,

28

options: RevlogOpenOptions,

29

) -> Result<Self, HgError> {

29

) -> Result<Self, HgError> {

30

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

30

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

31

Ok(Self { revlog })

31

Ok(Self { revlog })

32

}

32

}

33

34

/// Return the `ChangelogRevisionData` for the given node ID.

34

/// Return the `ChangelogRevisionData` for the given node ID.

35

pub fn data_for_node(

35

pub fn data_for_node(

36

&self,

36

&self,

37

node: NodePrefix,

37

node: NodePrefix,

38

) -> Result<ChangelogRevisionData, RevlogError> {

38

) -> Result<ChangelogRevisionData, RevlogError> {

39

let rev = self.revlog.rev_from_node(node)?;

39

let rev = self.revlog.rev_from_node(node)?;

40

self.entry_for_checked_rev(rev)?.data()

40

self.entry_for_checked_rev(rev)?.data()

41

}

41

}

42

43

/// Return the [`ChangelogEntry`] for the given revision number.

43

/// Return the [`ChangelogEntry`] for the given revision number.

44

pub fn entry_for_rev(

44

pub fn entry_for_rev(

45

&self,

45

&self,

46

rev: UncheckedRevision,

46

rev: UncheckedRevision,

47

) -> Result<ChangelogEntry, RevlogError> {

47

) -> Result<ChangelogEntry, RevlogError> {

48

let revlog_entry = self.revlog.get_entry(rev)?;

48

let revlog_entry = self.revlog.get_entry(rev)?;

49

Ok(ChangelogEntry { revlog_entry })

49

Ok(ChangelogEntry { revlog_entry })

50

}

50

}

51

52

/// Same as [`Self::entry_for_rev`] for checked revisions.

52

/// Same as [`Self::entry_for_rev`] for checked revisions.

53

fn entry_for_checked_rev(

53

fn entry_for_checked_rev(

54

&self,

54

&self,

55

rev: Revision,

55

rev: Revision,

56

) -> Result<ChangelogEntry, RevlogError> {

56

) -> Result<ChangelogEntry, RevlogError> {

57

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

57

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

58

Ok(ChangelogEntry { revlog_entry })

58

Ok(ChangelogEntry { revlog_entry })

59

}

59

}

60

61

/// Return the [`ChangelogRevisionData`] for the given revision number.

61

/// Return the [`ChangelogRevisionData`] for the given revision number.

62

///

62

///

63

/// This is a useful shortcut in case the caller does not need the

63

/// This is a useful shortcut in case the caller does not need the

64

/// generic revlog information (parents, hashes etc). Otherwise

64

/// generic revlog information (parents, hashes etc). Otherwise

65

/// consider taking a [`ChangelogEntry`] with

65

/// consider taking a [`ChangelogEntry`] with

66

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

66

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

67

pub fn data_for_rev(

67

pub fn data_for_rev(

68

&self,

68

&self,

69

rev: UncheckedRevision,

69

rev: UncheckedRevision,

70

) -> Result<ChangelogRevisionData, RevlogError> {

70

) -> Result<ChangelogRevisionData, RevlogError> {

71

self.entry_for_rev(rev)?.data()

71

self.entry_for_rev(rev)?.data()

72

}

72

}

73

74

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

74

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

75

self.revlog.node_from_rev(rev)

75

self.revlog.node_from_rev(rev)

76

}

76

}

77

78

pub fn rev_from_node(

78

pub fn rev_from_node(

79

&self,

79

&self,

80

node: NodePrefix,

80

node: NodePrefix,

81

) -> Result<Revision, RevlogError> {

81

) -> Result<Revision, RevlogError> {

82

self.revlog.rev_from_node(node)

82

self.revlog.rev_from_node(node)

83

}

83

}

84

}

84

}

85

86

impl Graph for Changelog {

86

impl Graph for Changelog {

87

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

87

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

88

self.revlog.parents(rev)

88

self.revlog.parents(rev)

89

}

89

}

90

}

90

}

91

92

/// A specialized `RevlogEntry` for `changelog` data format

92

/// A specialized `RevlogEntry` for `changelog` data format

93

///

93

///

94

/// This is a `RevlogEntry` with the added semantics that the associated

94

/// This is a `RevlogEntry` with the added semantics that the associated

95

/// data should meet the requirements for `changelog`, materialized by

95

/// data should meet the requirements for `changelog`, materialized by

96

/// the fact that `data()` constructs a `ChangelogRevisionData`.

96

/// the fact that `data()` constructs a `ChangelogRevisionData`.

97

/// In case that promise would be broken, the `data` method returns an error.

97

/// In case that promise would be broken, the `data` method returns an error.

98

#[derive(Clone)]

98

#[derive(Clone)]

99

pub struct ChangelogEntry<'changelog> {

99

pub struct ChangelogEntry<'changelog> {

100

/// Same data, as a generic `RevlogEntry`.

100

/// Same data, as a generic `RevlogEntry`.

101

pub(crate) revlog_entry: RevlogEntry<'changelog>,

101

pub(crate) revlog_entry: RevlogEntry<'changelog>,

102

}

102

}

103

104

impl<'changelog> ChangelogEntry<'changelog> {

104

impl<'changelog> ChangelogEntry<'changelog> {

105

pub fn data<'a>(

105

pub fn data<'a>(

106

&'a self,

106

&'a self,

107

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

107

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

108

let bytes = self.revlog_entry.data()?;

108

let bytes = self.revlog_entry.data()?;

109

if bytes.is_empty() {

109

if bytes.is_empty() {

110

Ok(ChangelogRevisionData::null())

110

Ok(ChangelogRevisionData::null())

111

} else {

111

} else {

112

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

112

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

113

RevlogError::Other(HgError::CorruptedRepository(format!(

113

RevlogError::Other(HgError::CorruptedRepository(format!(

114

"Invalid changelog data for revision {}: {:?}",

114

"Invalid changelog data for revision {}: {:?}",

115

self.revlog_entry.revision(),

115

self.revlog_entry.revision(),

116

err

116

err

117

)))

117

)))

118

})?)

118

})?)

119

}

119

}

120

}

120

}

121

122

/// Obtain a reference to the underlying `RevlogEntry`.

122

/// Obtain a reference to the underlying `RevlogEntry`.

123

///

123

///

124

/// This allows the caller to access the information that is common

124

/// This allows the caller to access the information that is common

125

/// to all revlog entries: revision number, node id, parent revisions etc.

125

/// to all revlog entries: revision number, node id, parent revisions etc.

126

pub fn as_revlog_entry(&self) -> &RevlogEntry {

126

pub fn as_revlog_entry(&self) -> &RevlogEntry {

127

&self.revlog_entry

127

&self.revlog_entry

128

}

128

}

129

130

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

130

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

131

Ok(self

131

Ok(self

132

.revlog_entry

132

.revlog_entry

133

.p1_entry()?

133

.p1_entry()?

134

.map(|revlog_entry| Self { revlog_entry }))

134

.map(|revlog_entry| Self { revlog_entry }))

135

}

135

}

136

137

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

137

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

138

Ok(self

138

Ok(self

139

.revlog_entry

139

.revlog_entry

140

.p2_entry()?

140

.p2_entry()?

141

.map(|revlog_entry| Self { revlog_entry }))

141

.map(|revlog_entry| Self { revlog_entry }))

142

}

142

}

143

}

143

}

144

145

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

145

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

146

#[derive(PartialEq)]

146

#[derive(PartialEq)]

147

pub struct ChangelogRevisionData<'changelog> {

147

pub struct ChangelogRevisionData<'changelog> {

148

/// The data bytes of the `changelog` entry.

148

/// The data bytes of the `changelog` entry.

149

bytes: Cow<'changelog, [u8]>,

149

bytes: Cow<'changelog, [u8]>,

150

/// The end offset for the hex manifest (not including the newline)

150

/// The end offset for the hex manifest (not including the newline)

151

manifest_end: usize,

151

manifest_end: usize,

152

/// The end offset for the user+email (not including the newline)

152

/// The end offset for the user+email (not including the newline)

153

user_end: usize,

153

user_end: usize,

154

/// The end offset for the timestamp+timezone+extras (not including the

154

/// The end offset for the timestamp+timezone+extras (not including the

155

/// newline)

155

/// newline)

156

timestamp_end: usize,

156

timestamp_end: usize,

157

/// The end offset for the file list (not including the newline)

157

/// The end offset for the file list (not including the newline)

158

files_end: usize,

158

files_end: usize,

159

}

159

}

160

161

impl<'changelog> ChangelogRevisionData<'changelog> {

161

impl<'changelog> ChangelogRevisionData<'changelog> {

162

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

162

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

163

let mut line_iter = bytes.split(|b| b == &b'\n');

163

let mut line_iter = bytes.split(|b| b == &b'\n');

164

let manifest_end = line_iter

164

let manifest_end = line_iter

165

.next()

165

.next()

166

.expect("Empty iterator from split()?")

166

.expect("Empty iterator from split()?")

167

.len();

167

.len();

168

let user_slice = line_iter.next().ok_or_else(|| {

168

let user_slice = line_iter.next().ok_or_else(|| {

169

HgError::corrupted("Changeset data truncated after manifest line")

169

HgError::corrupted("Changeset data truncated after manifest line")

170

})?;

170

})?;

171

let user_end = manifest_end + 1 + user_slice.len();

171

let user_end = manifest_end + 1 + user_slice.len();

172

let timestamp_slice = line_iter.next().ok_or_else(|| {

172

let timestamp_slice = line_iter.next().ok_or_else(|| {

173

HgError::corrupted("Changeset data truncated after user line")

173

HgError::corrupted("Changeset data truncated after user line")

174

})?;

174

})?;

175

let timestamp_end = user_end + 1 + timestamp_slice.len();

175

let timestamp_end = user_end + 1 + timestamp_slice.len();

176

let mut files_end = timestamp_end + 1;

176

let mut files_end = timestamp_end + 1;

177

loop {

177

loop {

178

let line = line_iter.next().ok_or_else(|| {

178

let line = line_iter.next().ok_or_else(|| {

179

HgError::corrupted("Changeset data truncated in files list")

179

HgError::corrupted("Changeset data truncated in files list")

180

})?;

180

})?;

181

if line.is_empty() {

181

if line.is_empty() {

182

if files_end == bytes.len() {

182

if files_end == bytes.len() {

183

// The list of files ended with a single newline (there

183

// The list of files ended with a single newline (there

184

// should be two)

184

// should be two)

185

return Err(HgError::corrupted(

185

return Err(HgError::corrupted(

186

"Changeset data truncated after files list",

186

"Changeset data truncated after files list",

187

));

187

));

188

}

188

}

189

files_end -= 1;

189

files_end -= 1;

190

break;

190

break;

191

}

191

}

192

files_end += line.len() + 1;

192

files_end += line.len() + 1;

193

}

193

}

194

195

Ok(Self {

195

Ok(Self {

196

bytes,

196

bytes,

197

manifest_end,

197

manifest_end,

198

user_end,

198

user_end,

199

timestamp_end,

199

timestamp_end,

200

files_end,

200

files_end,

201

})

201

})

202

}

202

}

203

204

fn null() -> Self {

204

fn null() -> Self {

205

Self::new(Cow::Borrowed(

205

Self::new(Cow::Borrowed(

206

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

206

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

207

))

207

))

208

.unwrap()

208

.unwrap()

209

}

209

}

210

211

/// Return an iterator over the lines of the entry.

211

/// Return an iterator over the lines of the entry.

212

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

212

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

213

self.bytes.split(|b| b == &b'\n')

213

self.bytes.split(|b| b == &b'\n')

214

}

214

}

215

216

/// Return the node id of the `manifest` referenced by this `changelog`

216

/// Return the node id of the `manifest` referenced by this `changelog`

217

/// entry.

217

/// entry.

218

pub fn manifest_node(&self) -> Result<Node, HgError> {

218

pub fn manifest_node(&self) -> Result<Node, HgError> {

219

let manifest_node_hex = &self.bytes[..self.manifest_end];

219

let manifest_node_hex = &self.bytes[..self.manifest_end];

220

Node::from_hex_for_repo(manifest_node_hex)

220

Node::from_hex_for_repo(manifest_node_hex)

221

}

221

}

222

223

/// The full user string (usually a name followed by an email enclosed in

223

/// The full user string (usually a name followed by an email enclosed in

224

/// angle brackets)

224

/// angle brackets)

225

pub fn user(&self) -> &[u8] {

225

pub fn user(&self) -> &[u8] {

226

&self.bytes[self.manifest_end + 1..self.user_end]

226

&self.bytes[self.manifest_end + 1..self.user_end]

227

}

227

}

228

229

/// The full timestamp line (timestamp in seconds, offset in seconds, and

229

/// The full timestamp line (timestamp in seconds, offset in seconds, and

230

/// possibly extras)

230

/// possibly extras)

231

// TODO: We should expose this in a more useful way

231

// TODO: We should expose this in a more useful way

232

pub fn timestamp_line(&self) -> &[u8] {

232

pub fn timestamp_line(&self) -> &[u8] {

233

&self.bytes[self.user_end + 1..self.timestamp_end]

233

&self.bytes[self.user_end + 1..self.timestamp_end]

234

}

234

}

235

236

/// Parsed timestamp ~~line, including optional extras~~.

236

/// Parsed timestamp.

237

pub fn ~~parsed_~~timestamp(&self) -> Result<~~TimestampAndExtra~~, HgError> {

237

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

238

Timestamp~~AndExtra~~::~~from_bytes~~(self.timestamp_line())

238

parse_timestamp(self.timestamp_line())

239

}

240

241

/// Optional commit extras.

242

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

243

parse_timestamp_line_extra(self.timestamp_line())

239

}

244

}

240

245

241

/// The files changed in this revision.

246

/// The files changed in this revision.

242

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

247

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

243

if self.timestamp_end == self.files_end {

248

if self.timestamp_end == self.files_end {

244

Either::Left(iter::empty())

249

Either::Left(iter::empty())

245

} else {

250

} else {

246

Either::Right(

251

Either::Right(

247

self.bytes[self.timestamp_end + 1..self.files_end]

252

self.bytes[self.timestamp_end + 1..self.files_end]

248

.split(|b| b == &b'\n')

253

.split(|b| b == &b'\n')

249

.map(HgPath::new),

254

.map(HgPath::new),

250

)

255

)

251

}

256

}

252

}

257

}

253

258

254

/// The change description.

259

/// The change description.

255

pub fn description(&self) -> &[u8] {

260

pub fn description(&self) -> &[u8] {

256

&self.bytes[self.files_end + 2..]

261

&self.bytes[self.files_end + 2..]

257

}

262

}

258

}

263

}

259

264

260

impl Debug for ChangelogRevisionData<'_> {

265

impl Debug for ChangelogRevisionData<'_> {

261

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

266

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

262

f.debug_struct("ChangelogRevisionData")

267

f.debug_struct("ChangelogRevisionData")

263

.field("bytes", &debug_bytes(&self.bytes))

268

.field("bytes", &debug_bytes(&self.bytes))

264

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

269

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

265

.field(

270

.field(

266

"user",

271

"user",

267

&debug_bytes(

272

&debug_bytes(

268

&self.bytes[self.manifest_end + 1..self.user_end],

273

&self.bytes[self.manifest_end + 1..self.user_end],

269

),

274

),

270

)

275

)

271

.field(

276

.field(

272

"timestamp",

277

"timestamp",

273

&debug_bytes(

278

&debug_bytes(

274

&self.bytes[self.user_end + 1..self.timestamp_end],

279

&self.bytes[self.user_end + 1..self.timestamp_end],

275

),

280

),

276

)

281

)

277

.field(

282

.field(

278

"files",

283

"files",

279

&debug_bytes(

284

&debug_bytes(

280

&self.bytes[self.timestamp_end + 1..self.files_end],

285

&self.bytes[self.timestamp_end + 1..self.files_end],

281

),

286

),

282

)

287

)

283

.field(

288

.field(

284

"description",

289

"description",

285

&debug_bytes(&self.bytes[self.files_end + 2..]),

290

&debug_bytes(&self.bytes[self.files_end + 2..]),

286

)

291

)

287

.finish()

292

.finish()

288

}

293

}

289

}

294

}

290

295

291

fn debug_bytes(bytes: &[u8]) -> String {

296

fn debug_bytes(bytes: &[u8]) -> String {

292

String::from_utf8_lossy(

297

String::from_utf8_lossy(

293

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

298

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

294

)

299

)

295

.to_string()

300

.to_string()

296

}

301

}

297

302

298

/// Parsed timestamp line, including the timestamp and optional extras.

303

/// Parse the raw bytes of the timestamp line from a changelog entry.

299

#[derive(Clone, Debug)]

304

///

300

pub struct TimestampAndExtra {

305

/// According to the documentation in `hg help dates` and the

301

pub timestamp: DateTime<FixedOffset>,

306

/// implementation in `changelog.py`, the format of the timestamp line

302

pub extra: BTreeMap<String, Vec<u8>>,

307

/// is `time tz extra\n` where:

303

}

308

///

309

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

310

/// as seconds since the UNIX epoch.

311

///

312

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

313

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

314

/// opposite of the sign in ISO 8601 timestamps).

315

///

316

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

317

/// and value in each pair separated by an ASCII colon. Keys are limited to

318

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

319

/// arbitrary bytes.

320

fn parse_timestamp(

321

timestamp_line: &[u8],

322

) -> Result<DateTime<FixedOffset>, HgError> {

323

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

304

324

305

impl TimestampAndExtra {

325

let timestamp_bytes = parts

306

/// Parse the raw bytes of the timestamp line from a changelog entry.

326

.next()

307

///

327

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

308

/// According to the documentation in `hg help dates` and the

328

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

309

/// implementation in `changelog.py`, the format of the timestamp line

329

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

310

/// is `time tz extra\n` where:

330

})?;

311

///

331

let timestamp_utc = timestamp_str

312

/// - `time` is an ASCII-encoded signed int or float denoting a UTC

332

.parse()

313

/// timestamp as seconds since the UNIX epoch.

333

.map_err(|e| {

314

///

334

HgError::corrupted(format!("failed to parse timestamp: {e}"))

315

/// - `tz` is the timezone offset as an ASCII-encoded signed integer

335

})

316

/// denoting seconds WEST of UTC (so negative for timezones east of UTC,

336

.and_then(|secs| {

317

/// which is the opposite of the sign in ISO 8601 timestamps).

337

NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

318

///

338

HgError::corrupted(format!(

319

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the

339

"integer timestamp out of valid range: {secs}"

320

/// key and value in each pair separated by an ASCII colon. Keys are

340

))

321

/// limited to ASCII letters, digits, hyphens, and underscores, whereas

322

/// values can be arbitrary bytes.

323

fn from_bytes(line: &[u8]) -> Result<Self, HgError> {

324

let mut parts = line.splitn(3, |c| *c == b' ');

325

326

let timestamp_bytes = parts

327

.next()

328

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

329

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

330

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

331

})?;

332

let timestamp_utc = timestamp_str

333

.parse()

334

.map_err(|e| {

335

HgError::corrupted(format!("failed to parse timestamp: {e}"))

336

})

341

})

337

.and_then(|secs| {

342

})

338

NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

343

// Attempt to parse the timestamp as a float if we can't parse

339

HgError::corrupted(format!(

344

// it as an int. It doesn't seem like float timestamps are actually

340

"integer timestamp out of valid range: {secs}"

345

// used in practice, but the Python code supports them.

341

))

346

.or_else(|_| parse_float_timestamp(timestamp_str))?;

342

})

343

})

344

// Attempt to parse the timestamp as a float if we can't parse

345

// it as an int. It doesn't seem like float timestamps are actually

346

// used in practice, but the Python code supports them.

347

.or_else(|_| parse_float_timestamp(timestamp_str))?;

348

347

349

let timezone_bytes = parts

348

let timezone_bytes = parts

350

.next()

349

.next()

351

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

350

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

352

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

351

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

353

.map_err(|e| {

352

.map_err(|e| {

354

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

353

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

355

})?

354

})?

356

.parse()

355

.parse()

357

.map_err(|e| {

356

.map_err(|e| {

358

HgError::corrupted(format!("timezone is not an integer: {e}"))

357

HgError::corrupted(format!("timezone is not an integer: {e}"))

359

})?;

358

})?;

360

let timezone =

359

let timezone = FixedOffset::west_opt(timezone_secs)

361

FixedOffset::west_opt(timezone_secs).ok_or_else(|| {

360

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

362

HgError::corrupted("timezone offset out of bounds")

363

})?;

364

361

365

let timestamp =

362

Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))

366

DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);

367

let extra = parts

368

.next()

369

.map(parse_extra)

370

.transpose()?

371

.unwrap_or_default();

372

373

Ok(Self { timestamp, extra })

374

}

375

}

363

}

376

364

377

/// Attempt to parse the given string as floating-point timestamp, and

365

/// Attempt to parse the given string as floating-point timestamp, and

378

/// convert the result into a `chrono::NaiveDateTime`.

366

/// convert the result into a `chrono::NaiveDateTime`.

379

fn parse_float_timestamp(

367

fn parse_float_timestamp(

380

timestamp_str: &str,

368

timestamp_str: &str,

381

) -> Result<NaiveDateTime, HgError> {

369

) -> Result<NaiveDateTime, HgError> {

382

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

370

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

383

HgError::corrupted(format!("failed to parse timestamp: {e}"))

371

HgError::corrupted(format!("failed to parse timestamp: {e}"))

384

})?;

372

})?;

385

373

386

// To construct a `NaiveDateTime` we'll need to convert the float

374

// To construct a `NaiveDateTime` we'll need to convert the float

387

// into signed integer seconds and unsigned integer nanoseconds.

375

// into signed integer seconds and unsigned integer nanoseconds.

388

let mut secs = timestamp.trunc() as i64;

376

let mut secs = timestamp.trunc() as i64;

389

let mut subsecs = timestamp.fract();

377

let mut subsecs = timestamp.fract();

390

378

391

// If the timestamp is negative, we need to express the fractional

379

// If the timestamp is negative, we need to express the fractional

392

// component as positive nanoseconds since the previous second.

380

// component as positive nanoseconds since the previous second.

393

if timestamp < 0.0 {

381

if timestamp < 0.0 {

394

secs -= 1;

382

secs -= 1;

395

subsecs += 1.0;

383

subsecs += 1.0;

396

}

384

}

397

385

398

// This cast should be safe because the fractional component is

386

// This cast should be safe because the fractional component is

399

// by definition less than 1.0, so this value should not exceed

387

// by definition less than 1.0, so this value should not exceed

400

// 1 billion, which is representable as an f64 without loss of

388

// 1 billion, which is representable as an f64 without loss of

401

// precision and should fit into a u32 without overflowing.

389

// precision and should fit into a u32 without overflowing.

402

//

390

//

403

// (Any loss of precision in the fractional component will have

391

// (Any loss of precision in the fractional component will have

404

// already happened at the time of initial parsing; in general,

392

// already happened at the time of initial parsing; in general,

405

// f64s are insufficiently precise to provide nanosecond-level

393

// f64s are insufficiently precise to provide nanosecond-level

406

// precision with present-day timestamps.)

394

// precision with present-day timestamps.)

407

let nsecs = (subsecs * 1_000_000_000.0) as u32;

395

let nsecs = (subsecs * 1_000_000_000.0) as u32;

408

396

409

NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

397

NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

410

HgError::corrupted(format!(

398

HgError::corrupted(format!(

411

"float timestamp out of valid range: {timestamp}"

399

"float timestamp out of valid range: {timestamp}"

412

))

400

))

413

})

401

})

414

}

402

}

415

403

416

/// Parse the "extra" fields from a changeset's timestamp line.

404

/// Decode changeset extra fields.

417

///

405

///

418

/// Extras are null-delimited key-value pairs where the key consists of ASCII

406

/// Extras are null-delimited key-value pairs where the key consists of ASCII

419

/// alphanumeric characters plus hyphens and underscores, and the value can

407

/// alphanumeric characters plus hyphens and underscores, and the value can

420

/// contain arbitrary bytes.

408

/// contain arbitrary bytes.

421

fn ~~pars~~e_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

409

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

422

extra

410

extra

423

.split(|c| *c == b'\0')

411

.split(|c| *c == b'\0')

424

.map(|pair| {

412

.map(|pair| {

425

let pair = unescape_extra(pair);

413

let pair = unescape_extra(pair);

426

let mut iter = pair.splitn(2, |c| *c == b':');

414

let mut iter = pair.splitn(2, |c| *c == b':');

427

415

428

let key_bytes =

416

let key_bytes =

429

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

417

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

430

HgError::corrupted("empty key in changeset extras")

418

HgError::corrupted("empty key in changeset extras")

431

})?;

419

})?;

432

420

433

let key = str::from_utf8(key_bytes)

421

let key = str::from_utf8(key_bytes)

434

.ok()

422

.ok()

435

.filter(|k| {

423

.filter(|k| {

436

k.chars().all(|c| {

424

k.chars().all(|c| {

437

c.is_ascii_alphanumeric() || c == '_' || c == '-'

425

c.is_ascii_alphanumeric() || c == '_' || c == '-'

438

})

426

})

439

})

427

})

440

.ok_or_else(|| {

428

.ok_or_else(|| {

441

let key = String::from_utf8_lossy(key_bytes);

429

let key = String::from_utf8_lossy(key_bytes);

442

HgError::corrupted(format!(

430

HgError::corrupted(format!(

443

"invalid key in changeset extras: {key}",

431

"invalid key in changeset extras: {key}",

444

))

432

))

445

})?

433

})?

446

.to_string();

434

.to_string();

447

435

448

let value = iter.next().map(Into::into).ok_or_else(|| {

436

let value = iter.next().map(Into::into).ok_or_else(|| {

449

HgError::corrupted(format!(

437

HgError::corrupted(format!(

450

"missing value for changeset extra: {key}"

438

"missing value for changeset extra: {key}"

451

))

439

))

452

})?;

440

})?;

453

441

454

Ok((key, value))

442

Ok((key, value))

455

})

443

})

456

.collect()

444

.collect()

457

}

445

}

458

446

447

/// Parse the extra fields from a changeset's timestamp line.

448

fn parse_timestamp_line_extra(

449

timestamp_line: &[u8],

450

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

451

Ok(timestamp_line

452

.splitn(3, |c| *c == b' ')

453

.nth(2)

454

.map(decode_extra)

455

.transpose()?

456

.unwrap_or_default())

457

}

458

459

/// Decode Mercurial's escaping for changelog extras.

459

/// Decode Mercurial's escaping for changelog extras.

460

///

460

///

461

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

461

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

462

/// (null, backslash, newline, and carriage return) so we only decode those.

462

/// (null, backslash, newline, and carriage return) so we only decode those.

463

///

463

///

464

/// The Python code also includes a workaround for decoding escaped nuls

464

/// The Python code also includes a workaround for decoding escaped nuls

465

/// that are followed by an ASCII octal digit, since Python's built-in

465

/// that are followed by an ASCII octal digit, since Python's built-in

466

/// `string_escape` codec will interpret that as an escaped octal byte value.

466

/// `string_escape` codec will interpret that as an escaped octal byte value.

467

/// That workaround is omitted here since we don't support decoding octal.

467

/// That workaround is omitted here since we don't support decoding octal.

468

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

468

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

469

let mut output = Vec::with_capacity(bytes.len());

469

let mut output = Vec::with_capacity(bytes.len());

470

let mut input = bytes.iter().copied();

470

let mut input = bytes.iter().copied();

471

472

while let Some(c) = input.next() {

472

while let Some(c) = input.next() {

473

if c != b'\\' {

473

if c != b'\\' {

474

output.push(c);

474

output.push(c);

475

continue;

475

continue;

476

}

476

}

477

478

match input.next() {

478

match input.next() {

479

Some(b'0') => output.push(b'\0'),

479

Some(b'0') => output.push(b'\0'),

480

Some(b'\\') => output.push(b'\\'),

480

Some(b'\\') => output.push(b'\\'),

481

Some(b'n') => output.push(b'\n'),

481

Some(b'n') => output.push(b'\n'),

482

Some(b'r') => output.push(b'\r'),

482

Some(b'r') => output.push(b'\r'),

483

// The following cases should never occur in theory because any

483

// The following cases should never occur in theory because any

484

// backslashes in the original input should have been escaped

484

// backslashes in the original input should have been escaped

485

// with another backslash, so it should not be possible to

485

// with another backslash, so it should not be possible to

486

// observe an escape sequence other than the 4 above.

486

// observe an escape sequence other than the 4 above.

487

Some(c) => output.extend_from_slice(&[b'\\', c]),

487

Some(c) => output.extend_from_slice(&[b'\\', c]),

488

None => output.push(b'\\'),

488

None => output.push(b'\\'),

489

}

489

}

490

}

490

}

491

492

output

492

output

493

}

493

}

494

495

#[cfg(test)]

495

#[cfg(test)]

496

mod tests {

496

mod tests {

497

use super::*;

497

use super::*;

498

use crate::vfs::Vfs;

498

use crate::vfs::Vfs;

499

use crate::NULL_REVISION;

499

use crate::NULL_REVISION;

500

use pretty_assertions::assert_eq;

500

use pretty_assertions::assert_eq;

501

502

#[test]

502

#[test]

503

fn test_create_changelogrevisiondata_invalid() {

503

fn test_create_changelogrevisiondata_invalid() {

504

// Completely empty

504

// Completely empty

505

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

505

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

506

// No newline after manifest

506

// No newline after manifest

507

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

507

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

508

// No newline after user

508

// No newline after user

509

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

509

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

510

// No newline after timestamp

510

// No newline after timestamp

511

assert!(

511

assert!(

512

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

512

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

513

);

513

);

514

// Missing newline after files

514

// Missing newline after files

515

assert!(ChangelogRevisionData::new(Cow::Borrowed(

515

assert!(ChangelogRevisionData::new(Cow::Borrowed(

516

b"abcd\n\n0 0\nfile1\nfile2"

516

b"abcd\n\n0 0\nfile1\nfile2"

517

))

517

))

518

.is_err(),);

518

.is_err(),);

519

// Only one newline after files

519

// Only one newline after files

520

assert!(ChangelogRevisionData::new(Cow::Borrowed(

520

assert!(ChangelogRevisionData::new(Cow::Borrowed(

521

b"abcd\n\n0 0\nfile1\nfile2\n"

521

b"abcd\n\n0 0\nfile1\nfile2\n"

522

))

522

))

523

.is_err(),);

523

.is_err(),);

524

}

524

}

525

526

#[test]

526

#[test]

527

fn test_create_changelogrevisiondata() {

527

fn test_create_changelogrevisiondata() {

528

let data = ChangelogRevisionData::new(Cow::Borrowed(

528

let data = ChangelogRevisionData::new(Cow::Borrowed(

529

b"0123456789abcdef0123456789abcdef01234567

529

b"0123456789abcdef0123456789abcdef01234567

530

Some One <someone@example.com>

530

Some One <someone@example.com>

531

0 0

531

0 0

532

file1

532

file1

533

file2

533

file2

534

535

some

535

some

536

commit

536

commit

537

message",

537

message",

538

))

538

))

539

.unwrap();

539

.unwrap();

540

assert_eq!(

540

assert_eq!(

541

data.manifest_node().unwrap(),

541

data.manifest_node().unwrap(),

542

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

542

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

543

.unwrap()

543

.unwrap()

544

);

544

);

545

assert_eq!(data.user(), b"Some One <someone@example.com>");

545

assert_eq!(data.user(), b"Some One <someone@example.com>");

546

assert_eq!(data.timestamp_line(), b"0 0");

546

assert_eq!(data.timestamp_line(), b"0 0");

547

assert_eq!(

547

assert_eq!(

548

data.files().collect_vec(),

548

data.files().collect_vec(),

549

vec![HgPath::new("file1"), HgPath::new("file2")]

549

vec![HgPath::new("file1"), HgPath::new("file2")]

550

);

550

);

551

assert_eq!(data.description(), b"some\ncommit\nmessage");

551

assert_eq!(data.description(), b"some\ncommit\nmessage");

552

}

552

}

553

554

#[test]

554

#[test]

555

fn test_data_from_rev_null() -> Result<(), RevlogError> {

555

fn test_data_from_rev_null() -> Result<(), RevlogError> {

556

// an empty revlog will be enough for this case

556

// an empty revlog will be enough for this case

557

let temp = tempfile::tempdir().unwrap();

557

let temp = tempfile::tempdir().unwrap();

558

let vfs = Vfs { base: temp.path() };

558

let vfs = Vfs { base: temp.path() };

559

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

559

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

560

let revlog =

560

let revlog =

561

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

561

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

562

.unwrap();

562

.unwrap();

563

564

let changelog = Changelog { revlog };

564

let changelog = Changelog { revlog };

565

assert_eq!(

565

assert_eq!(

566

changelog.data_for_rev(NULL_REVISION.into())?,

566

changelog.data_for_rev(NULL_REVISION.into())?,

567

ChangelogRevisionData::null()

567

ChangelogRevisionData::null()

568

);

568

);

569

// same with the intermediate entry object

569

// same with the intermediate entry object

570

assert_eq!(

570

assert_eq!(

571

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

571

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

572

ChangelogRevisionData::null()

572

ChangelogRevisionData::null()

573

);

573

);

574

Ok(())

574

Ok(())

575

}

575

}

576

577

#[test]

577

#[test]

578

fn test_empty_files_list() {

578

fn test_empty_files_list() {

579

assert!(ChangelogRevisionData::null()

579

assert!(ChangelogRevisionData::null()

580

.files()

580

.files()

581

.collect_vec()

581

.collect_vec()

582

.is_empty());

582

.is_empty());

583

}

583

}

584

585

#[test]

585

#[test]

586

fn test_unescape_basic() {

586

fn test_unescape_basic() {

587

// '\0', '\\', '\n', and '\r' are correctly unescaped.

587

// '\0', '\\', '\n', and '\r' are correctly unescaped.

588

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

588

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

589

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

589

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

590

let unescaped = unescape_extra(escaped);

590

let unescaped = unescape_extra(escaped);

591

assert_eq!(&expected[..], &unescaped[..]);

591

assert_eq!(&expected[..], &unescaped[..]);

592

}

592

}

593

594

#[test]

594

#[test]

595

fn test_unescape_unsupported_sequence() {

595

fn test_unescape_unsupported_sequence() {

596

// Other escape sequences are left unaltered.

596

// Other escape sequences are left unaltered.

597

for c in 0u8..255 {

597

for c in 0u8..255 {

598

match c {

598

match c {

599

b'0' | b'\\' | b'n' | b'r' => continue,

599

b'0' | b'\\' | b'n' | b'r' => continue,

600

c => {

600

c => {

601

let expected = &[b'\\', c][..];

601

let expected = &[b'\\', c][..];

602

let unescaped = unescape_extra(expected);

602

let unescaped = unescape_extra(expected);

603

assert_eq!(expected, &unescaped[..]);

603

assert_eq!(expected, &unescaped[..]);

604

}

604

}

605

}

605

}

606

}

606

}

607

}

607

}

608

609

#[test]

609

#[test]

610

fn test_unescape_trailing_backslash() {

610

fn test_unescape_trailing_backslash() {

611

// Trailing backslashes are OK.

611

// Trailing backslashes are OK.

612

let expected = br"hi\";

612

let expected = br"hi\";

613

let unescaped = unescape_extra(expected);

613

let unescaped = unescape_extra(expected);

614

assert_eq!(&expected[..], &unescaped[..]);

614

assert_eq!(&expected[..], &unescaped[..]);

615

}

615

}

616

617

#[test]

617

#[test]

618

fn test_unescape_nul_followed_by_octal() {

618

fn test_unescape_nul_followed_by_octal() {

619

// Escaped NUL chars followed by octal digits are decoded correctly.

619

// Escaped NUL chars followed by octal digits are decoded correctly.

620

let expected = b"\012";

620

let expected = b"\012";

621

let escaped = br"\012";

621

let escaped = br"\012";

622

let unescaped = unescape_extra(escaped);

622

let unescaped = unescape_extra(escaped);

623

assert_eq!(&expected[..], &unescaped[..]);

623

assert_eq!(&expected[..], &unescaped[..]);

624

}

624

}

625

626

#[test]

626

#[test]

627

fn test_parse_float_timestamp() {

627

fn test_parse_float_timestamp() {

628

let test_cases = [

628

let test_cases = [

629

// Zero should map to the UNIX epoch.

629

// Zero should map to the UNIX epoch.

630

("0.0", "1970-01-01 00:00:00"),

630

("0.0", "1970-01-01 00:00:00"),

631

// Negative zero should be the same as positive zero.

631

// Negative zero should be the same as positive zero.

632

("-0.0", "1970-01-01 00:00:00"),

632

("-0.0", "1970-01-01 00:00:00"),

633

// Values without fractional components should work like integers.

633

// Values without fractional components should work like integers.

634

// (Assuming the timestamp is within the limits of f64 precision.)

634

// (Assuming the timestamp is within the limits of f64 precision.)

635

("1115154970.0", "2005-05-03 21:16:10"),

635

("1115154970.0", "2005-05-03 21:16:10"),

636

// We expect some loss of precision in the fractional component

636

// We expect some loss of precision in the fractional component

637

// when parsing arbitrary floating-point values.

637

// when parsing arbitrary floating-point values.

638

("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

638

("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

639

// But representable f64 values should parse losslessly.

639

// But representable f64 values should parse losslessly.

640

("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

640

("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

641

// Negative fractional components are subtracted from the epoch.

641

// Negative fractional components are subtracted from the epoch.

642

("-1.333", "1969-12-31 23:59:58.667"),

642

("-1.333", "1969-12-31 23:59:58.667"),

643

];

643

];

644

645

for (input, expected) in test_cases {

645

for (input, expected) in test_cases {

646

let res = parse_float_timestamp(input).unwrap().to_string();

646

let res = parse_float_timestamp(input).unwrap().to_string();

647

assert_eq!(res, expected);

647

assert_eq!(res, expected);

648

}

648

}

649

}

649

}

650

651

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

651

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

652

let mut output = Vec::with_capacity(bytes.len());

652

let mut output = Vec::with_capacity(bytes.len());

653

654

for c in bytes.iter().copied() {

654

for c in bytes.iter().copied() {

655

output.extend_from_slice(match c {

655

output.extend_from_slice(match c {

656

b'\0' => &b"\\0"[..],

656

b'\0' => &b"\\0"[..],

657

b'\\' => &b"\\\\"[..],

657

b'\\' => &b"\\\\"[..],

658

b'\n' => &b"\\n"[..],

658

b'\n' => &b"\\n"[..],

659

b'\r' => &b"\\r"[..],

659

b'\r' => &b"\\r"[..],

660

_ => {

660

_ => {

661

output.push(c);

661

output.push(c);

662

continue;

662

continue;

663

}

663

}

664

});

664

});

665

}

665

}

666

667

output

667

output

668

}

668

}

669

670

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

670

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

671

where

671

where

672

K: AsRef<[u8]>,

672

K: AsRef<[u8]>,

673

V: AsRef<[u8]>,

673

V: AsRef<[u8]>,

674

{

674

{

675

let extras = pairs.into_iter().map(|(k, v)| {

675

let extras = pairs.into_iter().map(|(k, v)| {

676

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

676

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

677

});

677

});

678

// Use fully-qualified syntax to avoid a future naming conflict with

678

// Use fully-qualified syntax to avoid a future naming conflict with

679

// the standard library: https://github.com/rust-lang/rust/issues/79524

679

// the standard library: https://github.com/rust-lang/rust/issues/79524

680

Itertools::intersperse(extras, b"\0".to_vec()).concat()

680

Itertools::intersperse(extras, b"\0".to_vec()).concat()

681

}

681

}

682

683

#[test]

683

#[test]

684

fn test_~~pars~~e_extra() {

684

fn test_decode_extra() {

685

let extra = [

685

let extra = [

686

("branch".into(), b"default".to_vec()),

686

("branch".into(), b"default".to_vec()),

687

("key-with-hyphens".into(), b"value1".to_vec()),

687

("key-with-hyphens".into(), b"value1".to_vec()),

688

("key_with_underscores".into(), b"value2".to_vec()),

688

("key_with_underscores".into(), b"value2".to_vec()),

689

("empty-value".into(), b"".to_vec()),

689

("empty-value".into(), b"".to_vec()),

690

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

690

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

691

]

691

]

692

.into_iter()

692

.into_iter()

693

.collect::<BTreeMap<String, Vec<u8>>>();

693

.collect::<BTreeMap<String, Vec<u8>>>();

694

695

let encoded = encode_extra(&extra);

695

let encoded = encode_extra(&extra);

696

let ~~pars~~ed = ~~pars~~e_extra(&encoded).unwrap();

696

let decoded = decode_extra(&encoded).unwrap();

697

698

assert_eq!(extra, ~~pars~~ed);

698

assert_eq!(extra, decoded);

699

}

699

}

700

701

#[test]

701

#[test]

702

fn test_corrupt_extra() {

702

fn test_corrupt_extra() {

703

let test_cases = [

703

let test_cases = [

704

(&b""[..], "empty input"),

704

(&b""[..], "empty input"),

705

(&b"\0"[..], "unexpected null byte"),

705

(&b"\0"[..], "unexpected null byte"),

706

(&b":empty-key"[..], "empty key"),

706

(&b":empty-key"[..], "empty key"),

707

(&b"\0leading-null:"[..], "leading null"),

707

(&b"\0leading-null:"[..], "leading null"),

708

(&b"trailing-null:\0"[..], "trailing null"),

708

(&b"trailing-null:\0"[..], "trailing null"),

709

(&b"missing-value"[..], "missing value"),

709

(&b"missing-value"[..], "missing value"),

710

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

710

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

711

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

711

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

712

];

712

];

713

714

for (extra, msg) in test_cases {

714

for (extra, msg) in test_cases {

715

assert!(

715

assert!(

716

~~pars~~e_extra(&extra).is_err(),

716

decode_extra(&extra).is_err(),

717

"corrupt extra should have failed to parse: {}",

717

"corrupt extra should have failed to parse: {}",

718

msg

718

msg

719

);

719

);

720

}

720

}

721

}

721

}

722

723

#[test]

723

#[test]

724

fn test_parse_timestamp_line() {

724

fn test_parse_timestamp_line() {

725

let extra = [

725

let extra = [

726

("branch".into(), b"default".to_vec()),

726

("branch".into(), b"default".to_vec()),

727

("key-with-hyphens".into(), b"value1".to_vec()),

727

("key-with-hyphens".into(), b"value1".to_vec()),

728

("key_with_underscores".into(), b"value2".to_vec()),

728

("key_with_underscores".into(), b"value2".to_vec()),

729

("empty-value".into(), b"".to_vec()),

729

("empty-value".into(), b"".to_vec()),

730

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

730

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

731

]

731

]

732

.into_iter()

732

.into_iter()

733

.collect::<BTreeMap<String, Vec<u8>>>();

733

.collect::<BTreeMap<String, Vec<u8>>>();

734

735

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

735

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

736

line.extend_from_slice(&encode_extra(&extra));

736

line.extend_from_slice(&encode_extra(&extra));

737

738

let p~~arsed~~ = Timestamp~~AndExtra~~::~~from_bytes~~(&line).unwrap();

738

let timestamp = parse_timestamp(&line).unwrap();

739

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

739

740

assert_eq!(

741

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

741

&parsed.timestamp.to_rfc3339(),

742

assert_eq!(extra, parsed_extra);

742

"2005-05-03T13:16:10-08:00"

743

);

744

assert_eq!(extra, parsed.extra);

745

}

743

}

746

}

744

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::collections::BTreeMap;
             use std::fmt::{Debug, Formatter};
             use std::{iter, str};
             use chrono::{DateTime, FixedOffset, NaiveDateTime};
             use itertools::{Either, Itertools};
             use crate::errors::HgError;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::Vfs;
             use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(
                     store_vfs: &Vfs,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Same as [`Self::entry_for_rev`] for checked revisions.
                 fn entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
             }
             impl Graph for Changelog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.revlog.parents(rev)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
-                /// Parsed timestamp line, including optional extras.
+                /// Parsed timestamp.
-                pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
+                pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
-                    TimestampAndExtra::from_bytes(self.timestamp_line())
+                    parse_timestamp(self.timestamp_line())
+                }
+                /// Optional commit extras.
+                pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+                    parse_timestamp_line_extra(self.timestamp_line())
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     if self.timestamp_end == self.files_end {
                         Either::Left(iter::empty())
                     } else {
                         Either::Right(
                             self.bytes[self.timestamp_end + 1..self.files_end]
                                 .split(|b| b == &b'\n')
                                 .map(HgPath::new),
                         )
                     }
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
-            /// Parsed timestamp line, including the timestamp and optional extras.
+            /// Parse the raw bytes of the timestamp line from a changelog entry.
-            #[derive(Clone, Debug)]
+            ///
-            pub struct TimestampAndExtra {
+            /// According to the documentation in `hg help dates` and the
-                pub timestamp: DateTime<FixedOffset>,
+            /// implementation in `changelog.py`, the format of the timestamp line
-                pub extra: BTreeMap<String, Vec<u8>>,
+            /// is `time tz extra\n` where:
+            ///
+            /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
+            ///   as seconds since the UNIX epoch.
+            ///
+            /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
+            ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
+            ///   opposite of the sign in ISO 8601 timestamps).
+            ///
+            /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
+            ///   and value in each pair separated by an ASCII colon. Keys are limited to
+            ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
+            ///   arbitrary bytes.
+            fn parse_timestamp(
+                timestamp_line: &[u8],
+            ) -> Result<DateTime<FixedOffset>, HgError> {
+                let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
-            impl TimestampAndExtra {
+                let timestamp_bytes = parts
-                /// Parse the raw bytes of the timestamp line from a changelog entry.
+                    .next()
-                ///
+                    .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
-                /// According to the documentation in `hg help dates` and the
+                let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
-                /// implementation in `changelog.py`, the format of the timestamp line
+                    HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
-                /// is `time tz extra\n` where:
+                })?;
-                ///
+                let timestamp_utc = timestamp_str
-                /// - `time` is an ASCII-encoded signed int or float denoting a UTC
+                    .parse()
-                ///   timestamp as seconds since the UNIX epoch.
+                    .map_err(|e| {
-                ///
+                        HgError::corrupted(format!("failed to parse timestamp: {e}"))
-                /// - `tz` is the timezone offset as an ASCII-encoded signed integer
+                    })
-                ///   denoting seconds WEST of UTC (so negative for timezones east of UTC,
+                    .and_then(|secs| {
-                ///   which is the opposite of the sign in ISO 8601 timestamps).
+                        NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
-                ///
+                            HgError::corrupted(format!(
-                /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
+                                "integer timestamp out of valid range: {secs}"
-                ///   key and value in each pair separated by an ASCII colon. Keys are
+                            ))
-                ///   limited to ASCII letters, digits, hyphens, and underscores, whereas
-                ///   values can be arbitrary bytes.
-                fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
-                    let mut parts = line.splitn(3, |c| *c == b' ');
-                    let timestamp_bytes = parts
-                        .next()
-                        .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
-                    let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
-                        HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
-                    })?;
-                    let timestamp_utc = timestamp_str
-                        .parse()
-                        .map_err(|e| {
-                            HgError::corrupted(format!("failed to parse timestamp: {e}"))
                         })
-                        .and_then(|secs| {
+                    })
-                            NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
+                    // Attempt to parse the timestamp as a float if we can't parse
-                                HgError::corrupted(format!(
+                    // it as an int. It doesn't seem like float timestamps are actually
-                                    "integer timestamp out of valid range: {secs}"
+                    // used in practice, but the Python code supports them.
-                                ))
+                    .or_else(|_| parse_float_timestamp(timestamp_str))?;
-                            })
-                        })
-                        // Attempt to parse the timestamp as a float if we can't parse
-                        // it as an int. It doesn't seem like float timestamps are actually
-                        // used in practice, but the Python code supports them.
-                        .or_else(|_| parse_float_timestamp(timestamp_str))?;
-                    let timezone_bytes = parts
+                let timezone_bytes = parts
-                        .next()
+                    .next()
-                        .ok_or_else(|| HgError::corrupted("missing timezone"))?;
+                    .ok_or_else(|| HgError::corrupted("missing timezone"))?;
-                    let timezone_secs: i32 = str::from_utf8(timezone_bytes)
+                let timezone_secs: i32 = str::from_utf8(timezone_bytes)
-                        .map_err(|e| {
+                    .map_err(|e| {
-                            HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
+                        HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
-                        })?
+                    })?
-                        .parse()
+                    .parse()
-                        .map_err(|e| {
+                    .map_err(|e| {
-                            HgError::corrupted(format!("timezone is not an integer: {e}"))
+                        HgError::corrupted(format!("timezone is not an integer: {e}"))
-                        })?;
+                    })?;
-                    let timezone =
+                let timezone = FixedOffset::west_opt(timezone_secs)
-                        FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
+                    .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
-                            HgError::corrupted("timezone offset out of bounds")
-                        })?;
-                    let timestamp =
+                Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
-                        DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
-                    let extra = parts
-                        .next()
-                        .map(parse_extra)
-                        .transpose()?
-                        .unwrap_or_default();
-                    Ok(Self { timestamp, extra })
             }
             /// Attempt to parse the given string as floating-point timestamp, and
             /// convert the result into a `chrono::NaiveDateTime`.
             fn parse_float_timestamp(
                 timestamp_str: &str,
             ) -> Result<NaiveDateTime, HgError> {
                 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                     HgError::corrupted(format!("failed to parse timestamp: {e}"))
                 })?;
                 // To construct a `NaiveDateTime` we'll need to convert the float
                 // into signed integer seconds and unsigned integer nanoseconds.
                 let mut secs = timestamp.trunc() as i64;
                 let mut subsecs = timestamp.fract();
                 // If the timestamp is negative, we need to express the fractional
                 // component as positive nanoseconds since the previous second.
                 if timestamp < 0.0 {
                     secs -= 1;
                     subsecs += 1.0;
                 }
                 // This cast should be safe because the fractional component is
                 // by definition less than 1.0, so this value should not exceed
                 // 1 billion, which is representable as an f64 without loss of
                 // precision and should fit into a u32 without overflowing.
                 //
                 // (Any loss of precision in the fractional component will have
                 // already happened at the time of initial parsing; in general,
                 // f64s are insufficiently precise to provide nanosecond-level
                 // precision with present-day timestamps.)
                 let nsecs = (subsecs * 1_000_000_000.0) as u32;
                 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
                     HgError::corrupted(format!(
                         "float timestamp out of valid range: {timestamp}"
                     ))
                 })
             }
-            /// Parse the "extra" fields from a changeset's timestamp line.
+            /// Decode changeset extra fields.
             ///
             /// Extras are null-delimited key-value pairs where the key consists of ASCII
             /// alphanumeric characters plus hyphens and underscores, and the value can
             /// contain arbitrary bytes.
-            fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+            fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 extra
                     .split(|c| *c == b'\0')
                     .map(|pair| {
                         let pair = unescape_extra(pair);
                         let mut iter = pair.splitn(2, |c| *c == b':');
                         let key_bytes =
                             iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                 HgError::corrupted("empty key in changeset extras")
                             })?;
                         let key = str::from_utf8(key_bytes)
                             .ok()
                             .filter(|k| {
                                 k.chars().all(|c| {
                                     c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                 })
                             })
                             .ok_or_else(|| {
                                 let key = String::from_utf8_lossy(key_bytes);
                                 HgError::corrupted(format!(
                                     "invalid key in changeset extras: {key}",
                                 ))
                             })?
                             .to_string();
                         let value = iter.next().map(Into::into).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "missing value for changeset extra: {key}"
                             ))
                         })?;
                         Ok((key, value))
                     })
                     .collect()
             }
+            /// Parse the extra fields from a changeset's timestamp line.
+            fn parse_timestamp_line_extra(
+                timestamp_line: &[u8],
+            ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+                Ok(timestamp_line
+                    .splitn(3, |c| *c == b' ')
+                    .nth(2)
+                    .map(decode_extra)
+                    .transpose()?
+                    .unwrap_or_default())
+            }
             /// Decode Mercurial's escaping for changelog extras.
             ///
             /// The `_string_escape` function in `changelog.py` only escapes 4 characters
             /// (null, backslash, newline, and carriage return) so we only decode those.
             ///
             /// The Python code also includes a workaround for decoding escaped nuls
             /// that are followed by an ASCII octal digit, since Python's built-in
             /// `string_escape` codec will interpret that as an escaped octal byte value.
             /// That workaround is omitted here since we don't support decoding octal.
             fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                 let mut output = Vec::with_capacity(bytes.len());
                 let mut input = bytes.iter().copied();
                 while let Some(c) = input.next() {
                     if c != b'\\' {
                         output.push(c);
                         continue;
                     }
                     match input.next() {
                         Some(b'0') => output.push(b'\0'),
                         Some(b'\\') => output.push(b'\\'),
                         Some(b'n') => output.push(b'\n'),
                         Some(b'r') => output.push(b'\r'),
                         // The following cases should never occur in theory because any
                         // backslashes in the original input should have been escaped
                         // with another backslash, so it should not be possible to
                         // observe an escape sequence other than the 4 above.
                         Some(c) => output.extend_from_slice(&[b'\\', c]),
                         None => output.push(b'\\'),
                     }
                 }
                 output
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::Vfs;
                 use crate::NULL_REVISION;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                             .unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
                         changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
                         changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_empty_files_list() {
                     assert!(ChangelogRevisionData::null()
                         .files()
                         .collect_vec()
                         .is_empty());
                 }
                 #[test]
                 fn test_unescape_basic() {
                     // '\0', '\\', '\n', and '\r' are correctly unescaped.
                     let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                     let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_unsupported_sequence() {
                     // Other escape sequences are left unaltered.
                     for c in 0u8..255 {
                         match c {
                             b'0' | b'\\' | b'n' | b'r' => continue,
                             c => {
                                 let expected = &[b'\\', c][..];
                                 let unescaped = unescape_extra(expected);
                                 assert_eq!(expected, &unescaped[..]);
                             }
                         }
                     }
                 }
                 #[test]
                 fn test_unescape_trailing_backslash() {
                     // Trailing backslashes are OK.
                     let expected = br"hi\";
                     let unescaped = unescape_extra(expected);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_nul_followed_by_octal() {
                     // Escaped NUL chars followed by octal digits are decoded correctly.
                     let expected = b"\012";
                     let escaped = br"\012";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_parse_float_timestamp() {
                     let test_cases = [
                         // Zero should map to the UNIX epoch.
                         ("0.0", "1970-01-01 00:00:00"),
                         // Negative zero should be the same as positive zero.
                         ("-0.0", "1970-01-01 00:00:00"),
                         // Values without fractional components should work like integers.
                         // (Assuming the timestamp is within the limits of f64 precision.)
                         ("1115154970.0", "2005-05-03 21:16:10"),
                         // We expect some loss of precision in the fractional component
                         // when parsing arbitrary floating-point values.
                         ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
                         // But representable f64 values should parse losslessly.
                         ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
                         // Negative fractional components are subtracted from the epoch.
                         ("-1.333", "1969-12-31 23:59:58.667"),
                     ];
                     for (input, expected) in test_cases {
                         let res = parse_float_timestamp(input).unwrap().to_string();
                         assert_eq!(res, expected);
                     }
                 }
                 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                     let mut output = Vec::with_capacity(bytes.len());
                     for c in bytes.iter().copied() {
                         output.extend_from_slice(match c {
                             b'\0' => &b"\\0"[..],
                             b'\\' => &b"\\\\"[..],
                             b'\n' => &b"\\n"[..],
                             b'\r' => &b"\\r"[..],
                             _ => {
                                 output.push(c);
                                 continue;
                             }
                         });
                     }
                     output
                 }
                 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                 where
                     K: AsRef<[u8]>,
                     V: AsRef<[u8]>,
                 {
                     let extras = pairs.into_iter().map(|(k, v)| {
                         escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                     });
                     // Use fully-qualified syntax to avoid a future naming conflict with
                     // the standard library: https://github.com/rust-lang/rust/issues/79524
                     Itertools::intersperse(extras, b"\0".to_vec()).concat()
                 }
                 #[test]
-                fn test_parse_extra() {
+                fn test_decode_extra() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let encoded = encode_extra(&extra);
-                    let parsed = parse_extra(&encoded).unwrap();
+                    let decoded = decode_extra(&encoded).unwrap();
-                    assert_eq!(extra, parsed);
+                    assert_eq!(extra, decoded);
                 }
                 #[test]
                 fn test_corrupt_extra() {
                     let test_cases = [
                         (&b""[..], "empty input"),
                         (&b"\0"[..], "unexpected null byte"),
                         (&b":empty-key"[..], "empty key"),
                         (&b"\0leading-null:"[..], "leading null"),
                         (&b"trailing-null:\0"[..], "trailing null"),
                         (&b"missing-value"[..], "missing value"),
                         (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                         (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                     ];
                     for (extra, msg) in test_cases {
                         assert!(
-                            parse_extra(&extra).is_err(),
+                            decode_extra(&extra).is_err(),
                             "corrupt extra should have failed to parse: {}",
                             msg
                         );
                     }
                 }
                 #[test]
                 fn test_parse_timestamp_line() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                     line.extend_from_slice(&encode_extra(&extra));
-                    let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
+                    let timestamp = parse_timestamp(&line).unwrap();
+                    assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
-                    assert_eq!(
+                    let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
-                        &parsed.timestamp.to_rfc3339(),
+                    assert_eq!(extra, parsed_extra);
-                        "2005-05-03T13:16:10-08:00"
-                    );
-                    assert_eq!(extra, parsed.extra);
                 }
             }