upstream/mercurial-mirror Commit - r53061:6b7ffa3f

1

use std::ascii::escape_default;

1

use std::ascii::escape_default;

2

use std::borrow::Cow;

2

use std::borrow::Cow;

3

use std::collections::BTreeMap;

3

use std::collections::BTreeMap;

4

use std::fmt::{Debug, Formatter};

4

use std::fmt::{Debug, Formatter};

5

use std::{iter, str};

5

use std::{iter, str};

6

7

use chrono::{DateTime, FixedOffset, Utc};

7

use chrono::{DateTime, FixedOffset, Utc};

8

use itertools::{Either, Itertools};

8

use itertools::{Either, Itertools};

9

10

use crate::errors::HgError;

10

use crate::errors::HgError;

11

use crate::revlog::Index;

11

use crate::revlog::Index;

12

use crate::revlog::Revision;

12

use crate::revlog::Revision;

13

use crate::revlog::{Node, NodePrefix};

13

use crate::revlog::{Node, NodePrefix};

14

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

14

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

15

use crate::utils::hg_path::HgPath;

15

use crate::utils::hg_path::HgPath;

16

use crate::vfs::VfsImpl;

16

use crate::vfs::VfsImpl;

17

use crate::{Graph, GraphError, UncheckedRevision};

17

use crate::{Graph, GraphError, UncheckedRevision};

18

19

use super::options::RevlogOpenOptions;

19

use super::options::RevlogOpenOptions;

20

21

/// A specialized `Revlog` to work with changelog data format.

21

/// A specialized `Revlog` to work with changelog data format.

22

pub struct Changelog {

22

pub struct Changelog {

23

/// The generic `revlog` format.

23

/// The generic `revlog` format.

24

pub(crate) revlog: Revlog,

24

pub(crate) revlog: Revlog,

25

}

25

}

26

27

impl Changelog {

27

impl Changelog {

28

/// Open the `changelog` of a repository given by its root.

28

/// Open the `changelog` of a repository given by its root.

29

pub fn open(

29

pub fn open(

30

store_vfs: &VfsImpl,

30

store_vfs: &VfsImpl,

31

options: RevlogOpenOptions,

31

options: RevlogOpenOptions,

32

) -> Result<Self, HgError> {

32

) -> Result<Self, HgError> {

33

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

33

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

34

Ok(Self { revlog })

34

Ok(Self { revlog })

35

}

35

}

36

37

/// Return the `ChangelogRevisionData` for the given node ID.

37

/// Return the `ChangelogRevisionData` for the given node ID.

38

pub fn data_for_node(

38

pub fn data_for_node(

39

&self,

39

&self,

40

node: NodePrefix,

40

node: NodePrefix,

41

) -> Result<ChangelogRevisionData, RevlogError> {

41

) -> Result<ChangelogRevisionData, RevlogError> {

42

let rev = self.revlog.rev_from_node(node)?;

42

let rev = self.revlog.rev_from_node(node)?;

43

self.entry_for_checked_rev(rev)?.data()

43

self.entry_for_checked_rev(rev)?.data()

44

}

44

}

45

46

/// Return the [`ChangelogEntry`] for the given revision number.

46

/// Return the [`ChangelogEntry`] for the given revision number.

47

pub fn entry_for_rev(

47

pub fn entry_for_rev(

48

&self,

48

&self,

49

rev: UncheckedRevision,

49

rev: UncheckedRevision,

50

) -> Result<ChangelogEntry, RevlogError> {

50

) -> Result<ChangelogEntry, RevlogError> {

51

let revlog_entry = self.revlog.get_entry(rev)?;

51

let revlog_entry = self.revlog.get_entry(rev)?;

52

Ok(ChangelogEntry { revlog_entry })

52

Ok(ChangelogEntry { revlog_entry })

53

}

53

}

54

55

/// Same as [`Self::entry_for_rev`] for checked revisions.

55

/// Same as [`Self::entry_for_rev`] for checked revisions.

56

fn entry_for_checked_rev(

56

fn entry_for_checked_rev(

57

&self,

57

&self,

58

rev: Revision,

58

rev: Revision,

59

) -> Result<ChangelogEntry, RevlogError> {

59

) -> Result<ChangelogEntry, RevlogError> {

60

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

60

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

61

Ok(ChangelogEntry { revlog_entry })

61

Ok(ChangelogEntry { revlog_entry })

62

}

62

}

63

64

/// Return the [`ChangelogRevisionData`] for the given revision number.

64

/// Return the [`ChangelogRevisionData`] for the given revision number.

65

///

65

///

66

/// This is a useful shortcut in case the caller does not need the

66

/// This is a useful shortcut in case the caller does not need the

67

/// generic revlog information (parents, hashes etc). Otherwise

67

/// generic revlog information (parents, hashes etc). Otherwise

68

/// consider taking a [`ChangelogEntry`] with

68

/// consider taking a [`ChangelogEntry`] with

69

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

69

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

70

pub fn data_for_rev(

70

pub fn data_for_rev(

71

&self,

71

&self,

72

rev: UncheckedRevision,

72

rev: UncheckedRevision,

73

) -> Result<ChangelogRevisionData, RevlogError> {

73

) -> Result<ChangelogRevisionData, RevlogError> {

74

self.entry_for_rev(rev)?.data()

74

self.entry_for_rev(rev)?.data()

75

}

75

}

76

77

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

77

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

78

self.revlog.node_from_rev(rev)

78

self.revlog.node_from_rev(rev)

79

}

79

}

80

81

pub fn rev_from_node(

81

pub fn rev_from_node(

82

&self,

82

&self,

83

node: NodePrefix,

83

node: NodePrefix,

84

) -> Result<Revision, RevlogError> {

84

) -> Result<Revision, RevlogError> {

85

self.revlog.rev_from_node(node)

85

self.revlog.rev_from_node(node)

86

}

86

}

87

88

pub fn get_index(&self) -> &Index {

88

pub fn get_index(&self) -> &Index {

89

self.revlog.index()

89

self.revlog.index()

90

}

90

}

91

}

91

}

92

93

impl Graph for Changelog {

93

impl Graph for Changelog {

94

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

94

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

95

self.revlog.parents(rev)

95

self.revlog.parents(rev)

96

}

96

}

97

}

97

}

98

99

/// A specialized `RevlogEntry` for `changelog` data format

99

/// A specialized `RevlogEntry` for `changelog` data format

100

///

100

///

101

/// This is a `RevlogEntry` with the added semantics that the associated

101

/// This is a `RevlogEntry` with the added semantics that the associated

102

/// data should meet the requirements for `changelog`, materialized by

102

/// data should meet the requirements for `changelog`, materialized by

103

/// the fact that `data()` constructs a `ChangelogRevisionData`.

103

/// the fact that `data()` constructs a `ChangelogRevisionData`.

104

/// In case that promise would be broken, the `data` method returns an error.

104

/// In case that promise would be broken, the `data` method returns an error.

105

#[derive(Clone)]

105

#[derive(Clone)]

106

pub struct ChangelogEntry<'changelog> {

106

pub struct ChangelogEntry<'changelog> {

107

/// Same data, as a generic `RevlogEntry`.

107

/// Same data, as a generic `RevlogEntry`.

108

pub(crate) revlog_entry: RevlogEntry<'changelog>,

108

pub(crate) revlog_entry: RevlogEntry<'changelog>,

109

}

109

}

110

111

impl<'changelog> ChangelogEntry<'changelog> {

111

impl<'changelog> ChangelogEntry<'changelog> {

112

pub fn data<'a>(

112

pub fn data<'a>(

113

&'a self,

113

&'a self,

114

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

114

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

115

let bytes = self.revlog_entry.data()?;

115

let bytes = self.revlog_entry.data()?;

116

if bytes.is_empty() {

116

if bytes.is_empty() {

117

Ok(ChangelogRevisionData::null())

117

Ok(ChangelogRevisionData::null())

118

} else {

118

} else {

119

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

119

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

120

RevlogError::Other(HgError::CorruptedRepository(format!(

120

RevlogError::Other(HgError::CorruptedRepository(format!(

121

"Invalid changelog data for revision {}: {:?}",

121

"Invalid changelog data for revision {}: {:?}",

122

self.revlog_entry.revision(),

122

self.revlog_entry.revision(),

123

err

123

err

124

)))

124

)))

125

})?)

125

})?)

126

}

126

}

127

}

127

}

128

129

/// Obtain a reference to the underlying `RevlogEntry`.

129

/// Obtain a reference to the underlying `RevlogEntry`.

130

///

130

///

131

/// This allows the caller to access the information that is common

131

/// This allows the caller to access the information that is common

132

/// to all revlog entries: revision number, node id, parent revisions etc.

132

/// to all revlog entries: revision number, node id, parent revisions etc.

133

pub fn as_revlog_entry(&self) -> &RevlogEntry {

133

pub fn as_revlog_entry(&self) -> &RevlogEntry {

134

&self.revlog_entry

134

&self.revlog_entry

135

}

135

}

136

137

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

137

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

138

Ok(self

138

Ok(self

139

.revlog_entry

139

.revlog_entry

140

.p1_entry()?

140

.p1_entry()?

141

.map(|revlog_entry| Self { revlog_entry }))

141

.map(|revlog_entry| Self { revlog_entry }))

142

}

142

}

143

144

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

144

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

145

Ok(self

145

Ok(self

146

.revlog_entry

146

.revlog_entry

147

.p2_entry()?

147

.p2_entry()?

148

.map(|revlog_entry| Self { revlog_entry }))

148

.map(|revlog_entry| Self { revlog_entry }))

149

}

149

}

150

}

150

}

151

152

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

152

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

153

#[derive(PartialEq)]

153

#[derive(PartialEq)]

154

pub struct ChangelogRevisionData<'changelog> {

154

pub struct ChangelogRevisionData<'changelog> {

155

/// The data bytes of the `changelog` entry.

155

/// The data bytes of the `changelog` entry.

156

bytes: Cow<'changelog, [u8]>,

156

bytes: Cow<'changelog, [u8]>,

157

/// The end offset for the hex manifest (not including the newline)

157

/// The end offset for the hex manifest (not including the newline)

158

manifest_end: usize,

158

manifest_end: usize,

159

/// The end offset for the user+email (not including the newline)

159

/// The end offset for the user+email (not including the newline)

160

user_end: usize,

160

user_end: usize,

161

/// The end offset for the timestamp+timezone+extras (not including the

161

/// The end offset for the timestamp+timezone+extras (not including the

162

/// newline)

162

/// newline)

163

timestamp_end: usize,

163

timestamp_end: usize,

164

/// The end offset for the file list (not including the newline)

164

/// The end offset for the file list (not including the newline)

165

files_end: usize,

165

files_end: usize,

166

}

166

}

167

168

impl<'changelog> ChangelogRevisionData<'changelog> {

168

impl<'changelog> ChangelogRevisionData<'changelog> {

169

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

169

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

170

let mut line_iter = bytes.split(|b| b == &b'\n');

170

let mut line_iter = bytes.split(|b| b == &b'\n');

171

let manifest_end = line_iter

171

let manifest_end = line_iter

172

.next()

172

.next()

173

.expect("Empty iterator from split()?")

173

.expect("Empty iterator from split()?")

174

.len();

174

.len();

175

let user_slice = line_iter.next().ok_or_else(|| {

175

let user_slice = line_iter.next().ok_or_else(|| {

176

HgError::corrupted("Changeset data truncated after manifest line")

176

HgError::corrupted("Changeset data truncated after manifest line")

177

})?;

177

})?;

178

let user_end = manifest_end + 1 + user_slice.len();

178

let user_end = manifest_end + 1 + user_slice.len();

179

let timestamp_slice = line_iter.next().ok_or_else(|| {

179

let timestamp_slice = line_iter.next().ok_or_else(|| {

180

HgError::corrupted("Changeset data truncated after user line")

180

HgError::corrupted("Changeset data truncated after user line")

181

})?;

181

})?;

182

let timestamp_end = user_end + 1 + timestamp_slice.len();

182

let timestamp_end = user_end + 1 + timestamp_slice.len();

183

let mut files_end = timestamp_end + 1;

183

let mut files_end = timestamp_end + 1;

184

loop {

184

loop {

185

let line = line_iter.next().ok_or_else(|| {

185

let line = line_iter.next().ok_or_else(|| {

186

HgError::corrupted("Changeset data truncated in files list")

186

HgError::corrupted("Changeset data truncated in files list")

187

})?;

187

})?;

188

if line.is_empty() {

188

if line.is_empty() {

189

if files_end == bytes.len() {

189

if files_end == bytes.len() {

190

// The list of files ended with a single newline (there

190

// The list of files ended with a single newline (there

191

// should be two)

191

// should be two)

192

return Err(HgError::corrupted(

192

return Err(HgError::corrupted(

193

"Changeset data truncated after files list",

193

"Changeset data truncated after files list",

194

));

194

));

195

}

195

}

196

files_end -= 1;

196

files_end -= 1;

197

break;

197

break;

198

}

198

}

199

files_end += line.len() + 1;

199

files_end += line.len() + 1;

200

}

200

}

201

202

Ok(Self {

202

Ok(Self {

203

bytes,

203

bytes,

204

manifest_end,

204

manifest_end,

205

user_end,

205

user_end,

206

timestamp_end,

206

timestamp_end,

207

files_end,

207

files_end,

208

})

208

})

209

}

209

}

210

211

fn null() -> Self {

211

fn null() -> Self {

212

Self::new(Cow::Borrowed(

212

Self::new(Cow::Borrowed(

213

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

213

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

214

))

214

))

215

.unwrap()

215

.unwrap()

216

}

216

}

217

218

/// Return an iterator over the lines of the entry.

218

/// Return an iterator over the lines of the entry.

219

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

219

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

220

self.bytes.split(|b| b == &b'\n')

220

self.bytes.split(|b| b == &b'\n')

221

}

221

}

222

223

/// Return the node id of the `manifest` referenced by this `changelog`

223

/// Return the node id of the `manifest` referenced by this `changelog`

224

/// entry.

224

/// entry.

225

pub fn manifest_node(&self) -> Result<Node, HgError> {

225

pub fn manifest_node(&self) -> Result<Node, HgError> {

226

let manifest_node_hex = &self.bytes[..self.manifest_end];

226

let manifest_node_hex = &self.bytes[..self.manifest_end];

227

Node::from_hex_for_repo(manifest_node_hex)

227

Node::from_hex_for_repo(manifest_node_hex)

228

}

228

}

229

230

/// The full user string (usually a name followed by an email enclosed in

230

/// The full user string (usually a name followed by an email enclosed in

231

/// angle brackets)

231

/// angle brackets)

232

pub fn user(&self) -> &[u8] {

232

pub fn user(&self) -> &[u8] {

233

&self.bytes[self.manifest_end + 1..self.user_end]

233

&self.bytes[self.manifest_end + 1..self.user_end]

234

}

234

}

235

236

/// The full timestamp line (timestamp in seconds, offset in seconds, and

236

/// The full timestamp line (timestamp in seconds, offset in seconds, and

237

/// possibly extras)

237

/// possibly extras)

238

// TODO: We should expose this in a more useful way

238

// TODO: We should expose this in a more useful way

239

pub fn timestamp_line(&self) -> &[u8] {

239

pub fn timestamp_line(&self) -> &[u8] {

240

&self.bytes[self.user_end + 1..self.timestamp_end]

240

&self.bytes[self.user_end + 1..self.timestamp_end]

241

}

241

}

242

243

/// Parsed timestamp.

243

/// Parsed timestamp.

244

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

244

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

245

parse_timestamp(self.timestamp_line())

245

parse_timestamp(self.timestamp_line())

246

}

246

}

247

248

/// Optional commit extras.

248

/// Optional commit extras.

249

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

249

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

250

parse_timestamp_line_extra(self.timestamp_line())

250

parse_timestamp_line_extra(self.timestamp_line())

251

}

251

}

252

253

/// The files changed in this revision.

253

/// The files changed in this revision.

254

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

254

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

255

if self.timestamp_end == self.files_end {

255

if self.timestamp_end == self.files_end {

256

Either::Left(iter::empty())

256

Either::Left(iter::empty())

257

} else {

257

} else {

258

Either::Right(

258

Either::Right(

259

self.bytes[self.timestamp_end + 1..self.files_end]

259

self.bytes[self.timestamp_end + 1..self.files_end]

260

.split(|b| b == &b'\n')

260

.split(|b| b == &b'\n')

261

.map(HgPath::new),

261

.map(HgPath::new),

262

)

262

)

263

}

263

}

264

}

264

}

265

266

/// The change description.

266

/// The change description.

267

pub fn description(&self) -> &[u8] {

267

pub fn description(&self) -> &[u8] {

268

&self.bytes[self.files_end + 2..]

268

&self.bytes[self.files_end + 2..]

269

}

269

}

270

}

270

}

271

272

impl Debug for ChangelogRevisionData<'_> {

272

impl Debug for ChangelogRevisionData<'_> {

273

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

273

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

274

f.debug_struct("ChangelogRevisionData")

274

f.debug_struct("ChangelogRevisionData")

275

.field("bytes", &debug_bytes(&self.bytes))

275

.field("bytes", &debug_bytes(&self.bytes))

276

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

276

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

277

.field(

277

.field(

278

"user",

278

"user",

279

&debug_bytes(

279

&debug_bytes(

280

&self.bytes[self.manifest_end + 1..self.user_end],

280

&self.bytes[self.manifest_end + 1..self.user_end],

281

),

281

),

282

)

282

)

283

.field(

283

.field(

284

"timestamp",

284

"timestamp",

285

&debug_bytes(

285

&debug_bytes(

286

&self.bytes[self.user_end + 1..self.timestamp_end],

286

&self.bytes[self.user_end + 1..self.timestamp_end],

287

),

287

),

288

)

288

)

289

.field(

289

.field(

290

"files",

290

"files",

291

&debug_bytes(

291

&debug_bytes(

292

&self.bytes[self.timestamp_end + 1..self.files_end],

292

&self.bytes[self.timestamp_end + 1..self.files_end],

293

),

293

),

294

)

294

)

295

.field(

295

.field(

296

"description",

296

"description",

297

&debug_bytes(&self.bytes[self.files_end + 2..]),

297

&debug_bytes(&self.bytes[self.files_end + 2..]),

298

)

298

)

299

.finish()

299

.finish()

300

}

300

}

301

}

301

}

302

303

fn debug_bytes(bytes: &[u8]) -> String {

303

fn debug_bytes(bytes: &[u8]) -> String {

304

String::from_utf8_lossy(

304

String::from_utf8_lossy(

305

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

305

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

306

)

306

)

307

.to_string()

307

.to_string()

308

}

308

}

309

310

/// Parse the raw bytes of the timestamp line from a changelog entry.

310

/// Parse the raw bytes of the timestamp line from a changelog entry.

311

///

311

///

312

/// According to the documentation in `hg help dates` and the

312

/// According to the documentation in `hg help dates` and the

313

/// implementation in `changelog.py`, the format of the timestamp line

313

/// implementation in `changelog.py`, the format of the timestamp line

314

/// is `time tz extra\n` where:

314

/// is `time tz extra\n` where:

315

///

315

///

316

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

316

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

317

/// as seconds since the UNIX epoch.

317

/// as seconds since the UNIX epoch.

318

///

318

///

319

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

319

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

320

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

320

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

321

/// opposite of the sign in ISO 8601 timestamps).

321

/// opposite of the sign in ISO 8601 timestamps).

322

///

322

///

323

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

323

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

324

/// and value in each pair separated by an ASCII colon. Keys are limited to

324

/// and value in each pair separated by an ASCII colon. Keys are limited to

325

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

325

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

326

/// arbitrary bytes.

326

/// arbitrary bytes.

327

fn parse_timestamp(

327

fn parse_timestamp(

328

timestamp_line: &[u8],

328

timestamp_line: &[u8],

329

) -> Result<DateTime<FixedOffset>, HgError> {

329

) -> Result<DateTime<FixedOffset>, HgError> {

330

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

330

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

331

332

let timestamp_bytes = parts

332

let timestamp_bytes = parts

333

.next()

333

.next()

334

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

334

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

335

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

335

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

336

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

336

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

337

})?;

337

})?;

338

let timestamp_utc = timestamp_str

338

let timestamp_utc = timestamp_str

339

.parse()

339

.parse()

340

.map_err(|e| {

340

.map_err(|e| {

341

HgError::corrupted(format!("failed to parse timestamp: {e}"))

341

HgError::corrupted(format!("failed to parse timestamp: {e}"))

342

})

342

})

343

.and_then(|secs| {

343

.and_then(|secs| {

344

DateTime::from_timestamp(secs, 0).ok_or_else(|| {

344

DateTime::from_timestamp(secs, 0).ok_or_else(|| {

345

HgError::corrupted(format!(

345

HgError::corrupted(format!(

346

"integer timestamp out of valid range: {secs}"

346

"integer timestamp out of valid range: {secs}"

347

))

347

))

348

})

348

})

349

})

349

})

350

// Attempt to parse the timestamp as a float if we can't parse

350

// Attempt to parse the timestamp as a float if we can't parse

351

// it as an int. It doesn't seem like float timestamps are actually

351

// it as an int. It doesn't seem like float timestamps are actually

352

// used in practice, but the Python code supports them.

352

// used in practice, but the Python code supports them.

353

.or_else(|_| parse_float_timestamp(timestamp_str))?;

353

.or_else(|_| parse_float_timestamp(timestamp_str))?;

354

355

let timezone_bytes = parts

355

let timezone_bytes = parts

356

.next()

356

.next()

357

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

357

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

358

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

358

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

359

.map_err(|e| {

359

.map_err(|e| {

360

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

360

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

361

})?

361

})?

362

.parse()

362

.parse()

363

.map_err(|e| {

363

.map_err(|e| {

364

HgError::corrupted(format!("timezone is not an integer: {e}"))

364

HgError::corrupted(format!("timezone is not an integer: {e}"))

365

})?;

365

})?;

366

let timezone = FixedOffset::west_opt(timezone_secs)

366

let timezone = FixedOffset::west_opt(timezone_secs)

367

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

367

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

368

369

Ok(DateTime::from_naive_utc_and_offset(

369

Ok(timestamp_utc.with_timezone(&timezone))

370

timestamp_utc.naive_utc(),

371

timezone,

372

))

373

}

370

}

374

371

375

/// Attempt to parse the given string as floating-point timestamp, and

372

/// Attempt to parse the given string as floating-point timestamp, and

376

/// convert the result into a `chrono::NaiveDateTime`.

373

/// convert the result into a `chrono::NaiveDateTime`.

377

fn parse_float_timestamp(

374

fn parse_float_timestamp(

378

timestamp_str: &str,

375

timestamp_str: &str,

379

) -> Result<DateTime<Utc>, HgError> {

376

) -> Result<DateTime<Utc>, HgError> {

380

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

377

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

381

HgError::corrupted(format!("failed to parse timestamp: {e}"))

378

HgError::corrupted(format!("failed to parse timestamp: {e}"))

382

})?;

379

})?;

383

380

384

// To construct a `NaiveDateTime` we'll need to convert the float

381

// To construct a `NaiveDateTime` we'll need to convert the float

385

// into signed integer seconds and unsigned integer nanoseconds.

382

// into signed integer seconds and unsigned integer nanoseconds.

386

let mut secs = timestamp.trunc() as i64;

383

let mut secs = timestamp.trunc() as i64;

387

let mut subsecs = timestamp.fract();

384

let mut subsecs = timestamp.fract();

388

385

389

// If the timestamp is negative, we need to express the fractional

386

// If the timestamp is negative, we need to express the fractional

390

// component as positive nanoseconds since the previous second.

387

// component as positive nanoseconds since the previous second.

391

if timestamp < 0.0 {

388

if timestamp < 0.0 {

392

secs -= 1;

389

secs -= 1;

393

subsecs += 1.0;

390

subsecs += 1.0;

394

}

391

}

395

392

396

// This cast should be safe because the fractional component is

393

// This cast should be safe because the fractional component is

397

// by definition less than 1.0, so this value should not exceed

394

// by definition less than 1.0, so this value should not exceed

398

// 1 billion, which is representable as an f64 without loss of

395

// 1 billion, which is representable as an f64 without loss of

399

// precision and should fit into a u32 without overflowing.

396

// precision and should fit into a u32 without overflowing.

400

//

397

//

401

// (Any loss of precision in the fractional component will have

398

// (Any loss of precision in the fractional component will have

402

// already happened at the time of initial parsing; in general,

399

// already happened at the time of initial parsing; in general,

403

// f64s are insufficiently precise to provide nanosecond-level

400

// f64s are insufficiently precise to provide nanosecond-level

404

// precision with present-day timestamps.)

401

// precision with present-day timestamps.)

405

let nsecs = (subsecs * 1_000_000_000.0) as u32;

402

let nsecs = (subsecs * 1_000_000_000.0) as u32;

406

403

407

DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {

404

DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {

408

HgError::corrupted(format!(

405

HgError::corrupted(format!(

409

"float timestamp out of valid range: {timestamp}"

406

"float timestamp out of valid range: {timestamp}"

410

))

407

))

411

})

408

})

412

}

409

}

413

410

414

/// Decode changeset extra fields.

411

/// Decode changeset extra fields.

415

///

412

///

416

/// Extras are null-delimited key-value pairs where the key consists of ASCII

413

/// Extras are null-delimited key-value pairs where the key consists of ASCII

417

/// alphanumeric characters plus hyphens and underscores, and the value can

414

/// alphanumeric characters plus hyphens and underscores, and the value can

418

/// contain arbitrary bytes.

415

/// contain arbitrary bytes.

419

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

416

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

420

extra

417

extra

421

.split(|c| *c == b'\0')

418

.split(|c| *c == b'\0')

422

.map(|pair| {

419

.map(|pair| {

423

let pair = unescape_extra(pair);

420

let pair = unescape_extra(pair);

424

let mut iter = pair.splitn(2, |c| *c == b':');

421

let mut iter = pair.splitn(2, |c| *c == b':');

425

422

426

let key_bytes =

423

let key_bytes =

427

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

424

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

428

HgError::corrupted("empty key in changeset extras")

425

HgError::corrupted("empty key in changeset extras")

429

})?;

426

})?;

430

427

431

let key = str::from_utf8(key_bytes)

428

let key = str::from_utf8(key_bytes)

432

.ok()

429

.ok()

433

.filter(|k| {

430

.filter(|k| {

434

k.chars().all(|c| {

431

k.chars().all(|c| {

435

c.is_ascii_alphanumeric() || c == '_' || c == '-'

432

c.is_ascii_alphanumeric() || c == '_' || c == '-'

436

})

433

})

437

})

434

})

438

.ok_or_else(|| {

435

.ok_or_else(|| {

439

let key = String::from_utf8_lossy(key_bytes);

436

let key = String::from_utf8_lossy(key_bytes);

440

HgError::corrupted(format!(

437

HgError::corrupted(format!(

441

"invalid key in changeset extras: {key}",

438

"invalid key in changeset extras: {key}",

442

))

439

))

443

})?

440

})?

444

.to_string();

441

.to_string();

445

442

446

let value = iter.next().map(Into::into).ok_or_else(|| {

443

let value = iter.next().map(Into::into).ok_or_else(|| {

447

HgError::corrupted(format!(

444

HgError::corrupted(format!(

448

"missing value for changeset extra: {key}"

445

"missing value for changeset extra: {key}"

449

))

446

))

450

})?;

447

})?;

451

448

452

Ok((key, value))

449

Ok((key, value))

453

})

450

})

454

.collect()

451

.collect()

455

}

452

}

456

453

457

/// Parse the extra fields from a changeset's timestamp line.

454

/// Parse the extra fields from a changeset's timestamp line.

458

fn parse_timestamp_line_extra(

455

fn parse_timestamp_line_extra(

459

timestamp_line: &[u8],

456

timestamp_line: &[u8],

460

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

457

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

461

Ok(timestamp_line

458

Ok(timestamp_line

462

.splitn(3, |c| *c == b' ')

459

.splitn(3, |c| *c == b' ')

463

.nth(2)

460

.nth(2)

464

.map(decode_extra)

461

.map(decode_extra)

465

.transpose()?

462

.transpose()?

466

.unwrap_or_default())

463

.unwrap_or_default())

467

}

464

}

468

465

469

/// Decode Mercurial's escaping for changelog extras.

466

/// Decode Mercurial's escaping for changelog extras.

470

///

467

///

471

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

468

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

472

/// (null, backslash, newline, and carriage return) so we only decode those.

469

/// (null, backslash, newline, and carriage return) so we only decode those.

473

///

470

///

474

/// The Python code also includes a workaround for decoding escaped nuls

471

/// The Python code also includes a workaround for decoding escaped nuls

475

/// that are followed by an ASCII octal digit, since Python's built-in

472

/// that are followed by an ASCII octal digit, since Python's built-in

476

/// `string_escape` codec will interpret that as an escaped octal byte value.

473

/// `string_escape` codec will interpret that as an escaped octal byte value.

477

/// That workaround is omitted here since we don't support decoding octal.

474

/// That workaround is omitted here since we don't support decoding octal.

478

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

475

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

479

let mut output = Vec::with_capacity(bytes.len());

476

let mut output = Vec::with_capacity(bytes.len());

480

let mut input = bytes.iter().copied();

477

let mut input = bytes.iter().copied();

481

478

482

while let Some(c) = input.next() {

479

while let Some(c) = input.next() {

483

if c != b'\\' {

480

if c != b'\\' {

484

output.push(c);

481

output.push(c);

485

continue;

482

continue;

486

}

483

}

487

484

488

match input.next() {

485

match input.next() {

489

Some(b'0') => output.push(b'\0'),

486

Some(b'0') => output.push(b'\0'),

490

Some(b'\\') => output.push(b'\\'),

487

Some(b'\\') => output.push(b'\\'),

491

Some(b'n') => output.push(b'\n'),

488

Some(b'n') => output.push(b'\n'),

492

Some(b'r') => output.push(b'\r'),

489

Some(b'r') => output.push(b'\r'),

493

// The following cases should never occur in theory because any

490

// The following cases should never occur in theory because any

494

// backslashes in the original input should have been escaped

491

// backslashes in the original input should have been escaped

495

// with another backslash, so it should not be possible to

492

// with another backslash, so it should not be possible to

496

// observe an escape sequence other than the 4 above.

493

// observe an escape sequence other than the 4 above.

497

Some(c) => output.extend_from_slice(&[b'\\', c]),

494

Some(c) => output.extend_from_slice(&[b'\\', c]),

498

None => output.push(b'\\'),

495

None => output.push(b'\\'),

499

}

496

}

500

}

497

}

501

498

502

output

499

output

503

}

500

}

504

501

505

#[cfg(test)]

502

#[cfg(test)]

506

mod tests {

503

mod tests {

507

use super::*;

504

use super::*;

508

use crate::vfs::VfsImpl;

505

use crate::vfs::VfsImpl;

509

use crate::NULL_REVISION;

506

use crate::NULL_REVISION;

510

use pretty_assertions::assert_eq;

507

use pretty_assertions::assert_eq;

511

508

512

#[test]

509

#[test]

513

fn test_create_changelogrevisiondata_invalid() {

510

fn test_create_changelogrevisiondata_invalid() {

514

// Completely empty

511

// Completely empty

515

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

512

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

516

// No newline after manifest

513

// No newline after manifest

517

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

514

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

518

// No newline after user

515

// No newline after user

519

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

516

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

520

// No newline after timestamp

517

// No newline after timestamp

521

assert!(

518

assert!(

522

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

519

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

523

);

520

);

524

// Missing newline after files

521

// Missing newline after files

525

assert!(ChangelogRevisionData::new(Cow::Borrowed(

522

assert!(ChangelogRevisionData::new(Cow::Borrowed(

526

b"abcd\n\n0 0\nfile1\nfile2"

523

b"abcd\n\n0 0\nfile1\nfile2"

527

))

524

))

528

.is_err(),);

525

.is_err(),);

529

// Only one newline after files

526

// Only one newline after files

530

assert!(ChangelogRevisionData::new(Cow::Borrowed(

527

assert!(ChangelogRevisionData::new(Cow::Borrowed(

531

b"abcd\n\n0 0\nfile1\nfile2\n"

528

b"abcd\n\n0 0\nfile1\nfile2\n"

532

))

529

))

533

.is_err(),);

530

.is_err(),);

534

}

531

}

535

532

536

#[test]

533

#[test]

537

fn test_create_changelogrevisiondata() {

534

fn test_create_changelogrevisiondata() {

538

let data = ChangelogRevisionData::new(Cow::Borrowed(

535

let data = ChangelogRevisionData::new(Cow::Borrowed(

539

b"0123456789abcdef0123456789abcdef01234567

536

b"0123456789abcdef0123456789abcdef01234567

540

Some One <someone@example.com>

537

Some One <someone@example.com>

541

0 0

538

0 0

542

file1

539

file1

543

file2

540

file2

544

541

545

some

542

some

546

commit

543

commit

547

message",

544

message",

548

))

545

))

549

.unwrap();

546

.unwrap();

550

assert_eq!(

547

assert_eq!(

551

data.manifest_node().unwrap(),

548

data.manifest_node().unwrap(),

552

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

549

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

553

.unwrap()

550

.unwrap()

554

);

551

);

555

assert_eq!(data.user(), b"Some One <someone@example.com>");

552

assert_eq!(data.user(), b"Some One <someone@example.com>");

556

assert_eq!(data.timestamp_line(), b"0 0");

553

assert_eq!(data.timestamp_line(), b"0 0");

557

assert_eq!(

554

assert_eq!(

558

data.files().collect_vec(),

555

data.files().collect_vec(),

559

vec![HgPath::new("file1"), HgPath::new("file2")]

556

vec![HgPath::new("file1"), HgPath::new("file2")]

560

);

557

);

561

assert_eq!(data.description(), b"some\ncommit\nmessage");

558

assert_eq!(data.description(), b"some\ncommit\nmessage");

562

}

559

}

563

560

564

#[test]

561

#[test]

565

fn test_data_from_rev_null() -> Result<(), RevlogError> {

562

fn test_data_from_rev_null() -> Result<(), RevlogError> {

566

// an empty revlog will be enough for this case

563

// an empty revlog will be enough for this case

567

let temp = tempfile::tempdir().unwrap();

564

let temp = tempfile::tempdir().unwrap();

568

let vfs = VfsImpl {

565

let vfs = VfsImpl {

569

base: temp.path().to_owned(),

566

base: temp.path().to_owned(),

570

};

567

};

571

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

568

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

572

let revlog =

569

let revlog =

573

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())

570

Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())

574

.unwrap();

571

.unwrap();

575

572

576

let changelog = Changelog { revlog };

573

let changelog = Changelog { revlog };

577

assert_eq!(

574

assert_eq!(

578

changelog.data_for_rev(NULL_REVISION.into())?,

575

changelog.data_for_rev(NULL_REVISION.into())?,

579

ChangelogRevisionData::null()

576

ChangelogRevisionData::null()

580

);

577

);

581

// same with the intermediate entry object

578

// same with the intermediate entry object

582

assert_eq!(

579

assert_eq!(

583

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

580

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

584

ChangelogRevisionData::null()

581

ChangelogRevisionData::null()

585

);

582

);

586

Ok(())

583

Ok(())

587

}

584

}

588

585

589

#[test]

586

#[test]

590

fn test_empty_files_list() {

587

fn test_empty_files_list() {

591

assert!(ChangelogRevisionData::null()

588

assert!(ChangelogRevisionData::null()

592

.files()

589

.files()

593

.collect_vec()

590

.collect_vec()

594

.is_empty());

591

.is_empty());

595

}

592

}

596

593

597

#[test]

594

#[test]

598

fn test_unescape_basic() {

595

fn test_unescape_basic() {

599

// '\0', '\\', '\n', and '\r' are correctly unescaped.

596

// '\0', '\\', '\n', and '\r' are correctly unescaped.

600

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

597

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

601

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

598

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

602

let unescaped = unescape_extra(escaped);

599

let unescaped = unescape_extra(escaped);

603

assert_eq!(&expected[..], &unescaped[..]);

600

assert_eq!(&expected[..], &unescaped[..]);

604

}

601

}

605

602

606

#[test]

603

#[test]

607

fn test_unescape_unsupported_sequence() {

604

fn test_unescape_unsupported_sequence() {

608

// Other escape sequences are left unaltered.

605

// Other escape sequences are left unaltered.

609

for c in 0u8..255 {

606

for c in 0u8..255 {

610

match c {

607

match c {

611

b'0' | b'\\' | b'n' | b'r' => continue,

608

b'0' | b'\\' | b'n' | b'r' => continue,

612

c => {

609

c => {

613

let expected = &[b'\\', c][..];

610

let expected = &[b'\\', c][..];

614

let unescaped = unescape_extra(expected);

611

let unescaped = unescape_extra(expected);

615

assert_eq!(expected, &unescaped[..]);

612

assert_eq!(expected, &unescaped[..]);

616

}

613

}

617

}

614

}

618

}

615

}

619

}

616

}

620

617

621

#[test]

618

#[test]

622

fn test_unescape_trailing_backslash() {

619

fn test_unescape_trailing_backslash() {

623

// Trailing backslashes are OK.

620

// Trailing backslashes are OK.

624

let expected = br"hi\";

621

let expected = br"hi\";

625

let unescaped = unescape_extra(expected);

622

let unescaped = unescape_extra(expected);

626

assert_eq!(&expected[..], &unescaped[..]);

623

assert_eq!(&expected[..], &unescaped[..]);

627

}

624

}

628

625

629

#[test]

626

#[test]

630

fn test_unescape_nul_followed_by_octal() {

627

fn test_unescape_nul_followed_by_octal() {

631

// Escaped NUL chars followed by octal digits are decoded correctly.

628

// Escaped NUL chars followed by octal digits are decoded correctly.

632

let expected = b"\x0012";

629

let expected = b"\x0012";

633

let escaped = br"\012";

630

let escaped = br"\012";

634

let unescaped = unescape_extra(escaped);

631

let unescaped = unescape_extra(escaped);

635

assert_eq!(&expected[..], &unescaped[..]);

632

assert_eq!(&expected[..], &unescaped[..]);

636

}

633

}

637

634

638

#[test]

635

#[test]

639

fn test_parse_float_timestamp() {

636

fn test_parse_float_timestamp() {

640

let test_cases = [

637

let test_cases = [

641

// Zero should map to the UNIX epoch.

638

// Zero should map to the UNIX epoch.

642

("0.0", "1970-01-01 00:00:00 UTC"),

639

("0.0", "1970-01-01 00:00:00 UTC"),

643

// Negative zero should be the same as positive zero.

640

// Negative zero should be the same as positive zero.

644

("-0.0", "1970-01-01 00:00:00 UTC"),

641

("-0.0", "1970-01-01 00:00:00 UTC"),

645

// Values without fractional components should work like integers.

642

// Values without fractional components should work like integers.

646

// (Assuming the timestamp is within the limits of f64 precision.)

643

// (Assuming the timestamp is within the limits of f64 precision.)

647

("1115154970.0", "2005-05-03 21:16:10 UTC"),

644

("1115154970.0", "2005-05-03 21:16:10 UTC"),

648

// We expect some loss of precision in the fractional component

645

// We expect some loss of precision in the fractional component

649

// when parsing arbitrary floating-point values.

646

// when parsing arbitrary floating-point values.

650

("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),

647

("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),

651

// But representable f64 values should parse losslessly.

648

// But representable f64 values should parse losslessly.

652

("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),

649

("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),

653

// Negative fractional components are subtracted from the epoch.

650

// Negative fractional components are subtracted from the epoch.

654

("-1.333", "1969-12-31 23:59:58.667 UTC"),

651

("-1.333", "1969-12-31 23:59:58.667 UTC"),

655

];

652

];

656

653

657

for (input, expected) in test_cases {

654

for (input, expected) in test_cases {

658

let res = parse_float_timestamp(input).unwrap().to_string();

655

let res = parse_float_timestamp(input).unwrap().to_string();

659

assert_eq!(res, expected);

656

assert_eq!(res, expected);

660

}

657

}

661

}

658

}

662

659

663

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

660

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

664

let mut output = Vec::with_capacity(bytes.len());

661

let mut output = Vec::with_capacity(bytes.len());

665

662

666

for c in bytes.iter().copied() {

663

for c in bytes.iter().copied() {

667

output.extend_from_slice(match c {

664

output.extend_from_slice(match c {

668

b'\0' => &b"\\0"[..],

665

b'\0' => &b"\\0"[..],

669

b'\\' => &b"\\\\"[..],

666

b'\\' => &b"\\\\"[..],

670

b'\n' => &b"\\n"[..],

667

b'\n' => &b"\\n"[..],

671

b'\r' => &b"\\r"[..],

668

b'\r' => &b"\\r"[..],

672

_ => {

669

_ => {

673

output.push(c);

670

output.push(c);

674

continue;

671

continue;

675

}

672

}

676

});

673

});

677

}

674

}

678

675

679

output

676

output

680

}

677

}

681

678

682

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

679

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

683

where

680

where

684

K: AsRef<[u8]>,

681

K: AsRef<[u8]>,

685

V: AsRef<[u8]>,

682

V: AsRef<[u8]>,

686

{

683

{

687

let extras = pairs.into_iter().map(|(k, v)| {

684

let extras = pairs.into_iter().map(|(k, v)| {

688

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

685

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

689

});

686

});

690

// Use fully-qualified syntax to avoid a future naming conflict with

687

// Use fully-qualified syntax to avoid a future naming conflict with

691

// the standard library: https://github.com/rust-lang/rust/issues/79524

688

// the standard library: https://github.com/rust-lang/rust/issues/79524

692

Itertools::intersperse(extras, b"\0".to_vec()).concat()

689

Itertools::intersperse(extras, b"\0".to_vec()).concat()

693

}

690

}

694

691

695

#[test]

692

#[test]

696

fn test_decode_extra() {

693

fn test_decode_extra() {

697

let extra = [

694

let extra = [

698

("branch".into(), b"default".to_vec()),

695

("branch".into(), b"default".to_vec()),

699

("key-with-hyphens".into(), b"value1".to_vec()),

696

("key-with-hyphens".into(), b"value1".to_vec()),

700

("key_with_underscores".into(), b"value2".to_vec()),

697

("key_with_underscores".into(), b"value2".to_vec()),

701

("empty-value".into(), b"".to_vec()),

698

("empty-value".into(), b"".to_vec()),

702

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

699

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

703

]

700

]

704

.into_iter()

701

.into_iter()

705

.collect::<BTreeMap<String, Vec<u8>>>();

702

.collect::<BTreeMap<String, Vec<u8>>>();

706

703

707

let encoded = encode_extra(&extra);

704

let encoded = encode_extra(&extra);

708

let decoded = decode_extra(&encoded).unwrap();

705

let decoded = decode_extra(&encoded).unwrap();

709

706

710

assert_eq!(extra, decoded);

707

assert_eq!(extra, decoded);

711

}

708

}

712

709

713

#[test]

710

#[test]

714

fn test_corrupt_extra() {

711

fn test_corrupt_extra() {

715

let test_cases = [

712

let test_cases = [

716

(&b""[..], "empty input"),

713

(&b""[..], "empty input"),

717

(&b"\0"[..], "unexpected null byte"),

714

(&b"\0"[..], "unexpected null byte"),

718

(&b":empty-key"[..], "empty key"),

715

(&b":empty-key"[..], "empty key"),

719

(&b"\0leading-null:"[..], "leading null"),

716

(&b"\0leading-null:"[..], "leading null"),

720

(&b"trailing-null:\0"[..], "trailing null"),

717

(&b"trailing-null:\0"[..], "trailing null"),

721

(&b"missing-value"[..], "missing value"),

718

(&b"missing-value"[..], "missing value"),

722

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

719

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

723

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

720

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

724

];

721

];

725

722

726

for (extra, msg) in test_cases {

723

for (extra, msg) in test_cases {

727

assert!(

724

assert!(

728

decode_extra(extra).is_err(),

725

decode_extra(extra).is_err(),

729

"corrupt extra should have failed to parse: {}",

726

"corrupt extra should have failed to parse: {}",

730

msg

727

msg

731

);

728

);

732

}

729

}

733

}

730

}

734

731

735

#[test]

732

#[test]

736

fn test_parse_timestamp_line() {

733

fn test_parse_timestamp_line() {

737

let extra = [

734

let extra = [

738

("branch".into(), b"default".to_vec()),

735

("branch".into(), b"default".to_vec()),

739

("key-with-hyphens".into(), b"value1".to_vec()),

736

("key-with-hyphens".into(), b"value1".to_vec()),

740

("key_with_underscores".into(), b"value2".to_vec()),

737

("key_with_underscores".into(), b"value2".to_vec()),

741

("empty-value".into(), b"".to_vec()),

738

("empty-value".into(), b"".to_vec()),

742

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

739

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

743

]

740

]

744

.into_iter()

741

.into_iter()

745

.collect::<BTreeMap<String, Vec<u8>>>();

742

.collect::<BTreeMap<String, Vec<u8>>>();

746

743

747

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

744

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

748

line.extend_from_slice(&encode_extra(&extra));

745

line.extend_from_slice(&encode_extra(&extra));

749

746

750

let timestamp = parse_timestamp(&line).unwrap();

747

let timestamp = parse_timestamp(&line).unwrap();

751

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

748

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

752

749

753

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

750

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

754

assert_eq!(extra, parsed_extra);

751

assert_eq!(extra, parsed_extra);

755

}

752

}

756

}

753

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::collections::BTreeMap;
             use std::fmt::{Debug, Formatter};
             use std::{iter, str};
             use chrono::{DateTime, FixedOffset, Utc};
             use itertools::{Either, Itertools};
             use crate::errors::HgError;
             use crate::revlog::Index;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::VfsImpl;
             use crate::{Graph, GraphError, UncheckedRevision};
             use super::options::RevlogOpenOptions;
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(
                     store_vfs: &VfsImpl,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Same as [`Self::entry_for_rev`] for checked revisions.
                 fn entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
                 pub fn get_index(&self) -> &Index {
                     self.revlog.index()
                 }
             }
             impl Graph for Changelog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.revlog.parents(rev)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// Parsed timestamp.
                 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
                     parse_timestamp(self.timestamp_line())
                 }
                 /// Optional commit extras.
                 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                     parse_timestamp_line_extra(self.timestamp_line())
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     if self.timestamp_end == self.files_end {
                         Either::Left(iter::empty())
                     } else {
                         Either::Right(
                             self.bytes[self.timestamp_end + 1..self.files_end]
                                 .split(|b| b == &b'\n')
                                 .map(HgPath::new),
                         )
                     }
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             /// Parse the raw bytes of the timestamp line from a changelog entry.
             ///
             /// According to the documentation in `hg help dates` and the
             /// implementation in `changelog.py`, the format of the timestamp line
             /// is `time tz extra\n` where:
             ///
             /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
             ///   as seconds since the UNIX epoch.
             ///
             /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
             ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
             ///   opposite of the sign in ISO 8601 timestamps).
             ///
             /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
             ///   and value in each pair separated by an ASCII colon. Keys are limited to
             ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
             ///   arbitrary bytes.
             fn parse_timestamp(
                 timestamp_line: &[u8],
             ) -> Result<DateTime<FixedOffset>, HgError> {
                 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
                 let timestamp_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
                 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
                     HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
                 })?;
                 let timestamp_utc = timestamp_str
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("failed to parse timestamp: {e}"))
                     })
                     .and_then(|secs| {
                         DateTime::from_timestamp(secs, 0).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "integer timestamp out of valid range: {secs}"
                             ))
                         })
                     })
                     // Attempt to parse the timestamp as a float if we can't parse
                     // it as an int. It doesn't seem like float timestamps are actually
                     // used in practice, but the Python code supports them.
                     .or_else(|_| parse_float_timestamp(timestamp_str))?;
                 let timezone_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timezone"))?;
                 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
                     })?
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not an integer: {e}"))
                     })?;
                 let timezone = FixedOffset::west_opt(timezone_secs)
                     .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
-                Ok(DateTime::from_naive_utc_and_offset(
+                Ok(timestamp_utc.with_timezone(&timezone))
-                    timestamp_utc.naive_utc(),
-                    timezone,
-                ))
             }
             /// Attempt to parse the given string as floating-point timestamp, and
             /// convert the result into a `chrono::NaiveDateTime`.
             fn parse_float_timestamp(
                 timestamp_str: &str,
             ) -> Result<DateTime<Utc>, HgError> {
                 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                     HgError::corrupted(format!("failed to parse timestamp: {e}"))
                 })?;
                 // To construct a `NaiveDateTime` we'll need to convert the float
                 // into signed integer seconds and unsigned integer nanoseconds.
                 let mut secs = timestamp.trunc() as i64;
                 let mut subsecs = timestamp.fract();
                 // If the timestamp is negative, we need to express the fractional
                 // component as positive nanoseconds since the previous second.
                 if timestamp < 0.0 {
                     secs -= 1;
                     subsecs += 1.0;
                 }
                 // This cast should be safe because the fractional component is
                 // by definition less than 1.0, so this value should not exceed
                 // 1 billion, which is representable as an f64 without loss of
                 // precision and should fit into a u32 without overflowing.
                 //
                 // (Any loss of precision in the fractional component will have
                 // already happened at the time of initial parsing; in general,
                 // f64s are insufficiently precise to provide nanosecond-level
                 // precision with present-day timestamps.)
                 let nsecs = (subsecs * 1_000_000_000.0) as u32;
                 DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
                     HgError::corrupted(format!(
                         "float timestamp out of valid range: {timestamp}"
                     ))
                 })
             }
             /// Decode changeset extra fields.
             ///
             /// Extras are null-delimited key-value pairs where the key consists of ASCII
             /// alphanumeric characters plus hyphens and underscores, and the value can
             /// contain arbitrary bytes.
             fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 extra
                     .split(|c| *c == b'\0')
                     .map(|pair| {
                         let pair = unescape_extra(pair);
                         let mut iter = pair.splitn(2, |c| *c == b':');
                         let key_bytes =
                             iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                 HgError::corrupted("empty key in changeset extras")
                             })?;
                         let key = str::from_utf8(key_bytes)
                             .ok()
                             .filter(|k| {
                                 k.chars().all(|c| {
                                     c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                 })
                             })
                             .ok_or_else(|| {
                                 let key = String::from_utf8_lossy(key_bytes);
                                 HgError::corrupted(format!(
                                     "invalid key in changeset extras: {key}",
                                 ))
                             })?
                             .to_string();
                         let value = iter.next().map(Into::into).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "missing value for changeset extra: {key}"
                             ))
                         })?;
                         Ok((key, value))
                     })
                     .collect()
             }
             /// Parse the extra fields from a changeset's timestamp line.
             fn parse_timestamp_line_extra(
                 timestamp_line: &[u8],
             ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 Ok(timestamp_line
                     .splitn(3, |c| *c == b' ')
                     .nth(2)
                     .map(decode_extra)
                     .transpose()?
                     .unwrap_or_default())
             }
             /// Decode Mercurial's escaping for changelog extras.
             ///
             /// The `_string_escape` function in `changelog.py` only escapes 4 characters
             /// (null, backslash, newline, and carriage return) so we only decode those.
             ///
             /// The Python code also includes a workaround for decoding escaped nuls
             /// that are followed by an ASCII octal digit, since Python's built-in
             /// `string_escape` codec will interpret that as an escaped octal byte value.
             /// That workaround is omitted here since we don't support decoding octal.
             fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                 let mut output = Vec::with_capacity(bytes.len());
                 let mut input = bytes.iter().copied();
                 while let Some(c) = input.next() {
                     if c != b'\\' {
                         output.push(c);
                         continue;
                     }
                     match input.next() {
                         Some(b'0') => output.push(b'\0'),
                         Some(b'\\') => output.push(b'\\'),
                         Some(b'n') => output.push(b'\n'),
                         Some(b'r') => output.push(b'\r'),
                         // The following cases should never occur in theory because any
                         // backslashes in the original input should have been escaped
                         // with another backslash, so it should not be possible to
                         // observe an escape sequence other than the 4 above.
                         Some(c) => output.extend_from_slice(&[b'\\', c]),
                         None => output.push(b'\\'),
                     }
                 }
                 output
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::VfsImpl;
                 use crate::NULL_REVISION;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = VfsImpl {
                         base: temp.path().to_owned(),
                     };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
                             .unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
                         changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
                         changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_empty_files_list() {
                     assert!(ChangelogRevisionData::null()
                         .files()
                         .collect_vec()
                         .is_empty());
                 }
                 #[test]
                 fn test_unescape_basic() {
                     // '\0', '\\', '\n', and '\r' are correctly unescaped.
                     let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                     let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_unsupported_sequence() {
                     // Other escape sequences are left unaltered.
                     for c in 0u8..255 {
                         match c {
                             b'0' | b'\\' | b'n' | b'r' => continue,
                             c => {
                                 let expected = &[b'\\', c][..];
                                 let unescaped = unescape_extra(expected);
                                 assert_eq!(expected, &unescaped[..]);
                             }
                         }
                     }
                 }
                 #[test]
                 fn test_unescape_trailing_backslash() {
                     // Trailing backslashes are OK.
                     let expected = br"hi\";
                     let unescaped = unescape_extra(expected);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_nul_followed_by_octal() {
                     // Escaped NUL chars followed by octal digits are decoded correctly.
                     let expected = b"\x0012";
                     let escaped = br"\012";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_parse_float_timestamp() {
                     let test_cases = [
                         // Zero should map to the UNIX epoch.
                         ("0.0", "1970-01-01 00:00:00 UTC"),
                         // Negative zero should be the same as positive zero.
                         ("-0.0", "1970-01-01 00:00:00 UTC"),
                         // Values without fractional components should work like integers.
                         // (Assuming the timestamp is within the limits of f64 precision.)
                         ("1115154970.0", "2005-05-03 21:16:10 UTC"),
                         // We expect some loss of precision in the fractional component
                         // when parsing arbitrary floating-point values.
                         ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
                         // But representable f64 values should parse losslessly.
                         ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
                         // Negative fractional components are subtracted from the epoch.
                         ("-1.333", "1969-12-31 23:59:58.667 UTC"),
                     ];
                     for (input, expected) in test_cases {
                         let res = parse_float_timestamp(input).unwrap().to_string();
                         assert_eq!(res, expected);
                     }
                 }
                 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                     let mut output = Vec::with_capacity(bytes.len());
                     for c in bytes.iter().copied() {
                         output.extend_from_slice(match c {
                             b'\0' => &b"\\0"[..],
                             b'\\' => &b"\\\\"[..],
                             b'\n' => &b"\\n"[..],
                             b'\r' => &b"\\r"[..],
                             _ => {
                                 output.push(c);
                                 continue;
                             }
                         });
                     }
                     output
                 }
                 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                 where
                     K: AsRef<[u8]>,
                     V: AsRef<[u8]>,
                 {
                     let extras = pairs.into_iter().map(|(k, v)| {
                         escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                     });
                     // Use fully-qualified syntax to avoid a future naming conflict with
                     // the standard library: https://github.com/rust-lang/rust/issues/79524
                     Itertools::intersperse(extras, b"\0".to_vec()).concat()
                 }
                 #[test]
                 fn test_decode_extra() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let encoded = encode_extra(&extra);
                     let decoded = decode_extra(&encoded).unwrap();
                     assert_eq!(extra, decoded);
                 }
                 #[test]
                 fn test_corrupt_extra() {
                     let test_cases = [
                         (&b""[..], "empty input"),
                         (&b"\0"[..], "unexpected null byte"),
                         (&b":empty-key"[..], "empty key"),
                         (&b"\0leading-null:"[..], "leading null"),
                         (&b"trailing-null:\0"[..], "trailing null"),
                         (&b"missing-value"[..], "missing value"),
                         (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                         (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                     ];
                     for (extra, msg) in test_cases {
                         assert!(
                             decode_extra(extra).is_err(),
                             "corrupt extra should have failed to parse: {}",
                             msg
                         );
                     }
                 }
                 #[test]
                 fn test_parse_timestamp_line() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                     line.extend_from_slice(&encode_extra(&extra));
                     let timestamp = parse_timestamp(&line).unwrap();
                     assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
                     let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
                     assert_eq!(extra, parsed_extra);
                 }
             }