upstream/mercurial-mirror Commit - r52810:1d698282

1

use std::ascii::escape_default;

1

use std::ascii::escape_default;

2

use std::borrow::Cow;

2

use std::borrow::Cow;

3

use std::collections::BTreeMap;

3

use std::collections::BTreeMap;

4

use std::fmt::{Debug, Formatter};

4

use std::fmt::{Debug, Formatter};

5

use std::{iter, str};

5

use std::{iter, str};

6

7

use chrono::{DateTime, FixedOffset, ~~NaiveDateTime~~};

7

use chrono::{DateTime, FixedOffset, Utc};

8

use itertools::{Either, Itertools};

8

use itertools::{Either, Itertools};

9

10

use crate::errors::HgError;

10

use crate::errors::HgError;

11

use crate::revlog::Index;

11

use crate::revlog::Index;

12

use crate::revlog::Revision;

12

use crate::revlog::Revision;

13

use crate::revlog::{Node, NodePrefix};

13

use crate::revlog::{Node, NodePrefix};

14

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

14

use crate::revlog::{Revlog, RevlogEntry, RevlogError};

15

use crate::utils::hg_path::HgPath;

15

use crate::utils::hg_path::HgPath;

16

use crate::vfs::VfsImpl;

16

use crate::vfs::VfsImpl;

17

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

17

use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

18

19

/// A specialized `Revlog` to work with changelog data format.

19

/// A specialized `Revlog` to work with changelog data format.

20

pub struct Changelog {

20

pub struct Changelog {

21

/// The generic `revlog` format.

21

/// The generic `revlog` format.

22

pub(crate) revlog: Revlog,

22

pub(crate) revlog: Revlog,

23

}

23

}

24

25

impl Changelog {

25

impl Changelog {

26

/// Open the `changelog` of a repository given by its root.

26

/// Open the `changelog` of a repository given by its root.

27

pub fn open(

27

pub fn open(

28

store_vfs: &VfsImpl,

28

store_vfs: &VfsImpl,

29

options: RevlogOpenOptions,

29

options: RevlogOpenOptions,

30

) -> Result<Self, HgError> {

30

) -> Result<Self, HgError> {

31

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

31

let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

32

Ok(Self { revlog })

32

Ok(Self { revlog })

33

}

33

}

34

35

/// Return the `ChangelogRevisionData` for the given node ID.

35

/// Return the `ChangelogRevisionData` for the given node ID.

36

pub fn data_for_node(

36

pub fn data_for_node(

37

&self,

37

&self,

38

node: NodePrefix,

38

node: NodePrefix,

39

) -> Result<ChangelogRevisionData, RevlogError> {

39

) -> Result<ChangelogRevisionData, RevlogError> {

40

let rev = self.revlog.rev_from_node(node)?;

40

let rev = self.revlog.rev_from_node(node)?;

41

self.entry_for_checked_rev(rev)?.data()

41

self.entry_for_checked_rev(rev)?.data()

42

}

42

}

43

44

/// Return the [`ChangelogEntry`] for the given revision number.

44

/// Return the [`ChangelogEntry`] for the given revision number.

45

pub fn entry_for_rev(

45

pub fn entry_for_rev(

46

&self,

46

&self,

47

rev: UncheckedRevision,

47

rev: UncheckedRevision,

48

) -> Result<ChangelogEntry, RevlogError> {

48

) -> Result<ChangelogEntry, RevlogError> {

49

let revlog_entry = self.revlog.get_entry(rev)?;

49

let revlog_entry = self.revlog.get_entry(rev)?;

50

Ok(ChangelogEntry { revlog_entry })

50

Ok(ChangelogEntry { revlog_entry })

51

}

51

}

52

53

/// Same as [`Self::entry_for_rev`] for checked revisions.

53

/// Same as [`Self::entry_for_rev`] for checked revisions.

54

fn entry_for_checked_rev(

54

fn entry_for_checked_rev(

55

&self,

55

&self,

56

rev: Revision,

56

rev: Revision,

57

) -> Result<ChangelogEntry, RevlogError> {

57

) -> Result<ChangelogEntry, RevlogError> {

58

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

58

let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

59

Ok(ChangelogEntry { revlog_entry })

59

Ok(ChangelogEntry { revlog_entry })

60

}

60

}

61

62

/// Return the [`ChangelogRevisionData`] for the given revision number.

62

/// Return the [`ChangelogRevisionData`] for the given revision number.

63

///

63

///

64

/// This is a useful shortcut in case the caller does not need the

64

/// This is a useful shortcut in case the caller does not need the

65

/// generic revlog information (parents, hashes etc). Otherwise

65

/// generic revlog information (parents, hashes etc). Otherwise

66

/// consider taking a [`ChangelogEntry`] with

66

/// consider taking a [`ChangelogEntry`] with

67

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

67

/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

68

pub fn data_for_rev(

68

pub fn data_for_rev(

69

&self,

69

&self,

70

rev: UncheckedRevision,

70

rev: UncheckedRevision,

71

) -> Result<ChangelogRevisionData, RevlogError> {

71

) -> Result<ChangelogRevisionData, RevlogError> {

72

self.entry_for_rev(rev)?.data()

72

self.entry_for_rev(rev)?.data()

73

}

73

}

74

75

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

75

pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

76

self.revlog.node_from_rev(rev)

76

self.revlog.node_from_rev(rev)

77

}

77

}

78

79

pub fn rev_from_node(

79

pub fn rev_from_node(

80

&self,

80

&self,

81

node: NodePrefix,

81

node: NodePrefix,

82

) -> Result<Revision, RevlogError> {

82

) -> Result<Revision, RevlogError> {

83

self.revlog.rev_from_node(node)

83

self.revlog.rev_from_node(node)

84

}

84

}

85

86

pub fn get_index(&self) -> &Index {

86

pub fn get_index(&self) -> &Index {

87

&self.revlog.index

87

&self.revlog.index

88

}

88

}

89

}

89

}

90

91

impl Graph for Changelog {

91

impl Graph for Changelog {

92

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

92

fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

93

self.revlog.parents(rev)

93

self.revlog.parents(rev)

94

}

94

}

95

}

95

}

96

97

/// A specialized `RevlogEntry` for `changelog` data format

97

/// A specialized `RevlogEntry` for `changelog` data format

98

///

98

///

99

/// This is a `RevlogEntry` with the added semantics that the associated

99

/// This is a `RevlogEntry` with the added semantics that the associated

100

/// data should meet the requirements for `changelog`, materialized by

100

/// data should meet the requirements for `changelog`, materialized by

101

/// the fact that `data()` constructs a `ChangelogRevisionData`.

101

/// the fact that `data()` constructs a `ChangelogRevisionData`.

102

/// In case that promise would be broken, the `data` method returns an error.

102

/// In case that promise would be broken, the `data` method returns an error.

103

#[derive(Clone)]

103

#[derive(Clone)]

104

pub struct ChangelogEntry<'changelog> {

104

pub struct ChangelogEntry<'changelog> {

105

/// Same data, as a generic `RevlogEntry`.

105

/// Same data, as a generic `RevlogEntry`.

106

pub(crate) revlog_entry: RevlogEntry<'changelog>,

106

pub(crate) revlog_entry: RevlogEntry<'changelog>,

107

}

107

}

108

109

impl<'changelog> ChangelogEntry<'changelog> {

109

impl<'changelog> ChangelogEntry<'changelog> {

110

pub fn data<'a>(

110

pub fn data<'a>(

111

&'a self,

111

&'a self,

112

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

112

) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

113

let bytes = self.revlog_entry.data()?;

113

let bytes = self.revlog_entry.data()?;

114

if bytes.is_empty() {

114

if bytes.is_empty() {

115

Ok(ChangelogRevisionData::null())

115

Ok(ChangelogRevisionData::null())

116

} else {

116

} else {

117

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

117

Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

118

RevlogError::Other(HgError::CorruptedRepository(format!(

118

RevlogError::Other(HgError::CorruptedRepository(format!(

119

"Invalid changelog data for revision {}: {:?}",

119

"Invalid changelog data for revision {}: {:?}",

120

self.revlog_entry.revision(),

120

self.revlog_entry.revision(),

121

err

121

err

122

)))

122

)))

123

})?)

123

})?)

124

}

124

}

125

}

125

}

126

127

/// Obtain a reference to the underlying `RevlogEntry`.

127

/// Obtain a reference to the underlying `RevlogEntry`.

128

///

128

///

129

/// This allows the caller to access the information that is common

129

/// This allows the caller to access the information that is common

130

/// to all revlog entries: revision number, node id, parent revisions etc.

130

/// to all revlog entries: revision number, node id, parent revisions etc.

131

pub fn as_revlog_entry(&self) -> &RevlogEntry {

131

pub fn as_revlog_entry(&self) -> &RevlogEntry {

132

&self.revlog_entry

132

&self.revlog_entry

133

}

133

}

134

135

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

135

pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

136

Ok(self

136

Ok(self

137

.revlog_entry

137

.revlog_entry

138

.p1_entry()?

138

.p1_entry()?

139

.map(|revlog_entry| Self { revlog_entry }))

139

.map(|revlog_entry| Self { revlog_entry }))

140

}

140

}

141

142

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

142

pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

143

Ok(self

143

Ok(self

144

.revlog_entry

144

.revlog_entry

145

.p2_entry()?

145

.p2_entry()?

146

.map(|revlog_entry| Self { revlog_entry }))

146

.map(|revlog_entry| Self { revlog_entry }))

147

}

147

}

148

}

148

}

149

150

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

150

/// `Changelog` entry which knows how to interpret the `changelog` data bytes.

151

#[derive(PartialEq)]

151

#[derive(PartialEq)]

152

pub struct ChangelogRevisionData<'changelog> {

152

pub struct ChangelogRevisionData<'changelog> {

153

/// The data bytes of the `changelog` entry.

153

/// The data bytes of the `changelog` entry.

154

bytes: Cow<'changelog, [u8]>,

154

bytes: Cow<'changelog, [u8]>,

155

/// The end offset for the hex manifest (not including the newline)

155

/// The end offset for the hex manifest (not including the newline)

156

manifest_end: usize,

156

manifest_end: usize,

157

/// The end offset for the user+email (not including the newline)

157

/// The end offset for the user+email (not including the newline)

158

user_end: usize,

158

user_end: usize,

159

/// The end offset for the timestamp+timezone+extras (not including the

159

/// The end offset for the timestamp+timezone+extras (not including the

160

/// newline)

160

/// newline)

161

timestamp_end: usize,

161

timestamp_end: usize,

162

/// The end offset for the file list (not including the newline)

162

/// The end offset for the file list (not including the newline)

163

files_end: usize,

163

files_end: usize,

164

}

164

}

165

166

impl<'changelog> ChangelogRevisionData<'changelog> {

166

impl<'changelog> ChangelogRevisionData<'changelog> {

167

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

167

fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

168

let mut line_iter = bytes.split(|b| b == &b'\n');

168

let mut line_iter = bytes.split(|b| b == &b'\n');

169

let manifest_end = line_iter

169

let manifest_end = line_iter

170

.next()

170

.next()

171

.expect("Empty iterator from split()?")

171

.expect("Empty iterator from split()?")

172

.len();

172

.len();

173

let user_slice = line_iter.next().ok_or_else(|| {

173

let user_slice = line_iter.next().ok_or_else(|| {

174

HgError::corrupted("Changeset data truncated after manifest line")

174

HgError::corrupted("Changeset data truncated after manifest line")

175

})?;

175

})?;

176

let user_end = manifest_end + 1 + user_slice.len();

176

let user_end = manifest_end + 1 + user_slice.len();

177

let timestamp_slice = line_iter.next().ok_or_else(|| {

177

let timestamp_slice = line_iter.next().ok_or_else(|| {

178

HgError::corrupted("Changeset data truncated after user line")

178

HgError::corrupted("Changeset data truncated after user line")

179

})?;

179

})?;

180

let timestamp_end = user_end + 1 + timestamp_slice.len();

180

let timestamp_end = user_end + 1 + timestamp_slice.len();

181

let mut files_end = timestamp_end + 1;

181

let mut files_end = timestamp_end + 1;

182

loop {

182

loop {

183

let line = line_iter.next().ok_or_else(|| {

183

let line = line_iter.next().ok_or_else(|| {

184

HgError::corrupted("Changeset data truncated in files list")

184

HgError::corrupted("Changeset data truncated in files list")

185

})?;

185

})?;

186

if line.is_empty() {

186

if line.is_empty() {

187

if files_end == bytes.len() {

187

if files_end == bytes.len() {

188

// The list of files ended with a single newline (there

188

// The list of files ended with a single newline (there

189

// should be two)

189

// should be two)

190

return Err(HgError::corrupted(

190

return Err(HgError::corrupted(

191

"Changeset data truncated after files list",

191

"Changeset data truncated after files list",

192

));

192

));

193

}

193

}

194

files_end -= 1;

194

files_end -= 1;

195

break;

195

break;

196

}

196

}

197

files_end += line.len() + 1;

197

files_end += line.len() + 1;

198

}

198

}

199

200

Ok(Self {

200

Ok(Self {

201

bytes,

201

bytes,

202

manifest_end,

202

manifest_end,

203

user_end,

203

user_end,

204

timestamp_end,

204

timestamp_end,

205

files_end,

205

files_end,

206

})

206

})

207

}

207

}

208

209

fn null() -> Self {

209

fn null() -> Self {

210

Self::new(Cow::Borrowed(

210

Self::new(Cow::Borrowed(

211

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

211

b"0000000000000000000000000000000000000000\n\n0 0\n\n",

212

))

212

))

213

.unwrap()

213

.unwrap()

214

}

214

}

215

216

/// Return an iterator over the lines of the entry.

216

/// Return an iterator over the lines of the entry.

217

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

217

pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

218

self.bytes.split(|b| b == &b'\n')

218

self.bytes.split(|b| b == &b'\n')

219

}

219

}

220

221

/// Return the node id of the `manifest` referenced by this `changelog`

221

/// Return the node id of the `manifest` referenced by this `changelog`

222

/// entry.

222

/// entry.

223

pub fn manifest_node(&self) -> Result<Node, HgError> {

223

pub fn manifest_node(&self) -> Result<Node, HgError> {

224

let manifest_node_hex = &self.bytes[..self.manifest_end];

224

let manifest_node_hex = &self.bytes[..self.manifest_end];

225

Node::from_hex_for_repo(manifest_node_hex)

225

Node::from_hex_for_repo(manifest_node_hex)

226

}

226

}

227

228

/// The full user string (usually a name followed by an email enclosed in

228

/// The full user string (usually a name followed by an email enclosed in

229

/// angle brackets)

229

/// angle brackets)

230

pub fn user(&self) -> &[u8] {

230

pub fn user(&self) -> &[u8] {

231

&self.bytes[self.manifest_end + 1..self.user_end]

231

&self.bytes[self.manifest_end + 1..self.user_end]

232

}

232

}

233

234

/// The full timestamp line (timestamp in seconds, offset in seconds, and

234

/// The full timestamp line (timestamp in seconds, offset in seconds, and

235

/// possibly extras)

235

/// possibly extras)

236

// TODO: We should expose this in a more useful way

236

// TODO: We should expose this in a more useful way

237

pub fn timestamp_line(&self) -> &[u8] {

237

pub fn timestamp_line(&self) -> &[u8] {

238

&self.bytes[self.user_end + 1..self.timestamp_end]

238

&self.bytes[self.user_end + 1..self.timestamp_end]

239

}

239

}

240

241

/// Parsed timestamp.

241

/// Parsed timestamp.

242

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

242

pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

243

parse_timestamp(self.timestamp_line())

243

parse_timestamp(self.timestamp_line())

244

}

244

}

245

246

/// Optional commit extras.

246

/// Optional commit extras.

247

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

247

pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

248

parse_timestamp_line_extra(self.timestamp_line())

248

parse_timestamp_line_extra(self.timestamp_line())

249

}

249

}

250

251

/// The files changed in this revision.

251

/// The files changed in this revision.

252

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

252

pub fn files(&self) -> impl Iterator<Item = &HgPath> {

253

if self.timestamp_end == self.files_end {

253

if self.timestamp_end == self.files_end {

254

Either::Left(iter::empty())

254

Either::Left(iter::empty())

255

} else {

255

} else {

256

Either::Right(

256

Either::Right(

257

self.bytes[self.timestamp_end + 1..self.files_end]

257

self.bytes[self.timestamp_end + 1..self.files_end]

258

.split(|b| b == &b'\n')

258

.split(|b| b == &b'\n')

259

.map(HgPath::new),

259

.map(HgPath::new),

260

)

260

)

261

}

261

}

262

}

262

}

263

264

/// The change description.

264

/// The change description.

265

pub fn description(&self) -> &[u8] {

265

pub fn description(&self) -> &[u8] {

266

&self.bytes[self.files_end + 2..]

266

&self.bytes[self.files_end + 2..]

267

}

267

}

268

}

268

}

269

270

impl Debug for ChangelogRevisionData<'_> {

270

impl Debug for ChangelogRevisionData<'_> {

271

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

271

fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

272

f.debug_struct("ChangelogRevisionData")

272

f.debug_struct("ChangelogRevisionData")

273

.field("bytes", &debug_bytes(&self.bytes))

273

.field("bytes", &debug_bytes(&self.bytes))

274

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

274

.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

275

.field(

275

.field(

276

"user",

276

"user",

277

&debug_bytes(

277

&debug_bytes(

278

&self.bytes[self.manifest_end + 1..self.user_end],

278

&self.bytes[self.manifest_end + 1..self.user_end],

279

),

279

),

280

)

280

)

281

.field(

281

.field(

282

"timestamp",

282

"timestamp",

283

&debug_bytes(

283

&debug_bytes(

284

&self.bytes[self.user_end + 1..self.timestamp_end],

284

&self.bytes[self.user_end + 1..self.timestamp_end],

285

),

285

),

286

)

286

)

287

.field(

287

.field(

288

"files",

288

"files",

289

&debug_bytes(

289

&debug_bytes(

290

&self.bytes[self.timestamp_end + 1..self.files_end],

290

&self.bytes[self.timestamp_end + 1..self.files_end],

291

),

291

),

292

)

292

)

293

.field(

293

.field(

294

"description",

294

"description",

295

&debug_bytes(&self.bytes[self.files_end + 2..]),

295

&debug_bytes(&self.bytes[self.files_end + 2..]),

296

)

296

)

297

.finish()

297

.finish()

298

}

298

}

299

}

299

}

300

301

fn debug_bytes(bytes: &[u8]) -> String {

301

fn debug_bytes(bytes: &[u8]) -> String {

302

String::from_utf8_lossy(

302

String::from_utf8_lossy(

303

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

303

&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

304

)

304

)

305

.to_string()

305

.to_string()

306

}

306

}

307

308

/// Parse the raw bytes of the timestamp line from a changelog entry.

308

/// Parse the raw bytes of the timestamp line from a changelog entry.

309

///

309

///

310

/// According to the documentation in `hg help dates` and the

310

/// According to the documentation in `hg help dates` and the

311

/// implementation in `changelog.py`, the format of the timestamp line

311

/// implementation in `changelog.py`, the format of the timestamp line

312

/// is `time tz extra\n` where:

312

/// is `time tz extra\n` where:

313

///

313

///

314

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

314

/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

315

/// as seconds since the UNIX epoch.

315

/// as seconds since the UNIX epoch.

316

///

316

///

317

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

317

/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

318

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

318

/// seconds WEST of UTC (so negative for timezones east of UTC, which is the

319

/// opposite of the sign in ISO 8601 timestamps).

319

/// opposite of the sign in ISO 8601 timestamps).

320

///

320

///

321

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

321

/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

322

/// and value in each pair separated by an ASCII colon. Keys are limited to

322

/// and value in each pair separated by an ASCII colon. Keys are limited to

323

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

323

/// ASCII letters, digits, hyphens, and underscores, whereas values can be

324

/// arbitrary bytes.

324

/// arbitrary bytes.

325

fn parse_timestamp(

325

fn parse_timestamp(

326

timestamp_line: &[u8],

326

timestamp_line: &[u8],

327

) -> Result<DateTime<FixedOffset>, HgError> {

327

) -> Result<DateTime<FixedOffset>, HgError> {

328

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

328

let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

329

330

let timestamp_bytes = parts

330

let timestamp_bytes = parts

331

.next()

331

.next()

332

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

332

.ok_or_else(|| HgError::corrupted("missing timestamp"))?;

333

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

333

let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

334

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

334

HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

335

})?;

335

})?;

336

let timestamp_utc = timestamp_str

336

let timestamp_utc = timestamp_str

337

.parse()

337

.parse()

338

.map_err(|e| {

338

.map_err(|e| {

339

HgError::corrupted(format!("failed to parse timestamp: {e}"))

339

HgError::corrupted(format!("failed to parse timestamp: {e}"))

340

})

340

})

341

.and_then(|secs| {

341

.and_then(|secs| {

342

~~Naive~~DateTime::from_timestamp~~_opt~~(secs, 0).ok_or_else(|| {

342

DateTime::from_timestamp(secs, 0).ok_or_else(|| {

343

HgError::corrupted(format!(

343

HgError::corrupted(format!(

344

"integer timestamp out of valid range: {secs}"

344

"integer timestamp out of valid range: {secs}"

345

))

345

))

346

})

346

})

347

})

347

})

348

// Attempt to parse the timestamp as a float if we can't parse

348

// Attempt to parse the timestamp as a float if we can't parse

349

// it as an int. It doesn't seem like float timestamps are actually

349

// it as an int. It doesn't seem like float timestamps are actually

350

// used in practice, but the Python code supports them.

350

// used in practice, but the Python code supports them.

351

.or_else(|_| parse_float_timestamp(timestamp_str))?;

351

.or_else(|_| parse_float_timestamp(timestamp_str))?;

352

353

let timezone_bytes = parts

353

let timezone_bytes = parts

354

.next()

354

.next()

355

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

355

.ok_or_else(|| HgError::corrupted("missing timezone"))?;

356

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

356

let timezone_secs: i32 = str::from_utf8(timezone_bytes)

357

.map_err(|e| {

357

.map_err(|e| {

358

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

358

HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

359

})?

359

})?

360

.parse()

360

.parse()

361

.map_err(|e| {

361

.map_err(|e| {

362

HgError::corrupted(format!("timezone is not an integer: {e}"))

362

HgError::corrupted(format!("timezone is not an integer: {e}"))

363

})?;

363

})?;

364

let timezone = FixedOffset::west_opt(timezone_secs)

364

let timezone = FixedOffset::west_opt(timezone_secs)

365

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

365

.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

366

367

Ok(DateTime::from_naive_utc_and_offset(~~timestamp_utc~~, ~~timezone~~))

367

Ok(DateTime::from_naive_utc_and_offset(

368

timestamp_utc.naive_utc(),

369

timezone,

370

))

368

}

371

}

369

372

370

/// Attempt to parse the given string as floating-point timestamp, and

373

/// Attempt to parse the given string as floating-point timestamp, and

371

/// convert the result into a `chrono::NaiveDateTime`.

374

/// convert the result into a `chrono::NaiveDateTime`.

372

fn parse_float_timestamp(

375

fn parse_float_timestamp(

373

timestamp_str: &str,

376

timestamp_str: &str,

374

) -> Result<~~Naive~~DateTime, HgError> {

377

) -> Result<DateTime<Utc>, HgError> {

375

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

378

let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

376

HgError::corrupted(format!("failed to parse timestamp: {e}"))

379

HgError::corrupted(format!("failed to parse timestamp: {e}"))

377

})?;

380

})?;

378

381

379

// To construct a `NaiveDateTime` we'll need to convert the float

382

// To construct a `NaiveDateTime` we'll need to convert the float

380

// into signed integer seconds and unsigned integer nanoseconds.

383

// into signed integer seconds and unsigned integer nanoseconds.

381

let mut secs = timestamp.trunc() as i64;

384

let mut secs = timestamp.trunc() as i64;

382

let mut subsecs = timestamp.fract();

385

let mut subsecs = timestamp.fract();

383

386

384

// If the timestamp is negative, we need to express the fractional

387

// If the timestamp is negative, we need to express the fractional

385

// component as positive nanoseconds since the previous second.

388

// component as positive nanoseconds since the previous second.

386

if timestamp < 0.0 {

389

if timestamp < 0.0 {

387

secs -= 1;

390

secs -= 1;

388

subsecs += 1.0;

391

subsecs += 1.0;

389

}

392

}

390

393

391

// This cast should be safe because the fractional component is

394

// This cast should be safe because the fractional component is

392

// by definition less than 1.0, so this value should not exceed

395

// by definition less than 1.0, so this value should not exceed

393

// 1 billion, which is representable as an f64 without loss of

396

// 1 billion, which is representable as an f64 without loss of

394

// precision and should fit into a u32 without overflowing.

397

// precision and should fit into a u32 without overflowing.

395

//

398

//

396

// (Any loss of precision in the fractional component will have

399

// (Any loss of precision in the fractional component will have

397

// already happened at the time of initial parsing; in general,

400

// already happened at the time of initial parsing; in general,

398

// f64s are insufficiently precise to provide nanosecond-level

401

// f64s are insufficiently precise to provide nanosecond-level

399

// precision with present-day timestamps.)

402

// precision with present-day timestamps.)

400

let nsecs = (subsecs * 1_000_000_000.0) as u32;

403

let nsecs = (subsecs * 1_000_000_000.0) as u32;

401

404

402

~~Naive~~DateTime::from_timestamp~~_opt~~(secs, nsecs).ok_or_else(|| {

405

DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {

403

HgError::corrupted(format!(

406

HgError::corrupted(format!(

404

"float timestamp out of valid range: {timestamp}"

407

"float timestamp out of valid range: {timestamp}"

405

))

408

))

406

})

409

})

407

}

410

}

408

411

409

/// Decode changeset extra fields.

412

/// Decode changeset extra fields.

410

///

413

///

411

/// Extras are null-delimited key-value pairs where the key consists of ASCII

414

/// Extras are null-delimited key-value pairs where the key consists of ASCII

412

/// alphanumeric characters plus hyphens and underscores, and the value can

415

/// alphanumeric characters plus hyphens and underscores, and the value can

413

/// contain arbitrary bytes.

416

/// contain arbitrary bytes.

414

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

417

fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

415

extra

418

extra

416

.split(|c| *c == b'\0')

419

.split(|c| *c == b'\0')

417

.map(|pair| {

420

.map(|pair| {

418

let pair = unescape_extra(pair);

421

let pair = unescape_extra(pair);

419

let mut iter = pair.splitn(2, |c| *c == b':');

422

let mut iter = pair.splitn(2, |c| *c == b':');

420

423

421

let key_bytes =

424

let key_bytes =

422

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

425

iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

423

HgError::corrupted("empty key in changeset extras")

426

HgError::corrupted("empty key in changeset extras")

424

})?;

427

})?;

425

428

426

let key = str::from_utf8(key_bytes)

429

let key = str::from_utf8(key_bytes)

427

.ok()

430

.ok()

428

.filter(|k| {

431

.filter(|k| {

429

k.chars().all(|c| {

432

k.chars().all(|c| {

430

c.is_ascii_alphanumeric() || c == '_' || c == '-'

433

c.is_ascii_alphanumeric() || c == '_' || c == '-'

431

})

434

})

432

})

435

})

433

.ok_or_else(|| {

436

.ok_or_else(|| {

434

let key = String::from_utf8_lossy(key_bytes);

437

let key = String::from_utf8_lossy(key_bytes);

435

HgError::corrupted(format!(

438

HgError::corrupted(format!(

436

"invalid key in changeset extras: {key}",

439

"invalid key in changeset extras: {key}",

437

))

440

))

438

})?

441

})?

439

.to_string();

442

.to_string();

440

443

441

let value = iter.next().map(Into::into).ok_or_else(|| {

444

let value = iter.next().map(Into::into).ok_or_else(|| {

442

HgError::corrupted(format!(

445

HgError::corrupted(format!(

443

"missing value for changeset extra: {key}"

446

"missing value for changeset extra: {key}"

444

))

447

))

445

})?;

448

})?;

446

449

447

Ok((key, value))

450

Ok((key, value))

448

})

451

})

449

.collect()

452

.collect()

450

}

453

}

451

454

452

/// Parse the extra fields from a changeset's timestamp line.

455

/// Parse the extra fields from a changeset's timestamp line.

453

fn parse_timestamp_line_extra(

456

fn parse_timestamp_line_extra(

454

timestamp_line: &[u8],

457

timestamp_line: &[u8],

455

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

458

) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

456

Ok(timestamp_line

459

Ok(timestamp_line

457

.splitn(3, |c| *c == b' ')

460

.splitn(3, |c| *c == b' ')

458

.nth(2)

461

.nth(2)

459

.map(decode_extra)

462

.map(decode_extra)

460

.transpose()?

463

.transpose()?

461

.unwrap_or_default())

464

.unwrap_or_default())

462

}

465

}

463

466

464

/// Decode Mercurial's escaping for changelog extras.

467

/// Decode Mercurial's escaping for changelog extras.

465

///

468

///

466

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

469

/// The `_string_escape` function in `changelog.py` only escapes 4 characters

467

/// (null, backslash, newline, and carriage return) so we only decode those.

470

/// (null, backslash, newline, and carriage return) so we only decode those.

468

///

471

///

469

/// The Python code also includes a workaround for decoding escaped nuls

472

/// The Python code also includes a workaround for decoding escaped nuls

470

/// that are followed by an ASCII octal digit, since Python's built-in

473

/// that are followed by an ASCII octal digit, since Python's built-in

471

/// `string_escape` codec will interpret that as an escaped octal byte value.

474

/// `string_escape` codec will interpret that as an escaped octal byte value.

472

/// That workaround is omitted here since we don't support decoding octal.

475

/// That workaround is omitted here since we don't support decoding octal.

473

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

476

fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

474

let mut output = Vec::with_capacity(bytes.len());

477

let mut output = Vec::with_capacity(bytes.len());

475

let mut input = bytes.iter().copied();

478

let mut input = bytes.iter().copied();

476

479

477

while let Some(c) = input.next() {

480

while let Some(c) = input.next() {

478

if c != b'\\' {

481

if c != b'\\' {

479

output.push(c);

482

output.push(c);

480

continue;

483

continue;

481

}

484

}

482

485

483

match input.next() {

486

match input.next() {

484

Some(b'0') => output.push(b'\0'),

487

Some(b'0') => output.push(b'\0'),

485

Some(b'\\') => output.push(b'\\'),

488

Some(b'\\') => output.push(b'\\'),

486

Some(b'n') => output.push(b'\n'),

489

Some(b'n') => output.push(b'\n'),

487

Some(b'r') => output.push(b'\r'),

490

Some(b'r') => output.push(b'\r'),

488

// The following cases should never occur in theory because any

491

// The following cases should never occur in theory because any

489

// backslashes in the original input should have been escaped

492

// backslashes in the original input should have been escaped

490

// with another backslash, so it should not be possible to

493

// with another backslash, so it should not be possible to

491

// observe an escape sequence other than the 4 above.

494

// observe an escape sequence other than the 4 above.

492

Some(c) => output.extend_from_slice(&[b'\\', c]),

495

Some(c) => output.extend_from_slice(&[b'\\', c]),

493

None => output.push(b'\\'),

496

None => output.push(b'\\'),

494

}

497

}

495

}

498

}

496

499

497

output

500

output

498

}

501

}

499

502

500

#[cfg(test)]

503

#[cfg(test)]

501

mod tests {

504

mod tests {

502

use super::*;

505

use super::*;

503

use crate::vfs::VfsImpl;

506

use crate::vfs::VfsImpl;

504

use crate::{

507

use crate::{

505

RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,

508

RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,

506

NULL_REVISION,

509

NULL_REVISION,

507

};

510

};

508

use pretty_assertions::assert_eq;

511

use pretty_assertions::assert_eq;

509

512

510

#[test]

513

#[test]

511

fn test_create_changelogrevisiondata_invalid() {

514

fn test_create_changelogrevisiondata_invalid() {

512

// Completely empty

515

// Completely empty

513

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

516

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

514

// No newline after manifest

517

// No newline after manifest

515

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

518

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

516

// No newline after user

519

// No newline after user

517

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

520

assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

518

// No newline after timestamp

521

// No newline after timestamp

519

assert!(

522

assert!(

520

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

523

ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

521

);

524

);

522

// Missing newline after files

525

// Missing newline after files

523

assert!(ChangelogRevisionData::new(Cow::Borrowed(

526

assert!(ChangelogRevisionData::new(Cow::Borrowed(

524

b"abcd\n\n0 0\nfile1\nfile2"

527

b"abcd\n\n0 0\nfile1\nfile2"

525

))

528

))

526

.is_err(),);

529

.is_err(),);

527

// Only one newline after files

530

// Only one newline after files

528

assert!(ChangelogRevisionData::new(Cow::Borrowed(

531

assert!(ChangelogRevisionData::new(Cow::Borrowed(

529

b"abcd\n\n0 0\nfile1\nfile2\n"

532

b"abcd\n\n0 0\nfile1\nfile2\n"

530

))

533

))

531

.is_err(),);

534

.is_err(),);

532

}

535

}

533

536

534

#[test]

537

#[test]

535

fn test_create_changelogrevisiondata() {

538

fn test_create_changelogrevisiondata() {

536

let data = ChangelogRevisionData::new(Cow::Borrowed(

539

let data = ChangelogRevisionData::new(Cow::Borrowed(

537

b"0123456789abcdef0123456789abcdef01234567

540

b"0123456789abcdef0123456789abcdef01234567

538

Some One <someone@example.com>

541

Some One <someone@example.com>

539

0 0

542

0 0

540

file1

543

file1

541

file2

544

file2

542

545

543

some

546

some

544

commit

547

commit

545

message",

548

message",

546

))

549

))

547

.unwrap();

550

.unwrap();

548

assert_eq!(

551

assert_eq!(

549

data.manifest_node().unwrap(),

552

data.manifest_node().unwrap(),

550

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

553

Node::from_hex("0123456789abcdef0123456789abcdef01234567")

551

.unwrap()

554

.unwrap()

552

);

555

);

553

assert_eq!(data.user(), b"Some One <someone@example.com>");

556

assert_eq!(data.user(), b"Some One <someone@example.com>");

554

assert_eq!(data.timestamp_line(), b"0 0");

557

assert_eq!(data.timestamp_line(), b"0 0");

555

assert_eq!(

558

assert_eq!(

556

data.files().collect_vec(),

559

data.files().collect_vec(),

557

vec![HgPath::new("file1"), HgPath::new("file2")]

560

vec![HgPath::new("file1"), HgPath::new("file2")]

558

);

561

);

559

assert_eq!(data.description(), b"some\ncommit\nmessage");

562

assert_eq!(data.description(), b"some\ncommit\nmessage");

560

}

563

}

561

564

562

#[test]

565

#[test]

563

fn test_data_from_rev_null() -> Result<(), RevlogError> {

566

fn test_data_from_rev_null() -> Result<(), RevlogError> {

564

// an empty revlog will be enough for this case

567

// an empty revlog will be enough for this case

565

let temp = tempfile::tempdir().unwrap();

568

let temp = tempfile::tempdir().unwrap();

566

let vfs = VfsImpl {

569

let vfs = VfsImpl {

567

base: temp.path().to_owned(),

570

base: temp.path().to_owned(),

568

};

571

};

569

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

572

std::fs::write(temp.path().join("foo.i"), b"").unwrap();

570

std::fs::write(temp.path().join("foo.d"), b"").unwrap();

573

std::fs::write(temp.path().join("foo.d"), b"").unwrap();

571

let revlog = Revlog::open(

574

let revlog = Revlog::open(

572

&vfs,

575

&vfs,

573

"foo.i",

576

"foo.i",

574

None,

577

None,

575

RevlogOpenOptions::new(

578

RevlogOpenOptions::new(

576

false,

579

false,

577

RevlogDataConfig::default(),

580

RevlogDataConfig::default(),

578

RevlogDeltaConfig::default(),

581

RevlogDeltaConfig::default(),

579

RevlogFeatureConfig::default(),

582

RevlogFeatureConfig::default(),

580

),

583

),

581

)

584

)

582

.unwrap();

585

.unwrap();

583

586

584

let changelog = Changelog { revlog };

587

let changelog = Changelog { revlog };

585

assert_eq!(

588

assert_eq!(

586

changelog.data_for_rev(NULL_REVISION.into())?,

589

changelog.data_for_rev(NULL_REVISION.into())?,

587

ChangelogRevisionData::null()

590

ChangelogRevisionData::null()

588

);

591

);

589

// same with the intermediate entry object

592

// same with the intermediate entry object

590

assert_eq!(

593

assert_eq!(

591

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

594

changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

592

ChangelogRevisionData::null()

595

ChangelogRevisionData::null()

593

);

596

);

594

Ok(())

597

Ok(())

595

}

598

}

596

599

597

#[test]

600

#[test]

598

fn test_empty_files_list() {

601

fn test_empty_files_list() {

599

assert!(ChangelogRevisionData::null()

602

assert!(ChangelogRevisionData::null()

600

.files()

603

.files()

601

.collect_vec()

604

.collect_vec()

602

.is_empty());

605

.is_empty());

603

}

606

}

604

607

605

#[test]

608

#[test]

606

fn test_unescape_basic() {

609

fn test_unescape_basic() {

607

// '\0', '\\', '\n', and '\r' are correctly unescaped.

610

// '\0', '\\', '\n', and '\r' are correctly unescaped.

608

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

611

let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

609

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

612

let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

610

let unescaped = unescape_extra(escaped);

613

let unescaped = unescape_extra(escaped);

611

assert_eq!(&expected[..], &unescaped[..]);

614

assert_eq!(&expected[..], &unescaped[..]);

612

}

615

}

613

616

614

#[test]

617

#[test]

615

fn test_unescape_unsupported_sequence() {

618

fn test_unescape_unsupported_sequence() {

616

// Other escape sequences are left unaltered.

619

// Other escape sequences are left unaltered.

617

for c in 0u8..255 {

620

for c in 0u8..255 {

618

match c {

621

match c {

619

b'0' | b'\\' | b'n' | b'r' => continue,

622

b'0' | b'\\' | b'n' | b'r' => continue,

620

c => {

623

c => {

621

let expected = &[b'\\', c][..];

624

let expected = &[b'\\', c][..];

622

let unescaped = unescape_extra(expected);

625

let unescaped = unescape_extra(expected);

623

assert_eq!(expected, &unescaped[..]);

626

assert_eq!(expected, &unescaped[..]);

624

}

627

}

625

}

628

}

626

}

629

}

627

}

630

}

628

631

629

#[test]

632

#[test]

630

fn test_unescape_trailing_backslash() {

633

fn test_unescape_trailing_backslash() {

631

// Trailing backslashes are OK.

634

// Trailing backslashes are OK.

632

let expected = br"hi\";

635

let expected = br"hi\";

633

let unescaped = unescape_extra(expected);

636

let unescaped = unescape_extra(expected);

634

assert_eq!(&expected[..], &unescaped[..]);

637

assert_eq!(&expected[..], &unescaped[..]);

635

}

638

}

636

639

637

#[test]

640

#[test]

638

fn test_unescape_nul_followed_by_octal() {

641

fn test_unescape_nul_followed_by_octal() {

639

// Escaped NUL chars followed by octal digits are decoded correctly.

642

// Escaped NUL chars followed by octal digits are decoded correctly.

640

let expected = b"\x0012";

643

let expected = b"\x0012";

641

let escaped = br"\012";

644

let escaped = br"\012";

642

let unescaped = unescape_extra(escaped);

645

let unescaped = unescape_extra(escaped);

643

assert_eq!(&expected[..], &unescaped[..]);

646

assert_eq!(&expected[..], &unescaped[..]);

644

}

647

}

645

648

646

#[test]

649

#[test]

647

fn test_parse_float_timestamp() {

650

fn test_parse_float_timestamp() {

648

let test_cases = [

651

let test_cases = [

649

// Zero should map to the UNIX epoch.

652

// Zero should map to the UNIX epoch.

650

("0.0", "1970-01-01 00:00:00"),

653

("0.0", "1970-01-01 00:00:00 UTC"),

651

// Negative zero should be the same as positive zero.

654

// Negative zero should be the same as positive zero.

652

("-0.0", "1970-01-01 00:00:00"),

655

("-0.0", "1970-01-01 00:00:00 UTC"),

653

// Values without fractional components should work like integers.

656

// Values without fractional components should work like integers.

654

// (Assuming the timestamp is within the limits of f64 precision.)

657

// (Assuming the timestamp is within the limits of f64 precision.)

655

("1115154970.0", "2005-05-03 21:16:10"),

658

("1115154970.0", "2005-05-03 21:16:10 UTC"),

656

// We expect some loss of precision in the fractional component

659

// We expect some loss of precision in the fractional component

657

// when parsing arbitrary floating-point values.

660

// when parsing arbitrary floating-point values.

658

("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

661

("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),

659

// But representable f64 values should parse losslessly.

662

// But representable f64 values should parse losslessly.

660

("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

663

("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),

661

// Negative fractional components are subtracted from the epoch.

664

// Negative fractional components are subtracted from the epoch.

662

("-1.333", "1969-12-31 23:59:58.667"),

665

("-1.333", "1969-12-31 23:59:58.667 UTC"),

663

];

666

];

664

667

665

for (input, expected) in test_cases {

668

for (input, expected) in test_cases {

666

let res = parse_float_timestamp(input).unwrap().to_string();

669

let res = parse_float_timestamp(input).unwrap().to_string();

667

assert_eq!(res, expected);

670

assert_eq!(res, expected);

668

}

671

}

669

}

672

}

670

673

671

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

674

fn escape_extra(bytes: &[u8]) -> Vec<u8> {

672

let mut output = Vec::with_capacity(bytes.len());

675

let mut output = Vec::with_capacity(bytes.len());

673

676

674

for c in bytes.iter().copied() {

677

for c in bytes.iter().copied() {

675

output.extend_from_slice(match c {

678

output.extend_from_slice(match c {

676

b'\0' => &b"\\0"[..],

679

b'\0' => &b"\\0"[..],

677

b'\\' => &b"\\\\"[..],

680

b'\\' => &b"\\\\"[..],

678

b'\n' => &b"\\n"[..],

681

b'\n' => &b"\\n"[..],

679

b'\r' => &b"\\r"[..],

682

b'\r' => &b"\\r"[..],

680

_ => {

683

_ => {

681

output.push(c);

684

output.push(c);

682

continue;

685

continue;

683

}

686

}

684

});

687

});

685

}

688

}

686

689

687

output

690

output

688

}

691

}

689

692

690

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

693

fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

691

where

694

where

692

K: AsRef<[u8]>,

695

K: AsRef<[u8]>,

693

V: AsRef<[u8]>,

696

V: AsRef<[u8]>,

694

{

697

{

695

let extras = pairs.into_iter().map(|(k, v)| {

698

let extras = pairs.into_iter().map(|(k, v)| {

696

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

699

escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

697

});

700

});

698

// Use fully-qualified syntax to avoid a future naming conflict with

701

// Use fully-qualified syntax to avoid a future naming conflict with

699

// the standard library: https://github.com/rust-lang/rust/issues/79524

702

// the standard library: https://github.com/rust-lang/rust/issues/79524

700

Itertools::intersperse(extras, b"\0".to_vec()).concat()

703

Itertools::intersperse(extras, b"\0".to_vec()).concat()

701

}

704

}

702

705

703

#[test]

706

#[test]

704

fn test_decode_extra() {

707

fn test_decode_extra() {

705

let extra = [

708

let extra = [

706

("branch".into(), b"default".to_vec()),

709

("branch".into(), b"default".to_vec()),

707

("key-with-hyphens".into(), b"value1".to_vec()),

710

("key-with-hyphens".into(), b"value1".to_vec()),

708

("key_with_underscores".into(), b"value2".to_vec()),

711

("key_with_underscores".into(), b"value2".to_vec()),

709

("empty-value".into(), b"".to_vec()),

712

("empty-value".into(), b"".to_vec()),

710

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

713

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

711

]

714

]

712

.into_iter()

715

.into_iter()

713

.collect::<BTreeMap<String, Vec<u8>>>();

716

.collect::<BTreeMap<String, Vec<u8>>>();

714

717

715

let encoded = encode_extra(&extra);

718

let encoded = encode_extra(&extra);

716

let decoded = decode_extra(&encoded).unwrap();

719

let decoded = decode_extra(&encoded).unwrap();

717

720

718

assert_eq!(extra, decoded);

721

assert_eq!(extra, decoded);

719

}

722

}

720

723

721

#[test]

724

#[test]

722

fn test_corrupt_extra() {

725

fn test_corrupt_extra() {

723

let test_cases = [

726

let test_cases = [

724

(&b""[..], "empty input"),

727

(&b""[..], "empty input"),

725

(&b"\0"[..], "unexpected null byte"),

728

(&b"\0"[..], "unexpected null byte"),

726

(&b":empty-key"[..], "empty key"),

729

(&b":empty-key"[..], "empty key"),

727

(&b"\0leading-null:"[..], "leading null"),

730

(&b"\0leading-null:"[..], "leading null"),

728

(&b"trailing-null:\0"[..], "trailing null"),

731

(&b"trailing-null:\0"[..], "trailing null"),

729

(&b"missing-value"[..], "missing value"),

732

(&b"missing-value"[..], "missing value"),

730

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

733

(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

731

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

734

(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

732

];

735

];

733

736

734

for (extra, msg) in test_cases {

737

for (extra, msg) in test_cases {

735

assert!(

738

assert!(

736

decode_extra(extra).is_err(),

739

decode_extra(extra).is_err(),

737

"corrupt extra should have failed to parse: {}",

740

"corrupt extra should have failed to parse: {}",

738

msg

741

msg

739

);

742

);

740

}

743

}

741

}

744

}

742

745

743

#[test]

746

#[test]

744

fn test_parse_timestamp_line() {

747

fn test_parse_timestamp_line() {

745

let extra = [

748

let extra = [

746

("branch".into(), b"default".to_vec()),

749

("branch".into(), b"default".to_vec()),

747

("key-with-hyphens".into(), b"value1".to_vec()),

750

("key-with-hyphens".into(), b"value1".to_vec()),

748

("key_with_underscores".into(), b"value2".to_vec()),

751

("key_with_underscores".into(), b"value2".to_vec()),

749

("empty-value".into(), b"".to_vec()),

752

("empty-value".into(), b"".to_vec()),

750

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

753

("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

751

]

754

]

752

.into_iter()

755

.into_iter()

753

.collect::<BTreeMap<String, Vec<u8>>>();

756

.collect::<BTreeMap<String, Vec<u8>>>();

754

757

755

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

758

let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

756

line.extend_from_slice(&encode_extra(&extra));

759

line.extend_from_slice(&encode_extra(&extra));

757

760

758

let timestamp = parse_timestamp(&line).unwrap();

761

let timestamp = parse_timestamp(&line).unwrap();

759

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

762

assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

760

763

761

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

764

let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

762

assert_eq!(extra, parsed_extra);

765

assert_eq!(extra, parsed_extra);

763

}

766

}

764

}

767

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::collections::BTreeMap;
             use std::fmt::{Debug, Formatter};
             use std::{iter, str};
-            use chrono::{DateTime, FixedOffset, NaiveDateTime};
+            use chrono::{DateTime, FixedOffset, Utc};
             use itertools::{Either, Itertools};
             use crate::errors::HgError;
             use crate::revlog::Index;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::VfsImpl;
             use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(
                     store_vfs: &VfsImpl,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Same as [`Self::entry_for_rev`] for checked revisions.
                 fn entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
                 pub fn get_index(&self) -> &Index {
                     &self.revlog.index
                 }
             }
             impl Graph for Changelog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.revlog.parents(rev)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// Parsed timestamp.
                 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
                     parse_timestamp(self.timestamp_line())
                 }
                 /// Optional commit extras.
                 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                     parse_timestamp_line_extra(self.timestamp_line())
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     if self.timestamp_end == self.files_end {
                         Either::Left(iter::empty())
                     } else {
                         Either::Right(
                             self.bytes[self.timestamp_end + 1..self.files_end]
                                 .split(|b| b == &b'\n')
                                 .map(HgPath::new),
                         )
                     }
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             /// Parse the raw bytes of the timestamp line from a changelog entry.
             ///
             /// According to the documentation in `hg help dates` and the
             /// implementation in `changelog.py`, the format of the timestamp line
             /// is `time tz extra\n` where:
             ///
             /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
             ///   as seconds since the UNIX epoch.
             ///
             /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
             ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
             ///   opposite of the sign in ISO 8601 timestamps).
             ///
             /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
             ///   and value in each pair separated by an ASCII colon. Keys are limited to
             ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
             ///   arbitrary bytes.
             fn parse_timestamp(
                 timestamp_line: &[u8],
             ) -> Result<DateTime<FixedOffset>, HgError> {
                 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
                 let timestamp_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
                 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
                     HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
                 })?;
                 let timestamp_utc = timestamp_str
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("failed to parse timestamp: {e}"))
                     })
                     .and_then(|secs| {
-                        NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
+                        DateTime::from_timestamp(secs, 0).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "integer timestamp out of valid range: {secs}"
                             ))
                         })
                     })
                     // Attempt to parse the timestamp as a float if we can't parse
                     // it as an int. It doesn't seem like float timestamps are actually
                     // used in practice, but the Python code supports them.
                     .or_else(|_| parse_float_timestamp(timestamp_str))?;
                 let timezone_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timezone"))?;
                 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
                     })?
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not an integer: {e}"))
                     })?;
                 let timezone = FixedOffset::west_opt(timezone_secs)
                     .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
-                Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
+                Ok(DateTime::from_naive_utc_and_offset(
+                    timestamp_utc.naive_utc(),
+                    timezone,
+                ))
             }
             /// Attempt to parse the given string as floating-point timestamp, and
             /// convert the result into a `chrono::NaiveDateTime`.
             fn parse_float_timestamp(
                 timestamp_str: &str,
-            ) -> Result<NaiveDateTime, HgError> {
+            ) -> Result<DateTime<Utc>, HgError> {
                 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                     HgError::corrupted(format!("failed to parse timestamp: {e}"))
                 })?;
                 // To construct a `NaiveDateTime` we'll need to convert the float
                 // into signed integer seconds and unsigned integer nanoseconds.
                 let mut secs = timestamp.trunc() as i64;
                 let mut subsecs = timestamp.fract();
                 // If the timestamp is negative, we need to express the fractional
                 // component as positive nanoseconds since the previous second.
                 if timestamp < 0.0 {
                     secs -= 1;
                     subsecs += 1.0;
                 }
                 // This cast should be safe because the fractional component is
                 // by definition less than 1.0, so this value should not exceed
                 // 1 billion, which is representable as an f64 without loss of
                 // precision and should fit into a u32 without overflowing.
                 //
                 // (Any loss of precision in the fractional component will have
                 // already happened at the time of initial parsing; in general,
                 // f64s are insufficiently precise to provide nanosecond-level
                 // precision with present-day timestamps.)
                 let nsecs = (subsecs * 1_000_000_000.0) as u32;
-                NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
+                DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
                     HgError::corrupted(format!(
                         "float timestamp out of valid range: {timestamp}"
                     ))
                 })
             }
             /// Decode changeset extra fields.
             ///
             /// Extras are null-delimited key-value pairs where the key consists of ASCII
             /// alphanumeric characters plus hyphens and underscores, and the value can
             /// contain arbitrary bytes.
             fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 extra
                     .split(|c| *c == b'\0')
                     .map(|pair| {
                         let pair = unescape_extra(pair);
                         let mut iter = pair.splitn(2, |c| *c == b':');
                         let key_bytes =
                             iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                 HgError::corrupted("empty key in changeset extras")
                             })?;
                         let key = str::from_utf8(key_bytes)
                             .ok()
                             .filter(|k| {
                                 k.chars().all(|c| {
                                     c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                 })
                             })
                             .ok_or_else(|| {
                                 let key = String::from_utf8_lossy(key_bytes);
                                 HgError::corrupted(format!(
                                     "invalid key in changeset extras: {key}",
                                 ))
                             })?
                             .to_string();
                         let value = iter.next().map(Into::into).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "missing value for changeset extra: {key}"
                             ))
                         })?;
                         Ok((key, value))
                     })
                     .collect()
             }
             /// Parse the extra fields from a changeset's timestamp line.
             fn parse_timestamp_line_extra(
                 timestamp_line: &[u8],
             ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 Ok(timestamp_line
                     .splitn(3, |c| *c == b' ')
                     .nth(2)
                     .map(decode_extra)
                     .transpose()?
                     .unwrap_or_default())
             }
             /// Decode Mercurial's escaping for changelog extras.
             ///
             /// The `_string_escape` function in `changelog.py` only escapes 4 characters
             /// (null, backslash, newline, and carriage return) so we only decode those.
             ///
             /// The Python code also includes a workaround for decoding escaped nuls
             /// that are followed by an ASCII octal digit, since Python's built-in
             /// `string_escape` codec will interpret that as an escaped octal byte value.
             /// That workaround is omitted here since we don't support decoding octal.
             fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                 let mut output = Vec::with_capacity(bytes.len());
                 let mut input = bytes.iter().copied();
                 while let Some(c) = input.next() {
                     if c != b'\\' {
                         output.push(c);
                         continue;
                     }
                     match input.next() {
                         Some(b'0') => output.push(b'\0'),
                         Some(b'\\') => output.push(b'\\'),
                         Some(b'n') => output.push(b'\n'),
                         Some(b'r') => output.push(b'\r'),
                         // The following cases should never occur in theory because any
                         // backslashes in the original input should have been escaped
                         // with another backslash, so it should not be possible to
                         // observe an escape sequence other than the 4 above.
                         Some(c) => output.extend_from_slice(&[b'\\', c]),
                         None => output.push(b'\\'),
                     }
                 }
                 output
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::VfsImpl;
                 use crate::{
                     RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
                     NULL_REVISION,
                 };
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = VfsImpl {
                         base: temp.path().to_owned(),
                     };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     std::fs::write(temp.path().join("foo.d"), b"").unwrap();
                     let revlog = Revlog::open(
                         &vfs,
                         "foo.i",
                         None,
                         RevlogOpenOptions::new(
                             false,
                             RevlogDataConfig::default(),
                             RevlogDeltaConfig::default(),
                             RevlogFeatureConfig::default(),
                         ),
                     )
                     .unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
                         changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
                         changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_empty_files_list() {
                     assert!(ChangelogRevisionData::null()
                         .files()
                         .collect_vec()
                         .is_empty());
                 }
                 #[test]
                 fn test_unescape_basic() {
                     // '\0', '\\', '\n', and '\r' are correctly unescaped.
                     let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                     let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_unsupported_sequence() {
                     // Other escape sequences are left unaltered.
                     for c in 0u8..255 {
                         match c {
                             b'0' | b'\\' | b'n' | b'r' => continue,
                             c => {
                                 let expected = &[b'\\', c][..];
                                 let unescaped = unescape_extra(expected);
                                 assert_eq!(expected, &unescaped[..]);
                             }
                         }
                     }
                 }
                 #[test]
                 fn test_unescape_trailing_backslash() {
                     // Trailing backslashes are OK.
                     let expected = br"hi\";
                     let unescaped = unescape_extra(expected);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_nul_followed_by_octal() {
                     // Escaped NUL chars followed by octal digits are decoded correctly.
                     let expected = b"\x0012";
                     let escaped = br"\012";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_parse_float_timestamp() {
                     let test_cases = [
                         // Zero should map to the UNIX epoch.
-                        ("0.0", "1970-01-01 00:00:00"),
+                        ("0.0", "1970-01-01 00:00:00 UTC"),
                         // Negative zero should be the same as positive zero.
-                        ("-0.0", "1970-01-01 00:00:00"),
+                        ("-0.0", "1970-01-01 00:00:00 UTC"),
                         // Values without fractional components should work like integers.
                         // (Assuming the timestamp is within the limits of f64 precision.)
-                        ("1115154970.0", "2005-05-03 21:16:10"),
+                        ("1115154970.0", "2005-05-03 21:16:10 UTC"),
                         // We expect some loss of precision in the fractional component
                         // when parsing arbitrary floating-point values.
-                        ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
+                        ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
                         // But representable f64 values should parse losslessly.
-                        ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
+                        ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
                         // Negative fractional components are subtracted from the epoch.
-                        ("-1.333", "1969-12-31 23:59:58.667"),
+                        ("-1.333", "1969-12-31 23:59:58.667 UTC"),
                     ];
                     for (input, expected) in test_cases {
                         let res = parse_float_timestamp(input).unwrap().to_string();
                         assert_eq!(res, expected);
                     }
                 }
                 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                     let mut output = Vec::with_capacity(bytes.len());
                     for c in bytes.iter().copied() {
                         output.extend_from_slice(match c {
                             b'\0' => &b"\\0"[..],
                             b'\\' => &b"\\\\"[..],
                             b'\n' => &b"\\n"[..],
                             b'\r' => &b"\\r"[..],
                             _ => {
                                 output.push(c);
                                 continue;
                             }
                         });
                     }
                     output
                 }
                 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                 where
                     K: AsRef<[u8]>,
                     V: AsRef<[u8]>,
                 {
                     let extras = pairs.into_iter().map(|(k, v)| {
                         escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                     });
                     // Use fully-qualified syntax to avoid a future naming conflict with
                     // the standard library: https://github.com/rust-lang/rust/issues/79524
                     Itertools::intersperse(extras, b"\0".to_vec()).concat()
                 }
                 #[test]
                 fn test_decode_extra() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let encoded = encode_extra(&extra);
                     let decoded = decode_extra(&encoded).unwrap();
                     assert_eq!(extra, decoded);
                 }
                 #[test]
                 fn test_corrupt_extra() {
                     let test_cases = [
                         (&b""[..], "empty input"),
                         (&b"\0"[..], "unexpected null byte"),
                         (&b":empty-key"[..], "empty key"),
                         (&b"\0leading-null:"[..], "leading null"),
                         (&b"trailing-null:\0"[..], "trailing null"),
                         (&b"missing-value"[..], "missing value"),
                         (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                         (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                     ];
                     for (extra, msg) in test_cases {
                         assert!(
                             decode_extra(extra).is_err(),
                             "corrupt extra should have failed to parse: {}",
                             msg
                         );
                     }
                 }
                 #[test]
                 fn test_parse_timestamp_line() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                     line.extend_from_slice(&encode_extra(&extra));
                     let timestamp = parse_timestamp(&line).unwrap();
                     assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
                     let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
                     assert_eq!(extra, parsed_extra);
                 }
             }