##// END OF EJS Templates
changing-files: rework the way we store changed files in side-data...
marmoute -
r46211:9a6b409b default
parent child Browse files
Show More
@@ -239,3 +239,75 b' 1. Hash the parent nodes'
239 2. Hash the fulltext of the revision
239 2. Hash the fulltext of the revision
240
240
241 The 20 byte node ids of the parents are fed into the hasher in ascending order.
241 The 20 byte node ids of the parents are fed into the hasher in ascending order.
242
243 Changed Files side-data
244 =======================
245
246 (This feature is in active development and its behavior is not frozen yet. It
247 should not be used in any production repository)
248
249 When the `exp-copies-sidedata-changeset` requirement is in use, information
250 related to the changed files will be stored as "side-data" for every changeset
251 in the changelog.
252
253 These data contains the following information:
254
255 * set of files actively added by the changeset
256 * set of files actively removed by the changeset
257 * set of files actively merged by the changeset
258 * set of files actively touched by he changeset
259 * mapping of copy-source, copy-destination from first parent (p1)
260 * mapping of copy-source, copy-destination from second parent (p2)
261
262 The block itself is big-endian data, formatted in three sections: header, index,
263 and data. See below for details:
264
265 Header:
266
267 4 bytes: unsigned integer
268
269 total number of entry in the index
270
271 Index:
272
273 The index contains an entry for every involved filename. It is sorted by
274 filename. The entry use the following format:
275
276 1 byte: bits field
277
278 This byte hold two different bit fields:
279
280 The 2 lower bits carry copy information:
281
282 `00`: file has not copy information,
283 `10`: file is copied from a p1 source,
284 `11`: file is copied from a p2 source.
285
286 The 3 next bits carry action information.
287
288 `000`: file was untouched, it exist in the index as copy source,
289 `001`: file was actively added
290 `010`: file was actively merged
291 `011`: file was actively removed
292 `100`: reserved for future use
293 `101`: file was actively touched in any other way
294
295 (The last 2 bites are unused)
296
297 4 bytes: unsigned integer
298
299 Address (in bytes) of the end of the associated filename in the data
300 block. (This is the address of the first byte not part of the filename)
301
302 The start of the filename can be retrieve by reading that field for the
303 previous index entry. The filename of the first entry starts at zero.
304
305 4 bytes: unsigned integer
306
307 Index (in this very index) of the source of the copy (when a copy is
308 happening). If no copy is happening the value of this field is
309 irrelevant and could have any value. It is set to zero by convention
310
311 Data:
312
313 raw bytes block containing all filename concatenated without any separator.
@@ -8,6 +8,7 b''
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11 import struct
11
12
12 from . import (
13 from . import (
13 error,
14 error,
@@ -373,54 +374,112 b' def decodefileindices(files, data):'
373 return None
374 return None
374
375
375
376
377 # see mercurial/helptext/internals/revlogs.txt for details about the format
378
379 ACTION_MASK = int("111" "00", 2)
380 # note: untouched file used as copy source will as `000` for this mask.
381 ADDED_FLAG = int("001" "00", 2)
382 MERGED_FLAG = int("010" "00", 2)
383 REMOVED_FLAG = int("011" "00", 2)
384 # `100` is reserved for future use
385 TOUCHED_FLAG = int("101" "00", 2)
386
387 COPIED_MASK = int("11", 2)
388 COPIED_FROM_P1_FLAG = int("10", 2)
389 COPIED_FROM_P2_FLAG = int("11", 2)
390
391 # structure is <flag><filename-end><copy-source>
392 INDEX_HEADER = struct.Struct(">L")
393 INDEX_ENTRY = struct.Struct(">bLL")
394
395
376 def encode_files_sidedata(files):
396 def encode_files_sidedata(files):
377 sortedfiles = sorted(files.touched)
397 all_files = set(files.touched)
378 sidedata = {}
398 all_files.update(files.copied_from_p1.values())
379 p1copies = files.copied_from_p1
399 all_files.update(files.copied_from_p2.values())
380 if p1copies:
400 all_files = sorted(all_files)
381 p1copies = encodecopies(sortedfiles, p1copies)
401 file_idx = {f: i for (i, f) in enumerate(all_files)}
382 sidedata[sidedatamod.SD_P1COPIES] = p1copies
402 file_idx[None] = 0
383 p2copies = files.copied_from_p2
403
384 if p2copies:
404 chunks = [INDEX_HEADER.pack(len(all_files))]
385 p2copies = encodecopies(sortedfiles, p2copies)
405
386 sidedata[sidedatamod.SD_P2COPIES] = p2copies
406 filename_length = 0
387 filesadded = files.added
407 for f in all_files:
388 if filesadded:
408 filename_size = len(f)
389 filesadded = encodefileindices(sortedfiles, filesadded)
409 filename_length += filename_size
390 sidedata[sidedatamod.SD_FILESADDED] = filesadded
410 flag = 0
391 filesremoved = files.removed
411 if f in files.added:
392 if filesremoved:
412 flag |= ADDED_FLAG
393 filesremoved = encodefileindices(sortedfiles, filesremoved)
413 elif f in files.merged:
394 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
414 flag |= MERGED_FLAG
395 if not sidedata:
415 elif f in files.removed:
396 sidedata = None
416 flag |= REMOVED_FLAG
397 return sidedata
417 elif f in files.touched:
418 flag |= TOUCHED_FLAG
419
420 copy = None
421 if f in files.copied_from_p1:
422 flag |= COPIED_FROM_P1_FLAG
423 copy = files.copied_from_p1.get(f)
424 elif f in files.copied_from_p2:
425 copy = files.copied_from_p2.get(f)
426 flag |= COPIED_FROM_P2_FLAG
427 copy_idx = file_idx[copy]
428 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
429 chunks.extend(all_files)
430 return {sidedatamod.SD_FILES: b''.join(chunks)}
398
431
399
432
400 def decode_files_sidedata(changelogrevision, sidedata):
433 def decode_files_sidedata(changelogrevision, sidedata):
401 """Return a ChangingFiles instance from a changelogrevision using sidata
434 md = ChangingFiles()
402 """
435 raw = sidedata.get(sidedatamod.SD_FILES)
403 touched = changelogrevision.files
436
437 if raw is None:
438 return md
439
440 copies = []
441 all_files = []
404
442
405 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
443 assert len(raw) >= INDEX_HEADER.size
406 added = decodefileindices(touched, rawindices)
444 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
407
445
408 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
446 offset = INDEX_HEADER.size
409 removed = decodefileindices(touched, rawindices)
447 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
448 file_offset_last = file_offset_base
449
450 assert len(raw) >= file_offset_base
410
451
411 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
452 for idx in range(total_files):
412 p1_copies = decodecopies(touched, rawcopies)
453 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
413
454 file_end += file_offset_base
414 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
455 filename = raw[file_offset_last:file_end]
415 p2_copies = decodecopies(touched, rawcopies)
456 filesize = file_end - file_offset_last
457 assert len(filename) == filesize
458 offset += INDEX_ENTRY.size
459 file_offset_last = file_end
460 all_files.append(filename)
461 if flag & ACTION_MASK == ADDED_FLAG:
462 md.mark_added(filename)
463 elif flag & ACTION_MASK == MERGED_FLAG:
464 md.mark_merged(filename)
465 elif flag & ACTION_MASK == REMOVED_FLAG:
466 md.mark_removed(filename)
467 elif flag & ACTION_MASK == TOUCHED_FLAG:
468 md.mark_touched(filename)
416
469
417 return ChangingFiles(
470 copied = None
418 touched=touched,
471 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
419 added=added,
472 copied = md.mark_copied_from_p1
420 removed=removed,
473 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
421 p1_copies=p1_copies,
474 copied = md.mark_copied_from_p2
422 p2_copies=p2_copies,
475
423 )
476 if copied is not None:
477 copies.append((copied, filename, copy_idx))
478
479 for copied, filename, copy_idx in copies:
480 copied(all_files[copy_idx], filename)
481
482 return md
424
483
425
484
426 def _getsidedata(srcrepo, rev):
485 def _getsidedata(srcrepo, rev):
@@ -428,23 +487,15 b' def _getsidedata(srcrepo, rev):'
428 filescopies = computechangesetcopies(ctx)
487 filescopies = computechangesetcopies(ctx)
429 filesadded = computechangesetfilesadded(ctx)
488 filesadded = computechangesetfilesadded(ctx)
430 filesremoved = computechangesetfilesremoved(ctx)
489 filesremoved = computechangesetfilesremoved(ctx)
431 sidedata = {}
490 filesmerged = computechangesetfilesmerged(ctx)
432 if any([filescopies, filesadded, filesremoved]):
491 files = ChangingFiles()
433 sortedfiles = sorted(ctx.files())
492 files.update_touched(ctx.files())
434 p1copies, p2copies = filescopies
493 files.update_added(filesadded)
435 p1copies = encodecopies(sortedfiles, p1copies)
494 files.update_removed(filesremoved)
436 p2copies = encodecopies(sortedfiles, p2copies)
495 files.update_merged(filesmerged)
437 filesadded = encodefileindices(sortedfiles, filesadded)
496 files.update_copies_from_p1(filescopies[0])
438 filesremoved = encodefileindices(sortedfiles, filesremoved)
497 files.update_copies_from_p2(filescopies[1])
439 if p1copies:
498 return encode_files_sidedata(files)
440 sidedata[sidedatamod.SD_P1COPIES] = p1copies
441 if p2copies:
442 sidedata[sidedatamod.SD_P2COPIES] = p2copies
443 if filesadded:
444 sidedata[sidedatamod.SD_FILESADDED] = filesadded
445 if filesremoved:
446 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
447 return sidedata
448
499
449
500
450 def getsidedataadder(srcrepo, destrepo):
501 def getsidedataadder(srcrepo, destrepo):
@@ -53,6 +53,7 b' SD_P1COPIES = 8'
53 SD_P2COPIES = 9
53 SD_P2COPIES = 9
54 SD_FILESADDED = 10
54 SD_FILESADDED = 10
55 SD_FILESREMOVED = 11
55 SD_FILESREMOVED = 11
56 SD_FILES = 12
56
57
57 # internal format constant
58 # internal format constant
58 SIDEDATA_HEADER = struct.Struct('>H')
59 SIDEDATA_HEADER = struct.Struct('>H')
@@ -79,11 +79,9 b' Check that copies are recorded correctly'
79 2\x00a (esc)
79 2\x00a (esc)
80 #else
80 #else
81 $ hg debugsidedata -c -v -- -1
81 $ hg debugsidedata -c -v -- -1
82 2 sidedata entries
82 1 sidedata entries
83 entry-0010 size 11
83 entry-0014 size 44
84 '0\x00a\n1\x00a\n2\x00a'
84 '\x00\x00\x00\x04\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00abcd'
85 entry-0012 size 5
86 '0\n1\n2'
87 #endif
85 #endif
88
86
89 $ hg showcopies
87 $ hg showcopies
@@ -117,13 +115,9 b' Check that renames are recorded correctl'
117
115
118 #else
116 #else
119 $ hg debugsidedata -c -v -- -1
117 $ hg debugsidedata -c -v -- -1
120 3 sidedata entries
118 1 sidedata entries
121 entry-0010 size 3
119 entry-0014 size 25
122 '1\x00b'
120 '\x00\x00\x00\x02\x0c\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x03\x00\x00\x00\x00bb2'
123 entry-0012 size 1
124 '1'
125 entry-0013 size 1
126 '0'
127 #endif
121 #endif
128
122
129 $ hg showcopies
123 $ hg showcopies
@@ -165,8 +159,8 b' even though there is no filelog entry.'
165 #else
159 #else
166 $ hg debugsidedata -c -v -- -1
160 $ hg debugsidedata -c -v -- -1
167 1 sidedata entries
161 1 sidedata entries
168 entry-0010 size 4
162 entry-0014 size 25
169 '0\x00b2'
163 '\x00\x00\x00\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x00b2c'
170 #endif
164 #endif
171
165
172 $ hg showcopies
166 $ hg showcopies
@@ -221,13 +215,9 b" File 'f' exists only in p1, so 'i' shoul"
221
215
222 #else
216 #else
223 $ hg debugsidedata -c -v -- -1
217 $ hg debugsidedata -c -v -- -1
224 3 sidedata entries
218 1 sidedata entries
225 entry-0010 size 7
219 entry-0014 size 64
226 '0\x00a\n2\x00f'
220 '\x00\x00\x00\x06\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x06\x00\x00\x00\x04\x00\x00\x00\x00\x07\x00\x00\x00\x05\x00\x00\x00\x01\x06\x00\x00\x00\x06\x00\x00\x00\x02adfghi'
227 entry-0011 size 3
228 '1\x00d'
229 entry-0012 size 5
230 '0\n1\n2'
231 #endif
221 #endif
232
222
233 $ hg showcopies
223 $ hg showcopies
@@ -250,11 +240,9 b' Test writing to both changeset and filel'
250 #else
240 #else
251 $ hg ci -m 'copy a to j'
241 $ hg ci -m 'copy a to j'
252 $ hg debugsidedata -c -v -- -1
242 $ hg debugsidedata -c -v -- -1
253 2 sidedata entries
243 1 sidedata entries
254 entry-0010 size 3
244 entry-0014 size 24
255 '0\x00a'
245 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
256 entry-0012 size 1
257 '0'
258 #endif
246 #endif
259 $ hg debugdata j 0
247 $ hg debugdata j 0
260 \x01 (esc)
248 \x01 (esc)
@@ -281,11 +269,9 b' copy information on to the filelog'
281 $ hg ci --amend -m 'copy a to j, v2'
269 $ hg ci --amend -m 'copy a to j, v2'
282 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
270 saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
283 $ hg debugsidedata -c -v -- -1
271 $ hg debugsidedata -c -v -- -1
284 2 sidedata entries
272 1 sidedata entries
285 entry-0010 size 3
273 entry-0014 size 24
286 '0\x00a'
274 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
287 entry-0012 size 1
288 '0'
289 #endif
275 #endif
290 $ hg showcopies --config experimental.copies.read-from=filelog-only
276 $ hg showcopies --config experimental.copies.read-from=filelog-only
291 a -> j
277 a -> j
@@ -304,6 +290,9 b" won't have to fall back to reading from "
304 #else
290 #else
305 $ hg ci -m 'modify j'
291 $ hg ci -m 'modify j'
306 $ hg debugsidedata -c -v -- -1
292 $ hg debugsidedata -c -v -- -1
293 1 sidedata entries
294 entry-0014 size 14
295 '\x00\x00\x00\x01\x14\x00\x00\x00\x01\x00\x00\x00\x00j'
307 #endif
296 #endif
308
297
309 Test writing only to filelog
298 Test writing only to filelog
@@ -318,11 +307,9 b' Test writing only to filelog'
318 #else
307 #else
319 $ hg ci -m 'copy a to k'
308 $ hg ci -m 'copy a to k'
320 $ hg debugsidedata -c -v -- -1
309 $ hg debugsidedata -c -v -- -1
321 2 sidedata entries
310 1 sidedata entries
322 entry-0010 size 3
311 entry-0014 size 24
323 '0\x00a'
312 '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00ak'
324 entry-0012 size 1
325 '0'
326 #endif
313 #endif
327
314
328 $ hg debugdata k 0
315 $ hg debugdata k 0
@@ -439,10 +426,10 b' downgrading (keeping some sidedata)'
439 compression-level: default default default
426 compression-level: default default default
440 $ hg debugsidedata -c -- 0
427 $ hg debugsidedata -c -- 0
441 1 sidedata entries
428 1 sidedata entries
442 entry-0012 size 1
429 entry-0014 size 14
443 $ hg debugsidedata -c -- 1
430 $ hg debugsidedata -c -- 1
444 1 sidedata entries
431 1 sidedata entries
445 entry-0013 size 1
432 entry-0014 size 14
446 $ hg debugsidedata -m -- 0
433 $ hg debugsidedata -m -- 0
447 $ cat << EOF > .hg/hgrc
434 $ cat << EOF > .hg/hgrc
448 > [format]
435 > [format]
@@ -463,7 +450,11 b' downgrading (keeping some sidedata)'
463 compression: zlib zlib zlib
450 compression: zlib zlib zlib
464 compression-level: default default default
451 compression-level: default default default
465 $ hg debugsidedata -c -- 0
452 $ hg debugsidedata -c -- 0
453 1 sidedata entries
454 entry-0014 size 14
466 $ hg debugsidedata -c -- 1
455 $ hg debugsidedata -c -- 1
456 1 sidedata entries
457 entry-0014 size 14
467 $ hg debugsidedata -m -- 0
458 $ hg debugsidedata -m -- 0
468
459
469 upgrading
460 upgrading
@@ -487,10 +478,10 b' upgrading'
487 compression-level: default default default
478 compression-level: default default default
488 $ hg debugsidedata -c -- 0
479 $ hg debugsidedata -c -- 0
489 1 sidedata entries
480 1 sidedata entries
490 entry-0012 size 1
481 entry-0014 size 14
491 $ hg debugsidedata -c -- 1
482 $ hg debugsidedata -c -- 1
492 1 sidedata entries
483 1 sidedata entries
493 entry-0013 size 1
484 entry-0014 size 14
494 $ hg debugsidedata -m -- 0
485 $ hg debugsidedata -m -- 0
495
486
496 #endif
487 #endif
General Comments 0
You need to be logged in to leave comments. Login now