##// END OF EJS Templates
merge: move the filtering of ambiguous files to a dedicated function...
Raphaël Gomès -
r52951:f5742367 default
parent child Browse files
Show More
@@ -10,6 +10,7 from __future__ import annotations
10 10 import collections
11 11 import struct
12 12 import typing
13 from typing import Dict, Optional, Tuple
13 14
14 15 from .i18n import _
15 16 from .node import nullrev
@@ -2164,70 +2165,8 def _update(
2164 2165 mresult.len((mergestatemod.ACTION_GET,)) if wantfiledata else 0
2165 2166 )
2166 2167 with repo.dirstate.changing_parents(repo):
2167 ### Filter Filedata
2168 #
2169 # We gathered "cache" information for the clean file while
2170 # updating them: mtime, size and mode.
2171 #
2172 # At the time this comment is written, they are various issues
2173 # with how we gather the `mode` and `mtime` information (see
2174 # the comment in `batchget`).
2175 #
2176 # We are going to smooth one of this issue here : mtime ambiguity.
2177 #
2178 # i.e. even if the mtime gathered during `batchget` was
2179 # correct[1] a change happening right after it could change the
2180 # content while keeping the same mtime[2].
2181 #
2182 # When we reach the current code, the "on disk" part of the
2183 # update operation is finished. We still assume that no other
2184 # process raced that "on disk" part, but we want to at least
2185 # prevent later file change to alter the content of the file
2186 # right after the update operation. So quickly that the same
2187 # mtime is record for the operation.
2188 # To prevent such ambiguity to happens, we will only keep the
2189 # "file data" for files with mtime that are stricly in the past,
2190 # i.e. whose mtime is strictly lower than the current time.
2191 #
2192 # This protect us from race conditions from operation that could
2193 # run right after this one, especially other Mercurial
2194 # operation that could be waiting for the wlock to touch files
2195 # content and the dirstate.
2196 #
2197 # In an ideal world, we could only get reliable information in
2198 # `getfiledata` (from `getbatch`), however the current approach
2199 # have been a successful compromise since many years.
2200 #
2201 # At the time this comment is written, not using any "cache"
2202 # file data at all here would not be viable. As it would result is
2203 # a very large amount of work (equivalent to the previous `hg
2204 # update` during the next status after an update).
2205 #
2206 # [1] the current code cannot grantee that the `mtime` and
2207 # `mode` are correct, but the result is "okay in practice".
2208 # (see the comment in `batchget`). #
2209 #
2210 # [2] using nano-second precision can greatly help here because
2211 # it makes the "different write with same mtime" issue
2212 # virtually vanish. However, dirstate v1 cannot store such
2213 # precision and a bunch of python-runtime, operating-system and
2214 # filesystem does not provide use with such precision, so we
2215 # have to operate as if it wasn't available.
2216 2168 if getfiledata:
2217 ambiguous_mtime = {}
2218 now = timestamp.get_fs_now(repo.vfs)
2219 if now is None:
2220 # we can't write to the FS, so we won't actually update
2221 # the dirstate content anyway, no need to put cache
2222 # information.
2223 getfiledata = None
2224 else:
2225 now_sec = now[0]
2226 for f, m in getfiledata.items():
2227 if m is not None and m[2][0] >= now_sec:
2228 ambiguous_mtime[f] = (m[0], m[1], None)
2229 for f, m in ambiguous_mtime.items():
2230 getfiledata[f] = m
2169 getfiledata = filter_ambiguous_files(repo, getfiledata)
2231 2170
2232 2171 repo.setparents(fp1, fp2)
2233 2172 mergestatemod.recordupdates(
@@ -2253,6 +2192,74 def _update(
2253 2192 return stats
2254 2193
2255 2194
2195 # filename -> (mode, size, timestamp)
2196 FileData = Dict[bytes, Optional[Tuple[int, int, Optional[timestamp.timestamp]]]]
2197
2198
2199 def filter_ambiguous_files(repo, file_data: FileData) -> Optional[FileData]:
2200 """We've gathered "cache" information for the clean files while updating
2201 them: their mtime, size and mode.
2202
2203 At the time this comment is written, there are various issues with how we
2204 gather the `mode` and `mtime` information (see the comment in `batchget`).
2205
2206 We are going to smooth one of these issues here: mtime ambiguity.
2207
2208 i.e. even if the mtime gathered during `batchget` was correct[1] a change
2209 happening right after it could change the content while keeping
2210 the same mtime[2].
2211
2212 When we reach the current code, the "on disk" part of the update operation
2213 is finished. We still assume that no other process raced that "on disk"
2214 part, but we want to at least prevent later file changes to alter the
2215 contents of the file right after the update operation so quickly that the
2216 same mtime is recorded for the operation.
2217 To prevent such ambiguities from happenning, we will only keep the
2218 "file data" for files with mtimes that are strictly in the past,
2219 i.e. whose mtime is strictly lower than the current time.
2220
2221 This protects us from race conditions from operations that could run right
2222 after this one, especially other Mercurial operations that could be waiting
2223 for the wlock to touch files contents and the dirstate.
2224
2225 In an ideal world, we could only get reliable information in `getfiledata`
2226 (from `getbatch`), however the current approach has been a successful
2227 compromise for many years.
2228
2229 At the time this comment is written, not using any "cache" file data at all
2230 here would not be viable, as it would result is a very large amount of work
2231 (equivalent to the previous `hg update` during the next status after an
2232 update).
2233
2234 [1] the current code cannot grantee that the `mtime` and `mode`
2235 are correct, but the result is "okay in practice".
2236 (see the comment in `batchget`)
2237
2238 [2] using nano-second precision can greatly help here because it makes the
2239 "different write with same mtime" issue virtually vanish. However,
2240 dirstate v1 cannot store such precision and a bunch of python-runtime,
2241 operating-system and filesystem parts do not provide us with such
2242 precision, so we have to operate as if it wasn't available."""
2243 ambiguous_mtime: FileData = {}
2244 now = timestamp.get_fs_now(repo.vfs)
2245 if fs_now_result is None:
2246 # we can't write to the FS, so we won't actually update
2247 # the dirstate content anyway, no need to put cache
2248 # information.
2249 return None
2250 else:
2251 now_sec = now[0]
2252 now, timed_out = fs_now_result
2253 if timed_out:
2254 fast_enough_fs = False
2255 for f, m in file_data.items():
2256 if m is not None and m[2][0] >= now_sec:
2257 ambiguous_mtime[f] = (m[0], m[1], None)
2258 for f, m in ambiguous_mtime.items():
2259 file_data[f] = m
2260 return file_data
2261
2262
2256 2263 def merge(ctx, labels=None, force=False, wc=None):
2257 2264 """Merge another topological branch into the working copy.
2258 2265
General Comments 0
You need to be logged in to leave comments. Login now