Show More
@@ -10,6 +10,7 from __future__ import annotations | |||
|
10 | 10 | import collections |
|
11 | 11 | import struct |
|
12 | 12 | import typing |
|
13 | from typing import Dict, Optional, Tuple | |
|
13 | 14 | |
|
14 | 15 | from .i18n import _ |
|
15 | 16 | from .node import nullrev |
@@ -2164,70 +2165,8 def _update( | |||
|
2164 | 2165 | mresult.len((mergestatemod.ACTION_GET,)) if wantfiledata else 0 |
|
2165 | 2166 | ) |
|
2166 | 2167 | with repo.dirstate.changing_parents(repo): |
|
2167 | ### Filter Filedata | |
|
2168 | # | |
|
2169 | # We gathered "cache" information for the clean file while | |
|
2170 | # updating them: mtime, size and mode. | |
|
2171 | # | |
|
2172 | # At the time this comment is written, they are various issues | |
|
2173 | # with how we gather the `mode` and `mtime` information (see | |
|
2174 | # the comment in `batchget`). | |
|
2175 | # | |
|
2176 | # We are going to smooth one of this issue here : mtime ambiguity. | |
|
2177 | # | |
|
2178 | # i.e. even if the mtime gathered during `batchget` was | |
|
2179 | # correct[1] a change happening right after it could change the | |
|
2180 | # content while keeping the same mtime[2]. | |
|
2181 | # | |
|
2182 | # When we reach the current code, the "on disk" part of the | |
|
2183 | # update operation is finished. We still assume that no other | |
|
2184 | # process raced that "on disk" part, but we want to at least | |
|
2185 | # prevent later file change to alter the content of the file | |
|
2186 | # right after the update operation. So quickly that the same | |
|
2187 | # mtime is record for the operation. | |
|
2188 | # To prevent such ambiguity to happens, we will only keep the | |
|
2189 | # "file data" for files with mtime that are stricly in the past, | |
|
2190 | # i.e. whose mtime is strictly lower than the current time. | |
|
2191 | # | |
|
2192 | # This protect us from race conditions from operation that could | |
|
2193 | # run right after this one, especially other Mercurial | |
|
2194 | # operation that could be waiting for the wlock to touch files | |
|
2195 | # content and the dirstate. | |
|
2196 | # | |
|
2197 | # In an ideal world, we could only get reliable information in | |
|
2198 | # `getfiledata` (from `getbatch`), however the current approach | |
|
2199 | # have been a successful compromise since many years. | |
|
2200 | # | |
|
2201 | # At the time this comment is written, not using any "cache" | |
|
2202 | # file data at all here would not be viable. As it would result is | |
|
2203 | # a very large amount of work (equivalent to the previous `hg | |
|
2204 | # update` during the next status after an update). | |
|
2205 | # | |
|
2206 | # [1] the current code cannot grantee that the `mtime` and | |
|
2207 | # `mode` are correct, but the result is "okay in practice". | |
|
2208 | # (see the comment in `batchget`). # | |
|
2209 | # | |
|
2210 | # [2] using nano-second precision can greatly help here because | |
|
2211 | # it makes the "different write with same mtime" issue | |
|
2212 | # virtually vanish. However, dirstate v1 cannot store such | |
|
2213 | # precision and a bunch of python-runtime, operating-system and | |
|
2214 | # filesystem does not provide use with such precision, so we | |
|
2215 | # have to operate as if it wasn't available. | |
|
2216 | 2168 | if getfiledata: |
|
2217 | ambiguous_mtime = {} | |
|
2218 | now = timestamp.get_fs_now(repo.vfs) | |
|
2219 | if now is None: | |
|
2220 | # we can't write to the FS, so we won't actually update | |
|
2221 | # the dirstate content anyway, no need to put cache | |
|
2222 | # information. | |
|
2223 | getfiledata = None | |
|
2224 | else: | |
|
2225 | now_sec = now[0] | |
|
2226 | for f, m in getfiledata.items(): | |
|
2227 | if m is not None and m[2][0] >= now_sec: | |
|
2228 | ambiguous_mtime[f] = (m[0], m[1], None) | |
|
2229 | for f, m in ambiguous_mtime.items(): | |
|
2230 | getfiledata[f] = m | |
|
2169 | getfiledata = filter_ambiguous_files(repo, getfiledata) | |
|
2231 | 2170 | |
|
2232 | 2171 | repo.setparents(fp1, fp2) |
|
2233 | 2172 | mergestatemod.recordupdates( |
@@ -2253,6 +2192,74 def _update( | |||
|
2253 | 2192 | return stats |
|
2254 | 2193 | |
|
2255 | 2194 | |
|
2195 | # filename -> (mode, size, timestamp) | |
|
2196 | FileData = Dict[bytes, Optional[Tuple[int, int, Optional[timestamp.timestamp]]]] | |
|
2197 | ||
|
2198 | ||
|
2199 | def filter_ambiguous_files(repo, file_data: FileData) -> Optional[FileData]: | |
|
2200 | """We've gathered "cache" information for the clean files while updating | |
|
2201 | them: their mtime, size and mode. | |
|
2202 | ||
|
2203 | At the time this comment is written, there are various issues with how we | |
|
2204 | gather the `mode` and `mtime` information (see the comment in `batchget`). | |
|
2205 | ||
|
2206 | We are going to smooth one of these issues here: mtime ambiguity. | |
|
2207 | ||
|
2208 | i.e. even if the mtime gathered during `batchget` was correct[1] a change | |
|
2209 | happening right after it could change the content while keeping | |
|
2210 | the same mtime[2]. | |
|
2211 | ||
|
2212 | When we reach the current code, the "on disk" part of the update operation | |
|
2213 | is finished. We still assume that no other process raced that "on disk" | |
|
2214 | part, but we want to at least prevent later file changes to alter the | |
|
2215 | contents of the file right after the update operation so quickly that the | |
|
2216 | same mtime is recorded for the operation. | |
|
2217 | To prevent such ambiguities from happenning, we will only keep the | |
|
2218 | "file data" for files with mtimes that are strictly in the past, | |
|
2219 | i.e. whose mtime is strictly lower than the current time. | |
|
2220 | ||
|
2221 | This protects us from race conditions from operations that could run right | |
|
2222 | after this one, especially other Mercurial operations that could be waiting | |
|
2223 | for the wlock to touch files contents and the dirstate. | |
|
2224 | ||
|
2225 | In an ideal world, we could only get reliable information in `getfiledata` | |
|
2226 | (from `getbatch`), however the current approach has been a successful | |
|
2227 | compromise for many years. | |
|
2228 | ||
|
2229 | At the time this comment is written, not using any "cache" file data at all | |
|
2230 | here would not be viable, as it would result is a very large amount of work | |
|
2231 | (equivalent to the previous `hg update` during the next status after an | |
|
2232 | update). | |
|
2233 | ||
|
2234 | [1] the current code cannot grantee that the `mtime` and `mode` | |
|
2235 | are correct, but the result is "okay in practice". | |
|
2236 | (see the comment in `batchget`) | |
|
2237 | ||
|
2238 | [2] using nano-second precision can greatly help here because it makes the | |
|
2239 | "different write with same mtime" issue virtually vanish. However, | |
|
2240 | dirstate v1 cannot store such precision and a bunch of python-runtime, | |
|
2241 | operating-system and filesystem parts do not provide us with such | |
|
2242 | precision, so we have to operate as if it wasn't available.""" | |
|
2243 | ambiguous_mtime: FileData = {} | |
|
2244 | now = timestamp.get_fs_now(repo.vfs) | |
|
2245 | if fs_now_result is None: | |
|
2246 | # we can't write to the FS, so we won't actually update | |
|
2247 | # the dirstate content anyway, no need to put cache | |
|
2248 | # information. | |
|
2249 | return None | |
|
2250 | else: | |
|
2251 | now_sec = now[0] | |
|
2252 | now, timed_out = fs_now_result | |
|
2253 | if timed_out: | |
|
2254 | fast_enough_fs = False | |
|
2255 | for f, m in file_data.items(): | |
|
2256 | if m is not None and m[2][0] >= now_sec: | |
|
2257 | ambiguous_mtime[f] = (m[0], m[1], None) | |
|
2258 | for f, m in ambiguous_mtime.items(): | |
|
2259 | file_data[f] = m | |
|
2260 | return file_data | |
|
2261 | ||
|
2262 | ||
|
2256 | 2263 | def merge(ctx, labels=None, force=False, wc=None): |
|
2257 | 2264 | """Merge another topological branch into the working copy. |
|
2258 | 2265 |
General Comments 0
You need to be logged in to leave comments.
Login now