##// END OF EJS Templates
branching: merge stable into default
Raphaël Gomès -
r49646:834c9382 merge default
parent child Browse files
Show More
@@ -1,1335 +1,1335 b''
1 1 # aws.py - Automation code for Amazon Web Services
2 2 #
3 3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # no-check-code because Python 3 native.
9 9
10 10 import contextlib
11 11 import copy
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import pathlib
16 16 import subprocess
17 17 import time
18 18
19 19 import boto3
20 20 import botocore.exceptions
21 21
22 22 from .linux import BOOTSTRAP_DEBIAN
23 23 from .ssh import (
24 24 exec_command as ssh_exec_command,
25 25 wait_for_ssh,
26 26 )
27 27 from .winrm import (
28 28 run_powershell,
29 29 wait_for_winrm,
30 30 )
31 31
32 32
33 33 SOURCE_ROOT = pathlib.Path(
34 34 os.path.abspath(__file__)
35 35 ).parent.parent.parent.parent
36 36
37 37 INSTALL_WINDOWS_DEPENDENCIES = (
38 38 SOURCE_ROOT / 'contrib' / 'install-windows-dependencies.ps1'
39 39 )
40 40
41 41
42 42 INSTANCE_TYPES_WITH_STORAGE = {
43 43 'c5d',
44 44 'd2',
45 45 'h1',
46 46 'i3',
47 47 'm5ad',
48 48 'm5d',
49 49 'r5d',
50 50 'r5ad',
51 51 'x1',
52 52 'z1d',
53 53 }
54 54
55 55
56 56 AMAZON_ACCOUNT_ID = '801119661308'
57 57 DEBIAN_ACCOUNT_ID = '379101102735'
58 58 DEBIAN_ACCOUNT_ID_2 = '136693071363'
59 59 UBUNTU_ACCOUNT_ID = '099720109477'
60 60
61 61
62 62 WINDOWS_BASE_IMAGE_NAME = 'Windows_Server-2019-English-Full-Base-*'
63 63
64 64
65 65 KEY_PAIRS = {
66 66 'automation',
67 67 }
68 68
69 69
70 70 SECURITY_GROUPS = {
71 71 'linux-dev-1': {
72 72 'description': 'Mercurial Linux instances that perform build/test automation',
73 73 'ingress': [
74 74 {
75 75 'FromPort': 22,
76 76 'ToPort': 22,
77 77 'IpProtocol': 'tcp',
78 78 'IpRanges': [
79 79 {
80 80 'CidrIp': '0.0.0.0/0',
81 81 'Description': 'SSH from entire Internet',
82 82 },
83 83 ],
84 84 },
85 85 ],
86 86 },
87 87 'windows-dev-1': {
88 88 'description': 'Mercurial Windows instances that perform build automation',
89 89 'ingress': [
90 90 {
91 91 'FromPort': 22,
92 92 'ToPort': 22,
93 93 'IpProtocol': 'tcp',
94 94 'IpRanges': [
95 95 {
96 96 'CidrIp': '0.0.0.0/0',
97 97 'Description': 'SSH from entire Internet',
98 98 },
99 99 ],
100 100 },
101 101 {
102 102 'FromPort': 3389,
103 103 'ToPort': 3389,
104 104 'IpProtocol': 'tcp',
105 105 'IpRanges': [
106 106 {
107 107 'CidrIp': '0.0.0.0/0',
108 108 'Description': 'RDP from entire Internet',
109 109 },
110 110 ],
111 111 },
112 112 {
113 113 'FromPort': 5985,
114 114 'ToPort': 5986,
115 115 'IpProtocol': 'tcp',
116 116 'IpRanges': [
117 117 {
118 118 'CidrIp': '0.0.0.0/0',
119 119 'Description': 'PowerShell Remoting (Windows Remote Management)',
120 120 },
121 121 ],
122 122 },
123 123 ],
124 124 },
125 125 }
126 126
127 127
128 128 IAM_ROLES = {
129 129 'ephemeral-ec2-role-1': {
130 130 'description': 'Mercurial temporary EC2 instances',
131 131 'policy_arns': [
132 132 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
133 133 ],
134 134 },
135 135 }
136 136
137 137
138 138 ASSUME_ROLE_POLICY_DOCUMENT = '''
139 139 {
140 140 "Version": "2012-10-17",
141 141 "Statement": [
142 142 {
143 143 "Effect": "Allow",
144 144 "Principal": {
145 145 "Service": "ec2.amazonaws.com"
146 146 },
147 147 "Action": "sts:AssumeRole"
148 148 }
149 149 ]
150 150 }
151 151 '''.strip()
152 152
153 153
154 154 IAM_INSTANCE_PROFILES = {
155 155 'ephemeral-ec2-1': {
156 156 'roles': [
157 157 'ephemeral-ec2-role-1',
158 158 ],
159 159 }
160 160 }
161 161
162 162
163 163 # User Data for Windows EC2 instance. Mainly used to set the password
164 164 # and configure WinRM.
165 165 # Inspired by the User Data script used by Packer
166 166 # (from https://www.packer.io/intro/getting-started/build-image.html).
167 167 WINDOWS_USER_DATA = r'''
168 168 <powershell>
169 169
170 170 # TODO enable this once we figure out what is failing.
171 171 #$ErrorActionPreference = "stop"
172 172
173 173 # Set administrator password
174 174 net user Administrator "%s"
175 175 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
176 176
177 177 # First, make sure WinRM can't be connected to
178 178 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
179 179
180 180 # Delete any existing WinRM listeners
181 181 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
182 182 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
183 183
184 184 # Create a new WinRM listener and configure
185 185 winrm create winrm/config/listener?Address=*+Transport=HTTP
186 186 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
187 187 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
188 188 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
189 189 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
190 190 winrm set winrm/config/service/auth '@{Basic="true"}'
191 191 winrm set winrm/config/client/auth '@{Basic="true"}'
192 192
193 193 # Configure UAC to allow privilege elevation in remote shells
194 194 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
195 195 $Setting = 'LocalAccountTokenFilterPolicy'
196 196 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
197 197
198 198 # Avoid long usernames in the temp directory path because the '~' causes extra quoting in ssh output
199 199 [System.Environment]::SetEnvironmentVariable('TMP', 'C:\Temp', [System.EnvironmentVariableTarget]::User)
200 200 [System.Environment]::SetEnvironmentVariable('TEMP', 'C:\Temp', [System.EnvironmentVariableTarget]::User)
201 201
202 202 # Configure and restart the WinRM Service; Enable the required firewall exception
203 203 Stop-Service -Name WinRM
204 204 Set-Service -Name WinRM -StartupType Automatic
205 205 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
206 206 Start-Service -Name WinRM
207 207
208 208 # Disable firewall on private network interfaces so prompts don't appear.
209 209 Set-NetFirewallProfile -Name private -Enabled false
210 210 </powershell>
211 211 '''.lstrip()
212 212
213 213
214 214 WINDOWS_BOOTSTRAP_POWERSHELL = '''
215 215 Write-Output "installing PowerShell dependencies"
216 216 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
217 217 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
218 218 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
219 219
220 220 Write-Output "installing OpenSSL server"
221 221 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
222 222 # Various tools will attempt to use older versions of .NET. So we enable
223 223 # the feature that provides them so it doesn't have to be auto-enabled
224 224 # later.
225 225 Write-Output "enabling .NET Framework feature"
226 226 Install-WindowsFeature -Name Net-Framework-Core
227 227 '''
228 228
229 229
230 230 class AWSConnection:
231 231 """Manages the state of a connection with AWS."""
232 232
233 233 def __init__(self, automation, region: str, ensure_ec2_state: bool = True):
234 234 self.automation = automation
235 235 self.local_state_path = automation.state_path
236 236
237 237 self.prefix = 'hg-'
238 238
239 239 self.session = boto3.session.Session(region_name=region)
240 240 self.ec2client = self.session.client('ec2')
241 241 self.ec2resource = self.session.resource('ec2')
242 242 self.iamclient = self.session.client('iam')
243 243 self.iamresource = self.session.resource('iam')
244 244 self.security_groups = {}
245 245
246 246 if ensure_ec2_state:
247 247 ensure_key_pairs(automation.state_path, self.ec2resource)
248 248 self.security_groups = ensure_security_groups(self.ec2resource)
249 249 ensure_iam_state(self.iamclient, self.iamresource)
250 250
251 251 def key_pair_path_private(self, name):
252 252 """Path to a key pair private key file."""
253 253 return self.local_state_path / 'keys' / ('keypair-%s' % name)
254 254
255 255 def key_pair_path_public(self, name):
256 256 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
257 257
258 258
259 259 def rsa_key_fingerprint(p: pathlib.Path):
260 260 """Compute the fingerprint of an RSA private key."""
261 261
262 262 # TODO use rsa package.
263 263 res = subprocess.run(
264 264 [
265 265 'openssl',
266 266 'pkcs8',
267 267 '-in',
268 268 str(p),
269 269 '-nocrypt',
270 270 '-topk8',
271 271 '-outform',
272 272 'DER',
273 273 ],
274 274 capture_output=True,
275 275 check=True,
276 276 )
277 277
278 278 sha1 = hashlib.sha1(res.stdout).hexdigest()
279 279 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
280 280
281 281
282 282 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
283 283 remote_existing = {}
284 284
285 285 for kpi in ec2resource.key_pairs.all():
286 286 if kpi.name.startswith(prefix):
287 287 remote_existing[kpi.name[len(prefix) :]] = kpi.key_fingerprint
288 288
289 289 # Validate that we have these keys locally.
290 290 key_path = state_path / 'keys'
291 291 key_path.mkdir(exist_ok=True, mode=0o700)
292 292
293 293 def remove_remote(name):
294 294 print('deleting key pair %s' % name)
295 295 key = ec2resource.KeyPair(name)
296 296 key.delete()
297 297
298 298 def remove_local(name):
299 299 pub_full = key_path / ('keypair-%s.pub' % name)
300 300 priv_full = key_path / ('keypair-%s' % name)
301 301
302 302 print('removing %s' % pub_full)
303 303 pub_full.unlink()
304 304 print('removing %s' % priv_full)
305 305 priv_full.unlink()
306 306
307 307 local_existing = {}
308 308
309 309 for f in sorted(os.listdir(key_path)):
310 310 if not f.startswith('keypair-') or not f.endswith('.pub'):
311 311 continue
312 312
313 313 name = f[len('keypair-') : -len('.pub')]
314 314
315 315 pub_full = key_path / f
316 316 priv_full = key_path / ('keypair-%s' % name)
317 317
318 318 with open(pub_full, 'r', encoding='ascii') as fh:
319 319 data = fh.read()
320 320
321 321 if not data.startswith('ssh-rsa '):
322 322 print(
323 323 'unexpected format for key pair file: %s; removing' % pub_full
324 324 )
325 325 pub_full.unlink()
326 326 priv_full.unlink()
327 327 continue
328 328
329 329 local_existing[name] = rsa_key_fingerprint(priv_full)
330 330
331 331 for name in sorted(set(remote_existing) | set(local_existing)):
332 332 if name not in local_existing:
333 333 actual = '%s%s' % (prefix, name)
334 334 print('remote key %s does not exist locally' % name)
335 335 remove_remote(actual)
336 336 del remote_existing[name]
337 337
338 338 elif name not in remote_existing:
339 339 print('local key %s does not exist remotely' % name)
340 340 remove_local(name)
341 341 del local_existing[name]
342 342
343 343 elif remote_existing[name] != local_existing[name]:
344 344 print(
345 345 'key fingerprint mismatch for %s; '
346 346 'removing from local and remote' % name
347 347 )
348 348 remove_local(name)
349 349 remove_remote('%s%s' % (prefix, name))
350 350 del local_existing[name]
351 351 del remote_existing[name]
352 352
353 353 missing = KEY_PAIRS - set(remote_existing)
354 354
355 355 for name in sorted(missing):
356 356 actual = '%s%s' % (prefix, name)
357 357 print('creating key pair %s' % actual)
358 358
359 359 priv_full = key_path / ('keypair-%s' % name)
360 360 pub_full = key_path / ('keypair-%s.pub' % name)
361 361
362 362 kp = ec2resource.create_key_pair(KeyName=actual)
363 363
364 364 with priv_full.open('w', encoding='ascii') as fh:
365 365 fh.write(kp.key_material)
366 366 fh.write('\n')
367 367
368 368 priv_full.chmod(0o0600)
369 369
370 370 # SSH public key can be extracted via `ssh-keygen`.
371 371 with pub_full.open('w', encoding='ascii') as fh:
372 372 subprocess.run(
373 373 ['ssh-keygen', '-y', '-f', str(priv_full)],
374 374 stdout=fh,
375 375 check=True,
376 376 )
377 377
378 378 pub_full.chmod(0o0600)
379 379
380 380
381 381 def delete_instance_profile(profile):
382 382 for role in profile.roles:
383 383 print(
384 384 'removing role %s from instance profile %s'
385 385 % (role.name, profile.name)
386 386 )
387 387 profile.remove_role(RoleName=role.name)
388 388
389 389 print('deleting instance profile %s' % profile.name)
390 390 profile.delete()
391 391
392 392
393 393 def ensure_iam_state(iamclient, iamresource, prefix='hg-'):
394 394 """Ensure IAM state is in sync with our canonical definition."""
395 395
396 396 remote_profiles = {}
397 397
398 398 for profile in iamresource.instance_profiles.all():
399 399 if profile.name.startswith(prefix):
400 400 remote_profiles[profile.name[len(prefix) :]] = profile
401 401
402 402 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
403 403 delete_instance_profile(remote_profiles[name])
404 404 del remote_profiles[name]
405 405
406 406 remote_roles = {}
407 407
408 408 for role in iamresource.roles.all():
409 409 if role.name.startswith(prefix):
410 410 remote_roles[role.name[len(prefix) :]] = role
411 411
412 412 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
413 413 role = remote_roles[name]
414 414
415 415 print('removing role %s' % role.name)
416 416 role.delete()
417 417 del remote_roles[name]
418 418
419 419 # We've purged remote state that doesn't belong. Create missing
420 420 # instance profiles and roles.
421 421 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
422 422 actual = '%s%s' % (prefix, name)
423 423 print('creating IAM instance profile %s' % actual)
424 424
425 425 profile = iamresource.create_instance_profile(
426 426 InstanceProfileName=actual
427 427 )
428 428 remote_profiles[name] = profile
429 429
430 430 waiter = iamclient.get_waiter('instance_profile_exists')
431 431 waiter.wait(InstanceProfileName=actual)
432 432 print('IAM instance profile %s is available' % actual)
433 433
434 434 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
435 435 entry = IAM_ROLES[name]
436 436
437 437 actual = '%s%s' % (prefix, name)
438 438 print('creating IAM role %s' % actual)
439 439
440 440 role = iamresource.create_role(
441 441 RoleName=actual,
442 442 Description=entry['description'],
443 443 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
444 444 )
445 445
446 446 waiter = iamclient.get_waiter('role_exists')
447 447 waiter.wait(RoleName=actual)
448 448 print('IAM role %s is available' % actual)
449 449
450 450 remote_roles[name] = role
451 451
452 452 for arn in entry['policy_arns']:
453 453 print('attaching policy %s to %s' % (arn, role.name))
454 454 role.attach_policy(PolicyArn=arn)
455 455
456 456 # Now reconcile state of profiles.
457 457 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
458 458 profile = remote_profiles[name]
459 459 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
460 460 have = {role.name for role in profile.roles}
461 461
462 462 for role in sorted(have - wanted):
463 463 print('removing role %s from %s' % (role, profile.name))
464 464 profile.remove_role(RoleName=role)
465 465
466 466 for role in sorted(wanted - have):
467 467 print('adding role %s to %s' % (role, profile.name))
468 468 profile.add_role(RoleName=role)
469 469
470 470
471 471 def find_image(ec2resource, owner_id, name, reverse_sort_field=None):
472 472 """Find an AMI by its owner ID and name."""
473 473
474 474 images = ec2resource.images.filter(
475 475 Filters=[
476 476 {
477 477 'Name': 'owner-id',
478 478 'Values': [owner_id],
479 479 },
480 480 {
481 481 'Name': 'state',
482 482 'Values': ['available'],
483 483 },
484 484 {
485 485 'Name': 'image-type',
486 486 'Values': ['machine'],
487 487 },
488 488 {
489 489 'Name': 'name',
490 490 'Values': [name],
491 491 },
492 492 ]
493 493 )
494 494
495 495 if reverse_sort_field:
496 496 images = sorted(
497 497 images,
498 498 key=lambda image: getattr(image, reverse_sort_field),
499 499 reverse=True,
500 500 )
501 501
502 502 for image in images:
503 503 return image
504 504
505 505 raise Exception('unable to find image for %s' % name)
506 506
507 507
508 508 def ensure_security_groups(ec2resource, prefix='hg-'):
509 509 """Ensure all necessary Mercurial security groups are present.
510 510
511 511 All security groups are prefixed with ``hg-`` by default. Any security
512 512 groups having this prefix but aren't in our list are deleted.
513 513 """
514 514 existing = {}
515 515
516 516 for group in ec2resource.security_groups.all():
517 517 if group.group_name.startswith(prefix):
518 518 existing[group.group_name[len(prefix) :]] = group
519 519
520 520 purge = set(existing) - set(SECURITY_GROUPS)
521 521
522 522 for name in sorted(purge):
523 523 group = existing[name]
524 524 print('removing legacy security group: %s' % group.group_name)
525 525 group.delete()
526 526
527 527 security_groups = {}
528 528
529 529 for name, group in sorted(SECURITY_GROUPS.items()):
530 530 if name in existing:
531 531 security_groups[name] = existing[name]
532 532 continue
533 533
534 534 actual = '%s%s' % (prefix, name)
535 535 print('adding security group %s' % actual)
536 536
537 537 group_res = ec2resource.create_security_group(
538 538 Description=group['description'],
539 539 GroupName=actual,
540 540 )
541 541
542 542 group_res.authorize_ingress(
543 543 IpPermissions=group['ingress'],
544 544 )
545 545
546 546 security_groups[name] = group_res
547 547
548 548 return security_groups
549 549
550 550
551 551 def terminate_ec2_instances(ec2resource, prefix='hg-'):
552 552 """Terminate all EC2 instances managed by us."""
553 553 waiting = []
554 554
555 555 for instance in ec2resource.instances.all():
556 556 if instance.state['Name'] == 'terminated':
557 557 continue
558 558
559 559 for tag in instance.tags or []:
560 560 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
561 561 print('terminating %s' % instance.id)
562 562 instance.terminate()
563 563 waiting.append(instance)
564 564
565 565 for instance in waiting:
566 566 instance.wait_until_terminated()
567 567
568 568
569 569 def remove_resources(c, prefix='hg-'):
570 570 """Purge all of our resources in this EC2 region."""
571 571 ec2resource = c.ec2resource
572 572 iamresource = c.iamresource
573 573
574 574 terminate_ec2_instances(ec2resource, prefix=prefix)
575 575
576 576 for image in ec2resource.images.filter(Owners=['self']):
577 577 if image.name.startswith(prefix):
578 578 remove_ami(ec2resource, image)
579 579
580 580 for group in ec2resource.security_groups.all():
581 581 if group.group_name.startswith(prefix):
582 582 print('removing security group %s' % group.group_name)
583 583 group.delete()
584 584
585 585 for profile in iamresource.instance_profiles.all():
586 586 if profile.name.startswith(prefix):
587 587 delete_instance_profile(profile)
588 588
589 589 for role in iamresource.roles.all():
590 590 if role.name.startswith(prefix):
591 591 for p in role.attached_policies.all():
592 592 print('detaching policy %s from %s' % (p.arn, role.name))
593 593 role.detach_policy(PolicyArn=p.arn)
594 594
595 595 print('removing role %s' % role.name)
596 596 role.delete()
597 597
598 598
599 599 def wait_for_ip_addresses(instances):
600 600 """Wait for the public IP addresses of an iterable of instances."""
601 601 for instance in instances:
602 602 while True:
603 603 if not instance.public_ip_address:
604 604 time.sleep(2)
605 605 instance.reload()
606 606 continue
607 607
608 608 print(
609 609 'public IP address for %s: %s'
610 610 % (instance.id, instance.public_ip_address)
611 611 )
612 612 break
613 613
614 614
615 615 def remove_ami(ec2resource, image):
616 616 """Remove an AMI and its underlying snapshots."""
617 617 snapshots = []
618 618
619 619 for device in image.block_device_mappings:
620 620 if 'Ebs' in device:
621 621 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
622 622
623 623 print('deregistering %s' % image.id)
624 624 image.deregister()
625 625
626 626 for snapshot in snapshots:
627 627 print('deleting snapshot %s' % snapshot.id)
628 628 snapshot.delete()
629 629
630 630
631 631 def wait_for_ssm(ssmclient, instances):
632 632 """Wait for SSM to come online for an iterable of instance IDs."""
633 633 while True:
634 634 res = ssmclient.describe_instance_information(
635 635 Filters=[
636 636 {
637 637 'Key': 'InstanceIds',
638 638 'Values': [i.id for i in instances],
639 639 },
640 640 ],
641 641 )
642 642
643 643 available = len(res['InstanceInformationList'])
644 644 wanted = len(instances)
645 645
646 646 print('%d/%d instances available in SSM' % (available, wanted))
647 647
648 648 if available == wanted:
649 649 return
650 650
651 651 time.sleep(2)
652 652
653 653
654 654 def run_ssm_command(ssmclient, instances, document_name, parameters):
655 655 """Run a PowerShell script on an EC2 instance."""
656 656
657 657 res = ssmclient.send_command(
658 658 InstanceIds=[i.id for i in instances],
659 659 DocumentName=document_name,
660 660 Parameters=parameters,
661 661 CloudWatchOutputConfig={
662 662 'CloudWatchOutputEnabled': True,
663 663 },
664 664 )
665 665
666 666 command_id = res['Command']['CommandId']
667 667
668 668 for instance in instances:
669 669 while True:
670 670 try:
671 671 res = ssmclient.get_command_invocation(
672 672 CommandId=command_id,
673 673 InstanceId=instance.id,
674 674 )
675 675 except botocore.exceptions.ClientError as e:
676 676 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
677 677 print('could not find SSM command invocation; waiting')
678 678 time.sleep(1)
679 679 continue
680 680 else:
681 681 raise
682 682
683 683 if res['Status'] == 'Success':
684 684 break
685 685 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
686 686 time.sleep(2)
687 687 else:
688 688 raise Exception(
689 689 'command failed on %s: %s' % (instance.id, res['Status'])
690 690 )
691 691
692 692
693 693 @contextlib.contextmanager
694 694 def temporary_ec2_instances(ec2resource, config):
695 695 """Create temporary EC2 instances.
696 696
697 697 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
698 698 managing the lifecycle of the instances.
699 699
700 700 When the context manager exits, the instances are terminated.
701 701
702 702 The context manager evaluates to the list of data structures
703 703 describing each created instance. The instances may not be available
704 704 for work immediately: it is up to the caller to wait for the instance
705 705 to start responding.
706 706 """
707 707
708 708 ids = None
709 709
710 710 try:
711 711 res = ec2resource.create_instances(**config)
712 712
713 713 ids = [i.id for i in res]
714 714 print('started instances: %s' % ' '.join(ids))
715 715
716 716 yield res
717 717 finally:
718 718 if ids:
719 719 print('terminating instances: %s' % ' '.join(ids))
720 720 for instance in res:
721 721 instance.terminate()
722 722 print('terminated %d instances' % len(ids))
723 723
724 724
725 725 @contextlib.contextmanager
726 726 def create_temp_windows_ec2_instances(
727 727 c: AWSConnection, config, bootstrap: bool = False
728 728 ):
729 729 """Create temporary Windows EC2 instances.
730 730
731 731 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
732 732 configures the Windows instance for Windows Remote Management. The emitted
733 733 instances will have a ``winrm_client`` attribute containing a
734 734 ``pypsrp.client.Client`` instance bound to the instance.
735 735 """
736 736 if 'IamInstanceProfile' in config:
737 737 raise ValueError('IamInstanceProfile cannot be provided in config')
738 738 if 'UserData' in config:
739 739 raise ValueError('UserData cannot be provided in config')
740 740
741 741 password = c.automation.default_password()
742 742
743 743 config = copy.deepcopy(config)
744 744 config['IamInstanceProfile'] = {
745 745 'Name': 'hg-ephemeral-ec2-1',
746 746 }
747 747 config.setdefault('TagSpecifications', []).append(
748 748 {
749 749 'ResourceType': 'instance',
750 750 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
751 751 }
752 752 )
753 753
754 754 if bootstrap:
755 755 config['UserData'] = WINDOWS_USER_DATA % password
756 756
757 757 with temporary_ec2_instances(c.ec2resource, config) as instances:
758 758 wait_for_ip_addresses(instances)
759 759
760 760 print('waiting for Windows Remote Management service...')
761 761
762 762 for instance in instances:
763 763 client = wait_for_winrm(
764 764 instance.public_ip_address, 'Administrator', password
765 765 )
766 766 print('established WinRM connection to %s' % instance.id)
767 767 instance.winrm_client = client
768 768
769 769 yield instances
770 770
771 771
772 772 def resolve_fingerprint(fingerprint):
773 773 fingerprint = json.dumps(fingerprint, sort_keys=True)
774 774 return hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
775 775
776 776
777 777 def find_and_reconcile_image(ec2resource, name, fingerprint):
778 778 """Attempt to find an existing EC2 AMI with a name and fingerprint.
779 779
780 780 If an image with the specified fingerprint is found, it is returned.
781 781 Otherwise None is returned.
782 782
783 783 Existing images for the specified name that don't have the specified
784 784 fingerprint or are missing required metadata or deleted.
785 785 """
786 786 # Find existing AMIs with this name and delete the ones that are invalid.
787 787 # Store a reference to a good image so it can be returned one the
788 788 # image state is reconciled.
789 789 images = ec2resource.images.filter(
790 790 Filters=[{'Name': 'name', 'Values': [name]}]
791 791 )
792 792
793 793 existing_image = None
794 794
795 795 for image in images:
796 796 if image.tags is None:
797 797 print(
798 798 'image %s for %s lacks required tags; removing'
799 799 % (image.id, image.name)
800 800 )
801 801 remove_ami(ec2resource, image)
802 802 else:
803 803 tags = {t['Key']: t['Value'] for t in image.tags}
804 804
805 805 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
806 806 existing_image = image
807 807 else:
808 808 print(
809 809 'image %s for %s has wrong fingerprint; removing'
810 810 % (image.id, image.name)
811 811 )
812 812 remove_ami(ec2resource, image)
813 813
814 814 return existing_image
815 815
816 816
817 817 def create_ami_from_instance(
818 818 ec2client, instance, name, description, fingerprint
819 819 ):
820 820 """Create an AMI from a running instance.
821 821
822 822 Returns the ``ec2resource.Image`` representing the created AMI.
823 823 """
824 824 instance.stop()
825 825
826 826 ec2client.get_waiter('instance_stopped').wait(
827 827 InstanceIds=[instance.id],
828 828 WaiterConfig={
829 829 'Delay': 5,
830 830 },
831 831 )
832 832 print('%s is stopped' % instance.id)
833 833
834 834 image = instance.create_image(
835 835 Name=name,
836 836 Description=description,
837 837 )
838 838
839 839 image.create_tags(
840 840 Tags=[
841 841 {
842 842 'Key': 'HGIMAGEFINGERPRINT',
843 843 'Value': fingerprint,
844 844 },
845 845 ]
846 846 )
847 847
848 848 print('waiting for image %s' % image.id)
849 849
850 850 ec2client.get_waiter('image_available').wait(
851 851 ImageIds=[image.id],
852 852 )
853 853
854 854 print('image %s available as %s' % (image.id, image.name))
855 855
856 856 return image
857 857
858 858
859 859 def ensure_linux_dev_ami(c: AWSConnection, distro='debian10', prefix='hg-'):
860 860 """Ensures a Linux development AMI is available and up-to-date.
861 861
862 862 Returns an ``ec2.Image`` of either an existing AMI or a newly-built one.
863 863 """
864 864 ec2client = c.ec2client
865 865 ec2resource = c.ec2resource
866 866
867 867 name = '%s%s-%s' % (prefix, 'linux-dev', distro)
868 868
869 869 if distro == 'debian9':
870 870 image = find_image(
871 871 ec2resource,
872 872 DEBIAN_ACCOUNT_ID,
873 873 'debian-stretch-hvm-x86_64-gp2-2019-09-08-17994',
874 874 )
875 875 ssh_username = 'admin'
876 876 elif distro == 'debian10':
877 877 image = find_image(
878 878 ec2resource,
879 879 DEBIAN_ACCOUNT_ID_2,
880 880 'debian-10-amd64-20190909-10',
881 881 )
882 882 ssh_username = 'admin'
883 883 elif distro == 'ubuntu18.04':
884 884 image = find_image(
885 885 ec2resource,
886 886 UBUNTU_ACCOUNT_ID,
887 887 'ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190918',
888 888 )
889 889 ssh_username = 'ubuntu'
890 890 elif distro == 'ubuntu19.04':
891 891 image = find_image(
892 892 ec2resource,
893 893 UBUNTU_ACCOUNT_ID,
894 894 'ubuntu/images/hvm-ssd/ubuntu-disco-19.04-amd64-server-20190918',
895 895 )
896 896 ssh_username = 'ubuntu'
897 897 else:
898 898 raise ValueError('unsupported Linux distro: %s' % distro)
899 899
900 900 config = {
901 901 'BlockDeviceMappings': [
902 902 {
903 903 'DeviceName': image.block_device_mappings[0]['DeviceName'],
904 904 'Ebs': {
905 905 'DeleteOnTermination': True,
906 906 'VolumeSize': 10,
907 'VolumeType': 'gp2',
907 'VolumeType': 'gp3',
908 908 },
909 909 },
910 910 ],
911 911 'EbsOptimized': True,
912 912 'ImageId': image.id,
913 913 'InstanceInitiatedShutdownBehavior': 'stop',
914 914 # 8 VCPUs for compiling Python.
915 915 'InstanceType': 't3.2xlarge',
916 916 'KeyName': '%sautomation' % prefix,
917 917 'MaxCount': 1,
918 918 'MinCount': 1,
919 919 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
920 920 }
921 921
922 922 requirements2_path = (
923 923 pathlib.Path(__file__).parent.parent / 'linux-requirements-py2.txt'
924 924 )
925 925 requirements3_path = (
926 926 pathlib.Path(__file__).parent.parent / 'linux-requirements-py3.txt'
927 927 )
928 928 requirements35_path = (
929 929 pathlib.Path(__file__).parent.parent / 'linux-requirements-py3.5.txt'
930 930 )
931 931 with requirements2_path.open('r', encoding='utf-8') as fh:
932 932 requirements2 = fh.read()
933 933 with requirements3_path.open('r', encoding='utf-8') as fh:
934 934 requirements3 = fh.read()
935 935 with requirements35_path.open('r', encoding='utf-8') as fh:
936 936 requirements35 = fh.read()
937 937
938 938 # Compute a deterministic fingerprint to determine whether image needs to
939 939 # be regenerated.
940 940 fingerprint = resolve_fingerprint(
941 941 {
942 942 'instance_config': config,
943 943 'bootstrap_script': BOOTSTRAP_DEBIAN,
944 944 'requirements_py2': requirements2,
945 945 'requirements_py3': requirements3,
946 946 'requirements_py35': requirements35,
947 947 }
948 948 )
949 949
950 950 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
951 951
952 952 if existing_image:
953 953 return existing_image
954 954
955 955 print('no suitable %s image found; creating one...' % name)
956 956
957 957 with temporary_ec2_instances(ec2resource, config) as instances:
958 958 wait_for_ip_addresses(instances)
959 959
960 960 instance = instances[0]
961 961
962 962 client = wait_for_ssh(
963 963 instance.public_ip_address,
964 964 22,
965 965 username=ssh_username,
966 966 key_filename=str(c.key_pair_path_private('automation')),
967 967 )
968 968
969 969 home = '/home/%s' % ssh_username
970 970
971 971 with client:
972 972 print('connecting to SSH server')
973 973 sftp = client.open_sftp()
974 974
975 975 print('uploading bootstrap files')
976 976 with sftp.open('%s/bootstrap' % home, 'wb') as fh:
977 977 fh.write(BOOTSTRAP_DEBIAN)
978 978 fh.chmod(0o0700)
979 979
980 980 with sftp.open('%s/requirements-py2.txt' % home, 'wb') as fh:
981 981 fh.write(requirements2)
982 982 fh.chmod(0o0700)
983 983
984 984 with sftp.open('%s/requirements-py3.txt' % home, 'wb') as fh:
985 985 fh.write(requirements3)
986 986 fh.chmod(0o0700)
987 987
988 988 with sftp.open('%s/requirements-py3.5.txt' % home, 'wb') as fh:
989 989 fh.write(requirements35)
990 990 fh.chmod(0o0700)
991 991
992 992 print('executing bootstrap')
993 993 chan, stdin, stdout = ssh_exec_command(
994 994 client, '%s/bootstrap' % home
995 995 )
996 996 stdin.close()
997 997
998 998 for line in stdout:
999 999 print(line, end='')
1000 1000
1001 1001 res = chan.recv_exit_status()
1002 1002 if res:
1003 1003 raise Exception('non-0 exit from bootstrap: %d' % res)
1004 1004
1005 1005 print(
1006 1006 'bootstrap completed; stopping %s to create %s'
1007 1007 % (instance.id, name)
1008 1008 )
1009 1009
1010 1010 return create_ami_from_instance(
1011 1011 ec2client,
1012 1012 instance,
1013 1013 name,
1014 1014 'Mercurial Linux development environment',
1015 1015 fingerprint,
1016 1016 )
1017 1017
1018 1018
1019 1019 @contextlib.contextmanager
1020 1020 def temporary_linux_dev_instances(
1021 1021 c: AWSConnection,
1022 1022 image,
1023 1023 instance_type,
1024 1024 prefix='hg-',
1025 1025 ensure_extra_volume=False,
1026 1026 ):
1027 1027 """Create temporary Linux development EC2 instances.
1028 1028
1029 1029 Context manager resolves to a list of ``ec2.Instance`` that were created
1030 1030 and are running.
1031 1031
1032 1032 ``ensure_extra_volume`` can be set to ``True`` to require that instances
1033 1033 have a 2nd storage volume available other than the primary AMI volume.
1034 1034 For instance types with instance storage, this does nothing special.
1035 1035 But for instance types without instance storage, an additional EBS volume
1036 1036 will be added to the instance.
1037 1037
1038 1038 Instances have an ``ssh_client`` attribute containing a paramiko SSHClient
1039 1039 instance bound to the instance.
1040 1040
1041 1041 Instances have an ``ssh_private_key_path`` attributing containing the
1042 1042 str path to the SSH private key to connect to the instance.
1043 1043 """
1044 1044
1045 1045 block_device_mappings = [
1046 1046 {
1047 1047 'DeviceName': image.block_device_mappings[0]['DeviceName'],
1048 1048 'Ebs': {
1049 1049 'DeleteOnTermination': True,
1050 1050 'VolumeSize': 12,
1051 'VolumeType': 'gp2',
1051 'VolumeType': 'gp3',
1052 1052 },
1053 1053 }
1054 1054 ]
1055 1055
1056 1056 # This is not an exhaustive list of instance types having instance storage.
1057 1057 # But
1058 1058 if ensure_extra_volume and not instance_type.startswith(
1059 1059 tuple(INSTANCE_TYPES_WITH_STORAGE)
1060 1060 ):
1061 1061 main_device = block_device_mappings[0]['DeviceName']
1062 1062
1063 1063 if main_device == 'xvda':
1064 1064 second_device = 'xvdb'
1065 1065 elif main_device == '/dev/sda1':
1066 1066 second_device = '/dev/sdb'
1067 1067 else:
1068 1068 raise ValueError(
1069 1069 'unhandled primary EBS device name: %s' % main_device
1070 1070 )
1071 1071
1072 1072 block_device_mappings.append(
1073 1073 {
1074 1074 'DeviceName': second_device,
1075 1075 'Ebs': {
1076 1076 'DeleteOnTermination': True,
1077 1077 'VolumeSize': 8,
1078 'VolumeType': 'gp2',
1078 'VolumeType': 'gp3',
1079 1079 },
1080 1080 }
1081 1081 )
1082 1082
1083 1083 config = {
1084 1084 'BlockDeviceMappings': block_device_mappings,
1085 1085 'EbsOptimized': True,
1086 1086 'ImageId': image.id,
1087 1087 'InstanceInitiatedShutdownBehavior': 'terminate',
1088 1088 'InstanceType': instance_type,
1089 1089 'KeyName': '%sautomation' % prefix,
1090 1090 'MaxCount': 1,
1091 1091 'MinCount': 1,
1092 1092 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
1093 1093 }
1094 1094
1095 1095 with temporary_ec2_instances(c.ec2resource, config) as instances:
1096 1096 wait_for_ip_addresses(instances)
1097 1097
1098 1098 ssh_private_key_path = str(c.key_pair_path_private('automation'))
1099 1099
1100 1100 for instance in instances:
1101 1101 client = wait_for_ssh(
1102 1102 instance.public_ip_address,
1103 1103 22,
1104 1104 username='hg',
1105 1105 key_filename=ssh_private_key_path,
1106 1106 )
1107 1107
1108 1108 instance.ssh_client = client
1109 1109 instance.ssh_private_key_path = ssh_private_key_path
1110 1110
1111 1111 try:
1112 1112 yield instances
1113 1113 finally:
1114 1114 for instance in instances:
1115 1115 instance.ssh_client.close()
1116 1116
1117 1117
1118 1118 def ensure_windows_dev_ami(
1119 1119 c: AWSConnection,
1120 1120 prefix='hg-',
1121 1121 base_image_name=WINDOWS_BASE_IMAGE_NAME,
1122 1122 ):
1123 1123 """Ensure Windows Development AMI is available and up-to-date.
1124 1124
1125 1125 If necessary, a modern AMI will be built by starting a temporary EC2
1126 1126 instance and bootstrapping it.
1127 1127
1128 1128 Obsolete AMIs will be deleted so there is only a single AMI having the
1129 1129 desired name.
1130 1130
1131 1131 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
1132 1132 one.
1133 1133 """
1134 1134 ec2client = c.ec2client
1135 1135 ec2resource = c.ec2resource
1136 1136 ssmclient = c.session.client('ssm')
1137 1137
1138 1138 name = '%s%s' % (prefix, 'windows-dev')
1139 1139
1140 1140 image = find_image(
1141 1141 ec2resource,
1142 1142 AMAZON_ACCOUNT_ID,
1143 1143 base_image_name,
1144 1144 reverse_sort_field="name",
1145 1145 )
1146 1146
1147 1147 config = {
1148 1148 'BlockDeviceMappings': [
1149 1149 {
1150 1150 'DeviceName': '/dev/sda1',
1151 1151 'Ebs': {
1152 1152 'DeleteOnTermination': True,
1153 1153 'VolumeSize': 32,
1154 'VolumeType': 'gp2',
1154 'VolumeType': 'gp3',
1155 1155 },
1156 1156 }
1157 1157 ],
1158 1158 'ImageId': image.id,
1159 1159 'InstanceInitiatedShutdownBehavior': 'stop',
1160 'InstanceType': 't3.medium',
1160 'InstanceType': 'm6i.large',
1161 1161 'KeyName': '%sautomation' % prefix,
1162 1162 'MaxCount': 1,
1163 1163 'MinCount': 1,
1164 1164 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1165 1165 }
1166 1166
1167 1167 commands = [
1168 1168 # Need to start the service so sshd_config is generated.
1169 1169 'Start-Service sshd',
1170 1170 'Write-Output "modifying sshd_config"',
1171 1171 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
1172 1172 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
1173 1173 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
1174 1174 'Import-Module OpenSSHUtils',
1175 1175 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
1176 1176 'Restart-Service sshd',
1177 1177 'Write-Output "installing OpenSSL client"',
1178 1178 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
1179 1179 'Set-Service -Name sshd -StartupType "Automatic"',
1180 1180 'Write-Output "OpenSSH server running"',
1181 1181 ]
1182 1182
1183 1183 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
1184 1184 commands.extend(l.rstrip() for l in fh)
1185 1185
1186 1186 # Schedule run of EC2Launch on next boot. This ensures that UserData
1187 1187 # is executed.
1188 1188 # We disable setComputerName because it forces a reboot.
1189 1189 # We set an explicit admin password because this causes UserData to run
1190 1190 # as Administrator instead of System.
1191 1191 commands.extend(
1192 1192 [
1193 1193 r'''Set-Content -Path C:\ProgramData\Amazon\EC2-Windows\Launch\Config\LaunchConfig.json '''
1194 1194 r'''-Value '{"setComputerName": false, "setWallpaper": true, "addDnsSuffixList": true, '''
1195 1195 r'''"extendBootVolumeSize": true, "handleUserData": true, '''
1196 1196 r'''"adminPasswordType": "Specify", "adminPassword": "%s"}' '''
1197 1197 % c.automation.default_password(),
1198 1198 r'C:\ProgramData\Amazon\EC2-Windows\Launch\Scripts\InitializeInstance.ps1 '
1199 1199 r'–Schedule',
1200 1200 ]
1201 1201 )
1202 1202
1203 1203 # Disable Windows Defender when bootstrapping because it just slows
1204 1204 # things down.
1205 1205 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
1206 1206 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
1207 1207
1208 1208 # Compute a deterministic fingerprint to determine whether image needs
1209 1209 # to be regenerated.
1210 1210 fingerprint = resolve_fingerprint(
1211 1211 {
1212 1212 'instance_config': config,
1213 1213 'user_data': WINDOWS_USER_DATA,
1214 1214 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
1215 1215 'bootstrap_commands': commands,
1216 1216 'base_image_name': base_image_name,
1217 1217 }
1218 1218 )
1219 1219
1220 1220 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
1221 1221
1222 1222 if existing_image:
1223 1223 return existing_image
1224 1224
1225 1225 print('no suitable Windows development image found; creating one...')
1226 1226
1227 1227 with create_temp_windows_ec2_instances(
1228 1228 c, config, bootstrap=True
1229 1229 ) as instances:
1230 1230 assert len(instances) == 1
1231 1231 instance = instances[0]
1232 1232
1233 1233 wait_for_ssm(ssmclient, [instance])
1234 1234
1235 1235 # On first boot, install various Windows updates.
1236 1236 # We would ideally use PowerShell Remoting for this. However, there are
1237 1237 # trust issues that make it difficult to invoke Windows Update
1238 1238 # remotely. So we use SSM, which has a mechanism for running Windows
1239 1239 # Update.
1240 1240 print('installing Windows features...')
1241 1241 run_ssm_command(
1242 1242 ssmclient,
1243 1243 [instance],
1244 1244 'AWS-RunPowerShellScript',
1245 1245 {
1246 1246 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
1247 1247 },
1248 1248 )
1249 1249
1250 1250 # Reboot so all updates are fully applied.
1251 1251 #
1252 1252 # We don't use instance.reboot() here because it is asynchronous and
1253 1253 # we don't know when exactly the instance has rebooted. It could take
1254 1254 # a while to stop and we may start trying to interact with the instance
1255 1255 # before it has rebooted.
1256 1256 print('rebooting instance %s' % instance.id)
1257 1257 instance.stop()
1258 1258 ec2client.get_waiter('instance_stopped').wait(
1259 1259 InstanceIds=[instance.id],
1260 1260 WaiterConfig={
1261 1261 'Delay': 5,
1262 1262 },
1263 1263 )
1264 1264
1265 1265 instance.start()
1266 1266 wait_for_ip_addresses([instance])
1267 1267
1268 1268 # There is a race condition here between the User Data PS script running
1269 1269 # and us connecting to WinRM. This can manifest as
1270 1270 # "AuthorizationManager check failed" failures during run_powershell().
1271 1271 # TODO figure out a workaround.
1272 1272
1273 1273 print('waiting for Windows Remote Management to come back...')
1274 1274 client = wait_for_winrm(
1275 1275 instance.public_ip_address,
1276 1276 'Administrator',
1277 1277 c.automation.default_password(),
1278 1278 )
1279 1279 print('established WinRM connection to %s' % instance.id)
1280 1280 instance.winrm_client = client
1281 1281
1282 1282 print('bootstrapping instance...')
1283 1283 run_powershell(instance.winrm_client, '\n'.join(commands))
1284 1284
1285 1285 print('bootstrap completed; stopping %s to create image' % instance.id)
1286 1286 return create_ami_from_instance(
1287 1287 ec2client,
1288 1288 instance,
1289 1289 name,
1290 1290 'Mercurial Windows development environment',
1291 1291 fingerprint,
1292 1292 )
1293 1293
1294 1294
1295 1295 @contextlib.contextmanager
1296 1296 def temporary_windows_dev_instances(
1297 1297 c: AWSConnection,
1298 1298 image,
1299 1299 instance_type,
1300 1300 prefix='hg-',
1301 1301 disable_antivirus=False,
1302 1302 ):
1303 1303 """Create a temporary Windows development EC2 instance.
1304 1304
1305 1305 Context manager resolves to the list of ``EC2.Instance`` that were created.
1306 1306 """
1307 1307 config = {
1308 1308 'BlockDeviceMappings': [
1309 1309 {
1310 1310 'DeviceName': '/dev/sda1',
1311 1311 'Ebs': {
1312 1312 'DeleteOnTermination': True,
1313 1313 'VolumeSize': 32,
1314 'VolumeType': 'gp2',
1314 'VolumeType': 'gp3',
1315 1315 },
1316 1316 }
1317 1317 ],
1318 1318 'ImageId': image.id,
1319 1319 'InstanceInitiatedShutdownBehavior': 'stop',
1320 1320 'InstanceType': instance_type,
1321 1321 'KeyName': '%sautomation' % prefix,
1322 1322 'MaxCount': 1,
1323 1323 'MinCount': 1,
1324 1324 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1325 1325 }
1326 1326
1327 1327 with create_temp_windows_ec2_instances(c, config) as instances:
1328 1328 if disable_antivirus:
1329 1329 for instance in instances:
1330 1330 run_powershell(
1331 1331 instance.winrm_client,
1332 1332 'Set-MpPreference -DisableRealtimeMonitoring $true',
1333 1333 )
1334 1334
1335 1335 yield instances
@@ -1,576 +1,576 b''
1 1 # cli.py - Command line interface for automation
2 2 #
3 3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # no-check-code because Python 3 native.
9 9
10 10 import argparse
11 11 import concurrent.futures as futures
12 12 import os
13 13 import pathlib
14 14 import time
15 15
16 16 from . import (
17 17 aws,
18 18 HGAutomation,
19 19 linux,
20 20 try_server,
21 21 windows,
22 22 )
23 23
24 24
25 25 SOURCE_ROOT = pathlib.Path(
26 26 os.path.abspath(__file__)
27 27 ).parent.parent.parent.parent
28 28 DIST_PATH = SOURCE_ROOT / 'dist'
29 29
30 30
31 31 def bootstrap_linux_dev(
32 32 hga: HGAutomation, aws_region, distros=None, parallel=False
33 33 ):
34 34 c = hga.aws_connection(aws_region)
35 35
36 36 if distros:
37 37 distros = distros.split(',')
38 38 else:
39 39 distros = sorted(linux.DISTROS)
40 40
41 41 # TODO There is a wonky interaction involving KeyboardInterrupt whereby
42 42 # the context manager that is supposed to terminate the temporary EC2
43 43 # instance doesn't run. Until we fix this, make parallel building opt-in
44 44 # so we don't orphan instances.
45 45 if parallel:
46 46 fs = []
47 47
48 48 with futures.ThreadPoolExecutor(len(distros)) as e:
49 49 for distro in distros:
50 50 fs.append(e.submit(aws.ensure_linux_dev_ami, c, distro=distro))
51 51
52 52 for f in fs:
53 53 f.result()
54 54 else:
55 55 for distro in distros:
56 56 aws.ensure_linux_dev_ami(c, distro=distro)
57 57
58 58
59 59 def bootstrap_windows_dev(hga: HGAutomation, aws_region, base_image_name):
60 60 c = hga.aws_connection(aws_region)
61 61 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
62 62 print('Windows development AMI available as %s' % image.id)
63 63
64 64
65 65 def build_inno(
66 66 hga: HGAutomation,
67 67 aws_region,
68 68 python_version,
69 69 arch,
70 70 revision,
71 71 version,
72 72 base_image_name,
73 73 ):
74 74 c = hga.aws_connection(aws_region)
75 75 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
76 76 DIST_PATH.mkdir(exist_ok=True)
77 77
78 78 with aws.temporary_windows_dev_instances(c, image, 't3.medium') as insts:
79 79 instance = insts[0]
80 80
81 81 windows.synchronize_hg(SOURCE_ROOT, revision, instance)
82 82
83 83 for py_version in python_version:
84 84 for a in arch:
85 85 windows.build_inno_installer(
86 86 instance.winrm_client,
87 87 py_version,
88 88 a,
89 89 DIST_PATH,
90 90 version=version,
91 91 )
92 92
93 93
94 94 def build_wix(
95 95 hga: HGAutomation,
96 96 aws_region,
97 97 python_version,
98 98 arch,
99 99 revision,
100 100 version,
101 101 base_image_name,
102 102 ):
103 103 c = hga.aws_connection(aws_region)
104 104 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
105 105 DIST_PATH.mkdir(exist_ok=True)
106 106
107 107 with aws.temporary_windows_dev_instances(c, image, 't3.medium') as insts:
108 108 instance = insts[0]
109 109
110 110 windows.synchronize_hg(SOURCE_ROOT, revision, instance)
111 111
112 112 for py_version in python_version:
113 113 for a in arch:
114 114 windows.build_wix_installer(
115 115 instance.winrm_client,
116 116 py_version,
117 117 a,
118 118 DIST_PATH,
119 119 version=version,
120 120 )
121 121
122 122
123 123 def build_windows_wheel(
124 124 hga: HGAutomation,
125 125 aws_region,
126 126 python_version,
127 127 arch,
128 128 revision,
129 129 base_image_name,
130 130 ):
131 131 c = hga.aws_connection(aws_region)
132 132 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
133 133 DIST_PATH.mkdir(exist_ok=True)
134 134
135 135 with aws.temporary_windows_dev_instances(c, image, 't3.medium') as insts:
136 136 instance = insts[0]
137 137
138 138 windows.synchronize_hg(SOURCE_ROOT, revision, instance)
139 139
140 140 for py_version in python_version:
141 141 for a in arch:
142 142 windows.build_wheel(
143 143 instance.winrm_client, py_version, a, DIST_PATH
144 144 )
145 145
146 146
147 147 def build_all_windows_packages(
148 148 hga: HGAutomation, aws_region, revision, version, base_image_name
149 149 ):
150 150 c = hga.aws_connection(aws_region)
151 151 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
152 152 DIST_PATH.mkdir(exist_ok=True)
153 153
154 with aws.temporary_windows_dev_instances(c, image, 't3.medium') as insts:
154 with aws.temporary_windows_dev_instances(c, image, 'm6i.large') as insts:
155 155 instance = insts[0]
156 156
157 157 winrm_client = instance.winrm_client
158 158
159 159 windows.synchronize_hg(SOURCE_ROOT, revision, instance)
160 160
161 161 for py_version in ("2.7", "3.7", "3.8", "3.9", "3.10"):
162 162 for arch in ("x86", "x64"):
163 163 windows.purge_hg(winrm_client)
164 164 windows.build_wheel(
165 165 winrm_client,
166 166 python_version=py_version,
167 167 arch=arch,
168 168 dest_path=DIST_PATH,
169 169 )
170 170
171 171 for py_version in (2, 3):
172 172 for arch in ('x86', 'x64'):
173 173 windows.purge_hg(winrm_client)
174 174 windows.build_inno_installer(
175 175 winrm_client, py_version, arch, DIST_PATH, version=version
176 176 )
177 177 windows.build_wix_installer(
178 178 winrm_client, py_version, arch, DIST_PATH, version=version
179 179 )
180 180
181 181
182 182 def terminate_ec2_instances(hga: HGAutomation, aws_region):
183 183 c = hga.aws_connection(aws_region, ensure_ec2_state=False)
184 184 aws.terminate_ec2_instances(c.ec2resource)
185 185
186 186
187 187 def purge_ec2_resources(hga: HGAutomation, aws_region):
188 188 c = hga.aws_connection(aws_region, ensure_ec2_state=False)
189 189 aws.remove_resources(c)
190 190
191 191
192 192 def run_tests_linux(
193 193 hga: HGAutomation,
194 194 aws_region,
195 195 instance_type,
196 196 python_version,
197 197 test_flags,
198 198 distro,
199 199 filesystem,
200 200 ):
201 201 c = hga.aws_connection(aws_region)
202 202 image = aws.ensure_linux_dev_ami(c, distro=distro)
203 203
204 204 t_start = time.time()
205 205
206 206 ensure_extra_volume = filesystem not in ('default', 'tmpfs')
207 207
208 208 with aws.temporary_linux_dev_instances(
209 209 c, image, instance_type, ensure_extra_volume=ensure_extra_volume
210 210 ) as insts:
211 211
212 212 instance = insts[0]
213 213
214 214 linux.prepare_exec_environment(
215 215 instance.ssh_client, filesystem=filesystem
216 216 )
217 217 linux.synchronize_hg(SOURCE_ROOT, instance, '.')
218 218 t_prepared = time.time()
219 219 linux.run_tests(instance.ssh_client, python_version, test_flags)
220 220 t_done = time.time()
221 221
222 222 t_setup = t_prepared - t_start
223 223 t_all = t_done - t_start
224 224
225 225 print(
226 226 'total time: %.1fs; setup: %.1fs; tests: %.1fs; setup overhead: %.1f%%'
227 227 % (t_all, t_setup, t_done - t_prepared, t_setup / t_all * 100.0)
228 228 )
229 229
230 230
231 231 def run_tests_windows(
232 232 hga: HGAutomation,
233 233 aws_region,
234 234 instance_type,
235 235 python_version,
236 236 arch,
237 237 test_flags,
238 238 base_image_name,
239 239 ):
240 240 c = hga.aws_connection(aws_region)
241 241 image = aws.ensure_windows_dev_ami(c, base_image_name=base_image_name)
242 242
243 243 with aws.temporary_windows_dev_instances(
244 244 c, image, instance_type, disable_antivirus=True
245 245 ) as insts:
246 246 instance = insts[0]
247 247
248 248 windows.synchronize_hg(SOURCE_ROOT, '.', instance)
249 249 windows.run_tests(
250 250 instance.winrm_client, python_version, arch, test_flags
251 251 )
252 252
253 253
254 254 def publish_windows_artifacts(
255 255 hg: HGAutomation,
256 256 aws_region,
257 257 version: str,
258 258 pypi: bool,
259 259 mercurial_scm_org: bool,
260 260 ssh_username: str,
261 261 ):
262 262 windows.publish_artifacts(
263 263 DIST_PATH,
264 264 version,
265 265 pypi=pypi,
266 266 mercurial_scm_org=mercurial_scm_org,
267 267 ssh_username=ssh_username,
268 268 )
269 269
270 270
271 271 def run_try(hga: HGAutomation, aws_region: str, rev: str):
272 272 c = hga.aws_connection(aws_region, ensure_ec2_state=False)
273 273 try_server.trigger_try(c, rev=rev)
274 274
275 275
276 276 def get_parser():
277 277 parser = argparse.ArgumentParser()
278 278
279 279 parser.add_argument(
280 280 '--state-path',
281 281 default='~/.hgautomation',
282 282 help='Path for local state files',
283 283 )
284 284 parser.add_argument(
285 285 '--aws-region',
286 286 help='AWS region to use',
287 287 default='us-west-2',
288 288 )
289 289
290 290 subparsers = parser.add_subparsers()
291 291
292 292 sp = subparsers.add_parser(
293 293 'bootstrap-linux-dev',
294 294 help='Bootstrap Linux development environments',
295 295 )
296 296 sp.add_argument(
297 297 '--distros',
298 298 help='Comma delimited list of distros to bootstrap',
299 299 )
300 300 sp.add_argument(
301 301 '--parallel',
302 302 action='store_true',
303 303 help='Generate AMIs in parallel (not CTRL-c safe)',
304 304 )
305 305 sp.set_defaults(func=bootstrap_linux_dev)
306 306
307 307 sp = subparsers.add_parser(
308 308 'bootstrap-windows-dev',
309 309 help='Bootstrap the Windows development environment',
310 310 )
311 311 sp.add_argument(
312 312 '--base-image-name',
313 313 help='AMI name of base image',
314 314 default=aws.WINDOWS_BASE_IMAGE_NAME,
315 315 )
316 316 sp.set_defaults(func=bootstrap_windows_dev)
317 317
318 318 sp = subparsers.add_parser(
319 319 'build-all-windows-packages',
320 320 help='Build all Windows packages',
321 321 )
322 322 sp.add_argument(
323 323 '--revision',
324 324 help='Mercurial revision to build',
325 325 default='.',
326 326 )
327 327 sp.add_argument(
328 328 '--version',
329 329 help='Mercurial version string to use',
330 330 )
331 331 sp.add_argument(
332 332 '--base-image-name',
333 333 help='AMI name of base image',
334 334 default=aws.WINDOWS_BASE_IMAGE_NAME,
335 335 )
336 336 sp.set_defaults(func=build_all_windows_packages)
337 337
338 338 sp = subparsers.add_parser(
339 339 'build-inno',
340 340 help='Build Inno Setup installer(s)',
341 341 )
342 342 sp.add_argument(
343 343 '--python-version',
344 344 help='Which version of Python to target',
345 345 choices={2, 3},
346 346 type=int,
347 347 nargs='*',
348 348 default=[3],
349 349 )
350 350 sp.add_argument(
351 351 '--arch',
352 352 help='Architecture to build for',
353 353 choices={'x86', 'x64'},
354 354 nargs='*',
355 355 default=['x64'],
356 356 )
357 357 sp.add_argument(
358 358 '--revision',
359 359 help='Mercurial revision to build',
360 360 default='.',
361 361 )
362 362 sp.add_argument(
363 363 '--version',
364 364 help='Mercurial version string to use in installer',
365 365 )
366 366 sp.add_argument(
367 367 '--base-image-name',
368 368 help='AMI name of base image',
369 369 default=aws.WINDOWS_BASE_IMAGE_NAME,
370 370 )
371 371 sp.set_defaults(func=build_inno)
372 372
373 373 sp = subparsers.add_parser(
374 374 'build-windows-wheel',
375 375 help='Build Windows wheel(s)',
376 376 )
377 377 sp.add_argument(
378 378 '--python-version',
379 379 help='Python version to build for',
380 380 choices={'2.7', '3.7', '3.8', '3.9', '3.10'},
381 381 nargs='*',
382 382 default=['3.8'],
383 383 )
384 384 sp.add_argument(
385 385 '--arch',
386 386 help='Architecture to build for',
387 387 choices={'x86', 'x64'},
388 388 nargs='*',
389 389 default=['x64'],
390 390 )
391 391 sp.add_argument(
392 392 '--revision',
393 393 help='Mercurial revision to build',
394 394 default='.',
395 395 )
396 396 sp.add_argument(
397 397 '--base-image-name',
398 398 help='AMI name of base image',
399 399 default=aws.WINDOWS_BASE_IMAGE_NAME,
400 400 )
401 401 sp.set_defaults(func=build_windows_wheel)
402 402
403 403 sp = subparsers.add_parser('build-wix', help='Build WiX installer(s)')
404 404 sp.add_argument(
405 405 '--python-version',
406 406 help='Which version of Python to target',
407 407 choices={2, 3},
408 408 type=int,
409 409 nargs='*',
410 410 default=[3],
411 411 )
412 412 sp.add_argument(
413 413 '--arch',
414 414 help='Architecture to build for',
415 415 choices={'x86', 'x64'},
416 416 nargs='*',
417 417 default=['x64'],
418 418 )
419 419 sp.add_argument(
420 420 '--revision',
421 421 help='Mercurial revision to build',
422 422 default='.',
423 423 )
424 424 sp.add_argument(
425 425 '--version',
426 426 help='Mercurial version string to use in installer',
427 427 )
428 428 sp.add_argument(
429 429 '--base-image-name',
430 430 help='AMI name of base image',
431 431 default=aws.WINDOWS_BASE_IMAGE_NAME,
432 432 )
433 433 sp.set_defaults(func=build_wix)
434 434
435 435 sp = subparsers.add_parser(
436 436 'terminate-ec2-instances',
437 437 help='Terminate all active EC2 instances managed by us',
438 438 )
439 439 sp.set_defaults(func=terminate_ec2_instances)
440 440
441 441 sp = subparsers.add_parser(
442 442 'purge-ec2-resources',
443 443 help='Purge all EC2 resources managed by us',
444 444 )
445 445 sp.set_defaults(func=purge_ec2_resources)
446 446
447 447 sp = subparsers.add_parser(
448 448 'run-tests-linux',
449 449 help='Run tests on Linux',
450 450 )
451 451 sp.add_argument(
452 452 '--distro',
453 453 help='Linux distribution to run tests on',
454 454 choices=linux.DISTROS,
455 455 default='debian10',
456 456 )
457 457 sp.add_argument(
458 458 '--filesystem',
459 459 help='Filesystem type to use',
460 460 choices={'btrfs', 'default', 'ext3', 'ext4', 'jfs', 'tmpfs', 'xfs'},
461 461 default='default',
462 462 )
463 463 sp.add_argument(
464 464 '--instance-type',
465 465 help='EC2 instance type to use',
466 466 default='c5.9xlarge',
467 467 )
468 468 sp.add_argument(
469 469 '--python-version',
470 470 help='Python version to use',
471 471 choices={
472 472 'system2',
473 473 'system3',
474 474 '2.7',
475 475 '3.5',
476 476 '3.6',
477 477 '3.7',
478 478 '3.8',
479 479 'pypy',
480 480 'pypy3.5',
481 481 'pypy3.6',
482 482 },
483 483 default='system2',
484 484 )
485 485 sp.add_argument(
486 486 'test_flags',
487 487 help='Extra command line flags to pass to run-tests.py',
488 488 nargs='*',
489 489 )
490 490 sp.set_defaults(func=run_tests_linux)
491 491
492 492 sp = subparsers.add_parser(
493 493 'run-tests-windows',
494 494 help='Run tests on Windows',
495 495 )
496 496 sp.add_argument(
497 497 '--instance-type',
498 498 help='EC2 instance type to use',
499 default='t3.medium',
499 default='m6i.large',
500 500 )
501 501 sp.add_argument(
502 502 '--python-version',
503 503 help='Python version to use',
504 504 choices={'2.7', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10'},
505 505 default='2.7',
506 506 )
507 507 sp.add_argument(
508 508 '--arch',
509 509 help='Architecture to test',
510 510 choices={'x86', 'x64'},
511 511 default='x64',
512 512 )
513 513 sp.add_argument(
514 514 '--test-flags',
515 515 help='Extra command line flags to pass to run-tests.py',
516 516 )
517 517 sp.add_argument(
518 518 '--base-image-name',
519 519 help='AMI name of base image',
520 520 default=aws.WINDOWS_BASE_IMAGE_NAME,
521 521 )
522 522 sp.set_defaults(func=run_tests_windows)
523 523
524 524 sp = subparsers.add_parser(
525 525 'publish-windows-artifacts',
526 526 help='Publish built Windows artifacts (wheels, installers, etc)',
527 527 )
528 528 sp.add_argument(
529 529 '--no-pypi',
530 530 dest='pypi',
531 531 action='store_false',
532 532 default=True,
533 533 help='Skip uploading to PyPI',
534 534 )
535 535 sp.add_argument(
536 536 '--no-mercurial-scm-org',
537 537 dest='mercurial_scm_org',
538 538 action='store_false',
539 539 default=True,
540 540 help='Skip uploading to www.mercurial-scm.org',
541 541 )
542 542 sp.add_argument(
543 543 '--ssh-username',
544 544 help='SSH username for mercurial-scm.org',
545 545 )
546 546 sp.add_argument(
547 547 'version',
548 548 help='Mercurial version string to locate local packages',
549 549 )
550 550 sp.set_defaults(func=publish_windows_artifacts)
551 551
552 552 sp = subparsers.add_parser(
553 553 'try', help='Run CI automation against a custom changeset'
554 554 )
555 555 sp.add_argument('-r', '--rev', default='.', help='Revision to run CI on')
556 556 sp.set_defaults(func=run_try)
557 557
558 558 return parser
559 559
560 560
561 561 def main():
562 562 parser = get_parser()
563 563 args = parser.parse_args()
564 564
565 565 local_state_path = pathlib.Path(os.path.expanduser(args.state_path))
566 566 automation = HGAutomation(local_state_path)
567 567
568 568 if not hasattr(args, 'func'):
569 569 parser.print_help()
570 570 return
571 571
572 572 kwargs = dict(vars(args))
573 573 del kwargs['func']
574 574 del kwargs['state_path']
575 575
576 576 args.func(automation, **kwargs)
@@ -1,827 +1,837 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::NodeData;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::EntryState;
18 18 use crate::HgPathBuf;
19 19 use crate::HgPathCow;
20 20 use crate::PatternFileWarning;
21 21 use crate::StatusError;
22 22 use crate::StatusOptions;
23 23 use micro_timer::timed;
24 24 use rayon::prelude::*;
25 25 use sha1::{Digest, Sha1};
26 26 use std::borrow::Cow;
27 27 use std::io;
28 28 use std::path::Path;
29 29 use std::path::PathBuf;
30 30 use std::sync::Mutex;
31 31 use std::time::SystemTime;
32 32
33 33 /// Returns the status of the working directory compared to its parent
34 34 /// changeset.
35 35 ///
36 36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 37 /// and variable names) and dirstate tree at the same time. The core of this
38 38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 40 /// exists in one of the two trees, depending on information requested by
41 41 /// `options` we may need to traverse the remaining subtree.
42 42 #[timed]
43 43 pub fn status<'tree, 'on_disk: 'tree>(
44 44 dmap: &'tree mut DirstateMap<'on_disk>,
45 45 matcher: &(dyn Matcher + Sync),
46 46 root_dir: PathBuf,
47 47 ignore_files: Vec<PathBuf>,
48 48 options: StatusOptions,
49 49 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
50 50 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
51 51 if options.list_ignored || options.list_unknown {
52 52 let mut hasher = Sha1::new();
53 53 let (ignore_fn, warnings) = get_ignore_function(
54 54 ignore_files,
55 55 &root_dir,
56 56 &mut |pattern_bytes| hasher.update(pattern_bytes),
57 57 )?;
58 58 let new_hash = *hasher.finalize().as_ref();
59 59 let changed = new_hash != dmap.ignore_patterns_hash;
60 60 dmap.ignore_patterns_hash = new_hash;
61 61 (ignore_fn, warnings, Some(changed))
62 62 } else {
63 63 (Box::new(|&_| true), vec![], None)
64 64 };
65 65
66 66 let filesystem_time_at_status_start =
67 67 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
68 68
69 69 // If the repository is under the current directory, prefer using a
70 70 // relative path, so the kernel needs to traverse fewer directory in every
71 71 // call to `read_dir` or `symlink_metadata`.
72 72 // This is effective in the common case where the current directory is the
73 73 // repository root.
74 74
75 75 // TODO: Better yet would be to use libc functions like `openat` and
76 76 // `fstatat` to remove such repeated traversals entirely, but the standard
77 77 // library does not provide APIs based on those.
78 78 // Maybe with a crate like https://crates.io/crates/openat instead?
79 79 let root_dir = if let Some(relative) = std::env::current_dir()
80 80 .ok()
81 81 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
82 82 {
83 83 relative
84 84 } else {
85 85 &root_dir
86 86 };
87 87
88 88 let outcome = DirstateStatus {
89 89 filesystem_time_at_status_start,
90 90 ..Default::default()
91 91 };
92 92 let common = StatusCommon {
93 93 dmap,
94 94 options,
95 95 matcher,
96 96 ignore_fn,
97 97 outcome: Mutex::new(outcome),
98 98 ignore_patterns_have_changed: patterns_changed,
99 99 new_cachable_directories: Default::default(),
100 100 outated_cached_directories: Default::default(),
101 101 filesystem_time_at_status_start,
102 102 };
103 103 let is_at_repo_root = true;
104 104 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
105 105 let has_ignored_ancestor = false;
106 106 let root_cached_mtime = None;
107 107 let root_dir_metadata = None;
108 108 // If the path we have for the repository root is a symlink, do follow it.
109 109 // (As opposed to symlinks within the working directory which are not
110 110 // followed, using `std::fs::symlink_metadata`.)
111 111 common.traverse_fs_directory_and_dirstate(
112 112 has_ignored_ancestor,
113 113 dmap.root.as_ref(),
114 114 hg_path,
115 115 &root_dir,
116 116 root_dir_metadata,
117 117 root_cached_mtime,
118 118 is_at_repo_root,
119 119 )?;
120 120 let mut outcome = common.outcome.into_inner().unwrap();
121 121 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
122 122 let outdated = common.outated_cached_directories.into_inner().unwrap();
123 123
124 124 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
125 125 || !outdated.is_empty()
126 126 || !new_cachable.is_empty();
127 127
128 128 // Remove outdated mtimes before adding new mtimes, in case a given
129 129 // directory is both
130 130 for path in &outdated {
131 131 let node = dmap.get_or_insert(path)?;
132 132 if let NodeData::CachedDirectory { .. } = &node.data {
133 133 node.data = NodeData::None
134 134 }
135 135 }
136 136 for (path, mtime) in &new_cachable {
137 137 let node = dmap.get_or_insert(path)?;
138 138 match &node.data {
139 139 NodeData::Entry(_) => {} // Don’t overwrite an entry
140 140 NodeData::CachedDirectory { .. } | NodeData::None => {
141 141 node.data = NodeData::CachedDirectory { mtime: *mtime }
142 142 }
143 143 }
144 144 }
145 145
146 146 Ok((outcome, warnings))
147 147 }
148 148
149 149 /// Bag of random things needed by various parts of the algorithm. Reduces the
150 150 /// number of parameters passed to functions.
151 151 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
152 152 dmap: &'tree DirstateMap<'on_disk>,
153 153 options: StatusOptions,
154 154 matcher: &'a (dyn Matcher + Sync),
155 155 ignore_fn: IgnoreFnType<'a>,
156 156 outcome: Mutex<DirstateStatus<'on_disk>>,
157 157 new_cachable_directories:
158 158 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
159 159 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
160 160
161 161 /// Whether ignore files like `.hgignore` have changed since the previous
162 162 /// time a `status()` call wrote their hash to the dirstate. `None` means
163 163 /// we don’t know as this run doesn’t list either ignored or uknown files
164 164 /// and therefore isn’t reading `.hgignore`.
165 165 ignore_patterns_have_changed: Option<bool>,
166 166
167 167 /// The current time at the start of the `status()` algorithm, as measured
168 168 /// and possibly truncated by the filesystem.
169 169 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
170 170 }
171 171
172 172 enum Outcome {
173 173 Modified,
174 174 Added,
175 175 Removed,
176 176 Deleted,
177 177 Clean,
178 178 Ignored,
179 179 Unknown,
180 180 Unsure,
181 181 }
182 182
183 183 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
184 184 fn push_outcome(
185 185 &self,
186 186 which: Outcome,
187 187 dirstate_node: &NodeRef<'tree, 'on_disk>,
188 188 ) -> Result<(), DirstateV2ParseError> {
189 189 let path = dirstate_node
190 190 .full_path_borrowed(self.dmap.on_disk)?
191 191 .detach_from_tree();
192 192 let copy_source = if self.options.list_copies {
193 193 dirstate_node
194 194 .copy_source_borrowed(self.dmap.on_disk)?
195 195 .map(|source| source.detach_from_tree())
196 196 } else {
197 197 None
198 198 };
199 199 self.push_outcome_common(which, path, copy_source);
200 200 Ok(())
201 201 }
202 202
203 203 fn push_outcome_without_copy_source(
204 204 &self,
205 205 which: Outcome,
206 206 path: &BorrowedPath<'_, 'on_disk>,
207 207 ) {
208 208 self.push_outcome_common(which, path.detach_from_tree(), None)
209 209 }
210 210
211 211 fn push_outcome_common(
212 212 &self,
213 213 which: Outcome,
214 214 path: HgPathCow<'on_disk>,
215 215 copy_source: Option<HgPathCow<'on_disk>>,
216 216 ) {
217 217 let mut outcome = self.outcome.lock().unwrap();
218 218 let vec = match which {
219 219 Outcome::Modified => &mut outcome.modified,
220 220 Outcome::Added => &mut outcome.added,
221 221 Outcome::Removed => &mut outcome.removed,
222 222 Outcome::Deleted => &mut outcome.deleted,
223 223 Outcome::Clean => &mut outcome.clean,
224 224 Outcome::Ignored => &mut outcome.ignored,
225 225 Outcome::Unknown => &mut outcome.unknown,
226 226 Outcome::Unsure => &mut outcome.unsure,
227 227 };
228 228 vec.push(StatusPath { path, copy_source });
229 229 }
230 230
231 231 fn read_dir(
232 232 &self,
233 233 hg_path: &HgPath,
234 234 fs_path: &Path,
235 235 is_at_repo_root: bool,
236 236 ) -> Result<Vec<DirEntry>, ()> {
237 237 DirEntry::read_dir(fs_path, is_at_repo_root)
238 238 .map_err(|error| self.io_error(error, hg_path))
239 239 }
240 240
241 241 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
242 242 let errno = error.raw_os_error().expect("expected real OS error");
243 243 self.outcome
244 244 .lock()
245 245 .unwrap()
246 246 .bad
247 247 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
248 248 }
249 249
250 250 fn check_for_outdated_directory_cache(
251 251 &self,
252 252 dirstate_node: &NodeRef<'tree, 'on_disk>,
253 253 ) -> Result<(), DirstateV2ParseError> {
254 254 if self.ignore_patterns_have_changed == Some(true)
255 255 && dirstate_node.cached_directory_mtime()?.is_some()
256 256 {
257 257 self.outated_cached_directories.lock().unwrap().push(
258 258 dirstate_node
259 259 .full_path_borrowed(self.dmap.on_disk)?
260 260 .detach_from_tree(),
261 261 )
262 262 }
263 263 Ok(())
264 264 }
265 265
266 266 /// If this returns true, we can get accurate results by only using
267 267 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
268 268 /// need to call `read_dir`.
269 269 fn can_skip_fs_readdir(
270 270 &self,
271 271 directory_metadata: Option<&std::fs::Metadata>,
272 272 cached_directory_mtime: Option<TruncatedTimestamp>,
273 273 ) -> bool {
274 274 if !self.options.list_unknown && !self.options.list_ignored {
275 275 // All states that we care about listing have corresponding
276 276 // dirstate entries.
277 277 // This happens for example with `hg status -mard`.
278 278 return true;
279 279 }
280 280 if !self.options.list_ignored
281 281 && self.ignore_patterns_have_changed == Some(false)
282 282 {
283 283 if let Some(cached_mtime) = cached_directory_mtime {
284 284 // The dirstate contains a cached mtime for this directory, set
285 285 // by a previous run of the `status` algorithm which found this
286 286 // directory eligible for `read_dir` caching.
287 287 if let Some(meta) = directory_metadata {
288 288 if cached_mtime
289 289 .likely_equal_to_mtime_of(meta)
290 290 .unwrap_or(false)
291 291 {
292 292 // The mtime of that directory has not changed
293 293 // since then, which means that the results of
294 294 // `read_dir` should also be unchanged.
295 295 return true;
296 296 }
297 297 }
298 298 }
299 299 }
300 300 false
301 301 }
302 302
303 303 /// Returns whether all child entries of the filesystem directory have a
304 304 /// corresponding dirstate node or are ignored.
305 305 fn traverse_fs_directory_and_dirstate(
306 306 &self,
307 307 has_ignored_ancestor: bool,
308 308 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
309 309 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
310 310 directory_fs_path: &Path,
311 311 directory_metadata: Option<&std::fs::Metadata>,
312 312 cached_directory_mtime: Option<TruncatedTimestamp>,
313 313 is_at_repo_root: bool,
314 314 ) -> Result<bool, DirstateV2ParseError> {
315 315 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
316 316 {
317 317 dirstate_nodes
318 318 .par_iter()
319 319 .map(|dirstate_node| {
320 320 let fs_path = directory_fs_path.join(get_path_from_bytes(
321 321 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
322 322 ));
323 323 match std::fs::symlink_metadata(&fs_path) {
324 324 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
325 325 &fs_path,
326 326 &fs_metadata,
327 327 dirstate_node,
328 328 has_ignored_ancestor,
329 329 ),
330 330 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
331 331 self.traverse_dirstate_only(dirstate_node)
332 332 }
333 333 Err(error) => {
334 334 let hg_path =
335 335 dirstate_node.full_path(self.dmap.on_disk)?;
336 336 Ok(self.io_error(error, hg_path))
337 337 }
338 338 }
339 339 })
340 340 .collect::<Result<_, _>>()?;
341 341
342 342 // We don’t know, so conservatively say this isn’t the case
343 343 let children_all_have_dirstate_node_or_are_ignored = false;
344 344
345 345 return Ok(children_all_have_dirstate_node_or_are_ignored);
346 346 }
347 347
348 348 let mut fs_entries = if let Ok(entries) = self.read_dir(
349 349 directory_hg_path,
350 350 directory_fs_path,
351 351 is_at_repo_root,
352 352 ) {
353 353 entries
354 354 } else {
355 355 // Treat an unreadable directory (typically because of insufficient
356 356 // permissions) like an empty directory. `self.read_dir` has
357 357 // already called `self.io_error` so a warning will be emitted.
358 358 Vec::new()
359 359 };
360 360
361 361 // `merge_join_by` requires both its input iterators to be sorted:
362 362
363 363 let dirstate_nodes = dirstate_nodes.sorted();
364 364 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
365 365 // https://github.com/rust-lang/rust/issues/34162
366 366 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
367 367
368 368 // Propagate here any error that would happen inside the comparison
369 369 // callback below
370 370 for dirstate_node in &dirstate_nodes {
371 371 dirstate_node.base_name(self.dmap.on_disk)?;
372 372 }
373 373 itertools::merge_join_by(
374 374 dirstate_nodes,
375 375 &fs_entries,
376 376 |dirstate_node, fs_entry| {
377 377 // This `unwrap` never panics because we already propagated
378 378 // those errors above
379 379 dirstate_node
380 380 .base_name(self.dmap.on_disk)
381 381 .unwrap()
382 382 .cmp(&fs_entry.base_name)
383 383 },
384 384 )
385 385 .par_bridge()
386 386 .map(|pair| {
387 387 use itertools::EitherOrBoth::*;
388 388 let has_dirstate_node_or_is_ignored;
389 389 match pair {
390 390 Both(dirstate_node, fs_entry) => {
391 391 self.traverse_fs_and_dirstate(
392 392 &fs_entry.full_path,
393 393 &fs_entry.metadata,
394 394 dirstate_node,
395 395 has_ignored_ancestor,
396 396 )?;
397 397 has_dirstate_node_or_is_ignored = true
398 398 }
399 399 Left(dirstate_node) => {
400 400 self.traverse_dirstate_only(dirstate_node)?;
401 401 has_dirstate_node_or_is_ignored = true;
402 402 }
403 403 Right(fs_entry) => {
404 404 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
405 405 has_ignored_ancestor,
406 406 directory_hg_path,
407 407 fs_entry,
408 408 )
409 409 }
410 410 }
411 411 Ok(has_dirstate_node_or_is_ignored)
412 412 })
413 413 .try_reduce(|| true, |a, b| Ok(a && b))
414 414 }
415 415
416 416 fn traverse_fs_and_dirstate(
417 417 &self,
418 418 fs_path: &Path,
419 419 fs_metadata: &std::fs::Metadata,
420 420 dirstate_node: NodeRef<'tree, 'on_disk>,
421 421 has_ignored_ancestor: bool,
422 422 ) -> Result<(), DirstateV2ParseError> {
423 423 self.check_for_outdated_directory_cache(&dirstate_node)?;
424 424 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
425 425 let file_type = fs_metadata.file_type();
426 426 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
427 427 if !file_or_symlink {
428 428 // If we previously had a file here, it was removed (with
429 429 // `hg rm` or similar) or deleted before it could be
430 430 // replaced by a directory or something else.
431 431 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
432 432 }
433 433 if file_type.is_dir() {
434 434 if self.options.collect_traversed_dirs {
435 435 self.outcome
436 436 .lock()
437 437 .unwrap()
438 438 .traversed
439 439 .push(hg_path.detach_from_tree())
440 440 }
441 441 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
442 442 let is_at_repo_root = false;
443 443 let children_all_have_dirstate_node_or_are_ignored = self
444 444 .traverse_fs_directory_and_dirstate(
445 445 is_ignored,
446 446 dirstate_node.children(self.dmap.on_disk)?,
447 447 hg_path,
448 448 fs_path,
449 449 Some(fs_metadata),
450 450 dirstate_node.cached_directory_mtime()?,
451 451 is_at_repo_root,
452 452 )?;
453 453 self.maybe_save_directory_mtime(
454 454 children_all_have_dirstate_node_or_are_ignored,
455 455 fs_metadata,
456 456 dirstate_node,
457 457 )?
458 458 } else {
459 459 if file_or_symlink && self.matcher.matches(hg_path) {
460 460 if let Some(state) = dirstate_node.state()? {
461 461 match state {
462 462 EntryState::Added => {
463 463 self.push_outcome(Outcome::Added, &dirstate_node)?
464 464 }
465 465 EntryState::Removed => self
466 466 .push_outcome(Outcome::Removed, &dirstate_node)?,
467 467 EntryState::Merged => self
468 468 .push_outcome(Outcome::Modified, &dirstate_node)?,
469 469 EntryState::Normal => self
470 470 .handle_normal_file(&dirstate_node, fs_metadata)?,
471 471 }
472 472 } else {
473 473 // `node.entry.is_none()` indicates a "directory"
474 474 // node, but the filesystem has a file
475 475 self.mark_unknown_or_ignored(
476 476 has_ignored_ancestor,
477 477 hg_path,
478 478 );
479 479 }
480 480 }
481 481
482 482 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
483 483 {
484 484 self.traverse_dirstate_only(child_node)?
485 485 }
486 486 }
487 487 Ok(())
488 488 }
489 489
490 490 fn maybe_save_directory_mtime(
491 491 &self,
492 492 children_all_have_dirstate_node_or_are_ignored: bool,
493 493 directory_metadata: &std::fs::Metadata,
494 494 dirstate_node: NodeRef<'tree, 'on_disk>,
495 495 ) -> Result<(), DirstateV2ParseError> {
496 496 if !children_all_have_dirstate_node_or_are_ignored {
497 497 return Ok(());
498 498 }
499 499 // All filesystem directory entries from `read_dir` have a
500 500 // corresponding node in the dirstate, so we can reconstitute the
501 501 // names of those entries without calling `read_dir` again.
502 502
503 503 // TODO: use let-else here and below when available:
504 504 // https://github.com/rust-lang/rust/issues/87335
505 505 let status_start = if let Some(status_start) =
506 506 &self.filesystem_time_at_status_start
507 507 {
508 508 status_start
509 509 } else {
510 510 return Ok(());
511 511 };
512 512
513 513 // Although the Rust standard library’s `SystemTime` type
514 514 // has nanosecond precision, the times reported for a
515 515 // directory’s (or file’s) modified time may have lower
516 516 // resolution based on the filesystem (for example ext3
517 517 // only stores integer seconds), kernel (see
518 518 // https://stackoverflow.com/a/14393315/1162888), etc.
519 519 let directory_mtime = if let Ok(option) =
520 520 TruncatedTimestamp::for_reliable_mtime_of(
521 521 directory_metadata,
522 522 status_start,
523 523 ) {
524 524 if let Some(directory_mtime) = option {
525 525 directory_mtime
526 526 } else {
527 527 // The directory was modified too recently,
528 528 // don’t cache its `read_dir` results.
529 529 //
530 530 // 1. A change to this directory (direct child was
531 531 // added or removed) cause its mtime to be set
532 532 // (possibly truncated) to `directory_mtime`
533 533 // 2. This `status` algorithm calls `read_dir`
534 534 // 3. An other change is made to the same directory is
535 535 // made so that calling `read_dir` agin would give
536 536 // different results, but soon enough after 1. that
537 537 // the mtime stays the same
538 538 //
539 539 // On a system where the time resolution poor, this
540 540 // scenario is not unlikely if all three steps are caused
541 541 // by the same script.
542 542 return Ok(());
543 543 }
544 544 } else {
545 545 // OS/libc does not support mtime?
546 546 return Ok(());
547 547 };
548 548 // We’ve observed (through `status_start`) that time has
549 549 // “progressed” since `directory_mtime`, so any further
550 550 // change to this directory is extremely likely to cause a
551 551 // different mtime.
552 552 //
553 553 // Having the same mtime again is not entirely impossible
554 554 // since the system clock is not monotonous. It could jump
555 555 // backward to some point before `directory_mtime`, then a
556 556 // directory change could potentially happen during exactly
557 557 // the wrong tick.
558 558 //
559 559 // We deem this scenario (unlike the previous one) to be
560 560 // unlikely enough in practice.
561 561
562 562 let is_up_to_date =
563 563 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
564 564 cached.likely_equal(directory_mtime)
565 565 } else {
566 566 false
567 567 };
568 568 if !is_up_to_date {
569 569 let hg_path = dirstate_node
570 570 .full_path_borrowed(self.dmap.on_disk)?
571 571 .detach_from_tree();
572 572 self.new_cachable_directories
573 573 .lock()
574 574 .unwrap()
575 575 .push((hg_path, directory_mtime))
576 576 }
577 577 Ok(())
578 578 }
579 579
580 580 /// A file with `EntryState::Normal` in the dirstate was found in the
581 581 /// filesystem
582 582 fn handle_normal_file(
583 583 &self,
584 584 dirstate_node: &NodeRef<'tree, 'on_disk>,
585 585 fs_metadata: &std::fs::Metadata,
586 586 ) -> Result<(), DirstateV2ParseError> {
587 587 // Keep the low 31 bits
588 588 fn truncate_u64(value: u64) -> i32 {
589 589 (value & 0x7FFF_FFFF) as i32
590 590 }
591 591
592 592 let entry = dirstate_node
593 593 .entry()?
594 594 .expect("handle_normal_file called with entry-less node");
595 595 let mode_changed =
596 596 || self.options.check_exec && entry.mode_changed(fs_metadata);
597 597 let size = entry.size();
598 598 let size_changed = size != truncate_u64(fs_metadata.len());
599 599 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
600 600 // issue6456: Size returned may be longer due to encryption
601 601 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
602 602 self.push_outcome(Outcome::Unsure, dirstate_node)?
603 603 } else if dirstate_node.has_copy_source()
604 604 || entry.is_from_other_parent()
605 605 || (size >= 0 && (size_changed || mode_changed()))
606 606 {
607 607 self.push_outcome(Outcome::Modified, dirstate_node)?
608 608 } else {
609 609 let mtime_looks_clean;
610 610 if let Some(dirstate_mtime) = entry.truncated_mtime() {
611 611 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
612 612 .expect("OS/libc does not support mtime?");
613 613 // There might be a change in the future if for example the
614 614 // internal clock become off while process run, but this is a
615 615 // case where the issues the user would face
616 616 // would be a lot worse and there is nothing we
617 617 // can really do.
618 618 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
619 619 } else {
620 620 // No mtime in the dirstate entry
621 621 mtime_looks_clean = false
622 622 };
623 623 if !mtime_looks_clean {
624 624 self.push_outcome(Outcome::Unsure, dirstate_node)?
625 625 } else if self.options.list_clean {
626 626 self.push_outcome(Outcome::Clean, dirstate_node)?
627 627 }
628 628 }
629 629 Ok(())
630 630 }
631 631
632 632 /// A node in the dirstate tree has no corresponding filesystem entry
633 633 fn traverse_dirstate_only(
634 634 &self,
635 635 dirstate_node: NodeRef<'tree, 'on_disk>,
636 636 ) -> Result<(), DirstateV2ParseError> {
637 637 self.check_for_outdated_directory_cache(&dirstate_node)?;
638 638 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
639 639 dirstate_node
640 640 .children(self.dmap.on_disk)?
641 641 .par_iter()
642 642 .map(|child_node| self.traverse_dirstate_only(child_node))
643 643 .collect()
644 644 }
645 645
646 646 /// A node in the dirstate tree has no corresponding *file* on the
647 647 /// filesystem
648 648 ///
649 649 /// Does nothing on a "directory" node
650 650 fn mark_removed_or_deleted_if_file(
651 651 &self,
652 652 dirstate_node: &NodeRef<'tree, 'on_disk>,
653 653 ) -> Result<(), DirstateV2ParseError> {
654 654 if let Some(state) = dirstate_node.state()? {
655 655 let path = dirstate_node.full_path(self.dmap.on_disk)?;
656 656 if self.matcher.matches(path) {
657 657 if let EntryState::Removed = state {
658 658 self.push_outcome(Outcome::Removed, dirstate_node)?
659 659 } else {
660 660 self.push_outcome(Outcome::Deleted, &dirstate_node)?
661 661 }
662 662 }
663 663 }
664 664 Ok(())
665 665 }
666 666
667 667 /// Something in the filesystem has no corresponding dirstate node
668 668 ///
669 669 /// Returns whether that path is ignored
670 670 fn traverse_fs_only(
671 671 &self,
672 672 has_ignored_ancestor: bool,
673 673 directory_hg_path: &HgPath,
674 674 fs_entry: &DirEntry,
675 675 ) -> bool {
676 676 let hg_path = directory_hg_path.join(&fs_entry.base_name);
677 677 let file_type = fs_entry.metadata.file_type();
678 678 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
679 679 if file_type.is_dir() {
680 680 let is_ignored =
681 681 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
682 682 let traverse_children = if is_ignored {
683 683 // Descendants of an ignored directory are all ignored
684 684 self.options.list_ignored
685 685 } else {
686 686 // Descendants of an unknown directory may be either unknown or
687 687 // ignored
688 688 self.options.list_unknown || self.options.list_ignored
689 689 };
690 690 if traverse_children {
691 691 let is_at_repo_root = false;
692 692 if let Ok(children_fs_entries) = self.read_dir(
693 693 &hg_path,
694 694 &fs_entry.full_path,
695 695 is_at_repo_root,
696 696 ) {
697 697 children_fs_entries.par_iter().for_each(|child_fs_entry| {
698 698 self.traverse_fs_only(
699 699 is_ignored,
700 700 &hg_path,
701 701 child_fs_entry,
702 702 );
703 703 })
704 704 }
705 705 }
706 706 if self.options.collect_traversed_dirs {
707 707 self.outcome.lock().unwrap().traversed.push(hg_path.into())
708 708 }
709 709 is_ignored
710 710 } else {
711 711 if file_or_symlink {
712 712 if self.matcher.matches(&hg_path) {
713 713 self.mark_unknown_or_ignored(
714 714 has_ignored_ancestor,
715 715 &BorrowedPath::InMemory(&hg_path),
716 716 )
717 717 } else {
718 718 // We haven’t computed whether this path is ignored. It
719 719 // might not be, and a future run of status might have a
720 720 // different matcher that matches it. So treat it as not
721 721 // ignored. That is, inhibit readdir caching of the parent
722 722 // directory.
723 723 false
724 724 }
725 725 } else {
726 726 // This is neither a directory, a plain file, or a symlink.
727 727 // Treat it like an ignored file.
728 728 true
729 729 }
730 730 }
731 731 }
732 732
733 733 /// Returns whether that path is ignored
734 734 fn mark_unknown_or_ignored(
735 735 &self,
736 736 has_ignored_ancestor: bool,
737 737 hg_path: &BorrowedPath<'_, 'on_disk>,
738 738 ) -> bool {
739 739 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
740 740 if is_ignored {
741 741 if self.options.list_ignored {
742 742 self.push_outcome_without_copy_source(
743 743 Outcome::Ignored,
744 744 hg_path,
745 745 )
746 746 }
747 747 } else {
748 748 if self.options.list_unknown {
749 749 self.push_outcome_without_copy_source(
750 750 Outcome::Unknown,
751 751 hg_path,
752 752 )
753 753 }
754 754 }
755 755 is_ignored
756 756 }
757 757 }
758 758
759 759 struct DirEntry {
760 760 base_name: HgPathBuf,
761 761 full_path: PathBuf,
762 762 metadata: std::fs::Metadata,
763 763 }
764 764
765 765 impl DirEntry {
766 766 /// Returns **unsorted** entries in the given directory, with name and
767 767 /// metadata.
768 768 ///
769 769 /// If a `.hg` sub-directory is encountered:
770 770 ///
771 771 /// * At the repository root, ignore that sub-directory
772 772 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
773 773 /// list instead.
774 774 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
775 775 // `read_dir` returns a "not found" error for the empty path
776 776 let at_cwd = path == Path::new("");
777 777 let read_dir_path = if at_cwd { Path::new(".") } else { path };
778 778 let mut results = Vec::new();
779 779 for entry in read_dir_path.read_dir()? {
780 780 let entry = entry?;
781 let metadata = entry.metadata()?;
781 let metadata = match entry.metadata() {
782 Ok(v) => v,
783 Err(e) => {
784 // race with file deletion?
785 if e.kind() == std::io::ErrorKind::NotFound {
786 continue;
787 } else {
788 return Err(e);
789 }
790 }
791 };
782 792 let file_name = entry.file_name();
783 793 // FIXME don't do this when cached
784 794 if file_name == ".hg" {
785 795 if is_at_repo_root {
786 796 // Skip the repo’s own .hg (might be a symlink)
787 797 continue;
788 798 } else if metadata.is_dir() {
789 799 // A .hg sub-directory at another location means a subrepo,
790 800 // skip it entirely.
791 801 return Ok(Vec::new());
792 802 }
793 803 }
794 804 let full_path = if at_cwd {
795 805 file_name.clone().into()
796 806 } else {
797 807 entry.path()
798 808 };
799 809 let base_name = get_bytes_from_os_string(file_name).into();
800 810 results.push(DirEntry {
801 811 base_name,
802 812 full_path,
803 813 metadata,
804 814 })
805 815 }
806 816 Ok(results)
807 817 }
808 818 }
809 819
810 820 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
811 821 /// of the give repository.
812 822 ///
813 823 /// This is similar to `SystemTime::now()`, with the result truncated to the
814 824 /// same time resolution as other files’ modification times. Using `.hg`
815 825 /// instead of the system’s default temporary directory (such as `/tmp`) makes
816 826 /// it more likely the temporary file is in the same disk partition as contents
817 827 /// of the working directory, which can matter since different filesystems may
818 828 /// store timestamps with different resolutions.
819 829 ///
820 830 /// This may fail, typically if we lack write permissions. In that case we
821 831 /// should continue the `status()` algoritm anyway and consider the current
822 832 /// date/time to be unknown.
823 833 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
824 834 tempfile::tempfile_in(repo_root.join(".hg"))?
825 835 .metadata()?
826 836 .modified()
827 837 }
General Comments 0
You need to be logged in to leave comments. Login now