##// END OF EJS Templates
automation: only iterate over our AMIs...
Gregory Szorc -
r42461:730edbd8 default
parent child Browse files
Show More
@@ -1,879 +1,879 b''
1 1 # aws.py - Automation code for Amazon Web Services
2 2 #
3 3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # no-check-code because Python 3 native.
9 9
10 10 import contextlib
11 11 import copy
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import pathlib
16 16 import subprocess
17 17 import time
18 18
19 19 import boto3
20 20 import botocore.exceptions
21 21
22 22 from .winrm import (
23 23 run_powershell,
24 24 wait_for_winrm,
25 25 )
26 26
27 27
28 28 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
29 29
30 30 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
31 31 'install-windows-dependencies.ps1')
32 32
33 33
34 34 KEY_PAIRS = {
35 35 'automation',
36 36 }
37 37
38 38
39 39 SECURITY_GROUPS = {
40 40 'windows-dev-1': {
41 41 'description': 'Mercurial Windows instances that perform build automation',
42 42 'ingress': [
43 43 {
44 44 'FromPort': 22,
45 45 'ToPort': 22,
46 46 'IpProtocol': 'tcp',
47 47 'IpRanges': [
48 48 {
49 49 'CidrIp': '0.0.0.0/0',
50 50 'Description': 'SSH from entire Internet',
51 51 },
52 52 ],
53 53 },
54 54 {
55 55 'FromPort': 3389,
56 56 'ToPort': 3389,
57 57 'IpProtocol': 'tcp',
58 58 'IpRanges': [
59 59 {
60 60 'CidrIp': '0.0.0.0/0',
61 61 'Description': 'RDP from entire Internet',
62 62 },
63 63 ],
64 64
65 65 },
66 66 {
67 67 'FromPort': 5985,
68 68 'ToPort': 5986,
69 69 'IpProtocol': 'tcp',
70 70 'IpRanges': [
71 71 {
72 72 'CidrIp': '0.0.0.0/0',
73 73 'Description': 'PowerShell Remoting (Windows Remote Management)',
74 74 },
75 75 ],
76 76 }
77 77 ],
78 78 },
79 79 }
80 80
81 81
82 82 IAM_ROLES = {
83 83 'ephemeral-ec2-role-1': {
84 84 'description': 'Mercurial temporary EC2 instances',
85 85 'policy_arns': [
86 86 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
87 87 ],
88 88 },
89 89 }
90 90
91 91
92 92 ASSUME_ROLE_POLICY_DOCUMENT = '''
93 93 {
94 94 "Version": "2012-10-17",
95 95 "Statement": [
96 96 {
97 97 "Effect": "Allow",
98 98 "Principal": {
99 99 "Service": "ec2.amazonaws.com"
100 100 },
101 101 "Action": "sts:AssumeRole"
102 102 }
103 103 ]
104 104 }
105 105 '''.strip()
106 106
107 107
108 108 IAM_INSTANCE_PROFILES = {
109 109 'ephemeral-ec2-1': {
110 110 'roles': [
111 111 'ephemeral-ec2-role-1',
112 112 ],
113 113 }
114 114 }
115 115
116 116
117 117 # User Data for Windows EC2 instance. Mainly used to set the password
118 118 # and configure WinRM.
119 119 # Inspired by the User Data script used by Packer
120 120 # (from https://www.packer.io/intro/getting-started/build-image.html).
121 121 WINDOWS_USER_DATA = r'''
122 122 <powershell>
123 123
124 124 # TODO enable this once we figure out what is failing.
125 125 #$ErrorActionPreference = "stop"
126 126
127 127 # Set administrator password
128 128 net user Administrator "%s"
129 129 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
130 130
131 131 # First, make sure WinRM can't be connected to
132 132 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
133 133
134 134 # Delete any existing WinRM listeners
135 135 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
136 136 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
137 137
138 138 # Create a new WinRM listener and configure
139 139 winrm create winrm/config/listener?Address=*+Transport=HTTP
140 140 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
141 141 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
142 142 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
143 143 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
144 144 winrm set winrm/config/service/auth '@{Basic="true"}'
145 145 winrm set winrm/config/client/auth '@{Basic="true"}'
146 146
147 147 # Configure UAC to allow privilege elevation in remote shells
148 148 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
149 149 $Setting = 'LocalAccountTokenFilterPolicy'
150 150 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
151 151
152 152 # Configure and restart the WinRM Service; Enable the required firewall exception
153 153 Stop-Service -Name WinRM
154 154 Set-Service -Name WinRM -StartupType Automatic
155 155 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
156 156 Start-Service -Name WinRM
157 157
158 158 # Disable firewall on private network interfaces so prompts don't appear.
159 159 Set-NetFirewallProfile -Name private -Enabled false
160 160 </powershell>
161 161 '''.lstrip()
162 162
163 163
164 164 WINDOWS_BOOTSTRAP_POWERSHELL = '''
165 165 Write-Output "installing PowerShell dependencies"
166 166 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
167 167 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
168 168 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
169 169
170 170 Write-Output "installing OpenSSL server"
171 171 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
172 172 # Various tools will attempt to use older versions of .NET. So we enable
173 173 # the feature that provides them so it doesn't have to be auto-enabled
174 174 # later.
175 175 Write-Output "enabling .NET Framework feature"
176 176 Install-WindowsFeature -Name Net-Framework-Core
177 177 '''
178 178
179 179
180 180 class AWSConnection:
181 181 """Manages the state of a connection with AWS."""
182 182
183 183 def __init__(self, automation, region: str):
184 184 self.automation = automation
185 185 self.local_state_path = automation.state_path
186 186
187 187 self.prefix = 'hg-'
188 188
189 189 self.session = boto3.session.Session(region_name=region)
190 190 self.ec2client = self.session.client('ec2')
191 191 self.ec2resource = self.session.resource('ec2')
192 192 self.iamclient = self.session.client('iam')
193 193 self.iamresource = self.session.resource('iam')
194 194
195 195 ensure_key_pairs(automation.state_path, self.ec2resource)
196 196
197 197 self.security_groups = ensure_security_groups(self.ec2resource)
198 198 ensure_iam_state(self.iamresource)
199 199
200 200 def key_pair_path_private(self, name):
201 201 """Path to a key pair private key file."""
202 202 return self.local_state_path / 'keys' / ('keypair-%s' % name)
203 203
204 204 def key_pair_path_public(self, name):
205 205 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
206 206
207 207
208 208 def rsa_key_fingerprint(p: pathlib.Path):
209 209 """Compute the fingerprint of an RSA private key."""
210 210
211 211 # TODO use rsa package.
212 212 res = subprocess.run(
213 213 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
214 214 '-outform', 'DER'],
215 215 capture_output=True,
216 216 check=True)
217 217
218 218 sha1 = hashlib.sha1(res.stdout).hexdigest()
219 219 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
220 220
221 221
222 222 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
223 223 remote_existing = {}
224 224
225 225 for kpi in ec2resource.key_pairs.all():
226 226 if kpi.name.startswith(prefix):
227 227 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
228 228
229 229 # Validate that we have these keys locally.
230 230 key_path = state_path / 'keys'
231 231 key_path.mkdir(exist_ok=True, mode=0o700)
232 232
233 233 def remove_remote(name):
234 234 print('deleting key pair %s' % name)
235 235 key = ec2resource.KeyPair(name)
236 236 key.delete()
237 237
238 238 def remove_local(name):
239 239 pub_full = key_path / ('keypair-%s.pub' % name)
240 240 priv_full = key_path / ('keypair-%s' % name)
241 241
242 242 print('removing %s' % pub_full)
243 243 pub_full.unlink()
244 244 print('removing %s' % priv_full)
245 245 priv_full.unlink()
246 246
247 247 local_existing = {}
248 248
249 249 for f in sorted(os.listdir(key_path)):
250 250 if not f.startswith('keypair-') or not f.endswith('.pub'):
251 251 continue
252 252
253 253 name = f[len('keypair-'):-len('.pub')]
254 254
255 255 pub_full = key_path / f
256 256 priv_full = key_path / ('keypair-%s' % name)
257 257
258 258 with open(pub_full, 'r', encoding='ascii') as fh:
259 259 data = fh.read()
260 260
261 261 if not data.startswith('ssh-rsa '):
262 262 print('unexpected format for key pair file: %s; removing' %
263 263 pub_full)
264 264 pub_full.unlink()
265 265 priv_full.unlink()
266 266 continue
267 267
268 268 local_existing[name] = rsa_key_fingerprint(priv_full)
269 269
270 270 for name in sorted(set(remote_existing) | set(local_existing)):
271 271 if name not in local_existing:
272 272 actual = '%s%s' % (prefix, name)
273 273 print('remote key %s does not exist locally' % name)
274 274 remove_remote(actual)
275 275 del remote_existing[name]
276 276
277 277 elif name not in remote_existing:
278 278 print('local key %s does not exist remotely' % name)
279 279 remove_local(name)
280 280 del local_existing[name]
281 281
282 282 elif remote_existing[name] != local_existing[name]:
283 283 print('key fingerprint mismatch for %s; '
284 284 'removing from local and remote' % name)
285 285 remove_local(name)
286 286 remove_remote('%s%s' % (prefix, name))
287 287 del local_existing[name]
288 288 del remote_existing[name]
289 289
290 290 missing = KEY_PAIRS - set(remote_existing)
291 291
292 292 for name in sorted(missing):
293 293 actual = '%s%s' % (prefix, name)
294 294 print('creating key pair %s' % actual)
295 295
296 296 priv_full = key_path / ('keypair-%s' % name)
297 297 pub_full = key_path / ('keypair-%s.pub' % name)
298 298
299 299 kp = ec2resource.create_key_pair(KeyName=actual)
300 300
301 301 with priv_full.open('w', encoding='ascii') as fh:
302 302 fh.write(kp.key_material)
303 303 fh.write('\n')
304 304
305 305 priv_full.chmod(0o0600)
306 306
307 307 # SSH public key can be extracted via `ssh-keygen`.
308 308 with pub_full.open('w', encoding='ascii') as fh:
309 309 subprocess.run(
310 310 ['ssh-keygen', '-y', '-f', str(priv_full)],
311 311 stdout=fh,
312 312 check=True)
313 313
314 314 pub_full.chmod(0o0600)
315 315
316 316
317 317 def delete_instance_profile(profile):
318 318 for role in profile.roles:
319 319 print('removing role %s from instance profile %s' % (role.name,
320 320 profile.name))
321 321 profile.remove_role(RoleName=role.name)
322 322
323 323 print('deleting instance profile %s' % profile.name)
324 324 profile.delete()
325 325
326 326
327 327 def ensure_iam_state(iamresource, prefix='hg-'):
328 328 """Ensure IAM state is in sync with our canonical definition."""
329 329
330 330 remote_profiles = {}
331 331
332 332 for profile in iamresource.instance_profiles.all():
333 333 if profile.name.startswith(prefix):
334 334 remote_profiles[profile.name[len(prefix):]] = profile
335 335
336 336 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
337 337 delete_instance_profile(remote_profiles[name])
338 338 del remote_profiles[name]
339 339
340 340 remote_roles = {}
341 341
342 342 for role in iamresource.roles.all():
343 343 if role.name.startswith(prefix):
344 344 remote_roles[role.name[len(prefix):]] = role
345 345
346 346 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
347 347 role = remote_roles[name]
348 348
349 349 print('removing role %s' % role.name)
350 350 role.delete()
351 351 del remote_roles[name]
352 352
353 353 # We've purged remote state that doesn't belong. Create missing
354 354 # instance profiles and roles.
355 355 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
356 356 actual = '%s%s' % (prefix, name)
357 357 print('creating IAM instance profile %s' % actual)
358 358
359 359 profile = iamresource.create_instance_profile(
360 360 InstanceProfileName=actual)
361 361 remote_profiles[name] = profile
362 362
363 363 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
364 364 entry = IAM_ROLES[name]
365 365
366 366 actual = '%s%s' % (prefix, name)
367 367 print('creating IAM role %s' % actual)
368 368
369 369 role = iamresource.create_role(
370 370 RoleName=actual,
371 371 Description=entry['description'],
372 372 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
373 373 )
374 374
375 375 remote_roles[name] = role
376 376
377 377 for arn in entry['policy_arns']:
378 378 print('attaching policy %s to %s' % (arn, role.name))
379 379 role.attach_policy(PolicyArn=arn)
380 380
381 381 # Now reconcile state of profiles.
382 382 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
383 383 profile = remote_profiles[name]
384 384 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
385 385 have = {role.name for role in profile.roles}
386 386
387 387 for role in sorted(have - wanted):
388 388 print('removing role %s from %s' % (role, profile.name))
389 389 profile.remove_role(RoleName=role)
390 390
391 391 for role in sorted(wanted - have):
392 392 print('adding role %s to %s' % (role, profile.name))
393 393 profile.add_role(RoleName=role)
394 394
395 395
396 396 def find_windows_server_2019_image(ec2resource):
397 397 """Find the Amazon published Windows Server 2019 base image."""
398 398
399 399 images = ec2resource.images.filter(
400 400 Filters=[
401 401 {
402 402 'Name': 'owner-alias',
403 403 'Values': ['amazon'],
404 404 },
405 405 {
406 406 'Name': 'state',
407 407 'Values': ['available'],
408 408 },
409 409 {
410 410 'Name': 'image-type',
411 411 'Values': ['machine'],
412 412 },
413 413 {
414 414 'Name': 'name',
415 415 'Values': ['Windows_Server-2019-English-Full-Base-2019.02.13'],
416 416 },
417 417 ])
418 418
419 419 for image in images:
420 420 return image
421 421
422 422 raise Exception('unable to find Windows Server 2019 image')
423 423
424 424
425 425 def ensure_security_groups(ec2resource, prefix='hg-'):
426 426 """Ensure all necessary Mercurial security groups are present.
427 427
428 428 All security groups are prefixed with ``hg-`` by default. Any security
429 429 groups having this prefix but aren't in our list are deleted.
430 430 """
431 431 existing = {}
432 432
433 433 for group in ec2resource.security_groups.all():
434 434 if group.group_name.startswith(prefix):
435 435 existing[group.group_name[len(prefix):]] = group
436 436
437 437 purge = set(existing) - set(SECURITY_GROUPS)
438 438
439 439 for name in sorted(purge):
440 440 group = existing[name]
441 441 print('removing legacy security group: %s' % group.group_name)
442 442 group.delete()
443 443
444 444 security_groups = {}
445 445
446 446 for name, group in sorted(SECURITY_GROUPS.items()):
447 447 if name in existing:
448 448 security_groups[name] = existing[name]
449 449 continue
450 450
451 451 actual = '%s%s' % (prefix, name)
452 452 print('adding security group %s' % actual)
453 453
454 454 group_res = ec2resource.create_security_group(
455 455 Description=group['description'],
456 456 GroupName=actual,
457 457 )
458 458
459 459 group_res.authorize_ingress(
460 460 IpPermissions=group['ingress'],
461 461 )
462 462
463 463 security_groups[name] = group_res
464 464
465 465 return security_groups
466 466
467 467
468 468 def terminate_ec2_instances(ec2resource, prefix='hg-'):
469 469 """Terminate all EC2 instances managed by us."""
470 470 waiting = []
471 471
472 472 for instance in ec2resource.instances.all():
473 473 if instance.state['Name'] == 'terminated':
474 474 continue
475 475
476 476 for tag in instance.tags or []:
477 477 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
478 478 print('terminating %s' % instance.id)
479 479 instance.terminate()
480 480 waiting.append(instance)
481 481
482 482 for instance in waiting:
483 483 instance.wait_until_terminated()
484 484
485 485
486 486 def remove_resources(c, prefix='hg-'):
487 487 """Purge all of our resources in this EC2 region."""
488 488 ec2resource = c.ec2resource
489 489 iamresource = c.iamresource
490 490
491 491 terminate_ec2_instances(ec2resource, prefix=prefix)
492 492
493 for image in ec2resource.images.all():
493 for image in ec2resource.images.filter(Owners=['self']):
494 494 if image.name.startswith(prefix):
495 495 remove_ami(ec2resource, image)
496 496
497 497 for group in ec2resource.security_groups.all():
498 498 if group.group_name.startswith(prefix):
499 499 print('removing security group %s' % group.group_name)
500 500 group.delete()
501 501
502 502 for profile in iamresource.instance_profiles.all():
503 503 if profile.name.startswith(prefix):
504 504 delete_instance_profile(profile)
505 505
506 506 for role in iamresource.roles.all():
507 507 if role.name.startswith(prefix):
508 508 print('removing role %s' % role.name)
509 509 role.delete()
510 510
511 511
512 512 def wait_for_ip_addresses(instances):
513 513 """Wait for the public IP addresses of an iterable of instances."""
514 514 for instance in instances:
515 515 while True:
516 516 if not instance.public_ip_address:
517 517 time.sleep(2)
518 518 instance.reload()
519 519 continue
520 520
521 521 print('public IP address for %s: %s' % (
522 522 instance.id, instance.public_ip_address))
523 523 break
524 524
525 525
526 526 def remove_ami(ec2resource, image):
527 527 """Remove an AMI and its underlying snapshots."""
528 528 snapshots = []
529 529
530 530 for device in image.block_device_mappings:
531 531 if 'Ebs' in device:
532 532 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
533 533
534 534 print('deregistering %s' % image.id)
535 535 image.deregister()
536 536
537 537 for snapshot in snapshots:
538 538 print('deleting snapshot %s' % snapshot.id)
539 539 snapshot.delete()
540 540
541 541
542 542 def wait_for_ssm(ssmclient, instances):
543 543 """Wait for SSM to come online for an iterable of instance IDs."""
544 544 while True:
545 545 res = ssmclient.describe_instance_information(
546 546 Filters=[
547 547 {
548 548 'Key': 'InstanceIds',
549 549 'Values': [i.id for i in instances],
550 550 },
551 551 ],
552 552 )
553 553
554 554 available = len(res['InstanceInformationList'])
555 555 wanted = len(instances)
556 556
557 557 print('%d/%d instances available in SSM' % (available, wanted))
558 558
559 559 if available == wanted:
560 560 return
561 561
562 562 time.sleep(2)
563 563
564 564
565 565 def run_ssm_command(ssmclient, instances, document_name, parameters):
566 566 """Run a PowerShell script on an EC2 instance."""
567 567
568 568 res = ssmclient.send_command(
569 569 InstanceIds=[i.id for i in instances],
570 570 DocumentName=document_name,
571 571 Parameters=parameters,
572 572 CloudWatchOutputConfig={
573 573 'CloudWatchOutputEnabled': True,
574 574 },
575 575 )
576 576
577 577 command_id = res['Command']['CommandId']
578 578
579 579 for instance in instances:
580 580 while True:
581 581 try:
582 582 res = ssmclient.get_command_invocation(
583 583 CommandId=command_id,
584 584 InstanceId=instance.id,
585 585 )
586 586 except botocore.exceptions.ClientError as e:
587 587 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
588 588 print('could not find SSM command invocation; waiting')
589 589 time.sleep(1)
590 590 continue
591 591 else:
592 592 raise
593 593
594 594 if res['Status'] == 'Success':
595 595 break
596 596 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
597 597 time.sleep(2)
598 598 else:
599 599 raise Exception('command failed on %s: %s' % (
600 600 instance.id, res['Status']))
601 601
602 602
603 603 @contextlib.contextmanager
604 604 def temporary_ec2_instances(ec2resource, config):
605 605 """Create temporary EC2 instances.
606 606
607 607 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
608 608 managing the lifecycle of the instances.
609 609
610 610 When the context manager exits, the instances are terminated.
611 611
612 612 The context manager evaluates to the list of data structures
613 613 describing each created instance. The instances may not be available
614 614 for work immediately: it is up to the caller to wait for the instance
615 615 to start responding.
616 616 """
617 617
618 618 ids = None
619 619
620 620 try:
621 621 res = ec2resource.create_instances(**config)
622 622
623 623 ids = [i.id for i in res]
624 624 print('started instances: %s' % ' '.join(ids))
625 625
626 626 yield res
627 627 finally:
628 628 if ids:
629 629 print('terminating instances: %s' % ' '.join(ids))
630 630 for instance in res:
631 631 instance.terminate()
632 632 print('terminated %d instances' % len(ids))
633 633
634 634
635 635 @contextlib.contextmanager
636 636 def create_temp_windows_ec2_instances(c: AWSConnection, config):
637 637 """Create temporary Windows EC2 instances.
638 638
639 639 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
640 640 configures the Windows instance for Windows Remote Management. The emitted
641 641 instances will have a ``winrm_client`` attribute containing a
642 642 ``pypsrp.client.Client`` instance bound to the instance.
643 643 """
644 644 if 'IamInstanceProfile' in config:
645 645 raise ValueError('IamInstanceProfile cannot be provided in config')
646 646 if 'UserData' in config:
647 647 raise ValueError('UserData cannot be provided in config')
648 648
649 649 password = c.automation.default_password()
650 650
651 651 config = copy.deepcopy(config)
652 652 config['IamInstanceProfile'] = {
653 653 'Name': 'hg-ephemeral-ec2-1',
654 654 }
655 655 config.setdefault('TagSpecifications', []).append({
656 656 'ResourceType': 'instance',
657 657 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
658 658 })
659 659 config['UserData'] = WINDOWS_USER_DATA % password
660 660
661 661 with temporary_ec2_instances(c.ec2resource, config) as instances:
662 662 wait_for_ip_addresses(instances)
663 663
664 664 print('waiting for Windows Remote Management service...')
665 665
666 666 for instance in instances:
667 667 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
668 668 print('established WinRM connection to %s' % instance.id)
669 669 instance.winrm_client = client
670 670
671 671 yield instances
672 672
673 673
674 674 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
675 675 """Ensure Windows Development AMI is available and up-to-date.
676 676
677 677 If necessary, a modern AMI will be built by starting a temporary EC2
678 678 instance and bootstrapping it.
679 679
680 680 Obsolete AMIs will be deleted so there is only a single AMI having the
681 681 desired name.
682 682
683 683 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
684 684 one.
685 685 """
686 686 ec2client = c.ec2client
687 687 ec2resource = c.ec2resource
688 688 ssmclient = c.session.client('ssm')
689 689
690 690 name = '%s%s' % (prefix, 'windows-dev')
691 691
692 692 config = {
693 693 'BlockDeviceMappings': [
694 694 {
695 695 'DeviceName': '/dev/sda1',
696 696 'Ebs': {
697 697 'DeleteOnTermination': True,
698 698 'VolumeSize': 32,
699 699 'VolumeType': 'gp2',
700 700 },
701 701 }
702 702 ],
703 703 'ImageId': find_windows_server_2019_image(ec2resource).id,
704 704 'InstanceInitiatedShutdownBehavior': 'stop',
705 705 'InstanceType': 't3.medium',
706 706 'KeyName': '%sautomation' % prefix,
707 707 'MaxCount': 1,
708 708 'MinCount': 1,
709 709 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
710 710 }
711 711
712 712 commands = [
713 713 # Need to start the service so sshd_config is generated.
714 714 'Start-Service sshd',
715 715 'Write-Output "modifying sshd_config"',
716 716 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
717 717 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
718 718 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
719 719 'Import-Module OpenSSHUtils',
720 720 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
721 721 'Restart-Service sshd',
722 722 'Write-Output "installing OpenSSL client"',
723 723 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
724 724 'Set-Service -Name sshd -StartupType "Automatic"',
725 725 'Write-Output "OpenSSH server running"',
726 726 ]
727 727
728 728 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
729 729 commands.extend(l.rstrip() for l in fh)
730 730
731 731 # Disable Windows Defender when bootstrapping because it just slows
732 732 # things down.
733 733 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
734 734 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
735 735
736 736 # Compute a deterministic fingerprint to determine whether image needs
737 737 # to be regenerated.
738 738 fingerprint = {
739 739 'instance_config': config,
740 740 'user_data': WINDOWS_USER_DATA,
741 741 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
742 742 'bootstrap_commands': commands,
743 743 }
744 744
745 745 fingerprint = json.dumps(fingerprint, sort_keys=True)
746 746 fingerprint = hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
747 747
748 748 # Find existing AMIs with this name and delete the ones that are invalid.
749 749 # Store a reference to a good image so it can be returned one the
750 750 # image state is reconciled.
751 751 images = ec2resource.images.filter(
752 752 Filters=[{'Name': 'name', 'Values': [name]}])
753 753
754 754 existing_image = None
755 755
756 756 for image in images:
757 757 if image.tags is None:
758 758 print('image %s for %s lacks required tags; removing' % (
759 759 image.id, image.name))
760 760 remove_ami(ec2resource, image)
761 761 else:
762 762 tags = {t['Key']: t['Value'] for t in image.tags}
763 763
764 764 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
765 765 existing_image = image
766 766 else:
767 767 print('image %s for %s has wrong fingerprint; removing' % (
768 768 image.id, image.name))
769 769 remove_ami(ec2resource, image)
770 770
771 771 if existing_image:
772 772 return existing_image
773 773
774 774 print('no suitable Windows development image found; creating one...')
775 775
776 776 with create_temp_windows_ec2_instances(c, config) as instances:
777 777 assert len(instances) == 1
778 778 instance = instances[0]
779 779
780 780 wait_for_ssm(ssmclient, [instance])
781 781
782 782 # On first boot, install various Windows updates.
783 783 # We would ideally use PowerShell Remoting for this. However, there are
784 784 # trust issues that make it difficult to invoke Windows Update
785 785 # remotely. So we use SSM, which has a mechanism for running Windows
786 786 # Update.
787 787 print('installing Windows features...')
788 788 run_ssm_command(
789 789 ssmclient,
790 790 [instance],
791 791 'AWS-RunPowerShellScript',
792 792 {
793 793 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
794 794 },
795 795 )
796 796
797 797 # Reboot so all updates are fully applied.
798 798 print('rebooting instance %s' % instance.id)
799 799 ec2client.reboot_instances(InstanceIds=[instance.id])
800 800
801 801 time.sleep(15)
802 802
803 803 print('waiting for Windows Remote Management to come back...')
804 804 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
805 805 c.automation.default_password())
806 806 print('established WinRM connection to %s' % instance.id)
807 807 instance.winrm_client = client
808 808
809 809 print('bootstrapping instance...')
810 810 run_powershell(instance.winrm_client, '\n'.join(commands))
811 811
812 812 print('bootstrap completed; stopping %s to create image' % instance.id)
813 813 instance.stop()
814 814
815 815 ec2client.get_waiter('instance_stopped').wait(
816 816 InstanceIds=[instance.id],
817 817 WaiterConfig={
818 818 'Delay': 5,
819 819 })
820 820 print('%s is stopped' % instance.id)
821 821
822 822 image = instance.create_image(
823 823 Name=name,
824 824 Description='Mercurial Windows development environment',
825 825 )
826 826
827 827 image.create_tags(Tags=[
828 828 {
829 829 'Key': 'HGIMAGEFINGERPRINT',
830 830 'Value': fingerprint,
831 831 },
832 832 ])
833 833
834 834 print('waiting for image %s' % image.id)
835 835
836 836 ec2client.get_waiter('image_available').wait(
837 837 ImageIds=[image.id],
838 838 )
839 839
840 840 print('image %s available as %s' % (image.id, image.name))
841 841
842 842 return image
843 843
844 844
845 845 @contextlib.contextmanager
846 846 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
847 847 prefix='hg-', disable_antivirus=False):
848 848 """Create a temporary Windows development EC2 instance.
849 849
850 850 Context manager resolves to the list of ``EC2.Instance`` that were created.
851 851 """
852 852 config = {
853 853 'BlockDeviceMappings': [
854 854 {
855 855 'DeviceName': '/dev/sda1',
856 856 'Ebs': {
857 857 'DeleteOnTermination': True,
858 858 'VolumeSize': 32,
859 859 'VolumeType': 'gp2',
860 860 },
861 861 }
862 862 ],
863 863 'ImageId': image.id,
864 864 'InstanceInitiatedShutdownBehavior': 'stop',
865 865 'InstanceType': instance_type,
866 866 'KeyName': '%sautomation' % prefix,
867 867 'MaxCount': 1,
868 868 'MinCount': 1,
869 869 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
870 870 }
871 871
872 872 with create_temp_windows_ec2_instances(c, config) as instances:
873 873 if disable_antivirus:
874 874 for instance in instances:
875 875 run_powershell(
876 876 instance.winrm_client,
877 877 'Set-MpPreference -DisableRealtimeMonitoring $true')
878 878
879 879 yield instances
General Comments 0
You need to be logged in to leave comments. Login now