##// END OF EJS Templates
automation: use newer Windows base image...
Gregory Szorc -
r42869:92a99822 stable
parent child Browse files
Show More
@@ -1,1205 +1,1205 b''
1 1 # aws.py - Automation code for Amazon Web Services
2 2 #
3 3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # no-check-code because Python 3 native.
9 9
10 10 import contextlib
11 11 import copy
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import pathlib
16 16 import subprocess
17 17 import time
18 18
19 19 import boto3
20 20 import botocore.exceptions
21 21
22 22 from .linux import (
23 23 BOOTSTRAP_DEBIAN,
24 24 )
25 25 from .ssh import (
26 26 exec_command as ssh_exec_command,
27 27 wait_for_ssh,
28 28 )
29 29 from .winrm import (
30 30 run_powershell,
31 31 wait_for_winrm,
32 32 )
33 33
34 34
35 35 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
36 36
37 37 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
38 38 'install-windows-dependencies.ps1')
39 39
40 40
41 41 INSTANCE_TYPES_WITH_STORAGE = {
42 42 'c5d',
43 43 'd2',
44 44 'h1',
45 45 'i3',
46 46 'm5ad',
47 47 'm5d',
48 48 'r5d',
49 49 'r5ad',
50 50 'x1',
51 51 'z1d',
52 52 }
53 53
54 54
55 55 DEBIAN_ACCOUNT_ID = '379101102735'
56 56 UBUNTU_ACCOUNT_ID = '099720109477'
57 57
58 58
59 59 KEY_PAIRS = {
60 60 'automation',
61 61 }
62 62
63 63
64 64 SECURITY_GROUPS = {
65 65 'linux-dev-1': {
66 66 'description': 'Mercurial Linux instances that perform build/test automation',
67 67 'ingress': [
68 68 {
69 69 'FromPort': 22,
70 70 'ToPort': 22,
71 71 'IpProtocol': 'tcp',
72 72 'IpRanges': [
73 73 {
74 74 'CidrIp': '0.0.0.0/0',
75 75 'Description': 'SSH from entire Internet',
76 76 },
77 77 ],
78 78 },
79 79 ],
80 80 },
81 81 'windows-dev-1': {
82 82 'description': 'Mercurial Windows instances that perform build automation',
83 83 'ingress': [
84 84 {
85 85 'FromPort': 22,
86 86 'ToPort': 22,
87 87 'IpProtocol': 'tcp',
88 88 'IpRanges': [
89 89 {
90 90 'CidrIp': '0.0.0.0/0',
91 91 'Description': 'SSH from entire Internet',
92 92 },
93 93 ],
94 94 },
95 95 {
96 96 'FromPort': 3389,
97 97 'ToPort': 3389,
98 98 'IpProtocol': 'tcp',
99 99 'IpRanges': [
100 100 {
101 101 'CidrIp': '0.0.0.0/0',
102 102 'Description': 'RDP from entire Internet',
103 103 },
104 104 ],
105 105
106 106 },
107 107 {
108 108 'FromPort': 5985,
109 109 'ToPort': 5986,
110 110 'IpProtocol': 'tcp',
111 111 'IpRanges': [
112 112 {
113 113 'CidrIp': '0.0.0.0/0',
114 114 'Description': 'PowerShell Remoting (Windows Remote Management)',
115 115 },
116 116 ],
117 117 }
118 118 ],
119 119 },
120 120 }
121 121
122 122
123 123 IAM_ROLES = {
124 124 'ephemeral-ec2-role-1': {
125 125 'description': 'Mercurial temporary EC2 instances',
126 126 'policy_arns': [
127 127 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
128 128 ],
129 129 },
130 130 }
131 131
132 132
133 133 ASSUME_ROLE_POLICY_DOCUMENT = '''
134 134 {
135 135 "Version": "2012-10-17",
136 136 "Statement": [
137 137 {
138 138 "Effect": "Allow",
139 139 "Principal": {
140 140 "Service": "ec2.amazonaws.com"
141 141 },
142 142 "Action": "sts:AssumeRole"
143 143 }
144 144 ]
145 145 }
146 146 '''.strip()
147 147
148 148
149 149 IAM_INSTANCE_PROFILES = {
150 150 'ephemeral-ec2-1': {
151 151 'roles': [
152 152 'ephemeral-ec2-role-1',
153 153 ],
154 154 }
155 155 }
156 156
157 157
158 158 # User Data for Windows EC2 instance. Mainly used to set the password
159 159 # and configure WinRM.
160 160 # Inspired by the User Data script used by Packer
161 161 # (from https://www.packer.io/intro/getting-started/build-image.html).
162 162 WINDOWS_USER_DATA = r'''
163 163 <powershell>
164 164
165 165 # TODO enable this once we figure out what is failing.
166 166 #$ErrorActionPreference = "stop"
167 167
168 168 # Set administrator password
169 169 net user Administrator "%s"
170 170 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
171 171
172 172 # First, make sure WinRM can't be connected to
173 173 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
174 174
175 175 # Delete any existing WinRM listeners
176 176 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
177 177 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
178 178
179 179 # Create a new WinRM listener and configure
180 180 winrm create winrm/config/listener?Address=*+Transport=HTTP
181 181 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
182 182 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
183 183 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
184 184 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
185 185 winrm set winrm/config/service/auth '@{Basic="true"}'
186 186 winrm set winrm/config/client/auth '@{Basic="true"}'
187 187
188 188 # Configure UAC to allow privilege elevation in remote shells
189 189 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
190 190 $Setting = 'LocalAccountTokenFilterPolicy'
191 191 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
192 192
193 193 # Configure and restart the WinRM Service; Enable the required firewall exception
194 194 Stop-Service -Name WinRM
195 195 Set-Service -Name WinRM -StartupType Automatic
196 196 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
197 197 Start-Service -Name WinRM
198 198
199 199 # Disable firewall on private network interfaces so prompts don't appear.
200 200 Set-NetFirewallProfile -Name private -Enabled false
201 201 </powershell>
202 202 '''.lstrip()
203 203
204 204
205 205 WINDOWS_BOOTSTRAP_POWERSHELL = '''
206 206 Write-Output "installing PowerShell dependencies"
207 207 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
208 208 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
209 209 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
210 210
211 211 Write-Output "installing OpenSSL server"
212 212 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
213 213 # Various tools will attempt to use older versions of .NET. So we enable
214 214 # the feature that provides them so it doesn't have to be auto-enabled
215 215 # later.
216 216 Write-Output "enabling .NET Framework feature"
217 217 Install-WindowsFeature -Name Net-Framework-Core
218 218 '''
219 219
220 220
221 221 class AWSConnection:
222 222 """Manages the state of a connection with AWS."""
223 223
224 224 def __init__(self, automation, region: str, ensure_ec2_state: bool=True):
225 225 self.automation = automation
226 226 self.local_state_path = automation.state_path
227 227
228 228 self.prefix = 'hg-'
229 229
230 230 self.session = boto3.session.Session(region_name=region)
231 231 self.ec2client = self.session.client('ec2')
232 232 self.ec2resource = self.session.resource('ec2')
233 233 self.iamclient = self.session.client('iam')
234 234 self.iamresource = self.session.resource('iam')
235 235 self.security_groups = {}
236 236
237 237 if ensure_ec2_state:
238 238 ensure_key_pairs(automation.state_path, self.ec2resource)
239 239 self.security_groups = ensure_security_groups(self.ec2resource)
240 240 ensure_iam_state(self.iamclient, self.iamresource)
241 241
242 242 def key_pair_path_private(self, name):
243 243 """Path to a key pair private key file."""
244 244 return self.local_state_path / 'keys' / ('keypair-%s' % name)
245 245
246 246 def key_pair_path_public(self, name):
247 247 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
248 248
249 249
250 250 def rsa_key_fingerprint(p: pathlib.Path):
251 251 """Compute the fingerprint of an RSA private key."""
252 252
253 253 # TODO use rsa package.
254 254 res = subprocess.run(
255 255 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
256 256 '-outform', 'DER'],
257 257 capture_output=True,
258 258 check=True)
259 259
260 260 sha1 = hashlib.sha1(res.stdout).hexdigest()
261 261 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
262 262
263 263
264 264 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
265 265 remote_existing = {}
266 266
267 267 for kpi in ec2resource.key_pairs.all():
268 268 if kpi.name.startswith(prefix):
269 269 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
270 270
271 271 # Validate that we have these keys locally.
272 272 key_path = state_path / 'keys'
273 273 key_path.mkdir(exist_ok=True, mode=0o700)
274 274
275 275 def remove_remote(name):
276 276 print('deleting key pair %s' % name)
277 277 key = ec2resource.KeyPair(name)
278 278 key.delete()
279 279
280 280 def remove_local(name):
281 281 pub_full = key_path / ('keypair-%s.pub' % name)
282 282 priv_full = key_path / ('keypair-%s' % name)
283 283
284 284 print('removing %s' % pub_full)
285 285 pub_full.unlink()
286 286 print('removing %s' % priv_full)
287 287 priv_full.unlink()
288 288
289 289 local_existing = {}
290 290
291 291 for f in sorted(os.listdir(key_path)):
292 292 if not f.startswith('keypair-') or not f.endswith('.pub'):
293 293 continue
294 294
295 295 name = f[len('keypair-'):-len('.pub')]
296 296
297 297 pub_full = key_path / f
298 298 priv_full = key_path / ('keypair-%s' % name)
299 299
300 300 with open(pub_full, 'r', encoding='ascii') as fh:
301 301 data = fh.read()
302 302
303 303 if not data.startswith('ssh-rsa '):
304 304 print('unexpected format for key pair file: %s; removing' %
305 305 pub_full)
306 306 pub_full.unlink()
307 307 priv_full.unlink()
308 308 continue
309 309
310 310 local_existing[name] = rsa_key_fingerprint(priv_full)
311 311
312 312 for name in sorted(set(remote_existing) | set(local_existing)):
313 313 if name not in local_existing:
314 314 actual = '%s%s' % (prefix, name)
315 315 print('remote key %s does not exist locally' % name)
316 316 remove_remote(actual)
317 317 del remote_existing[name]
318 318
319 319 elif name not in remote_existing:
320 320 print('local key %s does not exist remotely' % name)
321 321 remove_local(name)
322 322 del local_existing[name]
323 323
324 324 elif remote_existing[name] != local_existing[name]:
325 325 print('key fingerprint mismatch for %s; '
326 326 'removing from local and remote' % name)
327 327 remove_local(name)
328 328 remove_remote('%s%s' % (prefix, name))
329 329 del local_existing[name]
330 330 del remote_existing[name]
331 331
332 332 missing = KEY_PAIRS - set(remote_existing)
333 333
334 334 for name in sorted(missing):
335 335 actual = '%s%s' % (prefix, name)
336 336 print('creating key pair %s' % actual)
337 337
338 338 priv_full = key_path / ('keypair-%s' % name)
339 339 pub_full = key_path / ('keypair-%s.pub' % name)
340 340
341 341 kp = ec2resource.create_key_pair(KeyName=actual)
342 342
343 343 with priv_full.open('w', encoding='ascii') as fh:
344 344 fh.write(kp.key_material)
345 345 fh.write('\n')
346 346
347 347 priv_full.chmod(0o0600)
348 348
349 349 # SSH public key can be extracted via `ssh-keygen`.
350 350 with pub_full.open('w', encoding='ascii') as fh:
351 351 subprocess.run(
352 352 ['ssh-keygen', '-y', '-f', str(priv_full)],
353 353 stdout=fh,
354 354 check=True)
355 355
356 356 pub_full.chmod(0o0600)
357 357
358 358
359 359 def delete_instance_profile(profile):
360 360 for role in profile.roles:
361 361 print('removing role %s from instance profile %s' % (role.name,
362 362 profile.name))
363 363 profile.remove_role(RoleName=role.name)
364 364
365 365 print('deleting instance profile %s' % profile.name)
366 366 profile.delete()
367 367
368 368
369 369 def ensure_iam_state(iamclient, iamresource, prefix='hg-'):
370 370 """Ensure IAM state is in sync with our canonical definition."""
371 371
372 372 remote_profiles = {}
373 373
374 374 for profile in iamresource.instance_profiles.all():
375 375 if profile.name.startswith(prefix):
376 376 remote_profiles[profile.name[len(prefix):]] = profile
377 377
378 378 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
379 379 delete_instance_profile(remote_profiles[name])
380 380 del remote_profiles[name]
381 381
382 382 remote_roles = {}
383 383
384 384 for role in iamresource.roles.all():
385 385 if role.name.startswith(prefix):
386 386 remote_roles[role.name[len(prefix):]] = role
387 387
388 388 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
389 389 role = remote_roles[name]
390 390
391 391 print('removing role %s' % role.name)
392 392 role.delete()
393 393 del remote_roles[name]
394 394
395 395 # We've purged remote state that doesn't belong. Create missing
396 396 # instance profiles and roles.
397 397 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
398 398 actual = '%s%s' % (prefix, name)
399 399 print('creating IAM instance profile %s' % actual)
400 400
401 401 profile = iamresource.create_instance_profile(
402 402 InstanceProfileName=actual)
403 403 remote_profiles[name] = profile
404 404
405 405 waiter = iamclient.get_waiter('instance_profile_exists')
406 406 waiter.wait(InstanceProfileName=actual)
407 407 print('IAM instance profile %s is available' % actual)
408 408
409 409 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
410 410 entry = IAM_ROLES[name]
411 411
412 412 actual = '%s%s' % (prefix, name)
413 413 print('creating IAM role %s' % actual)
414 414
415 415 role = iamresource.create_role(
416 416 RoleName=actual,
417 417 Description=entry['description'],
418 418 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
419 419 )
420 420
421 421 waiter = iamclient.get_waiter('role_exists')
422 422 waiter.wait(RoleName=actual)
423 423 print('IAM role %s is available' % actual)
424 424
425 425 remote_roles[name] = role
426 426
427 427 for arn in entry['policy_arns']:
428 428 print('attaching policy %s to %s' % (arn, role.name))
429 429 role.attach_policy(PolicyArn=arn)
430 430
431 431 # Now reconcile state of profiles.
432 432 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
433 433 profile = remote_profiles[name]
434 434 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
435 435 have = {role.name for role in profile.roles}
436 436
437 437 for role in sorted(have - wanted):
438 438 print('removing role %s from %s' % (role, profile.name))
439 439 profile.remove_role(RoleName=role)
440 440
441 441 for role in sorted(wanted - have):
442 442 print('adding role %s to %s' % (role, profile.name))
443 443 profile.add_role(RoleName=role)
444 444
445 445
446 446 def find_image(ec2resource, owner_id, name):
447 447 """Find an AMI by its owner ID and name."""
448 448
449 449 images = ec2resource.images.filter(
450 450 Filters=[
451 451 {
452 452 'Name': 'owner-id',
453 453 'Values': [owner_id],
454 454 },
455 455 {
456 456 'Name': 'state',
457 457 'Values': ['available'],
458 458 },
459 459 {
460 460 'Name': 'image-type',
461 461 'Values': ['machine'],
462 462 },
463 463 {
464 464 'Name': 'name',
465 465 'Values': [name],
466 466 },
467 467 ])
468 468
469 469 for image in images:
470 470 return image
471 471
472 472 raise Exception('unable to find image for %s' % name)
473 473
474 474
475 475 def ensure_security_groups(ec2resource, prefix='hg-'):
476 476 """Ensure all necessary Mercurial security groups are present.
477 477
478 478 All security groups are prefixed with ``hg-`` by default. Any security
479 479 groups having this prefix but aren't in our list are deleted.
480 480 """
481 481 existing = {}
482 482
483 483 for group in ec2resource.security_groups.all():
484 484 if group.group_name.startswith(prefix):
485 485 existing[group.group_name[len(prefix):]] = group
486 486
487 487 purge = set(existing) - set(SECURITY_GROUPS)
488 488
489 489 for name in sorted(purge):
490 490 group = existing[name]
491 491 print('removing legacy security group: %s' % group.group_name)
492 492 group.delete()
493 493
494 494 security_groups = {}
495 495
496 496 for name, group in sorted(SECURITY_GROUPS.items()):
497 497 if name in existing:
498 498 security_groups[name] = existing[name]
499 499 continue
500 500
501 501 actual = '%s%s' % (prefix, name)
502 502 print('adding security group %s' % actual)
503 503
504 504 group_res = ec2resource.create_security_group(
505 505 Description=group['description'],
506 506 GroupName=actual,
507 507 )
508 508
509 509 group_res.authorize_ingress(
510 510 IpPermissions=group['ingress'],
511 511 )
512 512
513 513 security_groups[name] = group_res
514 514
515 515 return security_groups
516 516
517 517
518 518 def terminate_ec2_instances(ec2resource, prefix='hg-'):
519 519 """Terminate all EC2 instances managed by us."""
520 520 waiting = []
521 521
522 522 for instance in ec2resource.instances.all():
523 523 if instance.state['Name'] == 'terminated':
524 524 continue
525 525
526 526 for tag in instance.tags or []:
527 527 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
528 528 print('terminating %s' % instance.id)
529 529 instance.terminate()
530 530 waiting.append(instance)
531 531
532 532 for instance in waiting:
533 533 instance.wait_until_terminated()
534 534
535 535
536 536 def remove_resources(c, prefix='hg-'):
537 537 """Purge all of our resources in this EC2 region."""
538 538 ec2resource = c.ec2resource
539 539 iamresource = c.iamresource
540 540
541 541 terminate_ec2_instances(ec2resource, prefix=prefix)
542 542
543 543 for image in ec2resource.images.filter(Owners=['self']):
544 544 if image.name.startswith(prefix):
545 545 remove_ami(ec2resource, image)
546 546
547 547 for group in ec2resource.security_groups.all():
548 548 if group.group_name.startswith(prefix):
549 549 print('removing security group %s' % group.group_name)
550 550 group.delete()
551 551
552 552 for profile in iamresource.instance_profiles.all():
553 553 if profile.name.startswith(prefix):
554 554 delete_instance_profile(profile)
555 555
556 556 for role in iamresource.roles.all():
557 557 if role.name.startswith(prefix):
558 558 for p in role.attached_policies.all():
559 559 print('detaching policy %s from %s' % (p.arn, role.name))
560 560 role.detach_policy(PolicyArn=p.arn)
561 561
562 562 print('removing role %s' % role.name)
563 563 role.delete()
564 564
565 565
566 566 def wait_for_ip_addresses(instances):
567 567 """Wait for the public IP addresses of an iterable of instances."""
568 568 for instance in instances:
569 569 while True:
570 570 if not instance.public_ip_address:
571 571 time.sleep(2)
572 572 instance.reload()
573 573 continue
574 574
575 575 print('public IP address for %s: %s' % (
576 576 instance.id, instance.public_ip_address))
577 577 break
578 578
579 579
580 580 def remove_ami(ec2resource, image):
581 581 """Remove an AMI and its underlying snapshots."""
582 582 snapshots = []
583 583
584 584 for device in image.block_device_mappings:
585 585 if 'Ebs' in device:
586 586 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
587 587
588 588 print('deregistering %s' % image.id)
589 589 image.deregister()
590 590
591 591 for snapshot in snapshots:
592 592 print('deleting snapshot %s' % snapshot.id)
593 593 snapshot.delete()
594 594
595 595
596 596 def wait_for_ssm(ssmclient, instances):
597 597 """Wait for SSM to come online for an iterable of instance IDs."""
598 598 while True:
599 599 res = ssmclient.describe_instance_information(
600 600 Filters=[
601 601 {
602 602 'Key': 'InstanceIds',
603 603 'Values': [i.id for i in instances],
604 604 },
605 605 ],
606 606 )
607 607
608 608 available = len(res['InstanceInformationList'])
609 609 wanted = len(instances)
610 610
611 611 print('%d/%d instances available in SSM' % (available, wanted))
612 612
613 613 if available == wanted:
614 614 return
615 615
616 616 time.sleep(2)
617 617
618 618
619 619 def run_ssm_command(ssmclient, instances, document_name, parameters):
620 620 """Run a PowerShell script on an EC2 instance."""
621 621
622 622 res = ssmclient.send_command(
623 623 InstanceIds=[i.id for i in instances],
624 624 DocumentName=document_name,
625 625 Parameters=parameters,
626 626 CloudWatchOutputConfig={
627 627 'CloudWatchOutputEnabled': True,
628 628 },
629 629 )
630 630
631 631 command_id = res['Command']['CommandId']
632 632
633 633 for instance in instances:
634 634 while True:
635 635 try:
636 636 res = ssmclient.get_command_invocation(
637 637 CommandId=command_id,
638 638 InstanceId=instance.id,
639 639 )
640 640 except botocore.exceptions.ClientError as e:
641 641 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
642 642 print('could not find SSM command invocation; waiting')
643 643 time.sleep(1)
644 644 continue
645 645 else:
646 646 raise
647 647
648 648 if res['Status'] == 'Success':
649 649 break
650 650 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
651 651 time.sleep(2)
652 652 else:
653 653 raise Exception('command failed on %s: %s' % (
654 654 instance.id, res['Status']))
655 655
656 656
657 657 @contextlib.contextmanager
658 658 def temporary_ec2_instances(ec2resource, config):
659 659 """Create temporary EC2 instances.
660 660
661 661 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
662 662 managing the lifecycle of the instances.
663 663
664 664 When the context manager exits, the instances are terminated.
665 665
666 666 The context manager evaluates to the list of data structures
667 667 describing each created instance. The instances may not be available
668 668 for work immediately: it is up to the caller to wait for the instance
669 669 to start responding.
670 670 """
671 671
672 672 ids = None
673 673
674 674 try:
675 675 res = ec2resource.create_instances(**config)
676 676
677 677 ids = [i.id for i in res]
678 678 print('started instances: %s' % ' '.join(ids))
679 679
680 680 yield res
681 681 finally:
682 682 if ids:
683 683 print('terminating instances: %s' % ' '.join(ids))
684 684 for instance in res:
685 685 instance.terminate()
686 686 print('terminated %d instances' % len(ids))
687 687
688 688
689 689 @contextlib.contextmanager
690 690 def create_temp_windows_ec2_instances(c: AWSConnection, config):
691 691 """Create temporary Windows EC2 instances.
692 692
693 693 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
694 694 configures the Windows instance for Windows Remote Management. The emitted
695 695 instances will have a ``winrm_client`` attribute containing a
696 696 ``pypsrp.client.Client`` instance bound to the instance.
697 697 """
698 698 if 'IamInstanceProfile' in config:
699 699 raise ValueError('IamInstanceProfile cannot be provided in config')
700 700 if 'UserData' in config:
701 701 raise ValueError('UserData cannot be provided in config')
702 702
703 703 password = c.automation.default_password()
704 704
705 705 config = copy.deepcopy(config)
706 706 config['IamInstanceProfile'] = {
707 707 'Name': 'hg-ephemeral-ec2-1',
708 708 }
709 709 config.setdefault('TagSpecifications', []).append({
710 710 'ResourceType': 'instance',
711 711 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
712 712 })
713 713 config['UserData'] = WINDOWS_USER_DATA % password
714 714
715 715 with temporary_ec2_instances(c.ec2resource, config) as instances:
716 716 wait_for_ip_addresses(instances)
717 717
718 718 print('waiting for Windows Remote Management service...')
719 719
720 720 for instance in instances:
721 721 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
722 722 print('established WinRM connection to %s' % instance.id)
723 723 instance.winrm_client = client
724 724
725 725 yield instances
726 726
727 727
728 728 def resolve_fingerprint(fingerprint):
729 729 fingerprint = json.dumps(fingerprint, sort_keys=True)
730 730 return hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
731 731
732 732
733 733 def find_and_reconcile_image(ec2resource, name, fingerprint):
734 734 """Attempt to find an existing EC2 AMI with a name and fingerprint.
735 735
736 736 If an image with the specified fingerprint is found, it is returned.
737 737 Otherwise None is returned.
738 738
739 739 Existing images for the specified name that don't have the specified
740 740 fingerprint or are missing required metadata or deleted.
741 741 """
742 742 # Find existing AMIs with this name and delete the ones that are invalid.
743 743 # Store a reference to a good image so it can be returned one the
744 744 # image state is reconciled.
745 745 images = ec2resource.images.filter(
746 746 Filters=[{'Name': 'name', 'Values': [name]}])
747 747
748 748 existing_image = None
749 749
750 750 for image in images:
751 751 if image.tags is None:
752 752 print('image %s for %s lacks required tags; removing' % (
753 753 image.id, image.name))
754 754 remove_ami(ec2resource, image)
755 755 else:
756 756 tags = {t['Key']: t['Value'] for t in image.tags}
757 757
758 758 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
759 759 existing_image = image
760 760 else:
761 761 print('image %s for %s has wrong fingerprint; removing' % (
762 762 image.id, image.name))
763 763 remove_ami(ec2resource, image)
764 764
765 765 return existing_image
766 766
767 767
768 768 def create_ami_from_instance(ec2client, instance, name, description,
769 769 fingerprint):
770 770 """Create an AMI from a running instance.
771 771
772 772 Returns the ``ec2resource.Image`` representing the created AMI.
773 773 """
774 774 instance.stop()
775 775
776 776 ec2client.get_waiter('instance_stopped').wait(
777 777 InstanceIds=[instance.id],
778 778 WaiterConfig={
779 779 'Delay': 5,
780 780 })
781 781 print('%s is stopped' % instance.id)
782 782
783 783 image = instance.create_image(
784 784 Name=name,
785 785 Description=description,
786 786 )
787 787
788 788 image.create_tags(Tags=[
789 789 {
790 790 'Key': 'HGIMAGEFINGERPRINT',
791 791 'Value': fingerprint,
792 792 },
793 793 ])
794 794
795 795 print('waiting for image %s' % image.id)
796 796
797 797 ec2client.get_waiter('image_available').wait(
798 798 ImageIds=[image.id],
799 799 )
800 800
801 801 print('image %s available as %s' % (image.id, image.name))
802 802
803 803 return image
804 804
805 805
806 806 def ensure_linux_dev_ami(c: AWSConnection, distro='debian9', prefix='hg-'):
807 807 """Ensures a Linux development AMI is available and up-to-date.
808 808
809 809 Returns an ``ec2.Image`` of either an existing AMI or a newly-built one.
810 810 """
811 811 ec2client = c.ec2client
812 812 ec2resource = c.ec2resource
813 813
814 814 name = '%s%s-%s' % (prefix, 'linux-dev', distro)
815 815
816 816 if distro == 'debian9':
817 817 image = find_image(
818 818 ec2resource,
819 819 DEBIAN_ACCOUNT_ID,
820 820 'debian-stretch-hvm-x86_64-gp2-2019-02-19-26620',
821 821 )
822 822 ssh_username = 'admin'
823 823 elif distro == 'ubuntu18.04':
824 824 image = find_image(
825 825 ec2resource,
826 826 UBUNTU_ACCOUNT_ID,
827 827 'ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190403',
828 828 )
829 829 ssh_username = 'ubuntu'
830 830 elif distro == 'ubuntu18.10':
831 831 image = find_image(
832 832 ec2resource,
833 833 UBUNTU_ACCOUNT_ID,
834 834 'ubuntu/images/hvm-ssd/ubuntu-cosmic-18.10-amd64-server-20190402',
835 835 )
836 836 ssh_username = 'ubuntu'
837 837 elif distro == 'ubuntu19.04':
838 838 image = find_image(
839 839 ec2resource,
840 840 UBUNTU_ACCOUNT_ID,
841 841 'ubuntu/images/hvm-ssd/ubuntu-disco-19.04-amd64-server-20190417',
842 842 )
843 843 ssh_username = 'ubuntu'
844 844 else:
845 845 raise ValueError('unsupported Linux distro: %s' % distro)
846 846
847 847 config = {
848 848 'BlockDeviceMappings': [
849 849 {
850 850 'DeviceName': image.block_device_mappings[0]['DeviceName'],
851 851 'Ebs': {
852 852 'DeleteOnTermination': True,
853 853 'VolumeSize': 8,
854 854 'VolumeType': 'gp2',
855 855 },
856 856 },
857 857 ],
858 858 'EbsOptimized': True,
859 859 'ImageId': image.id,
860 860 'InstanceInitiatedShutdownBehavior': 'stop',
861 861 # 8 VCPUs for compiling Python.
862 862 'InstanceType': 't3.2xlarge',
863 863 'KeyName': '%sautomation' % prefix,
864 864 'MaxCount': 1,
865 865 'MinCount': 1,
866 866 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
867 867 }
868 868
869 869 requirements2_path = (pathlib.Path(__file__).parent.parent /
870 870 'linux-requirements-py2.txt')
871 871 requirements3_path = (pathlib.Path(__file__).parent.parent /
872 872 'linux-requirements-py3.txt')
873 873 with requirements2_path.open('r', encoding='utf-8') as fh:
874 874 requirements2 = fh.read()
875 875 with requirements3_path.open('r', encoding='utf-8') as fh:
876 876 requirements3 = fh.read()
877 877
878 878 # Compute a deterministic fingerprint to determine whether image needs to
879 879 # be regenerated.
880 880 fingerprint = resolve_fingerprint({
881 881 'instance_config': config,
882 882 'bootstrap_script': BOOTSTRAP_DEBIAN,
883 883 'requirements_py2': requirements2,
884 884 'requirements_py3': requirements3,
885 885 })
886 886
887 887 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
888 888
889 889 if existing_image:
890 890 return existing_image
891 891
892 892 print('no suitable %s image found; creating one...' % name)
893 893
894 894 with temporary_ec2_instances(ec2resource, config) as instances:
895 895 wait_for_ip_addresses(instances)
896 896
897 897 instance = instances[0]
898 898
899 899 client = wait_for_ssh(
900 900 instance.public_ip_address, 22,
901 901 username=ssh_username,
902 902 key_filename=str(c.key_pair_path_private('automation')))
903 903
904 904 home = '/home/%s' % ssh_username
905 905
906 906 with client:
907 907 print('connecting to SSH server')
908 908 sftp = client.open_sftp()
909 909
910 910 print('uploading bootstrap files')
911 911 with sftp.open('%s/bootstrap' % home, 'wb') as fh:
912 912 fh.write(BOOTSTRAP_DEBIAN)
913 913 fh.chmod(0o0700)
914 914
915 915 with sftp.open('%s/requirements-py2.txt' % home, 'wb') as fh:
916 916 fh.write(requirements2)
917 917 fh.chmod(0o0700)
918 918
919 919 with sftp.open('%s/requirements-py3.txt' % home, 'wb') as fh:
920 920 fh.write(requirements3)
921 921 fh.chmod(0o0700)
922 922
923 923 print('executing bootstrap')
924 924 chan, stdin, stdout = ssh_exec_command(client,
925 925 '%s/bootstrap' % home)
926 926 stdin.close()
927 927
928 928 for line in stdout:
929 929 print(line, end='')
930 930
931 931 res = chan.recv_exit_status()
932 932 if res:
933 933 raise Exception('non-0 exit from bootstrap: %d' % res)
934 934
935 935 print('bootstrap completed; stopping %s to create %s' % (
936 936 instance.id, name))
937 937
938 938 return create_ami_from_instance(ec2client, instance, name,
939 939 'Mercurial Linux development environment',
940 940 fingerprint)
941 941
942 942
943 943 @contextlib.contextmanager
944 944 def temporary_linux_dev_instances(c: AWSConnection, image, instance_type,
945 945 prefix='hg-', ensure_extra_volume=False):
946 946 """Create temporary Linux development EC2 instances.
947 947
948 948 Context manager resolves to a list of ``ec2.Instance`` that were created
949 949 and are running.
950 950
951 951 ``ensure_extra_volume`` can be set to ``True`` to require that instances
952 952 have a 2nd storage volume available other than the primary AMI volume.
953 953 For instance types with instance storage, this does nothing special.
954 954 But for instance types without instance storage, an additional EBS volume
955 955 will be added to the instance.
956 956
957 957 Instances have an ``ssh_client`` attribute containing a paramiko SSHClient
958 958 instance bound to the instance.
959 959
960 960 Instances have an ``ssh_private_key_path`` attributing containing the
961 961 str path to the SSH private key to connect to the instance.
962 962 """
963 963
964 964 block_device_mappings = [
965 965 {
966 966 'DeviceName': image.block_device_mappings[0]['DeviceName'],
967 967 'Ebs': {
968 968 'DeleteOnTermination': True,
969 969 'VolumeSize': 8,
970 970 'VolumeType': 'gp2',
971 971 },
972 972 }
973 973 ]
974 974
975 975 # This is not an exhaustive list of instance types having instance storage.
976 976 # But
977 977 if (ensure_extra_volume
978 978 and not instance_type.startswith(tuple(INSTANCE_TYPES_WITH_STORAGE))):
979 979 main_device = block_device_mappings[0]['DeviceName']
980 980
981 981 if main_device == 'xvda':
982 982 second_device = 'xvdb'
983 983 elif main_device == '/dev/sda1':
984 984 second_device = '/dev/sdb'
985 985 else:
986 986 raise ValueError('unhandled primary EBS device name: %s' %
987 987 main_device)
988 988
989 989 block_device_mappings.append({
990 990 'DeviceName': second_device,
991 991 'Ebs': {
992 992 'DeleteOnTermination': True,
993 993 'VolumeSize': 8,
994 994 'VolumeType': 'gp2',
995 995 }
996 996 })
997 997
998 998 config = {
999 999 'BlockDeviceMappings': block_device_mappings,
1000 1000 'EbsOptimized': True,
1001 1001 'ImageId': image.id,
1002 1002 'InstanceInitiatedShutdownBehavior': 'terminate',
1003 1003 'InstanceType': instance_type,
1004 1004 'KeyName': '%sautomation' % prefix,
1005 1005 'MaxCount': 1,
1006 1006 'MinCount': 1,
1007 1007 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
1008 1008 }
1009 1009
1010 1010 with temporary_ec2_instances(c.ec2resource, config) as instances:
1011 1011 wait_for_ip_addresses(instances)
1012 1012
1013 1013 ssh_private_key_path = str(c.key_pair_path_private('automation'))
1014 1014
1015 1015 for instance in instances:
1016 1016 client = wait_for_ssh(
1017 1017 instance.public_ip_address, 22,
1018 1018 username='hg',
1019 1019 key_filename=ssh_private_key_path)
1020 1020
1021 1021 instance.ssh_client = client
1022 1022 instance.ssh_private_key_path = ssh_private_key_path
1023 1023
1024 1024 try:
1025 1025 yield instances
1026 1026 finally:
1027 1027 for instance in instances:
1028 1028 instance.ssh_client.close()
1029 1029
1030 1030
1031 1031 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
1032 1032 """Ensure Windows Development AMI is available and up-to-date.
1033 1033
1034 1034 If necessary, a modern AMI will be built by starting a temporary EC2
1035 1035 instance and bootstrapping it.
1036 1036
1037 1037 Obsolete AMIs will be deleted so there is only a single AMI having the
1038 1038 desired name.
1039 1039
1040 1040 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
1041 1041 one.
1042 1042 """
1043 1043 ec2client = c.ec2client
1044 1044 ec2resource = c.ec2resource
1045 1045 ssmclient = c.session.client('ssm')
1046 1046
1047 1047 name = '%s%s' % (prefix, 'windows-dev')
1048 1048
1049 1049 image = find_image(ec2resource,
1050 1050 '801119661308',
1051 'Windows_Server-2019-English-Full-Base-2019.02.13')
1051 'Windows_Server-2019-English-Full-Base-2019.07.12')
1052 1052
1053 1053 config = {
1054 1054 'BlockDeviceMappings': [
1055 1055 {
1056 1056 'DeviceName': '/dev/sda1',
1057 1057 'Ebs': {
1058 1058 'DeleteOnTermination': True,
1059 1059 'VolumeSize': 32,
1060 1060 'VolumeType': 'gp2',
1061 1061 },
1062 1062 }
1063 1063 ],
1064 1064 'ImageId': image.id,
1065 1065 'InstanceInitiatedShutdownBehavior': 'stop',
1066 1066 'InstanceType': 't3.medium',
1067 1067 'KeyName': '%sautomation' % prefix,
1068 1068 'MaxCount': 1,
1069 1069 'MinCount': 1,
1070 1070 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1071 1071 }
1072 1072
1073 1073 commands = [
1074 1074 # Need to start the service so sshd_config is generated.
1075 1075 'Start-Service sshd',
1076 1076 'Write-Output "modifying sshd_config"',
1077 1077 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
1078 1078 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
1079 1079 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
1080 1080 'Import-Module OpenSSHUtils',
1081 1081 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
1082 1082 'Restart-Service sshd',
1083 1083 'Write-Output "installing OpenSSL client"',
1084 1084 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
1085 1085 'Set-Service -Name sshd -StartupType "Automatic"',
1086 1086 'Write-Output "OpenSSH server running"',
1087 1087 ]
1088 1088
1089 1089 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
1090 1090 commands.extend(l.rstrip() for l in fh)
1091 1091
1092 1092 # Disable Windows Defender when bootstrapping because it just slows
1093 1093 # things down.
1094 1094 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
1095 1095 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
1096 1096
1097 1097 # Compute a deterministic fingerprint to determine whether image needs
1098 1098 # to be regenerated.
1099 1099 fingerprint = resolve_fingerprint({
1100 1100 'instance_config': config,
1101 1101 'user_data': WINDOWS_USER_DATA,
1102 1102 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
1103 1103 'bootstrap_commands': commands,
1104 1104 })
1105 1105
1106 1106 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
1107 1107
1108 1108 if existing_image:
1109 1109 return existing_image
1110 1110
1111 1111 print('no suitable Windows development image found; creating one...')
1112 1112
1113 1113 with create_temp_windows_ec2_instances(c, config) as instances:
1114 1114 assert len(instances) == 1
1115 1115 instance = instances[0]
1116 1116
1117 1117 wait_for_ssm(ssmclient, [instance])
1118 1118
1119 1119 # On first boot, install various Windows updates.
1120 1120 # We would ideally use PowerShell Remoting for this. However, there are
1121 1121 # trust issues that make it difficult to invoke Windows Update
1122 1122 # remotely. So we use SSM, which has a mechanism for running Windows
1123 1123 # Update.
1124 1124 print('installing Windows features...')
1125 1125 run_ssm_command(
1126 1126 ssmclient,
1127 1127 [instance],
1128 1128 'AWS-RunPowerShellScript',
1129 1129 {
1130 1130 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
1131 1131 },
1132 1132 )
1133 1133
1134 1134 # Reboot so all updates are fully applied.
1135 1135 #
1136 1136 # We don't use instance.reboot() here because it is asynchronous and
1137 1137 # we don't know when exactly the instance has rebooted. It could take
1138 1138 # a while to stop and we may start trying to interact with the instance
1139 1139 # before it has rebooted.
1140 1140 print('rebooting instance %s' % instance.id)
1141 1141 instance.stop()
1142 1142 ec2client.get_waiter('instance_stopped').wait(
1143 1143 InstanceIds=[instance.id],
1144 1144 WaiterConfig={
1145 1145 'Delay': 5,
1146 1146 })
1147 1147
1148 1148 instance.start()
1149 1149 wait_for_ip_addresses([instance])
1150 1150
1151 1151 # There is a race condition here between the User Data PS script running
1152 1152 # and us connecting to WinRM. This can manifest as
1153 1153 # "AuthorizationManager check failed" failures during run_powershell().
1154 1154 # TODO figure out a workaround.
1155 1155
1156 1156 print('waiting for Windows Remote Management to come back...')
1157 1157 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
1158 1158 c.automation.default_password())
1159 1159 print('established WinRM connection to %s' % instance.id)
1160 1160 instance.winrm_client = client
1161 1161
1162 1162 print('bootstrapping instance...')
1163 1163 run_powershell(instance.winrm_client, '\n'.join(commands))
1164 1164
1165 1165 print('bootstrap completed; stopping %s to create image' % instance.id)
1166 1166 return create_ami_from_instance(ec2client, instance, name,
1167 1167 'Mercurial Windows development environment',
1168 1168 fingerprint)
1169 1169
1170 1170
1171 1171 @contextlib.contextmanager
1172 1172 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
1173 1173 prefix='hg-', disable_antivirus=False):
1174 1174 """Create a temporary Windows development EC2 instance.
1175 1175
1176 1176 Context manager resolves to the list of ``EC2.Instance`` that were created.
1177 1177 """
1178 1178 config = {
1179 1179 'BlockDeviceMappings': [
1180 1180 {
1181 1181 'DeviceName': '/dev/sda1',
1182 1182 'Ebs': {
1183 1183 'DeleteOnTermination': True,
1184 1184 'VolumeSize': 32,
1185 1185 'VolumeType': 'gp2',
1186 1186 },
1187 1187 }
1188 1188 ],
1189 1189 'ImageId': image.id,
1190 1190 'InstanceInitiatedShutdownBehavior': 'stop',
1191 1191 'InstanceType': instance_type,
1192 1192 'KeyName': '%sautomation' % prefix,
1193 1193 'MaxCount': 1,
1194 1194 'MinCount': 1,
1195 1195 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1196 1196 }
1197 1197
1198 1198 with create_temp_windows_ec2_instances(c, config) as instances:
1199 1199 if disable_antivirus:
1200 1200 for instance in instances:
1201 1201 run_powershell(
1202 1202 instance.winrm_client,
1203 1203 'Set-MpPreference -DisableRealtimeMonitoring $true')
1204 1204
1205 1205 yield instances
General Comments 0
You need to be logged in to leave comments. Login now