##// END OF EJS Templates
automation: extract strings to constants...
Gregory Szorc -
r42870:8804aa6c stable
parent child Browse files
Show More
@@ -1,1205 +1,1207 b''
1 1 # aws.py - Automation code for Amazon Web Services
2 2 #
3 3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # no-check-code because Python 3 native.
9 9
10 10 import contextlib
11 11 import copy
12 12 import hashlib
13 13 import json
14 14 import os
15 15 import pathlib
16 16 import subprocess
17 17 import time
18 18
19 19 import boto3
20 20 import botocore.exceptions
21 21
22 22 from .linux import (
23 23 BOOTSTRAP_DEBIAN,
24 24 )
25 25 from .ssh import (
26 26 exec_command as ssh_exec_command,
27 27 wait_for_ssh,
28 28 )
29 29 from .winrm import (
30 30 run_powershell,
31 31 wait_for_winrm,
32 32 )
33 33
34 34
35 35 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
36 36
37 37 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
38 38 'install-windows-dependencies.ps1')
39 39
40 40
41 41 INSTANCE_TYPES_WITH_STORAGE = {
42 42 'c5d',
43 43 'd2',
44 44 'h1',
45 45 'i3',
46 46 'm5ad',
47 47 'm5d',
48 48 'r5d',
49 49 'r5ad',
50 50 'x1',
51 51 'z1d',
52 52 }
53 53
54 54
55 AMAZON_ACCOUNT_ID = '801119661308'
55 56 DEBIAN_ACCOUNT_ID = '379101102735'
56 57 UBUNTU_ACCOUNT_ID = '099720109477'
57 58
58 59
60 WINDOWS_BASE_IMAGE_NAME = 'Windows_Server-2019-English-Full-Base-2019.07.12'
61
62
59 63 KEY_PAIRS = {
60 64 'automation',
61 65 }
62 66
63 67
64 68 SECURITY_GROUPS = {
65 69 'linux-dev-1': {
66 70 'description': 'Mercurial Linux instances that perform build/test automation',
67 71 'ingress': [
68 72 {
69 73 'FromPort': 22,
70 74 'ToPort': 22,
71 75 'IpProtocol': 'tcp',
72 76 'IpRanges': [
73 77 {
74 78 'CidrIp': '0.0.0.0/0',
75 79 'Description': 'SSH from entire Internet',
76 80 },
77 81 ],
78 82 },
79 83 ],
80 84 },
81 85 'windows-dev-1': {
82 86 'description': 'Mercurial Windows instances that perform build automation',
83 87 'ingress': [
84 88 {
85 89 'FromPort': 22,
86 90 'ToPort': 22,
87 91 'IpProtocol': 'tcp',
88 92 'IpRanges': [
89 93 {
90 94 'CidrIp': '0.0.0.0/0',
91 95 'Description': 'SSH from entire Internet',
92 96 },
93 97 ],
94 98 },
95 99 {
96 100 'FromPort': 3389,
97 101 'ToPort': 3389,
98 102 'IpProtocol': 'tcp',
99 103 'IpRanges': [
100 104 {
101 105 'CidrIp': '0.0.0.0/0',
102 106 'Description': 'RDP from entire Internet',
103 107 },
104 108 ],
105 109
106 110 },
107 111 {
108 112 'FromPort': 5985,
109 113 'ToPort': 5986,
110 114 'IpProtocol': 'tcp',
111 115 'IpRanges': [
112 116 {
113 117 'CidrIp': '0.0.0.0/0',
114 118 'Description': 'PowerShell Remoting (Windows Remote Management)',
115 119 },
116 120 ],
117 121 }
118 122 ],
119 123 },
120 124 }
121 125
122 126
123 127 IAM_ROLES = {
124 128 'ephemeral-ec2-role-1': {
125 129 'description': 'Mercurial temporary EC2 instances',
126 130 'policy_arns': [
127 131 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
128 132 ],
129 133 },
130 134 }
131 135
132 136
133 137 ASSUME_ROLE_POLICY_DOCUMENT = '''
134 138 {
135 139 "Version": "2012-10-17",
136 140 "Statement": [
137 141 {
138 142 "Effect": "Allow",
139 143 "Principal": {
140 144 "Service": "ec2.amazonaws.com"
141 145 },
142 146 "Action": "sts:AssumeRole"
143 147 }
144 148 ]
145 149 }
146 150 '''.strip()
147 151
148 152
149 153 IAM_INSTANCE_PROFILES = {
150 154 'ephemeral-ec2-1': {
151 155 'roles': [
152 156 'ephemeral-ec2-role-1',
153 157 ],
154 158 }
155 159 }
156 160
157 161
158 162 # User Data for Windows EC2 instance. Mainly used to set the password
159 163 # and configure WinRM.
160 164 # Inspired by the User Data script used by Packer
161 165 # (from https://www.packer.io/intro/getting-started/build-image.html).
162 166 WINDOWS_USER_DATA = r'''
163 167 <powershell>
164 168
165 169 # TODO enable this once we figure out what is failing.
166 170 #$ErrorActionPreference = "stop"
167 171
168 172 # Set administrator password
169 173 net user Administrator "%s"
170 174 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
171 175
172 176 # First, make sure WinRM can't be connected to
173 177 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
174 178
175 179 # Delete any existing WinRM listeners
176 180 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
177 181 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
178 182
179 183 # Create a new WinRM listener and configure
180 184 winrm create winrm/config/listener?Address=*+Transport=HTTP
181 185 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
182 186 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
183 187 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
184 188 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
185 189 winrm set winrm/config/service/auth '@{Basic="true"}'
186 190 winrm set winrm/config/client/auth '@{Basic="true"}'
187 191
188 192 # Configure UAC to allow privilege elevation in remote shells
189 193 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
190 194 $Setting = 'LocalAccountTokenFilterPolicy'
191 195 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
192 196
193 197 # Configure and restart the WinRM Service; Enable the required firewall exception
194 198 Stop-Service -Name WinRM
195 199 Set-Service -Name WinRM -StartupType Automatic
196 200 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
197 201 Start-Service -Name WinRM
198 202
199 203 # Disable firewall on private network interfaces so prompts don't appear.
200 204 Set-NetFirewallProfile -Name private -Enabled false
201 205 </powershell>
202 206 '''.lstrip()
203 207
204 208
205 209 WINDOWS_BOOTSTRAP_POWERSHELL = '''
206 210 Write-Output "installing PowerShell dependencies"
207 211 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
208 212 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
209 213 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
210 214
211 215 Write-Output "installing OpenSSL server"
212 216 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
213 217 # Various tools will attempt to use older versions of .NET. So we enable
214 218 # the feature that provides them so it doesn't have to be auto-enabled
215 219 # later.
216 220 Write-Output "enabling .NET Framework feature"
217 221 Install-WindowsFeature -Name Net-Framework-Core
218 222 '''
219 223
220 224
221 225 class AWSConnection:
222 226 """Manages the state of a connection with AWS."""
223 227
224 228 def __init__(self, automation, region: str, ensure_ec2_state: bool=True):
225 229 self.automation = automation
226 230 self.local_state_path = automation.state_path
227 231
228 232 self.prefix = 'hg-'
229 233
230 234 self.session = boto3.session.Session(region_name=region)
231 235 self.ec2client = self.session.client('ec2')
232 236 self.ec2resource = self.session.resource('ec2')
233 237 self.iamclient = self.session.client('iam')
234 238 self.iamresource = self.session.resource('iam')
235 239 self.security_groups = {}
236 240
237 241 if ensure_ec2_state:
238 242 ensure_key_pairs(automation.state_path, self.ec2resource)
239 243 self.security_groups = ensure_security_groups(self.ec2resource)
240 244 ensure_iam_state(self.iamclient, self.iamresource)
241 245
242 246 def key_pair_path_private(self, name):
243 247 """Path to a key pair private key file."""
244 248 return self.local_state_path / 'keys' / ('keypair-%s' % name)
245 249
246 250 def key_pair_path_public(self, name):
247 251 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
248 252
249 253
250 254 def rsa_key_fingerprint(p: pathlib.Path):
251 255 """Compute the fingerprint of an RSA private key."""
252 256
253 257 # TODO use rsa package.
254 258 res = subprocess.run(
255 259 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
256 260 '-outform', 'DER'],
257 261 capture_output=True,
258 262 check=True)
259 263
260 264 sha1 = hashlib.sha1(res.stdout).hexdigest()
261 265 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
262 266
263 267
264 268 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
265 269 remote_existing = {}
266 270
267 271 for kpi in ec2resource.key_pairs.all():
268 272 if kpi.name.startswith(prefix):
269 273 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
270 274
271 275 # Validate that we have these keys locally.
272 276 key_path = state_path / 'keys'
273 277 key_path.mkdir(exist_ok=True, mode=0o700)
274 278
275 279 def remove_remote(name):
276 280 print('deleting key pair %s' % name)
277 281 key = ec2resource.KeyPair(name)
278 282 key.delete()
279 283
280 284 def remove_local(name):
281 285 pub_full = key_path / ('keypair-%s.pub' % name)
282 286 priv_full = key_path / ('keypair-%s' % name)
283 287
284 288 print('removing %s' % pub_full)
285 289 pub_full.unlink()
286 290 print('removing %s' % priv_full)
287 291 priv_full.unlink()
288 292
289 293 local_existing = {}
290 294
291 295 for f in sorted(os.listdir(key_path)):
292 296 if not f.startswith('keypair-') or not f.endswith('.pub'):
293 297 continue
294 298
295 299 name = f[len('keypair-'):-len('.pub')]
296 300
297 301 pub_full = key_path / f
298 302 priv_full = key_path / ('keypair-%s' % name)
299 303
300 304 with open(pub_full, 'r', encoding='ascii') as fh:
301 305 data = fh.read()
302 306
303 307 if not data.startswith('ssh-rsa '):
304 308 print('unexpected format for key pair file: %s; removing' %
305 309 pub_full)
306 310 pub_full.unlink()
307 311 priv_full.unlink()
308 312 continue
309 313
310 314 local_existing[name] = rsa_key_fingerprint(priv_full)
311 315
312 316 for name in sorted(set(remote_existing) | set(local_existing)):
313 317 if name not in local_existing:
314 318 actual = '%s%s' % (prefix, name)
315 319 print('remote key %s does not exist locally' % name)
316 320 remove_remote(actual)
317 321 del remote_existing[name]
318 322
319 323 elif name not in remote_existing:
320 324 print('local key %s does not exist remotely' % name)
321 325 remove_local(name)
322 326 del local_existing[name]
323 327
324 328 elif remote_existing[name] != local_existing[name]:
325 329 print('key fingerprint mismatch for %s; '
326 330 'removing from local and remote' % name)
327 331 remove_local(name)
328 332 remove_remote('%s%s' % (prefix, name))
329 333 del local_existing[name]
330 334 del remote_existing[name]
331 335
332 336 missing = KEY_PAIRS - set(remote_existing)
333 337
334 338 for name in sorted(missing):
335 339 actual = '%s%s' % (prefix, name)
336 340 print('creating key pair %s' % actual)
337 341
338 342 priv_full = key_path / ('keypair-%s' % name)
339 343 pub_full = key_path / ('keypair-%s.pub' % name)
340 344
341 345 kp = ec2resource.create_key_pair(KeyName=actual)
342 346
343 347 with priv_full.open('w', encoding='ascii') as fh:
344 348 fh.write(kp.key_material)
345 349 fh.write('\n')
346 350
347 351 priv_full.chmod(0o0600)
348 352
349 353 # SSH public key can be extracted via `ssh-keygen`.
350 354 with pub_full.open('w', encoding='ascii') as fh:
351 355 subprocess.run(
352 356 ['ssh-keygen', '-y', '-f', str(priv_full)],
353 357 stdout=fh,
354 358 check=True)
355 359
356 360 pub_full.chmod(0o0600)
357 361
358 362
359 363 def delete_instance_profile(profile):
360 364 for role in profile.roles:
361 365 print('removing role %s from instance profile %s' % (role.name,
362 366 profile.name))
363 367 profile.remove_role(RoleName=role.name)
364 368
365 369 print('deleting instance profile %s' % profile.name)
366 370 profile.delete()
367 371
368 372
369 373 def ensure_iam_state(iamclient, iamresource, prefix='hg-'):
370 374 """Ensure IAM state is in sync with our canonical definition."""
371 375
372 376 remote_profiles = {}
373 377
374 378 for profile in iamresource.instance_profiles.all():
375 379 if profile.name.startswith(prefix):
376 380 remote_profiles[profile.name[len(prefix):]] = profile
377 381
378 382 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
379 383 delete_instance_profile(remote_profiles[name])
380 384 del remote_profiles[name]
381 385
382 386 remote_roles = {}
383 387
384 388 for role in iamresource.roles.all():
385 389 if role.name.startswith(prefix):
386 390 remote_roles[role.name[len(prefix):]] = role
387 391
388 392 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
389 393 role = remote_roles[name]
390 394
391 395 print('removing role %s' % role.name)
392 396 role.delete()
393 397 del remote_roles[name]
394 398
395 399 # We've purged remote state that doesn't belong. Create missing
396 400 # instance profiles and roles.
397 401 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
398 402 actual = '%s%s' % (prefix, name)
399 403 print('creating IAM instance profile %s' % actual)
400 404
401 405 profile = iamresource.create_instance_profile(
402 406 InstanceProfileName=actual)
403 407 remote_profiles[name] = profile
404 408
405 409 waiter = iamclient.get_waiter('instance_profile_exists')
406 410 waiter.wait(InstanceProfileName=actual)
407 411 print('IAM instance profile %s is available' % actual)
408 412
409 413 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
410 414 entry = IAM_ROLES[name]
411 415
412 416 actual = '%s%s' % (prefix, name)
413 417 print('creating IAM role %s' % actual)
414 418
415 419 role = iamresource.create_role(
416 420 RoleName=actual,
417 421 Description=entry['description'],
418 422 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
419 423 )
420 424
421 425 waiter = iamclient.get_waiter('role_exists')
422 426 waiter.wait(RoleName=actual)
423 427 print('IAM role %s is available' % actual)
424 428
425 429 remote_roles[name] = role
426 430
427 431 for arn in entry['policy_arns']:
428 432 print('attaching policy %s to %s' % (arn, role.name))
429 433 role.attach_policy(PolicyArn=arn)
430 434
431 435 # Now reconcile state of profiles.
432 436 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
433 437 profile = remote_profiles[name]
434 438 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
435 439 have = {role.name for role in profile.roles}
436 440
437 441 for role in sorted(have - wanted):
438 442 print('removing role %s from %s' % (role, profile.name))
439 443 profile.remove_role(RoleName=role)
440 444
441 445 for role in sorted(wanted - have):
442 446 print('adding role %s to %s' % (role, profile.name))
443 447 profile.add_role(RoleName=role)
444 448
445 449
446 450 def find_image(ec2resource, owner_id, name):
447 451 """Find an AMI by its owner ID and name."""
448 452
449 453 images = ec2resource.images.filter(
450 454 Filters=[
451 455 {
452 456 'Name': 'owner-id',
453 457 'Values': [owner_id],
454 458 },
455 459 {
456 460 'Name': 'state',
457 461 'Values': ['available'],
458 462 },
459 463 {
460 464 'Name': 'image-type',
461 465 'Values': ['machine'],
462 466 },
463 467 {
464 468 'Name': 'name',
465 469 'Values': [name],
466 470 },
467 471 ])
468 472
469 473 for image in images:
470 474 return image
471 475
472 476 raise Exception('unable to find image for %s' % name)
473 477
474 478
475 479 def ensure_security_groups(ec2resource, prefix='hg-'):
476 480 """Ensure all necessary Mercurial security groups are present.
477 481
478 482 All security groups are prefixed with ``hg-`` by default. Any security
479 483 groups having this prefix but aren't in our list are deleted.
480 484 """
481 485 existing = {}
482 486
483 487 for group in ec2resource.security_groups.all():
484 488 if group.group_name.startswith(prefix):
485 489 existing[group.group_name[len(prefix):]] = group
486 490
487 491 purge = set(existing) - set(SECURITY_GROUPS)
488 492
489 493 for name in sorted(purge):
490 494 group = existing[name]
491 495 print('removing legacy security group: %s' % group.group_name)
492 496 group.delete()
493 497
494 498 security_groups = {}
495 499
496 500 for name, group in sorted(SECURITY_GROUPS.items()):
497 501 if name in existing:
498 502 security_groups[name] = existing[name]
499 503 continue
500 504
501 505 actual = '%s%s' % (prefix, name)
502 506 print('adding security group %s' % actual)
503 507
504 508 group_res = ec2resource.create_security_group(
505 509 Description=group['description'],
506 510 GroupName=actual,
507 511 )
508 512
509 513 group_res.authorize_ingress(
510 514 IpPermissions=group['ingress'],
511 515 )
512 516
513 517 security_groups[name] = group_res
514 518
515 519 return security_groups
516 520
517 521
518 522 def terminate_ec2_instances(ec2resource, prefix='hg-'):
519 523 """Terminate all EC2 instances managed by us."""
520 524 waiting = []
521 525
522 526 for instance in ec2resource.instances.all():
523 527 if instance.state['Name'] == 'terminated':
524 528 continue
525 529
526 530 for tag in instance.tags or []:
527 531 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
528 532 print('terminating %s' % instance.id)
529 533 instance.terminate()
530 534 waiting.append(instance)
531 535
532 536 for instance in waiting:
533 537 instance.wait_until_terminated()
534 538
535 539
536 540 def remove_resources(c, prefix='hg-'):
537 541 """Purge all of our resources in this EC2 region."""
538 542 ec2resource = c.ec2resource
539 543 iamresource = c.iamresource
540 544
541 545 terminate_ec2_instances(ec2resource, prefix=prefix)
542 546
543 547 for image in ec2resource.images.filter(Owners=['self']):
544 548 if image.name.startswith(prefix):
545 549 remove_ami(ec2resource, image)
546 550
547 551 for group in ec2resource.security_groups.all():
548 552 if group.group_name.startswith(prefix):
549 553 print('removing security group %s' % group.group_name)
550 554 group.delete()
551 555
552 556 for profile in iamresource.instance_profiles.all():
553 557 if profile.name.startswith(prefix):
554 558 delete_instance_profile(profile)
555 559
556 560 for role in iamresource.roles.all():
557 561 if role.name.startswith(prefix):
558 562 for p in role.attached_policies.all():
559 563 print('detaching policy %s from %s' % (p.arn, role.name))
560 564 role.detach_policy(PolicyArn=p.arn)
561 565
562 566 print('removing role %s' % role.name)
563 567 role.delete()
564 568
565 569
566 570 def wait_for_ip_addresses(instances):
567 571 """Wait for the public IP addresses of an iterable of instances."""
568 572 for instance in instances:
569 573 while True:
570 574 if not instance.public_ip_address:
571 575 time.sleep(2)
572 576 instance.reload()
573 577 continue
574 578
575 579 print('public IP address for %s: %s' % (
576 580 instance.id, instance.public_ip_address))
577 581 break
578 582
579 583
580 584 def remove_ami(ec2resource, image):
581 585 """Remove an AMI and its underlying snapshots."""
582 586 snapshots = []
583 587
584 588 for device in image.block_device_mappings:
585 589 if 'Ebs' in device:
586 590 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
587 591
588 592 print('deregistering %s' % image.id)
589 593 image.deregister()
590 594
591 595 for snapshot in snapshots:
592 596 print('deleting snapshot %s' % snapshot.id)
593 597 snapshot.delete()
594 598
595 599
596 600 def wait_for_ssm(ssmclient, instances):
597 601 """Wait for SSM to come online for an iterable of instance IDs."""
598 602 while True:
599 603 res = ssmclient.describe_instance_information(
600 604 Filters=[
601 605 {
602 606 'Key': 'InstanceIds',
603 607 'Values': [i.id for i in instances],
604 608 },
605 609 ],
606 610 )
607 611
608 612 available = len(res['InstanceInformationList'])
609 613 wanted = len(instances)
610 614
611 615 print('%d/%d instances available in SSM' % (available, wanted))
612 616
613 617 if available == wanted:
614 618 return
615 619
616 620 time.sleep(2)
617 621
618 622
619 623 def run_ssm_command(ssmclient, instances, document_name, parameters):
620 624 """Run a PowerShell script on an EC2 instance."""
621 625
622 626 res = ssmclient.send_command(
623 627 InstanceIds=[i.id for i in instances],
624 628 DocumentName=document_name,
625 629 Parameters=parameters,
626 630 CloudWatchOutputConfig={
627 631 'CloudWatchOutputEnabled': True,
628 632 },
629 633 )
630 634
631 635 command_id = res['Command']['CommandId']
632 636
633 637 for instance in instances:
634 638 while True:
635 639 try:
636 640 res = ssmclient.get_command_invocation(
637 641 CommandId=command_id,
638 642 InstanceId=instance.id,
639 643 )
640 644 except botocore.exceptions.ClientError as e:
641 645 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
642 646 print('could not find SSM command invocation; waiting')
643 647 time.sleep(1)
644 648 continue
645 649 else:
646 650 raise
647 651
648 652 if res['Status'] == 'Success':
649 653 break
650 654 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
651 655 time.sleep(2)
652 656 else:
653 657 raise Exception('command failed on %s: %s' % (
654 658 instance.id, res['Status']))
655 659
656 660
657 661 @contextlib.contextmanager
658 662 def temporary_ec2_instances(ec2resource, config):
659 663 """Create temporary EC2 instances.
660 664
661 665 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
662 666 managing the lifecycle of the instances.
663 667
664 668 When the context manager exits, the instances are terminated.
665 669
666 670 The context manager evaluates to the list of data structures
667 671 describing each created instance. The instances may not be available
668 672 for work immediately: it is up to the caller to wait for the instance
669 673 to start responding.
670 674 """
671 675
672 676 ids = None
673 677
674 678 try:
675 679 res = ec2resource.create_instances(**config)
676 680
677 681 ids = [i.id for i in res]
678 682 print('started instances: %s' % ' '.join(ids))
679 683
680 684 yield res
681 685 finally:
682 686 if ids:
683 687 print('terminating instances: %s' % ' '.join(ids))
684 688 for instance in res:
685 689 instance.terminate()
686 690 print('terminated %d instances' % len(ids))
687 691
688 692
689 693 @contextlib.contextmanager
690 694 def create_temp_windows_ec2_instances(c: AWSConnection, config):
691 695 """Create temporary Windows EC2 instances.
692 696
693 697 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
694 698 configures the Windows instance for Windows Remote Management. The emitted
695 699 instances will have a ``winrm_client`` attribute containing a
696 700 ``pypsrp.client.Client`` instance bound to the instance.
697 701 """
698 702 if 'IamInstanceProfile' in config:
699 703 raise ValueError('IamInstanceProfile cannot be provided in config')
700 704 if 'UserData' in config:
701 705 raise ValueError('UserData cannot be provided in config')
702 706
703 707 password = c.automation.default_password()
704 708
705 709 config = copy.deepcopy(config)
706 710 config['IamInstanceProfile'] = {
707 711 'Name': 'hg-ephemeral-ec2-1',
708 712 }
709 713 config.setdefault('TagSpecifications', []).append({
710 714 'ResourceType': 'instance',
711 715 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
712 716 })
713 717 config['UserData'] = WINDOWS_USER_DATA % password
714 718
715 719 with temporary_ec2_instances(c.ec2resource, config) as instances:
716 720 wait_for_ip_addresses(instances)
717 721
718 722 print('waiting for Windows Remote Management service...')
719 723
720 724 for instance in instances:
721 725 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
722 726 print('established WinRM connection to %s' % instance.id)
723 727 instance.winrm_client = client
724 728
725 729 yield instances
726 730
727 731
728 732 def resolve_fingerprint(fingerprint):
729 733 fingerprint = json.dumps(fingerprint, sort_keys=True)
730 734 return hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
731 735
732 736
733 737 def find_and_reconcile_image(ec2resource, name, fingerprint):
734 738 """Attempt to find an existing EC2 AMI with a name and fingerprint.
735 739
736 740 If an image with the specified fingerprint is found, it is returned.
737 741 Otherwise None is returned.
738 742
739 743 Existing images for the specified name that don't have the specified
740 744 fingerprint or are missing required metadata or deleted.
741 745 """
742 746 # Find existing AMIs with this name and delete the ones that are invalid.
743 747 # Store a reference to a good image so it can be returned one the
744 748 # image state is reconciled.
745 749 images = ec2resource.images.filter(
746 750 Filters=[{'Name': 'name', 'Values': [name]}])
747 751
748 752 existing_image = None
749 753
750 754 for image in images:
751 755 if image.tags is None:
752 756 print('image %s for %s lacks required tags; removing' % (
753 757 image.id, image.name))
754 758 remove_ami(ec2resource, image)
755 759 else:
756 760 tags = {t['Key']: t['Value'] for t in image.tags}
757 761
758 762 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
759 763 existing_image = image
760 764 else:
761 765 print('image %s for %s has wrong fingerprint; removing' % (
762 766 image.id, image.name))
763 767 remove_ami(ec2resource, image)
764 768
765 769 return existing_image
766 770
767 771
768 772 def create_ami_from_instance(ec2client, instance, name, description,
769 773 fingerprint):
770 774 """Create an AMI from a running instance.
771 775
772 776 Returns the ``ec2resource.Image`` representing the created AMI.
773 777 """
774 778 instance.stop()
775 779
776 780 ec2client.get_waiter('instance_stopped').wait(
777 781 InstanceIds=[instance.id],
778 782 WaiterConfig={
779 783 'Delay': 5,
780 784 })
781 785 print('%s is stopped' % instance.id)
782 786
783 787 image = instance.create_image(
784 788 Name=name,
785 789 Description=description,
786 790 )
787 791
788 792 image.create_tags(Tags=[
789 793 {
790 794 'Key': 'HGIMAGEFINGERPRINT',
791 795 'Value': fingerprint,
792 796 },
793 797 ])
794 798
795 799 print('waiting for image %s' % image.id)
796 800
797 801 ec2client.get_waiter('image_available').wait(
798 802 ImageIds=[image.id],
799 803 )
800 804
801 805 print('image %s available as %s' % (image.id, image.name))
802 806
803 807 return image
804 808
805 809
806 810 def ensure_linux_dev_ami(c: AWSConnection, distro='debian9', prefix='hg-'):
807 811 """Ensures a Linux development AMI is available and up-to-date.
808 812
809 813 Returns an ``ec2.Image`` of either an existing AMI or a newly-built one.
810 814 """
811 815 ec2client = c.ec2client
812 816 ec2resource = c.ec2resource
813 817
814 818 name = '%s%s-%s' % (prefix, 'linux-dev', distro)
815 819
816 820 if distro == 'debian9':
817 821 image = find_image(
818 822 ec2resource,
819 823 DEBIAN_ACCOUNT_ID,
820 824 'debian-stretch-hvm-x86_64-gp2-2019-02-19-26620',
821 825 )
822 826 ssh_username = 'admin'
823 827 elif distro == 'ubuntu18.04':
824 828 image = find_image(
825 829 ec2resource,
826 830 UBUNTU_ACCOUNT_ID,
827 831 'ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190403',
828 832 )
829 833 ssh_username = 'ubuntu'
830 834 elif distro == 'ubuntu18.10':
831 835 image = find_image(
832 836 ec2resource,
833 837 UBUNTU_ACCOUNT_ID,
834 838 'ubuntu/images/hvm-ssd/ubuntu-cosmic-18.10-amd64-server-20190402',
835 839 )
836 840 ssh_username = 'ubuntu'
837 841 elif distro == 'ubuntu19.04':
838 842 image = find_image(
839 843 ec2resource,
840 844 UBUNTU_ACCOUNT_ID,
841 845 'ubuntu/images/hvm-ssd/ubuntu-disco-19.04-amd64-server-20190417',
842 846 )
843 847 ssh_username = 'ubuntu'
844 848 else:
845 849 raise ValueError('unsupported Linux distro: %s' % distro)
846 850
847 851 config = {
848 852 'BlockDeviceMappings': [
849 853 {
850 854 'DeviceName': image.block_device_mappings[0]['DeviceName'],
851 855 'Ebs': {
852 856 'DeleteOnTermination': True,
853 857 'VolumeSize': 8,
854 858 'VolumeType': 'gp2',
855 859 },
856 860 },
857 861 ],
858 862 'EbsOptimized': True,
859 863 'ImageId': image.id,
860 864 'InstanceInitiatedShutdownBehavior': 'stop',
861 865 # 8 VCPUs for compiling Python.
862 866 'InstanceType': 't3.2xlarge',
863 867 'KeyName': '%sautomation' % prefix,
864 868 'MaxCount': 1,
865 869 'MinCount': 1,
866 870 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
867 871 }
868 872
869 873 requirements2_path = (pathlib.Path(__file__).parent.parent /
870 874 'linux-requirements-py2.txt')
871 875 requirements3_path = (pathlib.Path(__file__).parent.parent /
872 876 'linux-requirements-py3.txt')
873 877 with requirements2_path.open('r', encoding='utf-8') as fh:
874 878 requirements2 = fh.read()
875 879 with requirements3_path.open('r', encoding='utf-8') as fh:
876 880 requirements3 = fh.read()
877 881
878 882 # Compute a deterministic fingerprint to determine whether image needs to
879 883 # be regenerated.
880 884 fingerprint = resolve_fingerprint({
881 885 'instance_config': config,
882 886 'bootstrap_script': BOOTSTRAP_DEBIAN,
883 887 'requirements_py2': requirements2,
884 888 'requirements_py3': requirements3,
885 889 })
886 890
887 891 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
888 892
889 893 if existing_image:
890 894 return existing_image
891 895
892 896 print('no suitable %s image found; creating one...' % name)
893 897
894 898 with temporary_ec2_instances(ec2resource, config) as instances:
895 899 wait_for_ip_addresses(instances)
896 900
897 901 instance = instances[0]
898 902
899 903 client = wait_for_ssh(
900 904 instance.public_ip_address, 22,
901 905 username=ssh_username,
902 906 key_filename=str(c.key_pair_path_private('automation')))
903 907
904 908 home = '/home/%s' % ssh_username
905 909
906 910 with client:
907 911 print('connecting to SSH server')
908 912 sftp = client.open_sftp()
909 913
910 914 print('uploading bootstrap files')
911 915 with sftp.open('%s/bootstrap' % home, 'wb') as fh:
912 916 fh.write(BOOTSTRAP_DEBIAN)
913 917 fh.chmod(0o0700)
914 918
915 919 with sftp.open('%s/requirements-py2.txt' % home, 'wb') as fh:
916 920 fh.write(requirements2)
917 921 fh.chmod(0o0700)
918 922
919 923 with sftp.open('%s/requirements-py3.txt' % home, 'wb') as fh:
920 924 fh.write(requirements3)
921 925 fh.chmod(0o0700)
922 926
923 927 print('executing bootstrap')
924 928 chan, stdin, stdout = ssh_exec_command(client,
925 929 '%s/bootstrap' % home)
926 930 stdin.close()
927 931
928 932 for line in stdout:
929 933 print(line, end='')
930 934
931 935 res = chan.recv_exit_status()
932 936 if res:
933 937 raise Exception('non-0 exit from bootstrap: %d' % res)
934 938
935 939 print('bootstrap completed; stopping %s to create %s' % (
936 940 instance.id, name))
937 941
938 942 return create_ami_from_instance(ec2client, instance, name,
939 943 'Mercurial Linux development environment',
940 944 fingerprint)
941 945
942 946
943 947 @contextlib.contextmanager
944 948 def temporary_linux_dev_instances(c: AWSConnection, image, instance_type,
945 949 prefix='hg-', ensure_extra_volume=False):
946 950 """Create temporary Linux development EC2 instances.
947 951
948 952 Context manager resolves to a list of ``ec2.Instance`` that were created
949 953 and are running.
950 954
951 955 ``ensure_extra_volume`` can be set to ``True`` to require that instances
952 956 have a 2nd storage volume available other than the primary AMI volume.
953 957 For instance types with instance storage, this does nothing special.
954 958 But for instance types without instance storage, an additional EBS volume
955 959 will be added to the instance.
956 960
957 961 Instances have an ``ssh_client`` attribute containing a paramiko SSHClient
958 962 instance bound to the instance.
959 963
960 964 Instances have an ``ssh_private_key_path`` attributing containing the
961 965 str path to the SSH private key to connect to the instance.
962 966 """
963 967
964 968 block_device_mappings = [
965 969 {
966 970 'DeviceName': image.block_device_mappings[0]['DeviceName'],
967 971 'Ebs': {
968 972 'DeleteOnTermination': True,
969 973 'VolumeSize': 8,
970 974 'VolumeType': 'gp2',
971 975 },
972 976 }
973 977 ]
974 978
975 979 # This is not an exhaustive list of instance types having instance storage.
976 980 # But
977 981 if (ensure_extra_volume
978 982 and not instance_type.startswith(tuple(INSTANCE_TYPES_WITH_STORAGE))):
979 983 main_device = block_device_mappings[0]['DeviceName']
980 984
981 985 if main_device == 'xvda':
982 986 second_device = 'xvdb'
983 987 elif main_device == '/dev/sda1':
984 988 second_device = '/dev/sdb'
985 989 else:
986 990 raise ValueError('unhandled primary EBS device name: %s' %
987 991 main_device)
988 992
989 993 block_device_mappings.append({
990 994 'DeviceName': second_device,
991 995 'Ebs': {
992 996 'DeleteOnTermination': True,
993 997 'VolumeSize': 8,
994 998 'VolumeType': 'gp2',
995 999 }
996 1000 })
997 1001
998 1002 config = {
999 1003 'BlockDeviceMappings': block_device_mappings,
1000 1004 'EbsOptimized': True,
1001 1005 'ImageId': image.id,
1002 1006 'InstanceInitiatedShutdownBehavior': 'terminate',
1003 1007 'InstanceType': instance_type,
1004 1008 'KeyName': '%sautomation' % prefix,
1005 1009 'MaxCount': 1,
1006 1010 'MinCount': 1,
1007 1011 'SecurityGroupIds': [c.security_groups['linux-dev-1'].id],
1008 1012 }
1009 1013
1010 1014 with temporary_ec2_instances(c.ec2resource, config) as instances:
1011 1015 wait_for_ip_addresses(instances)
1012 1016
1013 1017 ssh_private_key_path = str(c.key_pair_path_private('automation'))
1014 1018
1015 1019 for instance in instances:
1016 1020 client = wait_for_ssh(
1017 1021 instance.public_ip_address, 22,
1018 1022 username='hg',
1019 1023 key_filename=ssh_private_key_path)
1020 1024
1021 1025 instance.ssh_client = client
1022 1026 instance.ssh_private_key_path = ssh_private_key_path
1023 1027
1024 1028 try:
1025 1029 yield instances
1026 1030 finally:
1027 1031 for instance in instances:
1028 1032 instance.ssh_client.close()
1029 1033
1030 1034
1031 1035 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
1032 1036 """Ensure Windows Development AMI is available and up-to-date.
1033 1037
1034 1038 If necessary, a modern AMI will be built by starting a temporary EC2
1035 1039 instance and bootstrapping it.
1036 1040
1037 1041 Obsolete AMIs will be deleted so there is only a single AMI having the
1038 1042 desired name.
1039 1043
1040 1044 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
1041 1045 one.
1042 1046 """
1043 1047 ec2client = c.ec2client
1044 1048 ec2resource = c.ec2resource
1045 1049 ssmclient = c.session.client('ssm')
1046 1050
1047 1051 name = '%s%s' % (prefix, 'windows-dev')
1048 1052
1049 image = find_image(ec2resource,
1050 '801119661308',
1051 'Windows_Server-2019-English-Full-Base-2019.07.12')
1053 image = find_image(ec2resource, AMAZON_ACCOUNT_ID, WINDOWS_BASE_IMAGE_NAME)
1052 1054
1053 1055 config = {
1054 1056 'BlockDeviceMappings': [
1055 1057 {
1056 1058 'DeviceName': '/dev/sda1',
1057 1059 'Ebs': {
1058 1060 'DeleteOnTermination': True,
1059 1061 'VolumeSize': 32,
1060 1062 'VolumeType': 'gp2',
1061 1063 },
1062 1064 }
1063 1065 ],
1064 1066 'ImageId': image.id,
1065 1067 'InstanceInitiatedShutdownBehavior': 'stop',
1066 1068 'InstanceType': 't3.medium',
1067 1069 'KeyName': '%sautomation' % prefix,
1068 1070 'MaxCount': 1,
1069 1071 'MinCount': 1,
1070 1072 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1071 1073 }
1072 1074
1073 1075 commands = [
1074 1076 # Need to start the service so sshd_config is generated.
1075 1077 'Start-Service sshd',
1076 1078 'Write-Output "modifying sshd_config"',
1077 1079 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
1078 1080 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
1079 1081 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
1080 1082 'Import-Module OpenSSHUtils',
1081 1083 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
1082 1084 'Restart-Service sshd',
1083 1085 'Write-Output "installing OpenSSL client"',
1084 1086 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
1085 1087 'Set-Service -Name sshd -StartupType "Automatic"',
1086 1088 'Write-Output "OpenSSH server running"',
1087 1089 ]
1088 1090
1089 1091 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
1090 1092 commands.extend(l.rstrip() for l in fh)
1091 1093
1092 1094 # Disable Windows Defender when bootstrapping because it just slows
1093 1095 # things down.
1094 1096 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
1095 1097 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
1096 1098
1097 1099 # Compute a deterministic fingerprint to determine whether image needs
1098 1100 # to be regenerated.
1099 1101 fingerprint = resolve_fingerprint({
1100 1102 'instance_config': config,
1101 1103 'user_data': WINDOWS_USER_DATA,
1102 1104 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
1103 1105 'bootstrap_commands': commands,
1104 1106 })
1105 1107
1106 1108 existing_image = find_and_reconcile_image(ec2resource, name, fingerprint)
1107 1109
1108 1110 if existing_image:
1109 1111 return existing_image
1110 1112
1111 1113 print('no suitable Windows development image found; creating one...')
1112 1114
1113 1115 with create_temp_windows_ec2_instances(c, config) as instances:
1114 1116 assert len(instances) == 1
1115 1117 instance = instances[0]
1116 1118
1117 1119 wait_for_ssm(ssmclient, [instance])
1118 1120
1119 1121 # On first boot, install various Windows updates.
1120 1122 # We would ideally use PowerShell Remoting for this. However, there are
1121 1123 # trust issues that make it difficult to invoke Windows Update
1122 1124 # remotely. So we use SSM, which has a mechanism for running Windows
1123 1125 # Update.
1124 1126 print('installing Windows features...')
1125 1127 run_ssm_command(
1126 1128 ssmclient,
1127 1129 [instance],
1128 1130 'AWS-RunPowerShellScript',
1129 1131 {
1130 1132 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
1131 1133 },
1132 1134 )
1133 1135
1134 1136 # Reboot so all updates are fully applied.
1135 1137 #
1136 1138 # We don't use instance.reboot() here because it is asynchronous and
1137 1139 # we don't know when exactly the instance has rebooted. It could take
1138 1140 # a while to stop and we may start trying to interact with the instance
1139 1141 # before it has rebooted.
1140 1142 print('rebooting instance %s' % instance.id)
1141 1143 instance.stop()
1142 1144 ec2client.get_waiter('instance_stopped').wait(
1143 1145 InstanceIds=[instance.id],
1144 1146 WaiterConfig={
1145 1147 'Delay': 5,
1146 1148 })
1147 1149
1148 1150 instance.start()
1149 1151 wait_for_ip_addresses([instance])
1150 1152
1151 1153 # There is a race condition here between the User Data PS script running
1152 1154 # and us connecting to WinRM. This can manifest as
1153 1155 # "AuthorizationManager check failed" failures during run_powershell().
1154 1156 # TODO figure out a workaround.
1155 1157
1156 1158 print('waiting for Windows Remote Management to come back...')
1157 1159 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
1158 1160 c.automation.default_password())
1159 1161 print('established WinRM connection to %s' % instance.id)
1160 1162 instance.winrm_client = client
1161 1163
1162 1164 print('bootstrapping instance...')
1163 1165 run_powershell(instance.winrm_client, '\n'.join(commands))
1164 1166
1165 1167 print('bootstrap completed; stopping %s to create image' % instance.id)
1166 1168 return create_ami_from_instance(ec2client, instance, name,
1167 1169 'Mercurial Windows development environment',
1168 1170 fingerprint)
1169 1171
1170 1172
1171 1173 @contextlib.contextmanager
1172 1174 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
1173 1175 prefix='hg-', disable_antivirus=False):
1174 1176 """Create a temporary Windows development EC2 instance.
1175 1177
1176 1178 Context manager resolves to the list of ``EC2.Instance`` that were created.
1177 1179 """
1178 1180 config = {
1179 1181 'BlockDeviceMappings': [
1180 1182 {
1181 1183 'DeviceName': '/dev/sda1',
1182 1184 'Ebs': {
1183 1185 'DeleteOnTermination': True,
1184 1186 'VolumeSize': 32,
1185 1187 'VolumeType': 'gp2',
1186 1188 },
1187 1189 }
1188 1190 ],
1189 1191 'ImageId': image.id,
1190 1192 'InstanceInitiatedShutdownBehavior': 'stop',
1191 1193 'InstanceType': instance_type,
1192 1194 'KeyName': '%sautomation' % prefix,
1193 1195 'MaxCount': 1,
1194 1196 'MinCount': 1,
1195 1197 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
1196 1198 }
1197 1199
1198 1200 with create_temp_windows_ec2_instances(c, config) as instances:
1199 1201 if disable_antivirus:
1200 1202 for instance in instances:
1201 1203 run_powershell(
1202 1204 instance.winrm_client,
1203 1205 'Set-MpPreference -DisableRealtimeMonitoring $true')
1204 1206
1205 1207 yield instances
General Comments 0
You need to be logged in to leave comments. Login now