##// END OF EJS Templates
automation: shore up rebooting behavior...
Gregory Szorc -
r42466:e570106b default
parent child Browse files
Show More
@@ -1,892 +1,908 b''
1 # aws.py - Automation code for Amazon Web Services
1 # aws.py - Automation code for Amazon Web Services
2 #
2 #
3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # no-check-code because Python 3 native.
8 # no-check-code because Python 3 native.
9
9
10 import contextlib
10 import contextlib
11 import copy
11 import copy
12 import hashlib
12 import hashlib
13 import json
13 import json
14 import os
14 import os
15 import pathlib
15 import pathlib
16 import subprocess
16 import subprocess
17 import time
17 import time
18
18
19 import boto3
19 import boto3
20 import botocore.exceptions
20 import botocore.exceptions
21
21
22 from .winrm import (
22 from .winrm import (
23 run_powershell,
23 run_powershell,
24 wait_for_winrm,
24 wait_for_winrm,
25 )
25 )
26
26
27
27
28 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
28 SOURCE_ROOT = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent.parent
29
29
30 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
30 INSTALL_WINDOWS_DEPENDENCIES = (SOURCE_ROOT / 'contrib' /
31 'install-windows-dependencies.ps1')
31 'install-windows-dependencies.ps1')
32
32
33
33
34 KEY_PAIRS = {
34 KEY_PAIRS = {
35 'automation',
35 'automation',
36 }
36 }
37
37
38
38
39 SECURITY_GROUPS = {
39 SECURITY_GROUPS = {
40 'windows-dev-1': {
40 'windows-dev-1': {
41 'description': 'Mercurial Windows instances that perform build automation',
41 'description': 'Mercurial Windows instances that perform build automation',
42 'ingress': [
42 'ingress': [
43 {
43 {
44 'FromPort': 22,
44 'FromPort': 22,
45 'ToPort': 22,
45 'ToPort': 22,
46 'IpProtocol': 'tcp',
46 'IpProtocol': 'tcp',
47 'IpRanges': [
47 'IpRanges': [
48 {
48 {
49 'CidrIp': '0.0.0.0/0',
49 'CidrIp': '0.0.0.0/0',
50 'Description': 'SSH from entire Internet',
50 'Description': 'SSH from entire Internet',
51 },
51 },
52 ],
52 ],
53 },
53 },
54 {
54 {
55 'FromPort': 3389,
55 'FromPort': 3389,
56 'ToPort': 3389,
56 'ToPort': 3389,
57 'IpProtocol': 'tcp',
57 'IpProtocol': 'tcp',
58 'IpRanges': [
58 'IpRanges': [
59 {
59 {
60 'CidrIp': '0.0.0.0/0',
60 'CidrIp': '0.0.0.0/0',
61 'Description': 'RDP from entire Internet',
61 'Description': 'RDP from entire Internet',
62 },
62 },
63 ],
63 ],
64
64
65 },
65 },
66 {
66 {
67 'FromPort': 5985,
67 'FromPort': 5985,
68 'ToPort': 5986,
68 'ToPort': 5986,
69 'IpProtocol': 'tcp',
69 'IpProtocol': 'tcp',
70 'IpRanges': [
70 'IpRanges': [
71 {
71 {
72 'CidrIp': '0.0.0.0/0',
72 'CidrIp': '0.0.0.0/0',
73 'Description': 'PowerShell Remoting (Windows Remote Management)',
73 'Description': 'PowerShell Remoting (Windows Remote Management)',
74 },
74 },
75 ],
75 ],
76 }
76 }
77 ],
77 ],
78 },
78 },
79 }
79 }
80
80
81
81
82 IAM_ROLES = {
82 IAM_ROLES = {
83 'ephemeral-ec2-role-1': {
83 'ephemeral-ec2-role-1': {
84 'description': 'Mercurial temporary EC2 instances',
84 'description': 'Mercurial temporary EC2 instances',
85 'policy_arns': [
85 'policy_arns': [
86 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
86 'arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM',
87 ],
87 ],
88 },
88 },
89 }
89 }
90
90
91
91
92 ASSUME_ROLE_POLICY_DOCUMENT = '''
92 ASSUME_ROLE_POLICY_DOCUMENT = '''
93 {
93 {
94 "Version": "2012-10-17",
94 "Version": "2012-10-17",
95 "Statement": [
95 "Statement": [
96 {
96 {
97 "Effect": "Allow",
97 "Effect": "Allow",
98 "Principal": {
98 "Principal": {
99 "Service": "ec2.amazonaws.com"
99 "Service": "ec2.amazonaws.com"
100 },
100 },
101 "Action": "sts:AssumeRole"
101 "Action": "sts:AssumeRole"
102 }
102 }
103 ]
103 ]
104 }
104 }
105 '''.strip()
105 '''.strip()
106
106
107
107
108 IAM_INSTANCE_PROFILES = {
108 IAM_INSTANCE_PROFILES = {
109 'ephemeral-ec2-1': {
109 'ephemeral-ec2-1': {
110 'roles': [
110 'roles': [
111 'ephemeral-ec2-role-1',
111 'ephemeral-ec2-role-1',
112 ],
112 ],
113 }
113 }
114 }
114 }
115
115
116
116
117 # User Data for Windows EC2 instance. Mainly used to set the password
117 # User Data for Windows EC2 instance. Mainly used to set the password
118 # and configure WinRM.
118 # and configure WinRM.
119 # Inspired by the User Data script used by Packer
119 # Inspired by the User Data script used by Packer
120 # (from https://www.packer.io/intro/getting-started/build-image.html).
120 # (from https://www.packer.io/intro/getting-started/build-image.html).
121 WINDOWS_USER_DATA = r'''
121 WINDOWS_USER_DATA = r'''
122 <powershell>
122 <powershell>
123
123
124 # TODO enable this once we figure out what is failing.
124 # TODO enable this once we figure out what is failing.
125 #$ErrorActionPreference = "stop"
125 #$ErrorActionPreference = "stop"
126
126
127 # Set administrator password
127 # Set administrator password
128 net user Administrator "%s"
128 net user Administrator "%s"
129 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
129 wmic useraccount where "name='Administrator'" set PasswordExpires=FALSE
130
130
131 # First, make sure WinRM can't be connected to
131 # First, make sure WinRM can't be connected to
132 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
132 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new enable=yes action=block
133
133
134 # Delete any existing WinRM listeners
134 # Delete any existing WinRM listeners
135 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
135 winrm delete winrm/config/listener?Address=*+Transport=HTTP 2>$Null
136 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
136 winrm delete winrm/config/listener?Address=*+Transport=HTTPS 2>$Null
137
137
138 # Create a new WinRM listener and configure
138 # Create a new WinRM listener and configure
139 winrm create winrm/config/listener?Address=*+Transport=HTTP
139 winrm create winrm/config/listener?Address=*+Transport=HTTP
140 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
140 winrm set winrm/config/winrs '@{MaxMemoryPerShellMB="0"}'
141 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
141 winrm set winrm/config '@{MaxTimeoutms="7200000"}'
142 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
142 winrm set winrm/config/service '@{AllowUnencrypted="true"}'
143 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
143 winrm set winrm/config/service '@{MaxConcurrentOperationsPerUser="12000"}'
144 winrm set winrm/config/service/auth '@{Basic="true"}'
144 winrm set winrm/config/service/auth '@{Basic="true"}'
145 winrm set winrm/config/client/auth '@{Basic="true"}'
145 winrm set winrm/config/client/auth '@{Basic="true"}'
146
146
147 # Configure UAC to allow privilege elevation in remote shells
147 # Configure UAC to allow privilege elevation in remote shells
148 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
148 $Key = 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System'
149 $Setting = 'LocalAccountTokenFilterPolicy'
149 $Setting = 'LocalAccountTokenFilterPolicy'
150 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
150 Set-ItemProperty -Path $Key -Name $Setting -Value 1 -Force
151
151
152 # Configure and restart the WinRM Service; Enable the required firewall exception
152 # Configure and restart the WinRM Service; Enable the required firewall exception
153 Stop-Service -Name WinRM
153 Stop-Service -Name WinRM
154 Set-Service -Name WinRM -StartupType Automatic
154 Set-Service -Name WinRM -StartupType Automatic
155 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
155 netsh advfirewall firewall set rule name="Windows Remote Management (HTTP-In)" new action=allow localip=any remoteip=any
156 Start-Service -Name WinRM
156 Start-Service -Name WinRM
157
157
158 # Disable firewall on private network interfaces so prompts don't appear.
158 # Disable firewall on private network interfaces so prompts don't appear.
159 Set-NetFirewallProfile -Name private -Enabled false
159 Set-NetFirewallProfile -Name private -Enabled false
160 </powershell>
160 </powershell>
161 '''.lstrip()
161 '''.lstrip()
162
162
163
163
164 WINDOWS_BOOTSTRAP_POWERSHELL = '''
164 WINDOWS_BOOTSTRAP_POWERSHELL = '''
165 Write-Output "installing PowerShell dependencies"
165 Write-Output "installing PowerShell dependencies"
166 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
166 Install-PackageProvider -Name NuGet -MinimumVersion 2.8.5.201 -Force
167 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
167 Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
168 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
168 Install-Module -Name OpenSSHUtils -RequiredVersion 0.0.2.0
169
169
170 Write-Output "installing OpenSSL server"
170 Write-Output "installing OpenSSL server"
171 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
171 Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
172 # Various tools will attempt to use older versions of .NET. So we enable
172 # Various tools will attempt to use older versions of .NET. So we enable
173 # the feature that provides them so it doesn't have to be auto-enabled
173 # the feature that provides them so it doesn't have to be auto-enabled
174 # later.
174 # later.
175 Write-Output "enabling .NET Framework feature"
175 Write-Output "enabling .NET Framework feature"
176 Install-WindowsFeature -Name Net-Framework-Core
176 Install-WindowsFeature -Name Net-Framework-Core
177 '''
177 '''
178
178
179
179
180 class AWSConnection:
180 class AWSConnection:
181 """Manages the state of a connection with AWS."""
181 """Manages the state of a connection with AWS."""
182
182
183 def __init__(self, automation, region: str, ensure_ec2_state: bool=True):
183 def __init__(self, automation, region: str, ensure_ec2_state: bool=True):
184 self.automation = automation
184 self.automation = automation
185 self.local_state_path = automation.state_path
185 self.local_state_path = automation.state_path
186
186
187 self.prefix = 'hg-'
187 self.prefix = 'hg-'
188
188
189 self.session = boto3.session.Session(region_name=region)
189 self.session = boto3.session.Session(region_name=region)
190 self.ec2client = self.session.client('ec2')
190 self.ec2client = self.session.client('ec2')
191 self.ec2resource = self.session.resource('ec2')
191 self.ec2resource = self.session.resource('ec2')
192 self.iamclient = self.session.client('iam')
192 self.iamclient = self.session.client('iam')
193 self.iamresource = self.session.resource('iam')
193 self.iamresource = self.session.resource('iam')
194 self.security_groups = {}
194 self.security_groups = {}
195
195
196 if ensure_ec2_state:
196 if ensure_ec2_state:
197 ensure_key_pairs(automation.state_path, self.ec2resource)
197 ensure_key_pairs(automation.state_path, self.ec2resource)
198 self.security_groups = ensure_security_groups(self.ec2resource)
198 self.security_groups = ensure_security_groups(self.ec2resource)
199 ensure_iam_state(self.iamclient, self.iamresource)
199 ensure_iam_state(self.iamclient, self.iamresource)
200
200
201 def key_pair_path_private(self, name):
201 def key_pair_path_private(self, name):
202 """Path to a key pair private key file."""
202 """Path to a key pair private key file."""
203 return self.local_state_path / 'keys' / ('keypair-%s' % name)
203 return self.local_state_path / 'keys' / ('keypair-%s' % name)
204
204
205 def key_pair_path_public(self, name):
205 def key_pair_path_public(self, name):
206 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
206 return self.local_state_path / 'keys' / ('keypair-%s.pub' % name)
207
207
208
208
209 def rsa_key_fingerprint(p: pathlib.Path):
209 def rsa_key_fingerprint(p: pathlib.Path):
210 """Compute the fingerprint of an RSA private key."""
210 """Compute the fingerprint of an RSA private key."""
211
211
212 # TODO use rsa package.
212 # TODO use rsa package.
213 res = subprocess.run(
213 res = subprocess.run(
214 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
214 ['openssl', 'pkcs8', '-in', str(p), '-nocrypt', '-topk8',
215 '-outform', 'DER'],
215 '-outform', 'DER'],
216 capture_output=True,
216 capture_output=True,
217 check=True)
217 check=True)
218
218
219 sha1 = hashlib.sha1(res.stdout).hexdigest()
219 sha1 = hashlib.sha1(res.stdout).hexdigest()
220 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
220 return ':'.join(a + b for a, b in zip(sha1[::2], sha1[1::2]))
221
221
222
222
223 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
223 def ensure_key_pairs(state_path: pathlib.Path, ec2resource, prefix='hg-'):
224 remote_existing = {}
224 remote_existing = {}
225
225
226 for kpi in ec2resource.key_pairs.all():
226 for kpi in ec2resource.key_pairs.all():
227 if kpi.name.startswith(prefix):
227 if kpi.name.startswith(prefix):
228 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
228 remote_existing[kpi.name[len(prefix):]] = kpi.key_fingerprint
229
229
230 # Validate that we have these keys locally.
230 # Validate that we have these keys locally.
231 key_path = state_path / 'keys'
231 key_path = state_path / 'keys'
232 key_path.mkdir(exist_ok=True, mode=0o700)
232 key_path.mkdir(exist_ok=True, mode=0o700)
233
233
234 def remove_remote(name):
234 def remove_remote(name):
235 print('deleting key pair %s' % name)
235 print('deleting key pair %s' % name)
236 key = ec2resource.KeyPair(name)
236 key = ec2resource.KeyPair(name)
237 key.delete()
237 key.delete()
238
238
239 def remove_local(name):
239 def remove_local(name):
240 pub_full = key_path / ('keypair-%s.pub' % name)
240 pub_full = key_path / ('keypair-%s.pub' % name)
241 priv_full = key_path / ('keypair-%s' % name)
241 priv_full = key_path / ('keypair-%s' % name)
242
242
243 print('removing %s' % pub_full)
243 print('removing %s' % pub_full)
244 pub_full.unlink()
244 pub_full.unlink()
245 print('removing %s' % priv_full)
245 print('removing %s' % priv_full)
246 priv_full.unlink()
246 priv_full.unlink()
247
247
248 local_existing = {}
248 local_existing = {}
249
249
250 for f in sorted(os.listdir(key_path)):
250 for f in sorted(os.listdir(key_path)):
251 if not f.startswith('keypair-') or not f.endswith('.pub'):
251 if not f.startswith('keypair-') or not f.endswith('.pub'):
252 continue
252 continue
253
253
254 name = f[len('keypair-'):-len('.pub')]
254 name = f[len('keypair-'):-len('.pub')]
255
255
256 pub_full = key_path / f
256 pub_full = key_path / f
257 priv_full = key_path / ('keypair-%s' % name)
257 priv_full = key_path / ('keypair-%s' % name)
258
258
259 with open(pub_full, 'r', encoding='ascii') as fh:
259 with open(pub_full, 'r', encoding='ascii') as fh:
260 data = fh.read()
260 data = fh.read()
261
261
262 if not data.startswith('ssh-rsa '):
262 if not data.startswith('ssh-rsa '):
263 print('unexpected format for key pair file: %s; removing' %
263 print('unexpected format for key pair file: %s; removing' %
264 pub_full)
264 pub_full)
265 pub_full.unlink()
265 pub_full.unlink()
266 priv_full.unlink()
266 priv_full.unlink()
267 continue
267 continue
268
268
269 local_existing[name] = rsa_key_fingerprint(priv_full)
269 local_existing[name] = rsa_key_fingerprint(priv_full)
270
270
271 for name in sorted(set(remote_existing) | set(local_existing)):
271 for name in sorted(set(remote_existing) | set(local_existing)):
272 if name not in local_existing:
272 if name not in local_existing:
273 actual = '%s%s' % (prefix, name)
273 actual = '%s%s' % (prefix, name)
274 print('remote key %s does not exist locally' % name)
274 print('remote key %s does not exist locally' % name)
275 remove_remote(actual)
275 remove_remote(actual)
276 del remote_existing[name]
276 del remote_existing[name]
277
277
278 elif name not in remote_existing:
278 elif name not in remote_existing:
279 print('local key %s does not exist remotely' % name)
279 print('local key %s does not exist remotely' % name)
280 remove_local(name)
280 remove_local(name)
281 del local_existing[name]
281 del local_existing[name]
282
282
283 elif remote_existing[name] != local_existing[name]:
283 elif remote_existing[name] != local_existing[name]:
284 print('key fingerprint mismatch for %s; '
284 print('key fingerprint mismatch for %s; '
285 'removing from local and remote' % name)
285 'removing from local and remote' % name)
286 remove_local(name)
286 remove_local(name)
287 remove_remote('%s%s' % (prefix, name))
287 remove_remote('%s%s' % (prefix, name))
288 del local_existing[name]
288 del local_existing[name]
289 del remote_existing[name]
289 del remote_existing[name]
290
290
291 missing = KEY_PAIRS - set(remote_existing)
291 missing = KEY_PAIRS - set(remote_existing)
292
292
293 for name in sorted(missing):
293 for name in sorted(missing):
294 actual = '%s%s' % (prefix, name)
294 actual = '%s%s' % (prefix, name)
295 print('creating key pair %s' % actual)
295 print('creating key pair %s' % actual)
296
296
297 priv_full = key_path / ('keypair-%s' % name)
297 priv_full = key_path / ('keypair-%s' % name)
298 pub_full = key_path / ('keypair-%s.pub' % name)
298 pub_full = key_path / ('keypair-%s.pub' % name)
299
299
300 kp = ec2resource.create_key_pair(KeyName=actual)
300 kp = ec2resource.create_key_pair(KeyName=actual)
301
301
302 with priv_full.open('w', encoding='ascii') as fh:
302 with priv_full.open('w', encoding='ascii') as fh:
303 fh.write(kp.key_material)
303 fh.write(kp.key_material)
304 fh.write('\n')
304 fh.write('\n')
305
305
306 priv_full.chmod(0o0600)
306 priv_full.chmod(0o0600)
307
307
308 # SSH public key can be extracted via `ssh-keygen`.
308 # SSH public key can be extracted via `ssh-keygen`.
309 with pub_full.open('w', encoding='ascii') as fh:
309 with pub_full.open('w', encoding='ascii') as fh:
310 subprocess.run(
310 subprocess.run(
311 ['ssh-keygen', '-y', '-f', str(priv_full)],
311 ['ssh-keygen', '-y', '-f', str(priv_full)],
312 stdout=fh,
312 stdout=fh,
313 check=True)
313 check=True)
314
314
315 pub_full.chmod(0o0600)
315 pub_full.chmod(0o0600)
316
316
317
317
318 def delete_instance_profile(profile):
318 def delete_instance_profile(profile):
319 for role in profile.roles:
319 for role in profile.roles:
320 print('removing role %s from instance profile %s' % (role.name,
320 print('removing role %s from instance profile %s' % (role.name,
321 profile.name))
321 profile.name))
322 profile.remove_role(RoleName=role.name)
322 profile.remove_role(RoleName=role.name)
323
323
324 print('deleting instance profile %s' % profile.name)
324 print('deleting instance profile %s' % profile.name)
325 profile.delete()
325 profile.delete()
326
326
327
327
328 def ensure_iam_state(iamclient, iamresource, prefix='hg-'):
328 def ensure_iam_state(iamclient, iamresource, prefix='hg-'):
329 """Ensure IAM state is in sync with our canonical definition."""
329 """Ensure IAM state is in sync with our canonical definition."""
330
330
331 remote_profiles = {}
331 remote_profiles = {}
332
332
333 for profile in iamresource.instance_profiles.all():
333 for profile in iamresource.instance_profiles.all():
334 if profile.name.startswith(prefix):
334 if profile.name.startswith(prefix):
335 remote_profiles[profile.name[len(prefix):]] = profile
335 remote_profiles[profile.name[len(prefix):]] = profile
336
336
337 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
337 for name in sorted(set(remote_profiles) - set(IAM_INSTANCE_PROFILES)):
338 delete_instance_profile(remote_profiles[name])
338 delete_instance_profile(remote_profiles[name])
339 del remote_profiles[name]
339 del remote_profiles[name]
340
340
341 remote_roles = {}
341 remote_roles = {}
342
342
343 for role in iamresource.roles.all():
343 for role in iamresource.roles.all():
344 if role.name.startswith(prefix):
344 if role.name.startswith(prefix):
345 remote_roles[role.name[len(prefix):]] = role
345 remote_roles[role.name[len(prefix):]] = role
346
346
347 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
347 for name in sorted(set(remote_roles) - set(IAM_ROLES)):
348 role = remote_roles[name]
348 role = remote_roles[name]
349
349
350 print('removing role %s' % role.name)
350 print('removing role %s' % role.name)
351 role.delete()
351 role.delete()
352 del remote_roles[name]
352 del remote_roles[name]
353
353
354 # We've purged remote state that doesn't belong. Create missing
354 # We've purged remote state that doesn't belong. Create missing
355 # instance profiles and roles.
355 # instance profiles and roles.
356 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
356 for name in sorted(set(IAM_INSTANCE_PROFILES) - set(remote_profiles)):
357 actual = '%s%s' % (prefix, name)
357 actual = '%s%s' % (prefix, name)
358 print('creating IAM instance profile %s' % actual)
358 print('creating IAM instance profile %s' % actual)
359
359
360 profile = iamresource.create_instance_profile(
360 profile = iamresource.create_instance_profile(
361 InstanceProfileName=actual)
361 InstanceProfileName=actual)
362 remote_profiles[name] = profile
362 remote_profiles[name] = profile
363
363
364 waiter = iamclient.get_waiter('instance_profile_exists')
364 waiter = iamclient.get_waiter('instance_profile_exists')
365 waiter.wait(InstanceProfileName=actual)
365 waiter.wait(InstanceProfileName=actual)
366 print('IAM instance profile %s is available' % actual)
366 print('IAM instance profile %s is available' % actual)
367
367
368 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
368 for name in sorted(set(IAM_ROLES) - set(remote_roles)):
369 entry = IAM_ROLES[name]
369 entry = IAM_ROLES[name]
370
370
371 actual = '%s%s' % (prefix, name)
371 actual = '%s%s' % (prefix, name)
372 print('creating IAM role %s' % actual)
372 print('creating IAM role %s' % actual)
373
373
374 role = iamresource.create_role(
374 role = iamresource.create_role(
375 RoleName=actual,
375 RoleName=actual,
376 Description=entry['description'],
376 Description=entry['description'],
377 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
377 AssumeRolePolicyDocument=ASSUME_ROLE_POLICY_DOCUMENT,
378 )
378 )
379
379
380 waiter = iamclient.get_waiter('role_exists')
380 waiter = iamclient.get_waiter('role_exists')
381 waiter.wait(RoleName=actual)
381 waiter.wait(RoleName=actual)
382 print('IAM role %s is available' % actual)
382 print('IAM role %s is available' % actual)
383
383
384 remote_roles[name] = role
384 remote_roles[name] = role
385
385
386 for arn in entry['policy_arns']:
386 for arn in entry['policy_arns']:
387 print('attaching policy %s to %s' % (arn, role.name))
387 print('attaching policy %s to %s' % (arn, role.name))
388 role.attach_policy(PolicyArn=arn)
388 role.attach_policy(PolicyArn=arn)
389
389
390 # Now reconcile state of profiles.
390 # Now reconcile state of profiles.
391 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
391 for name, meta in sorted(IAM_INSTANCE_PROFILES.items()):
392 profile = remote_profiles[name]
392 profile = remote_profiles[name]
393 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
393 wanted = {'%s%s' % (prefix, role) for role in meta['roles']}
394 have = {role.name for role in profile.roles}
394 have = {role.name for role in profile.roles}
395
395
396 for role in sorted(have - wanted):
396 for role in sorted(have - wanted):
397 print('removing role %s from %s' % (role, profile.name))
397 print('removing role %s from %s' % (role, profile.name))
398 profile.remove_role(RoleName=role)
398 profile.remove_role(RoleName=role)
399
399
400 for role in sorted(wanted - have):
400 for role in sorted(wanted - have):
401 print('adding role %s to %s' % (role, profile.name))
401 print('adding role %s to %s' % (role, profile.name))
402 profile.add_role(RoleName=role)
402 profile.add_role(RoleName=role)
403
403
404
404
405 def find_windows_server_2019_image(ec2resource):
405 def find_windows_server_2019_image(ec2resource):
406 """Find the Amazon published Windows Server 2019 base image."""
406 """Find the Amazon published Windows Server 2019 base image."""
407
407
408 images = ec2resource.images.filter(
408 images = ec2resource.images.filter(
409 Filters=[
409 Filters=[
410 {
410 {
411 'Name': 'owner-alias',
411 'Name': 'owner-alias',
412 'Values': ['amazon'],
412 'Values': ['amazon'],
413 },
413 },
414 {
414 {
415 'Name': 'state',
415 'Name': 'state',
416 'Values': ['available'],
416 'Values': ['available'],
417 },
417 },
418 {
418 {
419 'Name': 'image-type',
419 'Name': 'image-type',
420 'Values': ['machine'],
420 'Values': ['machine'],
421 },
421 },
422 {
422 {
423 'Name': 'name',
423 'Name': 'name',
424 'Values': ['Windows_Server-2019-English-Full-Base-2019.02.13'],
424 'Values': ['Windows_Server-2019-English-Full-Base-2019.02.13'],
425 },
425 },
426 ])
426 ])
427
427
428 for image in images:
428 for image in images:
429 return image
429 return image
430
430
431 raise Exception('unable to find Windows Server 2019 image')
431 raise Exception('unable to find Windows Server 2019 image')
432
432
433
433
434 def ensure_security_groups(ec2resource, prefix='hg-'):
434 def ensure_security_groups(ec2resource, prefix='hg-'):
435 """Ensure all necessary Mercurial security groups are present.
435 """Ensure all necessary Mercurial security groups are present.
436
436
437 All security groups are prefixed with ``hg-`` by default. Any security
437 All security groups are prefixed with ``hg-`` by default. Any security
438 groups having this prefix but aren't in our list are deleted.
438 groups having this prefix but aren't in our list are deleted.
439 """
439 """
440 existing = {}
440 existing = {}
441
441
442 for group in ec2resource.security_groups.all():
442 for group in ec2resource.security_groups.all():
443 if group.group_name.startswith(prefix):
443 if group.group_name.startswith(prefix):
444 existing[group.group_name[len(prefix):]] = group
444 existing[group.group_name[len(prefix):]] = group
445
445
446 purge = set(existing) - set(SECURITY_GROUPS)
446 purge = set(existing) - set(SECURITY_GROUPS)
447
447
448 for name in sorted(purge):
448 for name in sorted(purge):
449 group = existing[name]
449 group = existing[name]
450 print('removing legacy security group: %s' % group.group_name)
450 print('removing legacy security group: %s' % group.group_name)
451 group.delete()
451 group.delete()
452
452
453 security_groups = {}
453 security_groups = {}
454
454
455 for name, group in sorted(SECURITY_GROUPS.items()):
455 for name, group in sorted(SECURITY_GROUPS.items()):
456 if name in existing:
456 if name in existing:
457 security_groups[name] = existing[name]
457 security_groups[name] = existing[name]
458 continue
458 continue
459
459
460 actual = '%s%s' % (prefix, name)
460 actual = '%s%s' % (prefix, name)
461 print('adding security group %s' % actual)
461 print('adding security group %s' % actual)
462
462
463 group_res = ec2resource.create_security_group(
463 group_res = ec2resource.create_security_group(
464 Description=group['description'],
464 Description=group['description'],
465 GroupName=actual,
465 GroupName=actual,
466 )
466 )
467
467
468 group_res.authorize_ingress(
468 group_res.authorize_ingress(
469 IpPermissions=group['ingress'],
469 IpPermissions=group['ingress'],
470 )
470 )
471
471
472 security_groups[name] = group_res
472 security_groups[name] = group_res
473
473
474 return security_groups
474 return security_groups
475
475
476
476
477 def terminate_ec2_instances(ec2resource, prefix='hg-'):
477 def terminate_ec2_instances(ec2resource, prefix='hg-'):
478 """Terminate all EC2 instances managed by us."""
478 """Terminate all EC2 instances managed by us."""
479 waiting = []
479 waiting = []
480
480
481 for instance in ec2resource.instances.all():
481 for instance in ec2resource.instances.all():
482 if instance.state['Name'] == 'terminated':
482 if instance.state['Name'] == 'terminated':
483 continue
483 continue
484
484
485 for tag in instance.tags or []:
485 for tag in instance.tags or []:
486 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
486 if tag['Key'] == 'Name' and tag['Value'].startswith(prefix):
487 print('terminating %s' % instance.id)
487 print('terminating %s' % instance.id)
488 instance.terminate()
488 instance.terminate()
489 waiting.append(instance)
489 waiting.append(instance)
490
490
491 for instance in waiting:
491 for instance in waiting:
492 instance.wait_until_terminated()
492 instance.wait_until_terminated()
493
493
494
494
495 def remove_resources(c, prefix='hg-'):
495 def remove_resources(c, prefix='hg-'):
496 """Purge all of our resources in this EC2 region."""
496 """Purge all of our resources in this EC2 region."""
497 ec2resource = c.ec2resource
497 ec2resource = c.ec2resource
498 iamresource = c.iamresource
498 iamresource = c.iamresource
499
499
500 terminate_ec2_instances(ec2resource, prefix=prefix)
500 terminate_ec2_instances(ec2resource, prefix=prefix)
501
501
502 for image in ec2resource.images.filter(Owners=['self']):
502 for image in ec2resource.images.filter(Owners=['self']):
503 if image.name.startswith(prefix):
503 if image.name.startswith(prefix):
504 remove_ami(ec2resource, image)
504 remove_ami(ec2resource, image)
505
505
506 for group in ec2resource.security_groups.all():
506 for group in ec2resource.security_groups.all():
507 if group.group_name.startswith(prefix):
507 if group.group_name.startswith(prefix):
508 print('removing security group %s' % group.group_name)
508 print('removing security group %s' % group.group_name)
509 group.delete()
509 group.delete()
510
510
511 for profile in iamresource.instance_profiles.all():
511 for profile in iamresource.instance_profiles.all():
512 if profile.name.startswith(prefix):
512 if profile.name.startswith(prefix):
513 delete_instance_profile(profile)
513 delete_instance_profile(profile)
514
514
515 for role in iamresource.roles.all():
515 for role in iamresource.roles.all():
516 if role.name.startswith(prefix):
516 if role.name.startswith(prefix):
517 for p in role.attached_policies.all():
517 for p in role.attached_policies.all():
518 print('detaching policy %s from %s' % (p.arn, role.name))
518 print('detaching policy %s from %s' % (p.arn, role.name))
519 role.detach_policy(PolicyArn=p.arn)
519 role.detach_policy(PolicyArn=p.arn)
520
520
521 print('removing role %s' % role.name)
521 print('removing role %s' % role.name)
522 role.delete()
522 role.delete()
523
523
524
524
525 def wait_for_ip_addresses(instances):
525 def wait_for_ip_addresses(instances):
526 """Wait for the public IP addresses of an iterable of instances."""
526 """Wait for the public IP addresses of an iterable of instances."""
527 for instance in instances:
527 for instance in instances:
528 while True:
528 while True:
529 if not instance.public_ip_address:
529 if not instance.public_ip_address:
530 time.sleep(2)
530 time.sleep(2)
531 instance.reload()
531 instance.reload()
532 continue
532 continue
533
533
534 print('public IP address for %s: %s' % (
534 print('public IP address for %s: %s' % (
535 instance.id, instance.public_ip_address))
535 instance.id, instance.public_ip_address))
536 break
536 break
537
537
538
538
539 def remove_ami(ec2resource, image):
539 def remove_ami(ec2resource, image):
540 """Remove an AMI and its underlying snapshots."""
540 """Remove an AMI and its underlying snapshots."""
541 snapshots = []
541 snapshots = []
542
542
543 for device in image.block_device_mappings:
543 for device in image.block_device_mappings:
544 if 'Ebs' in device:
544 if 'Ebs' in device:
545 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
545 snapshots.append(ec2resource.Snapshot(device['Ebs']['SnapshotId']))
546
546
547 print('deregistering %s' % image.id)
547 print('deregistering %s' % image.id)
548 image.deregister()
548 image.deregister()
549
549
550 for snapshot in snapshots:
550 for snapshot in snapshots:
551 print('deleting snapshot %s' % snapshot.id)
551 print('deleting snapshot %s' % snapshot.id)
552 snapshot.delete()
552 snapshot.delete()
553
553
554
554
555 def wait_for_ssm(ssmclient, instances):
555 def wait_for_ssm(ssmclient, instances):
556 """Wait for SSM to come online for an iterable of instance IDs."""
556 """Wait for SSM to come online for an iterable of instance IDs."""
557 while True:
557 while True:
558 res = ssmclient.describe_instance_information(
558 res = ssmclient.describe_instance_information(
559 Filters=[
559 Filters=[
560 {
560 {
561 'Key': 'InstanceIds',
561 'Key': 'InstanceIds',
562 'Values': [i.id for i in instances],
562 'Values': [i.id for i in instances],
563 },
563 },
564 ],
564 ],
565 )
565 )
566
566
567 available = len(res['InstanceInformationList'])
567 available = len(res['InstanceInformationList'])
568 wanted = len(instances)
568 wanted = len(instances)
569
569
570 print('%d/%d instances available in SSM' % (available, wanted))
570 print('%d/%d instances available in SSM' % (available, wanted))
571
571
572 if available == wanted:
572 if available == wanted:
573 return
573 return
574
574
575 time.sleep(2)
575 time.sleep(2)
576
576
577
577
578 def run_ssm_command(ssmclient, instances, document_name, parameters):
578 def run_ssm_command(ssmclient, instances, document_name, parameters):
579 """Run a PowerShell script on an EC2 instance."""
579 """Run a PowerShell script on an EC2 instance."""
580
580
581 res = ssmclient.send_command(
581 res = ssmclient.send_command(
582 InstanceIds=[i.id for i in instances],
582 InstanceIds=[i.id for i in instances],
583 DocumentName=document_name,
583 DocumentName=document_name,
584 Parameters=parameters,
584 Parameters=parameters,
585 CloudWatchOutputConfig={
585 CloudWatchOutputConfig={
586 'CloudWatchOutputEnabled': True,
586 'CloudWatchOutputEnabled': True,
587 },
587 },
588 )
588 )
589
589
590 command_id = res['Command']['CommandId']
590 command_id = res['Command']['CommandId']
591
591
592 for instance in instances:
592 for instance in instances:
593 while True:
593 while True:
594 try:
594 try:
595 res = ssmclient.get_command_invocation(
595 res = ssmclient.get_command_invocation(
596 CommandId=command_id,
596 CommandId=command_id,
597 InstanceId=instance.id,
597 InstanceId=instance.id,
598 )
598 )
599 except botocore.exceptions.ClientError as e:
599 except botocore.exceptions.ClientError as e:
600 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
600 if e.response['Error']['Code'] == 'InvocationDoesNotExist':
601 print('could not find SSM command invocation; waiting')
601 print('could not find SSM command invocation; waiting')
602 time.sleep(1)
602 time.sleep(1)
603 continue
603 continue
604 else:
604 else:
605 raise
605 raise
606
606
607 if res['Status'] == 'Success':
607 if res['Status'] == 'Success':
608 break
608 break
609 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
609 elif res['Status'] in ('Pending', 'InProgress', 'Delayed'):
610 time.sleep(2)
610 time.sleep(2)
611 else:
611 else:
612 raise Exception('command failed on %s: %s' % (
612 raise Exception('command failed on %s: %s' % (
613 instance.id, res['Status']))
613 instance.id, res['Status']))
614
614
615
615
616 @contextlib.contextmanager
616 @contextlib.contextmanager
617 def temporary_ec2_instances(ec2resource, config):
617 def temporary_ec2_instances(ec2resource, config):
618 """Create temporary EC2 instances.
618 """Create temporary EC2 instances.
619
619
620 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
620 This is a proxy to ``ec2client.run_instances(**config)`` that takes care of
621 managing the lifecycle of the instances.
621 managing the lifecycle of the instances.
622
622
623 When the context manager exits, the instances are terminated.
623 When the context manager exits, the instances are terminated.
624
624
625 The context manager evaluates to the list of data structures
625 The context manager evaluates to the list of data structures
626 describing each created instance. The instances may not be available
626 describing each created instance. The instances may not be available
627 for work immediately: it is up to the caller to wait for the instance
627 for work immediately: it is up to the caller to wait for the instance
628 to start responding.
628 to start responding.
629 """
629 """
630
630
631 ids = None
631 ids = None
632
632
633 try:
633 try:
634 res = ec2resource.create_instances(**config)
634 res = ec2resource.create_instances(**config)
635
635
636 ids = [i.id for i in res]
636 ids = [i.id for i in res]
637 print('started instances: %s' % ' '.join(ids))
637 print('started instances: %s' % ' '.join(ids))
638
638
639 yield res
639 yield res
640 finally:
640 finally:
641 if ids:
641 if ids:
642 print('terminating instances: %s' % ' '.join(ids))
642 print('terminating instances: %s' % ' '.join(ids))
643 for instance in res:
643 for instance in res:
644 instance.terminate()
644 instance.terminate()
645 print('terminated %d instances' % len(ids))
645 print('terminated %d instances' % len(ids))
646
646
647
647
648 @contextlib.contextmanager
648 @contextlib.contextmanager
649 def create_temp_windows_ec2_instances(c: AWSConnection, config):
649 def create_temp_windows_ec2_instances(c: AWSConnection, config):
650 """Create temporary Windows EC2 instances.
650 """Create temporary Windows EC2 instances.
651
651
652 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
652 This is a higher-level wrapper around ``create_temp_ec2_instances()`` that
653 configures the Windows instance for Windows Remote Management. The emitted
653 configures the Windows instance for Windows Remote Management. The emitted
654 instances will have a ``winrm_client`` attribute containing a
654 instances will have a ``winrm_client`` attribute containing a
655 ``pypsrp.client.Client`` instance bound to the instance.
655 ``pypsrp.client.Client`` instance bound to the instance.
656 """
656 """
657 if 'IamInstanceProfile' in config:
657 if 'IamInstanceProfile' in config:
658 raise ValueError('IamInstanceProfile cannot be provided in config')
658 raise ValueError('IamInstanceProfile cannot be provided in config')
659 if 'UserData' in config:
659 if 'UserData' in config:
660 raise ValueError('UserData cannot be provided in config')
660 raise ValueError('UserData cannot be provided in config')
661
661
662 password = c.automation.default_password()
662 password = c.automation.default_password()
663
663
664 config = copy.deepcopy(config)
664 config = copy.deepcopy(config)
665 config['IamInstanceProfile'] = {
665 config['IamInstanceProfile'] = {
666 'Name': 'hg-ephemeral-ec2-1',
666 'Name': 'hg-ephemeral-ec2-1',
667 }
667 }
668 config.setdefault('TagSpecifications', []).append({
668 config.setdefault('TagSpecifications', []).append({
669 'ResourceType': 'instance',
669 'ResourceType': 'instance',
670 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
670 'Tags': [{'Key': 'Name', 'Value': 'hg-temp-windows'}],
671 })
671 })
672 config['UserData'] = WINDOWS_USER_DATA % password
672 config['UserData'] = WINDOWS_USER_DATA % password
673
673
674 with temporary_ec2_instances(c.ec2resource, config) as instances:
674 with temporary_ec2_instances(c.ec2resource, config) as instances:
675 wait_for_ip_addresses(instances)
675 wait_for_ip_addresses(instances)
676
676
677 print('waiting for Windows Remote Management service...')
677 print('waiting for Windows Remote Management service...')
678
678
679 for instance in instances:
679 for instance in instances:
680 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
680 client = wait_for_winrm(instance.public_ip_address, 'Administrator', password)
681 print('established WinRM connection to %s' % instance.id)
681 print('established WinRM connection to %s' % instance.id)
682 instance.winrm_client = client
682 instance.winrm_client = client
683
683
684 yield instances
684 yield instances
685
685
686
686
687 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
687 def ensure_windows_dev_ami(c: AWSConnection, prefix='hg-'):
688 """Ensure Windows Development AMI is available and up-to-date.
688 """Ensure Windows Development AMI is available and up-to-date.
689
689
690 If necessary, a modern AMI will be built by starting a temporary EC2
690 If necessary, a modern AMI will be built by starting a temporary EC2
691 instance and bootstrapping it.
691 instance and bootstrapping it.
692
692
693 Obsolete AMIs will be deleted so there is only a single AMI having the
693 Obsolete AMIs will be deleted so there is only a single AMI having the
694 desired name.
694 desired name.
695
695
696 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
696 Returns an ``ec2.Image`` of either an existing AMI or a newly-built
697 one.
697 one.
698 """
698 """
699 ec2client = c.ec2client
699 ec2client = c.ec2client
700 ec2resource = c.ec2resource
700 ec2resource = c.ec2resource
701 ssmclient = c.session.client('ssm')
701 ssmclient = c.session.client('ssm')
702
702
703 name = '%s%s' % (prefix, 'windows-dev')
703 name = '%s%s' % (prefix, 'windows-dev')
704
704
705 config = {
705 config = {
706 'BlockDeviceMappings': [
706 'BlockDeviceMappings': [
707 {
707 {
708 'DeviceName': '/dev/sda1',
708 'DeviceName': '/dev/sda1',
709 'Ebs': {
709 'Ebs': {
710 'DeleteOnTermination': True,
710 'DeleteOnTermination': True,
711 'VolumeSize': 32,
711 'VolumeSize': 32,
712 'VolumeType': 'gp2',
712 'VolumeType': 'gp2',
713 },
713 },
714 }
714 }
715 ],
715 ],
716 'ImageId': find_windows_server_2019_image(ec2resource).id,
716 'ImageId': find_windows_server_2019_image(ec2resource).id,
717 'InstanceInitiatedShutdownBehavior': 'stop',
717 'InstanceInitiatedShutdownBehavior': 'stop',
718 'InstanceType': 't3.medium',
718 'InstanceType': 't3.medium',
719 'KeyName': '%sautomation' % prefix,
719 'KeyName': '%sautomation' % prefix,
720 'MaxCount': 1,
720 'MaxCount': 1,
721 'MinCount': 1,
721 'MinCount': 1,
722 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
722 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
723 }
723 }
724
724
725 commands = [
725 commands = [
726 # Need to start the service so sshd_config is generated.
726 # Need to start the service so sshd_config is generated.
727 'Start-Service sshd',
727 'Start-Service sshd',
728 'Write-Output "modifying sshd_config"',
728 'Write-Output "modifying sshd_config"',
729 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
729 r'$content = Get-Content C:\ProgramData\ssh\sshd_config',
730 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
730 '$content = $content -replace "Match Group administrators","" -replace "AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys",""',
731 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
731 r'$content | Set-Content C:\ProgramData\ssh\sshd_config',
732 'Import-Module OpenSSHUtils',
732 'Import-Module OpenSSHUtils',
733 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
733 r'Repair-SshdConfigPermission C:\ProgramData\ssh\sshd_config -Confirm:$false',
734 'Restart-Service sshd',
734 'Restart-Service sshd',
735 'Write-Output "installing OpenSSL client"',
735 'Write-Output "installing OpenSSL client"',
736 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
736 'Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0',
737 'Set-Service -Name sshd -StartupType "Automatic"',
737 'Set-Service -Name sshd -StartupType "Automatic"',
738 'Write-Output "OpenSSH server running"',
738 'Write-Output "OpenSSH server running"',
739 ]
739 ]
740
740
741 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
741 with INSTALL_WINDOWS_DEPENDENCIES.open('r', encoding='utf-8') as fh:
742 commands.extend(l.rstrip() for l in fh)
742 commands.extend(l.rstrip() for l in fh)
743
743
744 # Disable Windows Defender when bootstrapping because it just slows
744 # Disable Windows Defender when bootstrapping because it just slows
745 # things down.
745 # things down.
746 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
746 commands.insert(0, 'Set-MpPreference -DisableRealtimeMonitoring $true')
747 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
747 commands.append('Set-MpPreference -DisableRealtimeMonitoring $false')
748
748
749 # Compute a deterministic fingerprint to determine whether image needs
749 # Compute a deterministic fingerprint to determine whether image needs
750 # to be regenerated.
750 # to be regenerated.
751 fingerprint = {
751 fingerprint = {
752 'instance_config': config,
752 'instance_config': config,
753 'user_data': WINDOWS_USER_DATA,
753 'user_data': WINDOWS_USER_DATA,
754 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
754 'initial_bootstrap': WINDOWS_BOOTSTRAP_POWERSHELL,
755 'bootstrap_commands': commands,
755 'bootstrap_commands': commands,
756 }
756 }
757
757
758 fingerprint = json.dumps(fingerprint, sort_keys=True)
758 fingerprint = json.dumps(fingerprint, sort_keys=True)
759 fingerprint = hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
759 fingerprint = hashlib.sha256(fingerprint.encode('utf-8')).hexdigest()
760
760
761 # Find existing AMIs with this name and delete the ones that are invalid.
761 # Find existing AMIs with this name and delete the ones that are invalid.
762 # Store a reference to a good image so it can be returned one the
762 # Store a reference to a good image so it can be returned one the
763 # image state is reconciled.
763 # image state is reconciled.
764 images = ec2resource.images.filter(
764 images = ec2resource.images.filter(
765 Filters=[{'Name': 'name', 'Values': [name]}])
765 Filters=[{'Name': 'name', 'Values': [name]}])
766
766
767 existing_image = None
767 existing_image = None
768
768
769 for image in images:
769 for image in images:
770 if image.tags is None:
770 if image.tags is None:
771 print('image %s for %s lacks required tags; removing' % (
771 print('image %s for %s lacks required tags; removing' % (
772 image.id, image.name))
772 image.id, image.name))
773 remove_ami(ec2resource, image)
773 remove_ami(ec2resource, image)
774 else:
774 else:
775 tags = {t['Key']: t['Value'] for t in image.tags}
775 tags = {t['Key']: t['Value'] for t in image.tags}
776
776
777 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
777 if tags.get('HGIMAGEFINGERPRINT') == fingerprint:
778 existing_image = image
778 existing_image = image
779 else:
779 else:
780 print('image %s for %s has wrong fingerprint; removing' % (
780 print('image %s for %s has wrong fingerprint; removing' % (
781 image.id, image.name))
781 image.id, image.name))
782 remove_ami(ec2resource, image)
782 remove_ami(ec2resource, image)
783
783
784 if existing_image:
784 if existing_image:
785 return existing_image
785 return existing_image
786
786
787 print('no suitable Windows development image found; creating one...')
787 print('no suitable Windows development image found; creating one...')
788
788
789 with create_temp_windows_ec2_instances(c, config) as instances:
789 with create_temp_windows_ec2_instances(c, config) as instances:
790 assert len(instances) == 1
790 assert len(instances) == 1
791 instance = instances[0]
791 instance = instances[0]
792
792
793 wait_for_ssm(ssmclient, [instance])
793 wait_for_ssm(ssmclient, [instance])
794
794
795 # On first boot, install various Windows updates.
795 # On first boot, install various Windows updates.
796 # We would ideally use PowerShell Remoting for this. However, there are
796 # We would ideally use PowerShell Remoting for this. However, there are
797 # trust issues that make it difficult to invoke Windows Update
797 # trust issues that make it difficult to invoke Windows Update
798 # remotely. So we use SSM, which has a mechanism for running Windows
798 # remotely. So we use SSM, which has a mechanism for running Windows
799 # Update.
799 # Update.
800 print('installing Windows features...')
800 print('installing Windows features...')
801 run_ssm_command(
801 run_ssm_command(
802 ssmclient,
802 ssmclient,
803 [instance],
803 [instance],
804 'AWS-RunPowerShellScript',
804 'AWS-RunPowerShellScript',
805 {
805 {
806 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
806 'commands': WINDOWS_BOOTSTRAP_POWERSHELL.split('\n'),
807 },
807 },
808 )
808 )
809
809
810 # Reboot so all updates are fully applied.
810 # Reboot so all updates are fully applied.
811 #
812 # We don't use instance.reboot() here because it is asynchronous and
813 # we don't know when exactly the instance has rebooted. It could take
814 # a while to stop and we may start trying to interact with the instance
815 # before it has rebooted.
811 print('rebooting instance %s' % instance.id)
816 print('rebooting instance %s' % instance.id)
812 ec2client.reboot_instances(InstanceIds=[instance.id])
817 instance.stop()
818 ec2client.get_waiter('instance_stopped').wait(
819 InstanceIds=[instance.id],
820 WaiterConfig={
821 'Delay': 5,
822 })
813
823
814 time.sleep(15)
824 instance.start()
825 wait_for_ip_addresses([instance])
826
827 # There is a race condition here between the User Data PS script running
828 # and us connecting to WinRM. This can manifest as
829 # "AuthorizationManager check failed" failures during run_powershell().
830 # TODO figure out a workaround.
815
831
816 print('waiting for Windows Remote Management to come back...')
832 print('waiting for Windows Remote Management to come back...')
817 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
833 client = wait_for_winrm(instance.public_ip_address, 'Administrator',
818 c.automation.default_password())
834 c.automation.default_password())
819 print('established WinRM connection to %s' % instance.id)
835 print('established WinRM connection to %s' % instance.id)
820 instance.winrm_client = client
836 instance.winrm_client = client
821
837
822 print('bootstrapping instance...')
838 print('bootstrapping instance...')
823 run_powershell(instance.winrm_client, '\n'.join(commands))
839 run_powershell(instance.winrm_client, '\n'.join(commands))
824
840
825 print('bootstrap completed; stopping %s to create image' % instance.id)
841 print('bootstrap completed; stopping %s to create image' % instance.id)
826 instance.stop()
842 instance.stop()
827
843
828 ec2client.get_waiter('instance_stopped').wait(
844 ec2client.get_waiter('instance_stopped').wait(
829 InstanceIds=[instance.id],
845 InstanceIds=[instance.id],
830 WaiterConfig={
846 WaiterConfig={
831 'Delay': 5,
847 'Delay': 5,
832 })
848 })
833 print('%s is stopped' % instance.id)
849 print('%s is stopped' % instance.id)
834
850
835 image = instance.create_image(
851 image = instance.create_image(
836 Name=name,
852 Name=name,
837 Description='Mercurial Windows development environment',
853 Description='Mercurial Windows development environment',
838 )
854 )
839
855
840 image.create_tags(Tags=[
856 image.create_tags(Tags=[
841 {
857 {
842 'Key': 'HGIMAGEFINGERPRINT',
858 'Key': 'HGIMAGEFINGERPRINT',
843 'Value': fingerprint,
859 'Value': fingerprint,
844 },
860 },
845 ])
861 ])
846
862
847 print('waiting for image %s' % image.id)
863 print('waiting for image %s' % image.id)
848
864
849 ec2client.get_waiter('image_available').wait(
865 ec2client.get_waiter('image_available').wait(
850 ImageIds=[image.id],
866 ImageIds=[image.id],
851 )
867 )
852
868
853 print('image %s available as %s' % (image.id, image.name))
869 print('image %s available as %s' % (image.id, image.name))
854
870
855 return image
871 return image
856
872
857
873
858 @contextlib.contextmanager
874 @contextlib.contextmanager
859 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
875 def temporary_windows_dev_instances(c: AWSConnection, image, instance_type,
860 prefix='hg-', disable_antivirus=False):
876 prefix='hg-', disable_antivirus=False):
861 """Create a temporary Windows development EC2 instance.
877 """Create a temporary Windows development EC2 instance.
862
878
863 Context manager resolves to the list of ``EC2.Instance`` that were created.
879 Context manager resolves to the list of ``EC2.Instance`` that were created.
864 """
880 """
865 config = {
881 config = {
866 'BlockDeviceMappings': [
882 'BlockDeviceMappings': [
867 {
883 {
868 'DeviceName': '/dev/sda1',
884 'DeviceName': '/dev/sda1',
869 'Ebs': {
885 'Ebs': {
870 'DeleteOnTermination': True,
886 'DeleteOnTermination': True,
871 'VolumeSize': 32,
887 'VolumeSize': 32,
872 'VolumeType': 'gp2',
888 'VolumeType': 'gp2',
873 },
889 },
874 }
890 }
875 ],
891 ],
876 'ImageId': image.id,
892 'ImageId': image.id,
877 'InstanceInitiatedShutdownBehavior': 'stop',
893 'InstanceInitiatedShutdownBehavior': 'stop',
878 'InstanceType': instance_type,
894 'InstanceType': instance_type,
879 'KeyName': '%sautomation' % prefix,
895 'KeyName': '%sautomation' % prefix,
880 'MaxCount': 1,
896 'MaxCount': 1,
881 'MinCount': 1,
897 'MinCount': 1,
882 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
898 'SecurityGroupIds': [c.security_groups['windows-dev-1'].id],
883 }
899 }
884
900
885 with create_temp_windows_ec2_instances(c, config) as instances:
901 with create_temp_windows_ec2_instances(c, config) as instances:
886 if disable_antivirus:
902 if disable_antivirus:
887 for instance in instances:
903 for instance in instances:
888 run_powershell(
904 run_powershell(
889 instance.winrm_client,
905 instance.winrm_client,
890 'Set-MpPreference -DisableRealtimeMonitoring $true')
906 'Set-MpPreference -DisableRealtimeMonitoring $true')
891
907
892 yield instances
908 yield instances
General Comments 0
You need to be logged in to leave comments. Login now