diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b9642b0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,55 @@ +--- +minimum_pre_commit_version: 2.4.0 +repos: + # ----- Formatting ----------------------------------------------------------------------------> + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: trailing-whitespace # Trims trailing whitespace. + args: [--markdown-linebreak-ext=md] + - id: mixed-line-ending # Replaces or checks mixed line ending. + args: [--fix=lf] + - id: end-of-file-fixer # Makes sure files end in a newline and only a newline. + - id: check-merge-conflict # Check for files that contain merge conflict strings. + - id: check-ast # Simply check whether files parse as valid python. + + - repo: https://github.com/asottile/pyupgrade + rev: v2.23.3 + hooks: + - id: pyupgrade + name: Rewrite Code to be Py3.7+ + args: [ + --py37-plus + ] + + - repo: https://github.com/asottile/reorder_python_imports + rev: v2.6.0 + hooks: + - id: reorder-python-imports + args: [ + --py37-plus, + ] + + - repo: https://github.com/psf/black + rev: 21.7b0 + hooks: + - id: black + args: [-l 100, -S] + + - repo: https://github.com/asottile/blacken-docs + rev: v1.10.0 + hooks: + - id: blacken-docs + args: [--skip-errors] + files: ^docs/.*\.md$ + additional_dependencies: [black==21.7b0] + # <---- Formatting ----------------------------------------------------------------------------- + + # ----- Security ------------------------------------------------------------------------------> + - repo: https://github.com/PyCQA/bandit + rev: "1.7.0" + hooks: + - id: bandit + name: Run bandit against the code base + args: [--silent, -lll] + # <---- Security ------------------------------------------------------------------------------- diff --git a/EksCreationEngine.py b/EksCreationEngine.py index ea35bf1..fb0819d 100644 --- a/EksCreationEngine.py +++ b/EksCreationEngine.py @@ -1,39 +1,37 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import base64 +import json +import re +import subprocess import sys +import time +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import time -import subprocess -import re + from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup cache = list() -class ClusterManager(): +class ClusterManager: def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture): ''' This function either receives an AMI ID from main.py or receives the default value of 'SSM' which is matched against the arguments @@ -47,22 +45,30 @@ def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_arch # AMD64 if ami_architecture == 'amd64': # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/amd64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id' + ) # ARM64 else: # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/arm64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id' + ) # Amazon Linux 2 # Public Params search in the console is fucky, check here: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html else: # AMD64 if ami_architecture == 'amd64': # /aws/service/eks/optimized-ami/1.21/amazon-linux-2/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id') + publicParameter = str( + f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id' + ) # ARM64 else: # /aws/service/eks/optimized-ami/1.21/amazon-linux-2-arm64/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id') + publicParameter = str( + f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id' + ) # retrieve the AMI ID and return it try: @@ -98,12 +104,10 @@ def create_cluster_svc_role(cluster_role_name): 'Statement': [ { 'Effect': 'Allow', - 'Principal': { - 'Service': 'eks.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + 'Principal': {'Service': 'eks.amazonaws.com'}, + 'Action': 'sts:AssumeRole', } - ] + ], } try: @@ -114,45 +118,29 @@ def create_cluster_svc_role(cluster_role_name): Description='Allows access to other AWS service resources that are required to operate clusters managed by EKS', MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': cluster_role_name - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': cluster_role_name}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) # Attach required Cluster Policy (AWS Managed) or get following error # botocore.errorfactory.InvalidParameterException: An error occurred (InvalidParameterException) when calling the CreateCluster operation: The provided role doesn't have the Amazon EKS Managed Policies associated with it. Please ensure the following policies [arn:aws:iam::aws:policy/AmazonEKSClusterPolicy] are attached waiter = iam.get_waiter('role_exists') - waiter.wait( - RoleName=cluster_role_name, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter.wait(RoleName=cluster_role_name, WaiterConfig={'Delay': 3, 'MaxAttempts': 20}) iam.attach_role_policy( RoleName=cluster_role_name, - PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy' + PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy', ) roleArn = str(r['Role']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role name of {cluster_role_name} already exists, attempting to use it') + print( + f'The supplied role name of {cluster_role_name} already exists, attempting to use it' + ) roleArn = f'arn:aws:iam::{acctId}:role/{cluster_role_name}' else: print(f'Error encountered: {error}') @@ -191,14 +179,11 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): 's3:GetObjectAcl', 's3:GetObject', 's3:GetBucketAcl', - 's3:GetBucketLocation' + 's3:GetBucketLocation', ], - 'Resource': [ - f'arn:aws:s3:::{bucket_name}/*', - f'arn:aws:s3:::{bucket_name}' - ] + 'Resource': [f'arn:aws:s3:::{bucket_name}/*', f'arn:aws:s3:::{bucket_name}'], } - ] + ], } try: @@ -208,30 +193,20 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): PolicyDocument=json.dumps(iamPolicyDoc), Description='Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE', Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': policyName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) policyArn = str(r['Policy']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role policy name of {policyName} already exists, attempting to use it') + print( + f'The supplied role policy name of {policyName} already exists, attempting to use it' + ) policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' else: print(f'Error encountered: {error}') @@ -266,12 +241,14 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes 'arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy', 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly', 'arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy', - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore' + 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore', ] # Grab S3 Node Group policy from other Function & add to List if MDE is enabled if mde_on_nodes == 'True': - s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name) + s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy( + bucket_name, nodegroup_role_name + ) nodegroupAwsManagedPolicies.append(s3PolicyArn) # Trust Policy for EKS NodeGroup Role trusts EC2 @@ -280,12 +257,10 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes 'Statement': [ { 'Effect': 'Allow', - 'Principal': { - 'Service': 'ec2.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + 'Principal': {'Service': 'ec2.amazonaws.com'}, + 'Action': 'sts:AssumeRole', } - ] + ], } try: @@ -296,34 +271,16 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes Description='Allows access to other AWS service resources that are required to operate node groups managed by EKS', MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': roleName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': roleName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) roleArn = str(r['Role']['Arn']) waiter = iam.get_waiter('role_exists') - waiter.wait( - RoleName=roleName, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter.wait(RoleName=roleName, WaiterConfig={'Delay': 3, 'MaxAttempts': 20}) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead @@ -341,10 +298,7 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes # Loop through List of policies and attach Policies to Role, handle errors if already attached try: for policy in nodegroupAwsManagedPolicies: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policy - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policy) except Exception as e: print(f'Error encountered: {e}') RollbackManager.rollback_from_cache(cache=cache) @@ -385,7 +339,7 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): for p in additional_ports: if int(p) not in defaultPortSet: defaultPortSet.append(int(p)) - + # remove the list, it's not needed anymore del additional_ports @@ -420,41 +374,26 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): { 'ResourceType': 'security-group', 'Tags': [ - { - 'Key': 'Name', - 'Value': sgName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - }, + {'Key': 'Name', 'Value': sgName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, # This tag is required per AWS Docs # One, and only one, of the security groups associated to your nodes should have the following tag applied: For more information about tagging, see Working with tags using the console. kubernetes.io/cluster/cluster-name: owned - { - 'Key': f'kubernetes.io/cluster/{cluster_name}', - 'Value': 'owned' - } - ] + {'Key': f'kubernetes.io/cluster/{cluster_name}', 'Value': 'owned'}, + ], } - ] + ], ) secGroupId = str(r['GroupId']) - sgCache = { - 'ClusterSecurityGroupId': secGroupId - } + sgCache = {'ClusterSecurityGroupId': secGroupId} cache.append(sgCache) print(f'Added {sgName} ID {secGroupId} to Cache') - print(f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}') + print( + f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}' + ) # Now start adding Inbound Rules per CIDR and per Port # Add conditional logic for port 53 (DNS) to create both TCP and UDP Rules @@ -471,9 +410,9 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + 'Description': f'Allow tcp {port} to {cidr}', } - ] + ], }, { 'FromPort': int(port), @@ -482,34 +421,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow udp {port} to {cidr}' + 'Description': f'Allow udp {port} to {cidr}', } - ] - } + ], + }, ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{cidr}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( @@ -522,34 +449,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + 'Description': f'Allow tcp {port} to {cidr}', } - ] + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{cidr}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) # Adding inbound rules per Port for the Security Group itself (talk to self for Node-Cluster Comms) @@ -565,9 +480,9 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'UserIdGroupPairs': [ { 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] + ], }, { 'FromPort': int(port), @@ -576,34 +491,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'UserIdGroupPairs': [ { 'Description': f'Allow udp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] - } + ], + }, ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{secGroupId}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( @@ -612,38 +515,26 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): { 'FromPort': int(port), 'ToPort': int(port), - 'IpProtocol': 'tcp', + 'IpProtocol': 'tcp', 'UserIdGroupPairs': [ { 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{secGroupId}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) # Adding TCP 443 (HTTPS) from the internet which is required for patching and agent communications @@ -655,36 +546,21 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'ToPort': 443, 'IpProtocol': 'tcp', 'IpRanges': [ - { - 'CidrIp': '0.0.0.0/0', - 'Description': f'Allow tcp 443 to Internet' - } - ] + {'CidrIp': '0.0.0.0/0', 'Description': f'Allow tcp 443 to Internet'} + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}Internet{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}Internet{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -719,23 +595,11 @@ def encryption_key_factory(cluster_name): KeySpec='SYMMETRIC_DEFAULT', Origin='AWS_KMS', Tags=[ - { - 'TagKey': 'Name', - 'TagValue': f'{cluster_name}-EKS-CMK' - }, - { - 'TagKey': 'CreatedBy', - 'TagValue': createdBy - }, - { - 'TagKey': 'CreatedAt', - 'TagValue': createdAt - }, - { - 'TagKey': 'CreatedWith', - 'TagValue': 'Lightspin ECE' - } - ] + {'TagKey': 'Name', 'TagValue': f'{cluster_name}-EKS-CMK'}, + {'TagKey': 'CreatedBy', 'TagValue': createdBy}, + {'TagKey': 'CreatedAt', 'TagValue': createdAt}, + {'TagKey': 'CreatedWith', 'TagValue': 'Lightspin ECE'}, + ], )['KeyMetadata']['Arn'] except KeyError as ke: print(f'Error encountered: {ke}') @@ -748,8 +612,10 @@ def encryption_key_factory(cluster_name): RollbackManager.rollback_from_cache(cache=cache) return kmsKeyArn - - def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports): + + def create_cluster( + cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports + ): ''' This function uses the EKS Boto3 Client to create a cluster, taking inputs from main.py to determing naming & Encryption ''' @@ -764,7 +630,9 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i clusterRoleArn = ClusterManager.create_cluster_svc_role(cluster_role_name) # Call `cluster_security_group_factory` to create or re-use an EKS cluster security group that allows minimum necessary comms intra-VPC - securityGroupId = ClusterManager.cluster_security_group_factory(cluster_name, vpc_id, additional_ports) + securityGroupId = ClusterManager.cluster_security_group_factory( + cluster_name, vpc_id, additional_ports + ) # Call `encryption_key_factory` to create a KMS Key ARN. Simple! (We'll add the Key Policy later) kmsKeyArn = ClusterManager.encryption_key_factory(cluster_name) @@ -779,33 +647,30 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i 'subnetIds': subnet_ids, 'securityGroupIds': [securityGroupId], 'endpointPublicAccess': False, - 'endpointPrivateAccess': True + 'endpointPrivateAccess': True, }, logging={ 'clusterLogging': [ - { + { # all Logging types are enabled here - 'types': ['api','audit','authenticator','controllerManager','scheduler'], - 'enabled': True + 'types': [ + 'api', + 'audit', + 'authenticator', + 'controllerManager', + 'scheduler', + ], + 'enabled': True, } ] }, - encryptionConfig=[ - { - 'resources': [ - 'secrets' - ], - 'provider': { - 'keyArn': kmsKeyArn - } - } - ], + encryptionConfig=[{'resources': ['secrets'], 'provider': {'keyArn': kmsKeyArn}}], tags={ 'Name': cluster_name, 'CreatedBy': createdBy, 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + 'CreatedWith': 'Lightspin ECE', + }, ) # Establish provided EKS Waiter() for cluster to come up @@ -814,13 +679,7 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i waiter = eks.get_waiter('cluster_active') - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={'Delay': 30, 'MaxAttempts': 40}) finalClusterName = str(r['cluster']['name']) @@ -855,7 +714,9 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os ''' eks = boto3.client('eks') - print(f'Retrieving Certificate Authority and API Server URL information for bootstrap script') + print( + f'Retrieving Certificate Authority and API Server URL information for bootstrap script' + ) # DescribeCluster and pull necessary values to set as env vars within the bootstrap c = eks.describe_cluster(name=cluster_name) @@ -945,7 +806,20 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os return userData - def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture): + def create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ): ''' This function creates an EC2 Launch Template using encryption and AMI data supplied from main.py and passes it to the `builder` function where final EKS Nodegroup creation takes place @@ -958,16 +832,20 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name createdAt = str(datetime.utcnow()) # Pull latest AMI ID for EKS-optimized Ubuntu 20.04LTS for specified K8s Version in main.py - amiId = ClusterManager.get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture) + amiId = ClusterManager.get_latest_eks_optimized_ubuntu( + kubernetes_version, ami_id, ami_os, ami_architecture + ) # Retrieve Base64 metadata from bootstrap generation function - this will download and install MDE (MDATP) from files in the S3 bucket specified in main.py if --mde_on_nodes is true. Will use ami_os arguements to create different UserData as well - userData = ClusterManager.generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os) + userData = ClusterManager.generate_nodegroup_bootstrap( + bucket_name, cluster_name, mde_on_nodes, ami_os + ) # For IMDSv2 - keeping this outside for eventual modification of hop limits? metadataOptions = { 'HttpTokens': 'required', 'HttpPutResponseHopLimit': 2, - 'HttpEndpoint': 'enabled' + 'HttpEndpoint': 'enabled', } try: @@ -985,8 +863,8 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name 'DeleteOnTermination': True, 'KmsKeyId': kms_key_arn, 'VolumeSize': int(ebs_volume_size), - 'VolumeType': 'gp2' - } + 'VolumeType': 'gp2', + }, } ], 'ImageId': amiId, @@ -998,47 +876,23 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name { 'ResourceType': 'instance', 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': str(f'{launch_template_name}Node')}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], }, { 'ResourceType': 'volume', 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] - } - ] - } + {'Key': 'Name', 'Value': str(f'{launch_template_name}Node')}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], + }, + ], + }, ) launchTemplateId = str(r['LaunchTemplate']['LaunchTemplateId']) @@ -1050,8 +904,32 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name RollbackManager.rollback_from_cache(cache=cache) return launchTemplateId - - def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_type, cluster_name, cluster_role_name, nodegroup_name, nodegroup_role_name, launch_template_name, vpc_id, subnet_ids, node_count, mde_on_nodes, additional_ports, falco_bool, falco_sidekick_destination_type, falco_sidekick_destination, ami_os, ami_architecture, datadog_api_key, datadog_bool, addtl_auth_principals): + + def builder( + kubernetes_version, + bucket_name, + ebs_volume_size, + ami_id, + instance_type, + cluster_name, + cluster_role_name, + nodegroup_name, + nodegroup_role_name, + launch_template_name, + vpc_id, + subnet_ids, + node_count, + mde_on_nodes, + additional_ports, + falco_bool, + falco_sidekick_destination_type, + falco_sidekick_destination, + ami_os, + ami_architecture, + datadog_api_key, + datadog_bool, + addtl_auth_principals, + ): ''' This function is the 'brain' that controls creation and calls the required functions to build infrastructure and services (EKS, EC2, IAM). This function also stores all required arguments into cache to facilitate rollbacks upon errors @@ -1064,7 +942,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t 'ClusterRoleName': cluster_role_name, 'NodegroupName': nodegroup_name, 'NodegroupRoleName': nodegroup_role_name, - 'LaunchTemplateName': launch_template_name + 'LaunchTemplateName': launch_template_name, } cache.append(cacheDict) @@ -1081,7 +959,14 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Create an EKS Cluster by calling `create_cluster` - this will take the longest, and if it fails, then other infrastructure won't be created # the positional selectors are for when you return multiple values, they are bundled in a tuple, and have to be accessed in the order they're provided - callClusterManager = ClusterManager.create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports) + callClusterManager = ClusterManager.create_cluster( + cluster_name, + kubernetes_version, + cluster_role_name, + subnet_ids, + vpc_id, + additional_ports, + ) clusterName = callClusterManager[0] securityGroupId = callClusterManager[1] kms_key_arn = callClusterManager[2] @@ -1089,7 +974,9 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Passes the S3 Bucket name to the `create_managed_nodegroup_role` function which in turn passes it to the `create_managed_nodegroup_s3_policy` # function which allows your Nodegroups to pull artifacts from S3 as part of bootstrapping - nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes) + nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role( + bucket_name, nodegroup_role_name, mde_on_nodes + ) # Now we can attach our proper Key Policy to the KMS Key since we now have all Roles ready @@ -1104,7 +991,9 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t seshRoleCheck = seshRoleRegex.search(createdBy) # On match to Regex do stupid stuff >:( if seshRoleCheck: - print(f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role') + print( + f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role' + ) roleNameSplit = createdBy.split('/')[1] createdByRoleArn = f'arn:aws:iam::{acctId}:role/{roleNameSplit}' print(f'Your Role ARN upstream to your session was determined as {createdByRoleArn}') @@ -1119,7 +1008,10 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t slrRole = str(r['Role']['RoleName']) print(f'Created Service-linked Role for Autoscaling called {slrRole}') except Exception as e: - if str(e) == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.': + if ( + str(e) + == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.' + ): pass else: print(f'Error encountered: {e}') @@ -1132,7 +1024,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t clusterRoleArn, nodegroupRoleArn, createdByRoleArn, - f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling' + f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling', ] # Check if additional AuthZ IAM Principals are even provided. If so, add them to the list if they're not there already @@ -1142,18 +1034,16 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t kmsAuthZPrincipals.append(arn) keyPolicyJson = { - 'Version':'2012-10-17', - 'Id':'ecekeypolicy', + 'Version': '2012-10-17', + 'Id': 'ecekeypolicy', 'Statement': [ # full key usage by whoever creates the key { 'Sid': 'Key Creator Admin', 'Effect': 'Allow', - 'Principal': { - 'AWS': createdByRoleArn - }, - 'Action':'kms:*', - 'Resource':'*' + 'Principal': {'AWS': createdByRoleArn}, + 'Action': 'kms:*', + 'Resource': '*', }, # This allows usage of the key by the Cluster & Nodegroup and aws-managed service principals # Creator is added throughout as well @@ -1163,38 +1053,28 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t 'Effect': 'Allow', 'Principal': { 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + 'Service': ['autoscaling.amazonaws.com', 'ec2.amazonaws.com'], }, 'Action': [ 'kms:Encrypt', 'kms:Decrypt', 'kms:ReEncrypt*', 'kms:GenerateDataKey*', - 'kms:DescribeKey' + 'kms:DescribeKey', ], - 'Resource': '*' + 'Resource': '*', }, { 'Sid': 'Allow attachment of persistent resources', 'Effect': 'Allow', 'Principal': { 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + 'Service': ['autoscaling.amazonaws.com', 'ec2.amazonaws.com'], }, - 'Action': [ - 'kms:CreateGrant', - 'kms:ListGrants', - 'kms:RevokeGrant' - ], - 'Resource': '*' - } - ] + 'Action': ['kms:CreateGrant', 'kms:ListGrants', 'kms:RevokeGrant'], + 'Resource': '*', + }, + ], } # For whatever reason, role propagation is a bit delayed with registration on the KMS Resource-based resource policy side @@ -1204,9 +1084,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t try: kms.put_key_policy( - KeyId=kms_key_arn, - PolicyName='default', - Policy=json.dumps(keyPolicyJson) + KeyId=kms_key_arn, PolicyName='default', Policy=json.dumps(keyPolicyJson) ) print(f'Key Policy attached to {kms_key_arn}') except KeyError as ke: @@ -1218,9 +1096,22 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) - + # Passes various arguements to the `create_launch_template` which returns a Launch Template ID (of the latest version) to pass to the Nodegroup creation payload - launchTemplateId = ClusterManager.create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture) + launchTemplateId = ClusterManager.create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ) print(f'Creating Nodegroup {nodegroup_name} for Cluster {clusterName}') @@ -1232,20 +1123,18 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t scalingConfig={ 'minSize': int(node_count), 'maxSize': int(node_count) * 2, - 'desiredSize': int(node_count) + 'desiredSize': int(node_count), }, nodeRole=nodegroupRoleArn, subnets=subnet_ids, - launchTemplate={ - 'id': launchTemplateId - }, + launchTemplate={'id': launchTemplateId}, capacityType='ON_DEMAND', tags={ 'Name': nodegroup_name, 'CreatedBy': createdBy, 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + 'CreatedWith': 'Lightspin ECE', + }, ) # Await Nodegroups to come online @@ -1256,10 +1145,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t waiter.wait( clusterName=clusterName, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 80 - } + WaiterConfig={'Delay': 30, 'MaxAttempts': 80}, ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -1276,7 +1162,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Setup first time cluster connection with AWS CLI updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {clusterName}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) print(updateKubeconfigProc.stdout.decode('utf-8')) # If additional principals are required to be authorized, attempt to do so @@ -1285,12 +1171,12 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Split out the name part of the Role addtlRoleName = str(arn.split('/')[1]) # Create a patch object to add into - newAuthZScript=f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" - kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml + newAuthZScript = f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" + kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \\|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml kubectl patch configmap/aws-auth -n kube-system --patch "$(cat /tmp/aws-auth-patch.yml)" ''' - newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, capture_output=True) print(newAuthZScriptProc.stdout.decode('utf-8')) ''' @@ -1298,26 +1184,27 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t ''' if falco_bool == 'True': FalcoSetup.falco_initialization( - cluster_name=clusterName, + cluster_name=clusterName, falco_mode='Create', - falco_sidekick_destination_type=falco_sidekick_destination_type, + falco_sidekick_destination_type=falco_sidekick_destination_type, falco_sidekick_destination=falco_sidekick_destination, - datadog_api_key=datadog_api_key + datadog_api_key=datadog_api_key, ) ''' Send a call into plugins.ECEDatadog ''' if datadog_bool == 'True': DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Create', - datadog_api_key=datadog_api_key + cluster_name=clusterName, datadog_mode='Create', datadog_api_key=datadog_api_key ) + + ''' This Class handles all update tasks to the Clusters, such as version bumps to latest Kubenertes Versions ''' -class UpdateManager(): - + + +class UpdateManager: def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): ''' This function attempts to update existing Cluster and Nodegroup to a specified Kubernetes Version by invoking @@ -1330,10 +1217,14 @@ def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): try: existingClusterVersion = eks.describe_cluster(name=cluster_name)['cluster']['version'] if existingClusterVersion == kubernetes_version: - print(f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting' + ) sys.exit(2) else: - print(f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}') + print( + f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}' + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'ResourceNotFoundException': @@ -1344,22 +1235,32 @@ def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): # Lookup EKS Nodegroup to see if specified K8s version from main.py matches, if so exit try: - existingNodegroupVersion = eks.describe_cluster(name=cluster_name,nodegroupName=nodegroup_name)['nodegroup']['version'] + existingNodegroupVersion = eks.describe_cluster( + name=cluster_name, nodegroupName=nodegroup_name + )['nodegroup']['version'] if existingNodegroupVersion == kubernetes_version: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting' + ) sys.exit(2) else: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}' + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting' + ) sys.exit(2) else: raise error - UpdateManager.update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version) + UpdateManager.update_nodegroup_kubernetes_version( + cluster_name, nodegroup_name, kubernetes_version + ) UpdateManager.update_cluster_kubernetes_version(cluster_name, kubernetes_version) @@ -1367,16 +1268,16 @@ def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes ''' This function carries out the update and waiter for EKS Nodegroup K8s version bumps ''' - print(f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}') + print( + f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}' + ) eks = boto3.client('eks') # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version r = eks.update_nodegroup_version( - clusterName=cluster_name, - nodegroupName=nodegroup_name, - version=kubernetes_version + clusterName=cluster_name, nodegroupName=nodegroup_name, version=kubernetes_version ) updateId = str(r['update']['id']) @@ -1386,18 +1287,20 @@ def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: d = eks.describe_update( - name=cluster_name, - updateId=updateId, - nodegroupName=nodegroup_name + name=cluster_name, updateId=updateId, nodegroupName=nodegroup_name ) updateStatus = str(d['update']['status']) # if/else logic time if updateStatus == 'Successful': - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.') + print( + f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.' + ) break elif updateStatus == 'Failed' or 'Cancelled': errorMessage = str(d['update']['errors']) - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!') + print( + f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!' + ) print(f'Error message: {errorMessage}') sys.exit(2) else: @@ -1417,10 +1320,7 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version - r = eks.update_nodegroup_version( - clusterName=cluster_name, - version=kubernetes_version - ) + r = eks.update_nodegroup_version(clusterName=cluster_name, version=kubernetes_version) updateId = str(r['update']['id']) print(f'Monitoring EKS Update ID {updateId} for failure or success state.') @@ -1428,10 +1328,7 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: - d = eks.describe_update( - name=cluster_name, - updateId=updateId - ) + d = eks.describe_update(name=cluster_name, updateId=updateId) updateStatus = str(d['update']['status']) # if/else logic time if updateStatus == 'Successful': @@ -1449,13 +1346,17 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): time.sleep(15) continue + ''' Despite it's name, this Class contains methods to conduct emergency deletions (rollback) from Cache as well as normal deletions from main.py commands this is purely for Create mode, other Classes may have their own self-contained rollback mechanism ''' -class RollbackManager(): - def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name): + +class RollbackManager: + def scheduled_deletion( + nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name + ): ''' This function performs a graceful, scheduled deletion of all resources - or attempts to at least ''' @@ -1465,48 +1366,40 @@ def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegrou # Retrieve the Security Groups from the Cluster to delete, as they are not provided as arguments and cannot be guessed (ID's and all that...) sgList = [] - for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig']['securityGroupIds']: + for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig'][ + 'securityGroupIds' + ]: sgList.append(sg) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroup_name, - cluster_name=cluster_name - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroup_name, cluster_name=cluster_name) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0][ + 'provider' + ]['keyArn'] except Exception: kmsKeyArn = None - + if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=cluster_name - ) + RollbackManager.delete_cluster(cluster_name=cluster_name) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=cluster_role_name, - nodegroup_role_name=nodegroup_role_name + cluster_role_name=cluster_role_name, nodegroup_role_name=nodegroup_role_name ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launch_template_name - ) + RollbackManager.delete_launch_template(launch_template_name=launch_template_name) # Finally, loop the retrieved SGs and then delete them for sg in sgList: print(f'Trying to delete EC2 Security Group {sg}') - RollbackManager.delete_security_groups( - cluster_security_group_id=sg - ) + RollbackManager.delete_security_groups(cluster_security_group_id=sg) print(f'Deletion complete. Confirm resource deletion in Console in case of errors') @@ -1528,42 +1421,32 @@ def rollback_from_cache(cache): clusterSgId = str(cache[1]['ClusterSecurityGroupId']) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroupName, - cluster_name=clusterName - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroupName, cluster_name=clusterName) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0][ + 'provider' + ]['keyArn'] except Exception: kmsKeyArn = None if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=clusterName - ) + RollbackManager.delete_cluster(cluster_name=clusterName) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=clusterRoleName, - nodegroup_role_name=nodegroupRoleName + cluster_role_name=clusterRoleName, nodegroup_role_name=nodegroupRoleName ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launchTemplateName - ) + RollbackManager.delete_launch_template(launch_template_name=launchTemplateName) # Finally, delete the Security Groups - RollbackManager.delete_security_groups( - cluster_security_group_id=clusterSgId - ) + RollbackManager.delete_security_groups(cluster_security_group_id=clusterSgId) print(f'Rollback complete. Confirm resource deletion in Console in case of errors') @@ -1579,10 +1462,7 @@ def delete_nodegroup(cluster_name, nodegroup_name): eks = boto3.client('eks') try: - eks.delete_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup_name - ) + eks.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1595,10 +1475,7 @@ def delete_nodegroup(cluster_name, nodegroup_name): waiter.wait( clusterName=cluster_name, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } + WaiterConfig={'Delay': 30, 'MaxAttempts': 40}, ) print(f'EKS Nodegroups rolled back.') @@ -1614,9 +1491,7 @@ def delete_cluster(cluster_name): eks = boto3.client('eks') try: - eks.delete_cluster( - name=cluster_name - ) + eks.delete_cluster(name=cluster_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1626,13 +1501,7 @@ def delete_cluster(cluster_name): waiter = eks.get_waiter('cluster_deleted') - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 123 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={'Delay': 30, 'MaxAttempts': 123}) print(f'EKS Clusters rolled back.') @@ -1642,7 +1511,9 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): ''' This function attempts to delete all related IAM entities for EKS (Cluster roles, Nodegroup roles, Nodegroup policies) ''' - print(f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.') + print( + f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.' + ) iam = boto3.client('iam') sts = boto3.client('sts') @@ -1653,23 +1524,21 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): # Find and detach all policies from the Cluster Role try: - for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)['AttachedPolicies']: + for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)[ + 'AttachedPolicies' + ]: policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=cluster_role_name, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=cluster_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') # Detach all Policies from Nodegroup cluster try: - for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)['AttachedPolicies']: + for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)[ + 'AttachedPolicies' + ]: policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=nodegroup_role_name, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=nodegroup_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1687,7 +1556,6 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): iam.delete_role(RoleName=nodegroup_role_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') - print(f'IAM Roles and Policies rolled back.') @@ -1705,10 +1573,7 @@ def delete_launch_template(launch_template_name): ec2 = boto3.client('ec2') try: - ec2.delete_launch_template( - DryRun=False, - LaunchTemplateName=launch_template_name - ) + ec2.delete_launch_template(DryRun=False, LaunchTemplateName=launch_template_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1742,13 +1607,10 @@ def delete_kms_key(kms_key_arn): kms = boto3.client('kms') try: - kms.schedule_key_deletion( - KeyId=kms_key_arn, - PendingWindowInDays=7 - ) + kms.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') print(f'KMS Key rolled back') - del kms \ No newline at end of file + del kms diff --git a/README.md b/README.md index 5b49537..e72c98b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The Amazon Elastic Kubernetes Service (EKS) Creation Engine (ECE) is a Python command-line program created by the Lightspin Office of the CISO to facilitate the creation and enablement of secure EKS Clusters, optionally further assured with continual Kubernetes Security Posture Management (KSPM), Runtime Protection, and Application Performance Monitoring (APM) capabilities. -## What is this :eyes: :eyes: ?? +## What is this :eyes: :eyes: ?? As stated above, the ECE is a Python utility to create a fully functioning EKS Cluster, complete with Nodegroups which are built off of EC2 Launch Templates as it was meant for creating EKS Nodegroups with custom AMIs with custom bootstrapping. @@ -73,10 +73,39 @@ We are happy to take contributions from anywhere that will help expand this proj - Spot provider & Fargate Profile support for Nodegroups, and an option to not use Nodegroups - Create more Plugins for various utilities (e.g., Calico, OPA, NGINX Ingress Controller, etc.) +### Basic Contributing Setup + +1. Fork the repository. +2. Clone your fork and enter the `eks-creation-engine` directory. +3. Get your Python things Python-y. + +```bash +# Add upstream +git remote add upstream https://github.com/lightspin-tech/eks-creation-engine.git + +# Create virtual env +pip3 -m venv .env --prompt ece + +# Enter virtual env +source .env/bin/activate + +# Install ECE reqs +pip3 install -r requirements.txt + +# Install pre-commit +pip3 install pre-commit + +# Ensure pre-commit runs... pre... commit +pre-commit install + +# Init the pre-commit env and run checks +pre-commit run -a +``` + ## Contact Us :telephone_receiver: :telephone_receiver: For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 17317a5..1382f33 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -347,4 +347,4 @@ For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/main.py b/main.py index dd00a00..3925de7 100644 --- a/main.py +++ b/main.py @@ -1,49 +1,57 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import argparse import json -import sys import re +import subprocess +import sys + import boto3 import botocore -import argparse -import subprocess -from art import text2art import termcolor -from clint.textui import colored, puts -from EksCreationEngine import ClusterManager, UpdateManager, RollbackManager +from art import text2art +from clint.textui import colored +from clint.textui import puts + +from EksCreationEngine import ClusterManager +from EksCreationEngine import RollbackManager +from EksCreationEngine import UpdateManager from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup from plugins.ECESecurity import SecurityAssessment + def print_logo(): textArt = text2art("EKS CREATION ENGINE") print(termcolor.colored(textArt, 'red')) puts(colored.red("CREATED BY THE LIGHTSPIN OFFICE OF THE CISO")) - puts(colored.red("For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io")) + puts( + colored.red( + "For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io" + ) + ) + def stay_dangerous(): textArt = text2art("STAY DANGEROUS") print(termcolor.colored(textArt, 'red')) puts(colored.red("With Love, the Lightspin Office of the CISO")) + def create_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -57,40 +65,43 @@ def create_preflight_check(): amiId = args.ami_id if amiId != 'SSM': # AMI Regex - amiRegex = re.compile('^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3})(?:\s*,\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3}))*$') + amiRegex = re.compile( + '^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3})(?:\\s*,\\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3}))*$' + ) # Attempt to match amiRegexCheck = amiRegex.search(amiId) if not amiRegexCheck: - print(f'Improperly AMI ID provided, does not match regex, check value and submit request again') + print( + f'Improperly AMI ID provided, does not match regex, check value and submit request again' + ) sys.exit(2) # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print(f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again' + ) sys.exit(2) - + # Check if an EKS Nodegroup exists for provided name try: - eks.describe_nodegroup( - clusterName=clusterName, - nodegroupName=nodegroupName - ) + eks.describe_nodegroup(clusterName=clusterName, nodegroupName=nodegroupName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster/nodegroup doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print(f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again') + print( + f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again' + ) sys.exit(2) - + # Check for a provided VPC if vpcId == None: print(f'VPC ID is required for cluster creation. Please specify a VPC ID and try again.') @@ -106,13 +117,17 @@ def create_preflight_check(): # Ensure a S3 Bucket was provided if MDE installation is true if installMdeOnNodes == 'True': if bucketName == None: - print(f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again') + print( + f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again' + ) sys.exit(2) # Ensure a Datadog API key is provided if Datadog installation is true if datadogBool == 'True': if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) sys.exit(2) # Print out creation specification - in the future this will be a "state file" for the cluster @@ -139,16 +154,11 @@ def create_preflight_check(): 'AmiArhcitecture': amiArchitecture, 'DatadogApiKey': datadogApiKey, 'InstallDatadog?': datadogBool, - 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals + 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals, } print(f'The following attributes are set for your EKS Cluster') - print( - json.dumps( - specDict, - indent=4 - ) - ) + print(json.dumps(specDict, indent=4)) # TODO: Save state? del specDict @@ -176,11 +186,12 @@ def create_preflight_check(): ami_architecture=amiArchitecture, datadog_api_key=datadogApiKey, datadog_bool=datadogBool, - addtl_auth_principals=additionalAuthZPrincipals + addtl_auth_principals=additionalAuthZPrincipals, ) stay_dangerous() + def delete_preflight_check(): print_logo() @@ -190,23 +201,23 @@ def delete_preflight_check(): cluster_role_name=clusterRoleName, nodegroup_name=nodegroupName, nodegroup_role_name=nodegroupRoleName, - launch_template_name=launchTemplateName + launch_template_name=launchTemplateName, ) stay_dangerous() + def update_preflight_check(): print_logo() # Call the `update_kubernetes_version` function and attempt to version bump K8s of Clusters & Nodes UpdateManager.update_kubernetes_version( - cluster_name=clusterName, - kubernetes_version=k8sVersion, - nodegroup_name=nodegroupName + cluster_name=clusterName, kubernetes_version=k8sVersion, nodegroup_name=nodegroupName ) stay_dangerous() + def assessment_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -217,13 +228,13 @@ def assessment_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass @@ -232,21 +243,20 @@ def assessment_preflight_check(): url = 'https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml' wgetCommand = f'wget {url}' - subProc = subprocess.run(wgetCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subProc = subprocess.run(wgetCommand, shell=True, capture_output=True) print(subProc.stderr.decode('utf-8')) print(f'Installing Trivy from source script for v0.24') # TODO: Continual updates of Trivy version https://aquasecurity.github.io/trivy trivyCmd = 'curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0' - trivyProc = subprocess.run(trivyCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + trivyProc = subprocess.run(trivyCmd, shell=True, capture_output=True) print(trivyProc.stdout.decode('utf-8')) - SecurityAssessment.start_assessment( - cluster_name=clusterName - ) + SecurityAssessment.start_assessment(cluster_name=clusterName) stay_dangerous() + def setup_falco_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -258,32 +268,36 @@ def setup_falco_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass - + if mode == 'SetupFalco': if falcoDestType == 'Slack' or falcoDestType == 'Teams': if falcoDest == None: - print(f'No destination was provided for "--falco_sidekick_destination_type", please try again.') + print( + f'No destination was provided for "--falco_sidekick_destination_type", please try again.' + ) sys.exit(2) elif falcoDestType == 'Datadog': if datadogApiKey == None: - print(f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.') - sys.exit(2) + print( + f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) + sys.exit(2) FalcoSetup.falco_initialization( cluster_name=clusterName, falco_mode='Create', falco_sidekick_destination_type=falcoDestType, - falco_sidekick_destination=falcoDest + falco_sidekick_destination=falcoDest, ) stay_dangerous() elif mode == 'RemoveFalco': @@ -292,13 +306,14 @@ def setup_falco_preflight_check(): falco_mode='Delete', falco_sidekick_destination_type=falcoDestType, falco_sidekick_destination=falcoDest, - datadog_api_key=datadogApiKey + datadog_api_key=datadogApiKey, ) stay_dangerous() else: print(f'Somehow, an incompatible mode detected for Falco, please try again.') sys.exit(2) + def setup_datadog_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -309,33 +324,31 @@ def setup_datadog_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass if mode == 'SetupDatadog': if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) sys.exit(2) # Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Setup', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode='Setup', datadog_api_key=datadogApiKey ) elif mode == 'RemoveDatadog': # Bye Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Remove', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode='Remove', datadog_api_key=datadogApiKey ) else: print(f'Somehow, an incompatible mode detected for Datadog, please try again.') @@ -343,8 +356,9 @@ def setup_datadog_preflight_check(): stay_dangerous() + if __name__ == "__main__": - # Feed all of the arguments + # Feed all of the arguments ''' >> argparse argument | **kwargs << --profile | profile @@ -380,92 +394,101 @@ def setup_datadog_preflight_check(): '--profile', help='Specify Profile name if multiple profiles are used', required=False, - default=[] + default=[], ) # --mode parser.add_argument( '--mode', help='Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create', required=False, - choices=['Create', 'Destroy', 'Update', 'Assessment', 'SetupFalco', 'RemoveFalco', 'SetupDatadog', 'RemoveDatadog'], - default='Create' + choices=[ + 'Create', + 'Destroy', + 'Update', + 'Assessment', + 'SetupFalco', + 'RemoveFalco', + 'SetupDatadog', + 'RemoveDatadog', + ], + default='Create', ) # --k8s_version parser.add_argument( '--k8s_version', help='Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update', required=False, - default='1.21' + default='1.21', ) # --s3_bucket_name parser.add_argument( '--s3_bucket_name', help='S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create', required=False, - default=None + default=None, ) # --ebs_volume_size parser.add_argument( '--ebs_volume_size', help='EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create', required=False, - default='20' + default='20', ) # --ami parser.add_argument( '--ami_id', help='Custom AMI ID for EKS nodegroup EC2 launch template. Defaults to "SSM" which tells the program to use an SSM-derived image for your K8s version matching --ami_os and --ami_architecture - used for Create', required=False, - default='SSM' + default='SSM', ) # --instance_type parser.add_argument( '--instance_type', help='EC2 Instance type for EKS nodegroup EC2 launch template', required=False, - default='t3.medium' + default='t3.medium', ) # --cluster_name parser.add_argument( '--cluster_name', help='Name for your EKS Cluster - used for Create, Delete and Update', required=False, - default='LightspinECECluster' + default='LightspinECECluster', ) # --cluster_role_name parser.add_argument( '--cluster_role_name', help='Name for your EKS Cluster Service IAM Role', required=False, - default='ClusterServiceRoleForEKS' + default='ClusterServiceRoleForEKS', ) # --nodegroup_name parser.add_argument( '--nodegroup_name', help='Name for your EKS Nodegroup - used for Create, Delete and Update', required=False, - default='LightspinECENodegroup' + default='LightspinECENodegroup', ) # --nodegroup_role_name parser.add_argument( '--nodegroup_role_name', help='Name for your EKS Nodegroup Service IAM Role (also given to policy)', required=False, - default='NodegroupServiceRoleForEKS' + default='NodegroupServiceRoleForEKS', ) # --launch_template_name parser.add_argument( '--launch_template_name', help='Name for your Nodegroup EC2 launch template - used for Create and Delete', required=False, - default='LightspinECECustomEKSAMI' + default='LightspinECECustomEKSAMI', ) # --vpcid parser.add_argument( '--vpcid', help='VPC ID to launch EKS Cluster and Nodegroups into', required=False, - default=None + default=None, ) # --subnets # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -473,14 +496,14 @@ def setup_datadog_preflight_check(): '--subnets', nargs='+', help='Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only', - required=False + required=False, ) # --node_count parser.add_argument( '--node_count', help='Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2', required=False, - default='2' + default='2', ) # --mde_on_nodes parser.add_argument( @@ -488,7 +511,7 @@ def setup_datadog_preflight_check(): help='Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --additional_ports # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -496,7 +519,7 @@ def setup_datadog_preflight_check(): '--additional_ports', nargs='+', help='Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included', - required=False + required=False, ) # --falco parser.add_argument( @@ -504,7 +527,7 @@ def setup_datadog_preflight_check(): help='For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --falco_sidekick_destination_type parser.add_argument( @@ -512,14 +535,14 @@ def setup_datadog_preflight_check(): help='The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided', required=False, choices=['SNS', 'Slack', 'Teams', 'Datadog'], - default='SNS' + default='SNS', ) # --falco_sidekick_destination parser.add_argument( '--falco_sidekick_destination', help='The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.', required=False, - default=None + default=None, ) # --ami_os parser.add_argument( @@ -527,7 +550,7 @@ def setup_datadog_preflight_check(): help='If using "SSM" for --ami use this argument to specify what OS you want to use (alas = Amazon Linux 2, ubuntu = Ubuntu 20.04) - defaults to ubuntu', required=False, choices=['alas', 'ubuntu'], - default='ubuntu' + default='ubuntu', ) # --ami_architecture parser.add_argument( @@ -535,7 +558,7 @@ def setup_datadog_preflight_check(): help='If using "SSM" for --ami use this argument to specify what architecture you want to use - defaults to amd64', required=False, choices=['amd64', 'arm64'], - default='amd64' + default='amd64', ) # --datadog parser.add_argument( @@ -543,14 +566,14 @@ def setup_datadog_preflight_check(): help='For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --datadog_api_key parser.add_argument( '--datadog_api_key', help='Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick', required=False, - default=None + default=None, ) # addtl_auth_principals # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -558,7 +581,7 @@ def setup_datadog_preflight_check(): '--addtl_auth_principals', nargs='+', help='Additional IAM Role ARNs to authorized as system:masters', - required=False + required=False, ) args = parser.parse_args() @@ -611,4 +634,4 @@ def setup_datadog_preflight_check(): setup_datadog_preflight_check() else: print(f'Somehow you provided an unexpected arguement, exiting!') - sys.exit(2) \ No newline at end of file + sys.exit(2) diff --git a/plugins/ECEDatadog.py b/plugins/ECEDatadog.py index 8caba57..65108e2 100644 --- a/plugins/ECEDatadog.py +++ b/plugins/ECEDatadog.py @@ -1,30 +1,27 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import subprocess ''' This Class manages deployment of Datadog onto an EKS Cluster and rollbacks / manual deletions ''' -class DatadogSetup(): + +class DatadogSetup: def initialization(cluster_name, datadog_mode, datadog_api_key): ''' This function controls initialization of the DatadogSetup Class. It will control installs, deletions, and rollbacks @@ -44,15 +41,21 @@ def install_datadog(datadog_api_key): # Use subprocess to add Datadog Charts using Helm print(f'Adding Datadog Helm Charts') - datadogHelmChartAddCmd = 'helm repo add datadog https://helm.datadoghq.com && helm repo update' - datadogHelmChartAddSubprocess = subprocess.run(datadogHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogHelmChartAddCmd = ( + 'helm repo add datadog https://helm.datadoghq.com && helm repo update' + ) + datadogHelmChartAddSubprocess = subprocess.run( + datadogHelmChartAddCmd, shell=True, capture_output=True + ) datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode('utf-8')) print(datadogHelmChartAddMsg) # Use subprocess to configure Datadog per initiation arguments from main.py print(f'Installing Datadog') installDatadogCmd = f'helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog' - installDatadogSubprocess = subprocess.run(installDatadogCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installDatadogSubprocess = subprocess.run( + installDatadogCmd, shell=True, capture_output=True + ) installDatadogMsg = str(installDatadogSubprocess.stdout.decode('utf-8')) print(installDatadogMsg) @@ -63,6 +66,6 @@ def uninstall_datadog(): # Uninstall Datadog from EKS datadogRemoveCmd = 'helm uninstall datadog-agent' - datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, capture_output=True) datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode('utf-8')) - print(datadogRemoveMsg) \ No newline at end of file + print(datadogRemoveMsg) diff --git a/plugins/ECEFalco.py b/plugins/ECEFalco.py index c148cda..5b838c2 100644 --- a/plugins/ECEFalco.py +++ b/plugins/ECEFalco.py @@ -1,36 +1,40 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import subprocess import sys +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import subprocess ''' This Class manages an end-to-end deployment of Falco and FalcoSidekick to EKS using Helm. This class can be called from ClusterManager (if flag is set) or called independently to setup Falco ''' -class FalcoSetup(): - def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_type, falco_sidekick_destination, datadog_api_key): + +class FalcoSetup: + def falco_initialization( + cluster_name, + falco_mode, + falco_sidekick_destination_type, + falco_sidekick_destination, + datadog_api_key, + ): ''' This function handles configuration of Falco and FalcoSidekick on a Cluster, whether in-line of an ECE Create or ECE SetupFalco `--mode` from main.py Depending on the destination configuration and mode, this function will either schedule deletion or creation of additional infrastructure and issue Helm @@ -51,8 +55,7 @@ def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_ty # and add them to the static list above if they are not already there for nodegroup in eks.list_nodegroups(clusterName=cluster_name)['nodegroups']: nodeRoleArn = eks.describe_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup + clusterName=cluster_name, nodegroupName=nodegroup )['nodegroup']['nodeRole'] if nodeRoleArn not in roleArns: roleArns.append(nodeRoleArn) @@ -65,37 +68,35 @@ def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_ty topicArn = FalcoSetup.falco_sidekick_sns_iam_generator( cluster_name=cluster_name, falco_sidekick_destination=falco_sidekick_destination, - role_arns=roleArns + role_arns=roleArns, ) # Install Falco # All commands for FalcoSidekick come from the Helm vars spec in the chart GitHub repo: https://github.com/falcosecurity/charts/tree/master/falcosidekick#configuration falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Slack': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}') - + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}' + ) + # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Teams': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}') + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}' + ) # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Datadog': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}') + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}' + ) # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) else: print(f'Unsupported destination type provided, exiting') sys.exit(2) @@ -116,8 +117,7 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r # If the value for 'falco_sidekick_destination' is None, that means a SNS topic was not provided and needs to be setup if falco_sidekick_destination == None: topicArn = FalcoSetup.falco_sidekick_sns_creation( - cluster_name=cluster_name, - role_arns=role_arns + cluster_name=cluster_name, role_arns=role_arns ) else: topicArn = falco_sidekick_destination @@ -139,14 +139,10 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r { 'Sid': 'Snssid', 'Effect': 'Allow', - 'Action': [ - 'sns:Publish', - 'sns:GetTopicAttributes', - 'sns:ListTopics' - ], - 'Resource': [topicArn] + 'Action': ['sns:Publish', 'sns:GetTopicAttributes', 'sns:ListTopics'], + 'Resource': [topicArn], } - ] + ], } policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' @@ -158,23 +154,11 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r PolicyDocument=json.dumps(iamPolicyDoc), Description=f'Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE', Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': policyName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' except botocore.exceptions.ClientError as error: @@ -184,10 +168,7 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r for role in roleArns: roleName = role.split('/')[1] try: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) @@ -229,27 +210,13 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): try: topicArn = sns.create_topic( Name=topicName, - Attributes={ - 'DisplayName': topicName - }, + Attributes={'DisplayName': topicName}, Tags=[ - { - 'Key': 'Name', - 'Value': topicName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': topicName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], )['TopicArn'] except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -257,40 +224,32 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): # Create a SNS Topic Policy Doc to pass in as an SNS Attribute topicPolicyJson = { - 'Version':'2008-10-17', - 'Id':'ecepolicy', - 'Statement':[ + 'Version': '2008-10-17', + 'Id': 'ecepolicy', + 'Statement': [ { - 'Sid':'ecesid-pub', - 'Effect':'Allow', - 'Principal':{ - 'AWS': roleArns - }, - 'Action':['SNS:Publish'], - 'Resource': topicArn + 'Sid': 'ecesid-pub', + 'Effect': 'Allow', + 'Principal': {'AWS': roleArns}, + 'Action': ['SNS:Publish'], + 'Resource': topicArn, }, { - 'Sid':'ecesid-sub', - 'Effect':'Allow', - 'Principal':{ - 'AWS':'*' - }, - 'Action':['SNS:Subscribe'], + 'Sid': 'ecesid-sub', + 'Effect': 'Allow', + 'Principal': {'AWS': '*'}, + 'Action': ['SNS:Subscribe'], 'Resource': topicArn, - 'Condition':{ - 'StringEquals':{ - 'AWS:SourceOwner': acctId - } - } - } - ] + 'Condition': {'StringEquals': {'AWS:SourceOwner': acctId}}, + }, + ], } try: sns.set_topic_attributes( TopicArn=topicArn, AttributeName='Policy', - AttributeValue=json.dumps(topicPolicyJson) + AttributeValue=json.dumps(topicPolicyJson), ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -314,15 +273,19 @@ def install_falco(falco_install_command): # Use subprocess to add Falco Charts using Helm print(f'Adding Falco Helm Charts') - falcoHelmChartAddCmd = 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' - falcoHelmChartAddSubprocess = subprocess.run(falcoHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoHelmChartAddCmd = ( + 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' + ) + falcoHelmChartAddSubprocess = subprocess.run( + falcoHelmChartAddCmd, shell=True, capture_output=True + ) falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode('utf-8')) print(falcoHelmChartAddMsg) # Use subprocess to configure Falco and FalcoSidekick per initiation arguments from main.py print(f'Installing Falco and FalcoSidekick') installFalcoCmd = falco_install_command - installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, capture_output=True) installFalcoMsg = str(installFalcoSubprocess.stdout.decode('utf-8')) print(installFalcoMsg) @@ -349,17 +312,12 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to detach it before deletion try: rolesAttachedToPolicy = iam.list_entities_for_policy( - PolicyArn=policyArn, - EntityFilter='Role', - PolicyUsageFilter='PermissionsPolicy' + PolicyArn=policyArn, EntityFilter='Role', PolicyUsageFilter='PermissionsPolicy' )['PolicyRoles'] if rolesAttachedToPolicy: for role in rolesAttachedToPolicy: roleName = str(role['RoleName']) - iam.detach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(error) except KeyError as ke: @@ -367,9 +325,7 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to delete it try: - iam.delete_policy( - PolicyArn=policyArn - ) + iam.delete_policy(PolicyArn=policyArn) print(f'Falco SNS Policy {policyArn} deleted') except botocore.exceptions.ClientError as error: print(error) @@ -383,9 +339,9 @@ def falco_setup_rollback(cluster_name): # Uninstall Falco from EKS falcoRemoveCmd = 'helm uninstall falco' - falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, capture_output=True) falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode('utf-8')) print(falcoRemoveMsg) print(f'Falco rollback complete.') - sys.exit(2) \ No newline at end of file + sys.exit(2) diff --git a/plugins/ECESecurity.py b/plugins/ECESecurity.py index 6958254..40f2b54 100644 --- a/plugins/ECESecurity.py +++ b/plugins/ECESecurity.py @@ -1,34 +1,32 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - -import boto3 +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import json -import time -import subprocess import re +import subprocess +import time + +import boto3 ''' This Class manages various security assessment functions - such as running and saving Kube-bench CIS benchmarking and Trivy container scanning ''' -class SecurityAssessment(): + +class SecurityAssessment: def start_assessment(cluster_name): ''' This function serves as the 'brain' of the security assessment. It will modify the Kubeconfig and attempt to run the other assessments @@ -40,8 +38,10 @@ def start_assessment(cluster_name): session = boto3.session.Session() awsRegion = session.region_name - updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + updateKubeconfigCmd = ( + f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' + ) + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) print(updateKubeconfigProc.stdout.decode('utf-8')) trivySarif = SecurityAssessment.run_trivy() @@ -52,7 +52,7 @@ def start_assessment(cluster_name): sarifBase = { '$schema': 'https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json', 'version': '2.1.0', - 'runs': [] + 'runs': [], } for runs in trivySarif: @@ -64,7 +64,9 @@ def start_assessment(cluster_name): with open('./ECE_SecurityAssessment.sarif', 'w') as jsonfile: json.dump(sarifBase, jsonfile, indent=4, default=str) - print(f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".') + print( + f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".' + ) def run_trivy(): ''' @@ -75,12 +77,12 @@ def run_trivy(): trivyFindings = [] print(f'Running Trivy') - + # Retrieve a list of all running Containers and create a unique list of them to pass to Trivy for scanning print(f'Retrieving list of all running Containers from your EKS Cluster') command = 'kubectl get pods --all-namespaces -o json | jq --raw-output ".items[].spec.containers[].image"' - sub = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sub = subprocess.run(command, shell=True, capture_output=True) # pull list of container URIs from kubectl strList = str(sub.stdout.decode('utf-8')) # split by newline, as that is how it is retruned @@ -92,7 +94,7 @@ def run_trivy(): pass else: uniqueContainers.append(i) - + totalUniques = str(len(uniqueContainers)) if totalUniques == '1': print(f'Trivy will scan {totalUniques} unique container image') @@ -106,7 +108,7 @@ def run_trivy(): for c in uniqueContainers: # passing '--quiet' will ensure the setup text from Trivy scanning does not make it into the JSON and corrupt it trivyScanCmd = f'trivy --quiet image --format sarif {c}' - trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, capture_output=True) trivyStdout = str(trivyScanSubprocess.stdout.decode('utf-8')) # load JSON object from stdout jsonItem = json.loads(trivyStdout) @@ -139,7 +141,7 @@ def run_kube_bench(cluster_name): # Schedule the Job onto your EKS Cluster command = 'kubectl apply -f job-eks.yaml' - runJobSubproc = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + runJobSubproc = subprocess.run(command, shell=True, capture_output=True) print(runJobSubproc.stdout.decode('utf-8')) time.sleep(1.5) @@ -149,7 +151,7 @@ def run_kube_bench(cluster_name): # Really bad Regex hack to exit the `while True` loop - fuzzy match the stdout message completionRegex = re.compile('job.batch/kube-bench condition met') while True: - jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, capture_output=True) jobWaitMessage = str(jobWaitSubproc.stdout.decode('utf-8')) completionRegexCheck = completionRegex.search(jobWaitMessage) if completionRegexCheck: @@ -162,13 +164,15 @@ def run_kube_bench(cluster_name): # `getPodCmd` used Kubectl to get pod names in all namespaces (-A). cut -d/ -f2 command is to split by the '/' and get the name # grep is used to ensure the right pod name is pulled as it always ends with a random 5 character hex (ex. kube-bench-z6r4b) getPodCmd = 'kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench' - getPodSubproc = subprocess.run(getPodCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getPodSubproc = subprocess.run(getPodCmd, shell=True, capture_output=True) # decoding adds newline or blank spaces - attempt to trim them - kubebenchPodName = str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') + kubebenchPodName = ( + str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') + ) # Pull logs from Job - this is the actual results of the job getLogsCmd = f'kubectl logs {kubebenchPodName}' - getLogsSubproc = subprocess.run(getLogsCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getLogsSubproc = subprocess.run(getLogsCmd, shell=True, capture_output=True) getLogsStdout = str(getLogsSubproc.stdout.decode('utf-8')) # Split the block of text from STDOUT by newline delimiters to create a new list splitter = getLogsStdout.split('\n') @@ -176,7 +180,7 @@ def run_kube_bench(cluster_name): # Use regex to match the Kube-Bench findings, they always start with a '[' which contains info such as '[PASS]'. We then match anything with 2 periods # as Kube-bench outputs 'headers' such as 3 or 3.1 - we want results such as '[PASS] 3.1.3 Ensure that the kubelet configuration file has permissions set to 644 or more restrictive (Manual)' # this is a horrible way to do it....but it works - kubeBenchResultRegex = re.compile('^\[.*\..*\..*') + kubeBenchResultRegex = re.compile(r'^\[.*\..*\..*') for line in splitter: kubeBenchRegexCheck = kubeBenchResultRegex.search(line) if kubeBenchRegexCheck: @@ -192,38 +196,34 @@ def run_kube_bench(cluster_name): # Create a new dict of the findings that will match a SARIF JSON 'run' # https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md run = { - 'tool':{ - 'driver':{ - 'name':'Kube-bench', + 'tool': { + 'driver': { + 'name': 'Kube-bench', 'semanticVersion': '0.6.6', 'informationUri': 'https://github.com/aquasecurity/kube-bench', 'organization': 'Aqua Security', 'fullDescription': { 'text': 'kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark.' - } + }, } }, - 'results':[ + 'results': [ { 'ruleId': splitFinding[1], - 'message':{ - 'text': findingStatus - }, - 'locations':[ + 'message': {'text': findingStatus}, + 'locations': [ { - 'physicalLocation':{ - 'artifactLocation':{ - 'uri': clusterEndpoint, - 'description': { - 'text': cluster_name - } + 'physicalLocation': { + 'artifactLocation': { + 'uri': clusterEndpoint, + 'description': {'text': cluster_name}, } } } - ] + ], } ], - 'columnKind':'utf16CodeUnits' + 'columnKind': 'utf16CodeUnits', } findings.append(run) else: @@ -233,10 +233,12 @@ def run_kube_bench(cluster_name): # Delete the job from the EKS Cluster deleteKubebenchJobCmd = 'kubectl delete -f job-eks.yaml' - deleteKubebenchJobSubproc = subprocess.run(deleteKubebenchJobCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + deleteKubebenchJobSubproc = subprocess.run( + deleteKubebenchJobCmd, shell=True, capture_output=True + ) deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode('utf-8')) print(f'{deleteKubebenchJobStdout}') print(f'Completed Kube-bench assessment of EKS Cluster {cluster_name}') - return findings \ No newline at end of file + return findings diff --git a/plugins/__init__.py b/plugins/__init__.py index 8e8bdd2..da288a2 100644 --- a/plugins/__init__.py +++ b/plugins/__init__.py @@ -1,19 +1,16 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. \ No newline at end of file +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/requirements.txt b/requirements.txt index 2e1b37f..53bd91a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -awscli -boto3 -art -termcolor -clint -urllib3 \ No newline at end of file +art>=5.4,<5.5 +awscli>=1.22.65,<1.23.0 +boto3>=1.21.10,<1.22.0 +clint>=0.5.1,<0.6.0 +termcolor>=1.1.0,<1.2.0 +urllib3>=1.26.8,<1.27.0