From 931499b7cf9d4e03f2dcd4449650986d31886362 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Mon, 3 Oct 2016 08:20:38 -0700 Subject: Try to make boiler plate for cert expiry checking --- library/openshift_cert_expiry.py | 428 +++++++++++++++++++++ .../byo/openshift-cluster/check-cert-expiry.yaml | 35 ++ .../openshift-cluster/check-cert-expiry.yaml | 37 ++ utils/Makefile | 12 +- utils/test-requirements.txt | 1 + 5 files changed, 510 insertions(+), 3 deletions(-) create mode 100644 library/openshift_cert_expiry.py create mode 100644 playbooks/byo/openshift-cluster/check-cert-expiry.yaml create mode 100644 playbooks/common/openshift-cluster/check-cert-expiry.yaml diff --git a/library/openshift_cert_expiry.py b/library/openshift_cert_expiry.py new file mode 100644 index 000000000..cd8662f67 --- /dev/null +++ b/library/openshift_cert_expiry.py @@ -0,0 +1,428 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# etcd config file +import ConfigParser +# Expiration parsing +import datetime +# File path stuff +import os +# Config file parsing +import yaml +# Certificate loading +import OpenSSL.crypto + + +DOCUMENTATION = ''' +--- +module: openshift_cert_expiry +short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster +description: + - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired. + - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following: + - C(ok) - not expired, and outside of the expiration C(warning_days) window. + - C(warning) - not expired, but will expire between now and the C(warning_days) window. + - C(expired) - an expired certificate. + - Certificate flagging follow this logic: + - If the expiration date is before now then the certificate is classified as C(expired). + - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning). + - All other conditions are classified as C(ok). + - The following keys are ALSO present in the certificate summary: + - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted) + - C(days_remaining) - The number of days until the certificate expires. + - C(expiry) - The date the certificate expires on. + - C(path) - The full path to the certificate on the examined host. +version_added: "0.0" +options: + config_base: + description: + - Base path to OCP system settings. + required: false + default: /etc/origin + warning_days: + description: + - Flag certificates which will expire in C(warning_days) days from now. + required: false + default: 30 + show_all: + description: + - Enable this option to show analysis of ALL certificates examined by this module. + - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported. + required: false + default: false + +author: "Tim Bielawa (@tbielawa) " +''' + +EXAMPLES = ''' +# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now +- openshift_cert_expiry: + +# Expand the warning window to show certificates expiring within a year from now +- openshift_cert_expiry: warning_days=365 + +# Show expired, soon to expire (now + 30 days), and all other certificates examined +- openshift_cert_expiry: show_all=true +''' + + +###################################################################### +# etcd does not begin their config file with an opening [section] as +# required by the Python ConfigParser module. We hack around it by +# slipping one in ourselves prior to parsing. +# +# Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 +class FakeSecHead(object): + def __init__(self, fp): + self.fp = fp + self.sechead = '[ETCD]\n' + + def readline(self): + if self.sechead: + try: + return self.sechead + finally: + self.sechead = None + else: + return self.fp.readline() + +###################################################################### + +def filter_paths(path_list): + # `path_list` - A list of file paths to check. Only files which + # exist will be returned + return filter( + lambda p: os.path.exists(os.path.realpath(p)), + path_list) + +def load_and_handle_cert(cert_string, now, base64decode=False): + """Load a certificate, split off the good parts, and return some +useful data + +Params: + +- `cert_string` (string) - a certificate loaded into a string object +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `base64decode` (bool) - run .decode('base64') on the input? + +Returns: +A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining) + + """ + if base64decode: + _cert_string = cert_string.decode('base-64') + else: + _cert_string = cert_string + + cert_loaded = OpenSSL.crypto.load_certificate( + OpenSSL.crypto.FILETYPE_PEM, _cert_string) + + # Strip the subject down to just the value of the first name + cert_subject = cert_loaded.get_subject().get_components()[0][1] + + # Grab the expiration date + cert_expiry = cert_loaded.get_notAfter() + cert_expiry_date = datetime.datetime.strptime( + cert_expiry, + # example get_notAfter() => 20180922170439Z + '%Y%m%d%H%M%SZ') + + time_remaining = cert_expiry_date - now + + return (cert_subject, cert_expiry_date, time_remaining) + +def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list): + """Given metadata about a certificate under examination, classify it + into one of three categories, 'ok', 'warning', and 'expired'. + +Params: + +- `cert_meta` dict - A dict with certificate metadata. Required fields + include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'. +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires +- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is +- `cert_list` list - A list to shove the classified cert into + +Return: +- `cert_list` - The updated list of classified certificates + """ + expiry_str = str(cert_meta['expiry']) + # Categorization + if cert_meta['expiry'] < now: + # This already expired, must NOTIFY + cert_meta['health'] = 'expired' + elif time_remaining < expire_window: + # WARN about this upcoming expirations + cert_meta['health'] = 'warning' + else: + # Not expired or about to expire + cert_meta['health'] = 'ok' + + cert_meta['expiry'] = expiry_str + cert_list.append(cert_meta) + return cert_list + +def tabulate_summary(certificates, kubeconfigs): + """Calculate the summary text for when the module finishes +running. This includes counds of each classification and what have +you. + +Params: + +- `certificates` (list of dicts) - Processed `expire_check_result` + dicts with filled in `health` keys for system certificates. +- `kubeconfigs` (list of dicts) - Processed `expire_check_result` + dicts with filled in `health` keys for embedded kubeconfig + certificates. + +Return: +- `summary_results` (dict) - Counts of each cert/kubeconfig + classification and total items examined. + """ + summary_results = { + 'system_certificates': len(certificates), + 'kubeconfig_certificates': len(kubeconfigs), + 'total': len(certificates + kubeconfigs), + 'ok': 0, + 'warning': 0, + 'expired': 0 + } + + items = certificates + kubeconfigs + summary_results['expired'] = len([c for c in items if c['health'] == 'expired']) + summary_results['warning'] = len([c for c in items if c['health'] == 'warning']) + summary_results['ok'] = len([c for c in items if c['health'] == 'ok']) + + return summary_results + + +###################################################################### +def main(): + module = AnsibleModule( + argument_spec=dict( + config_base=dict( + required=False, + default="/etc/origin", + type='str'), + warning_days=dict( + required=False, + default=int(30), + type='int'), + show_all=dict( + required=False, + default="False", + type='bool') + ), + supports_check_mode=True, + ) + + # Basic scaffolding for OpenShift spcific certs + openshift_base_config_path = module.params['config_base'] + openshift_master_config_path = os.path.normpath( + os.path.join(openshift_base_config_path, "master/master-config.yaml") + ) + openshift_node_config_path = os.path.normpath( + os.path.join(openshift_base_config_path, "node/node-config.yaml") + ) + openshift_cert_check_paths = [ + openshift_master_config_path, + openshift_node_config_path, + ] + + # Paths for Kubeconfigs. Additional kubeconfigs are conditionally checked later in the code + kubeconfig_paths = [ + os.path.normpath( + os.path.join(openshift_base_config_path, "master/admin.kubeconfig") + ), + os.path.normpath( + os.path.join(openshift_base_config_path, "master/openshift-master.kubeconfig") + ), + os.path.normpath( + os.path.join(openshift_base_config_path, "master/openshift-node.kubeconfig") + ), + os.path.normpath( + os.path.join(openshift_base_config_path, "master/openshift-router.kubeconfig") + ), + ] + + # Expiry checking stuff + now = datetime.datetime.now() + # todo, catch exception for invalid input and return a fail_json + warning_days = int(module.params['warning_days']) + expire_window = datetime.timedelta(days=warning_days) + + # Module stuff + # + # The results of our cert checking to return from the task call + check_results = {} + check_results['meta'] = {} + check_results['meta']['warning_days'] = warning_days + check_results['meta']['checked_at_time'] = str(now) + check_results['meta']['warn_after_date'] = str(now + expire_window) + check_results['meta']['show_all'] = str(module.params['show_all']) + # All the analyzed certs accumulate here + certs = [] + + ###################################################################### + # Sure, why not? Let's enable check mode. + if module.check_mode: + check_results['certs'] = [] + module.exit_json( + check_results=check_results, + msg="Checked 0 certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], + rc=0, + changed=False + ) + + ###################################################################### + # Check for OpenShift Container Platform specific certs + ###################################################################### + for os_cert in filter_paths(openshift_cert_check_paths): + # Open up that config file and locate the cert and CA + with open(os_cert, 'r') as fp: + cert_meta = {} + cfg = yaml.load(fp) + # cert files are specified in parsed `fp` as relative to the path + # of the original config file. 'master-config.yaml' with certFile + # = 'foo.crt' implies that 'foo.crt' is in the same + # directory. certFile = '../foo.crt' is in the parent directory. + cfg_path = os.path.dirname(fp.name) + cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile']) + cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA']) + + ###################################################################### + # Load the certificate and the CA, parse their expiration dates into + # datetime objects so we can manipulate them later + for _, v in cert_meta.iteritems(): + with open(v, 'r') as fp: + cert = fp.read() + cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, certs) + + ###################################################################### + # /Check for OpenShift Container Platform specific certs + ###################################################################### + + ###################################################################### + # Check service Kubeconfigs + ###################################################################### + kubeconfigs = [] + + # There may be additional kubeconfigs to check, but their naming + # is less predictable than the ones we've already assembled. + + try: + # Try to read the standard 'node-config.yaml' file to check if + # this host is a node. + with open(openshift_node_config_path, 'r') as fp: + cfg = yaml.load(fp) + # OK, the config file exists, therefore this is a + # node. Nodes have their own kubeconfig files to + # communicate with the master API. Let's read the relative + # path to that file from the node config. + node_masterKubeConfig = cfg['masterKubeConfig'] + # As before, the path to the 'masterKubeConfig' file is + # relative to `fp` + cfg_path = os.path.dirname(fp.name) + node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) + with open(node_kubeconfig, 'r') as fp: + # Read in the nodes kubeconfig file and grab the good stuff + cfg = yaml.load(fp) + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + except Exception: + # This is not a node + pass + + for kube in filter_paths(kubeconfig_paths): + with open(kube, 'r') as fp: + # TODO: Maybe consider catching exceptions here? + cfg = yaml.load(fp) + # Per conversation, "the kubeconfigs you care about: + # admin, router, registry should all be single + # value". Following that advice we only grab the data for + # the user at index 0 in the 'users' list. There should + # not be more than one user. + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + + + ###################################################################### + # /Check service Kubeconfigs + ###################################################################### + res = tabulate_summary(certs, kubeconfigs) + + msg = "Checked {count} certificates and kubeconfigs. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( + count=res['total'], + exp=res['expired'], + warn=res['warning'], + ok=res['ok'], + window=int(module.params['warning_days']), + ) + + # By default we only return detailed information about expired or + # warning certificates. If show_all is true then we will print all + # the certificates examined. + if not module.params['show_all']: + check_results['certs'] = filter(lambda ctr: ctr['health'] in ['expired', 'warning'], certs) + check_results['kubeconfigs'] = filter(lambda ctr: ctr['health'] in ['expired', 'warning'], kubeconfigs) + else: + check_results['certs'] = certs + check_results['kubeconfigs'] = kubeconfigs + + # Sort the final results to report in order of ascending safety + # time. That is to say, the certificates which will expire sooner + # will be at the front of the list and certificates which will + # expire later are at the end. + check_results['certs'] = sorted(check_results['certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + # This module will never change anything, but we might want to + # change the return code parameter if there is some catastrophic + # error we noticed earlier + module.exit_json( + check_results=check_results, + summary=res, + msg=msg, + rc=0, + changed=False + ) + +###################################################################### +# import module snippets +from ansible.module_utils.basic import AnsibleModule +if __name__ == '__main__': + main() diff --git a/playbooks/byo/openshift-cluster/check-cert-expiry.yaml b/playbooks/byo/openshift-cluster/check-cert-expiry.yaml new file mode 100644 index 000000000..39efdbd36 --- /dev/null +++ b/playbooks/byo/openshift-cluster/check-cert-expiry.yaml @@ -0,0 +1,35 @@ +--- +# check-cert-expiry.yaml - A utility for cluster ops to scan through +# (critical) certificates for the ongoing operations of a cluster. + +# We do not support all Ansible versions. This is our safety net. +- include: ../../common/openshift-cluster/verify_ansible_version.yml + +- name: Generate the l_oo_all_hosts group + hosts: localhost + connection: local + become: no + gather_facts: no + tasks: + - include_vars: cluster_hosts.yml + - add_host: + name: "{{ item }}" + groups: l_oo_all_hosts + with_items: "{{ g_all_hosts | default([]) }}" + +# This may seem redundant, running `include_vars` again on the list of +# hosts in the group 'l_oo_all_hosts' which we just created. But the +# fact of the matter is that if we don't re-run include_vars on the +# new host group we created, then they will not have access to those +# same group variables they were birthed from. +# +# Go ahead and try to 'debug: var=g_all_hosts' later on (without this +# play) and you'll find that the result is UNDEFINED VARIABLE. +- name: Inject cluster hosts variables into l_oo_all_hosts + hosts: l_oo_all_hosts + gather_facts: no + tasks: + - include_vars: cluster_hosts.yml + +# This is where the actual business gets started: +- include: ../../common/openshift-cluster/check-cert-expiry.yaml diff --git a/playbooks/common/openshift-cluster/check-cert-expiry.yaml b/playbooks/common/openshift-cluster/check-cert-expiry.yaml new file mode 100644 index 000000000..e160383af --- /dev/null +++ b/playbooks/common/openshift-cluster/check-cert-expiry.yaml @@ -0,0 +1,37 @@ +# --- +# # This must be evaluated after cluster_hosts.yml has been evaluated on +# # 'localhost' to generate l_oo_all_hosts and then subsequently +# # evaluated again on the generated hosts group (l_oo_all_hosts). +# - include: evaluate_groups.yml +# tags: +# - always + +# # Initialize cluster facts for oo_all_hosts using the openshift_facts +# # role followed by the 'common' role +# - include: initialize_facts.yml +# tags: +# - always + +# # Get the version to install from the first master, then synchronize +# # that variable across all in oo_all_hosts +# - include: initialize_openshift_version.yml +# tags: +# - always + +# # Earlier 'initialize_facts' included the openshift_facts role and +# # used the openshift_facts module with the 'role' parameter set to +# # 'common'. Now we're applying the openshift_facts role AGAIN but just +# # to the subset of oo_all_hosts which require configuring. +# - name: Load openshift_facts +# hosts: oo_etcd_to_config:oo_masters_to_config:oo_nodes_to_config +# roles: +# - openshift_facts + +- hosts: all + become: yes + gather_facts: no + tasks: + - name: Check cert expirys on host + openshift_cert_expiry: + warning_days: 1500 + show_all: true diff --git a/utils/Makefile b/utils/Makefile index 59aff92fd..bc708964b 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -31,6 +31,8 @@ ASCII2MAN = a2x -D $(dir $@) -d manpage -f manpage $< MANPAGES := docs/man/man1/atomic-openshift-installer.1 VERSION := 1.3 +PEPEXCLUDES := E501,E121,E124 + sdist: clean python setup.py sdist rm -fR $(SHORTNAME).egg-info @@ -80,7 +82,7 @@ ci-pylint: @echo "#############################################" @echo "# Running PyLint Tests in virtualenv" @echo "#############################################" - . $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py + . $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py ../library/openshift_cert_expiry.py ci-list-deps: @echo "#############################################" @@ -94,13 +96,17 @@ ci-pyflakes: @echo "#################################################" . $(NAME)env/bin/activate && pyflakes src/ooinstall/*.py . $(NAME)env/bin/activate && pyflakes ../callback_plugins/openshift_quick_installer.py + . $(NAME)env/bin/activate && pyflakes ../library/openshift_cert_expiry.py ci-pep8: @echo "#############################################" @echo "# Running PEP8 Compliance Tests in virtualenv" @echo "#############################################" - . $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 src/$(SHORTNAME)/ - . $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 ../callback_plugins/openshift_quick_installer.py + . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) src/$(SHORTNAME)/ + . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) ../callback_plugins/openshift_quick_installer.py +# This one excludes E402 because it is an ansible module and the +# boilerplate import statement is expected to be at the bottom + . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES),E402 ../library/openshift_cert_expiry.py ci: clean virtualenv ci-list-deps ci-pep8 ci-pylint ci-pyflakes ci-unittests : diff --git a/utils/test-requirements.txt b/utils/test-requirements.txt index f2216a177..af91ab6a7 100644 --- a/utils/test-requirements.txt +++ b/utils/test-requirements.txt @@ -9,3 +9,4 @@ flake8 PyYAML click backports.functools_lru_cache +pyOpenSSL -- cgit v1.2.1 From 5f7f6a6023c470337f0d879f55eb619fd63e2dbe Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Wed, 5 Oct 2016 15:38:43 -0700 Subject: Support etcd certs now. Fix lint. Generate HTML report. --- library/openshift_cert_expiry.py | 210 ++++++++++++++------- .../openshift-cluster/check-cert-expiry.yaml | 9 +- .../templates/cert-expiry-table.html.j2 | 110 +++++++++++ 3 files changed, 264 insertions(+), 65 deletions(-) create mode 100644 playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 diff --git a/library/openshift_cert_expiry.py b/library/openshift_cert_expiry.py index cd8662f67..4e66de755 100644 --- a/library/openshift_cert_expiry.py +++ b/library/openshift_cert_expiry.py @@ -1,5 +1,8 @@ #!/usr/bin/python # -*- coding: utf-8 -*- +# pylint: disable=line-too-long,invalid-name + +"""For details on this module see DOCUMENTATION (below)""" # etcd config file import ConfigParser @@ -66,18 +69,23 @@ EXAMPLES = ''' ''' -###################################################################### -# etcd does not begin their config file with an opening [section] as -# required by the Python ConfigParser module. We hack around it by -# slipping one in ourselves prior to parsing. +# We only need this for one thing, we don't care if it doesn't have +# that many public methods # -# Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 +# pylint: disable=too-few-public-methods class FakeSecHead(object): + """etcd does not begin their config file with an opening [section] as +required by the Python ConfigParser module. We hack around it by +slipping one in ourselves prior to parsing. + +Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 + """ def __init__(self, fp): self.fp = fp self.sechead = '[ETCD]\n' def readline(self): + """Make this look like a file-type object""" if self.sechead: try: return self.sechead @@ -86,14 +94,15 @@ class FakeSecHead(object): else: return self.fp.readline() + ###################################################################### def filter_paths(path_list): - # `path_list` - A list of file paths to check. Only files which - # exist will be returned - return filter( - lambda p: os.path.exists(os.path.realpath(p)), - path_list) + """`path_list` - A list of file paths to check. Only files which exist +will be returned + """ + return [p for p in path_list if os.path.exists(os.path.realpath(p))] + def load_and_handle_cert(cert_string, now, base64decode=False): """Load a certificate, split off the good parts, and return some @@ -131,6 +140,7 @@ A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certif return (cert_subject, cert_expiry_date, time_remaining) + def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list): """Given metadata about a certificate under examination, classify it into one of three categories, 'ok', 'warning', and 'expired'. @@ -163,7 +173,8 @@ Return: cert_list.append(cert_meta) return cert_list -def tabulate_summary(certificates, kubeconfigs): + +def tabulate_summary(certificates, kubeconfigs, etcd_certs): """Calculate the summary text for when the module finishes running. This includes counds of each classification and what have you. @@ -172,24 +183,25 @@ Params: - `certificates` (list of dicts) - Processed `expire_check_result` dicts with filled in `health` keys for system certificates. -- `kubeconfigs` (list of dicts) - Processed `expire_check_result` - dicts with filled in `health` keys for embedded kubeconfig - certificates. - +- `kubeconfigs` - as above for kubeconfigs +- `etcd_certs` - as above for etcd certs Return: -- `summary_results` (dict) - Counts of each cert/kubeconfig - classification and total items examined. + +- `summary_results` (dict) - Counts of each cert type classification + and total items examined. """ + items = certificates + kubeconfigs + etcd_certs + summary_results = { 'system_certificates': len(certificates), 'kubeconfig_certificates': len(kubeconfigs), - 'total': len(certificates + kubeconfigs), + 'etcd_certificates': len(etcd_certs), + 'total': len(items), 'ok': 0, 'warning': 0, 'expired': 0 } - items = certificates + kubeconfigs summary_results['expired'] = len([c for c in items if c['health'] == 'expired']) summary_results['warning'] = len([c for c in items if c['health'] == 'warning']) summary_results['ok'] = len([c for c in items if c['health'] == 'ok']) @@ -198,7 +210,15 @@ Return: ###################################################################### +# This is our module MAIN function after all, so there's bound to be a +# lot of code bundled up into one block +# +# pylint: disable=too-many-locals,too-many-locals,too-many-statements def main(): + """This module examines certificates (in various forms) which compose +an OpenShift Container Platform cluster + """ + module = AnsibleModule( argument_spec=dict( config_base=dict( @@ -223,7 +243,7 @@ def main(): os.path.join(openshift_base_config_path, "master/master-config.yaml") ) openshift_node_config_path = os.path.normpath( - os.path.join(openshift_base_config_path, "node/node-config.yaml") + os.path.join(openshift_base_config_path, "node/node-config.yaml") ) openshift_cert_check_paths = [ openshift_master_config_path, @@ -246,6 +266,14 @@ def main(): ), ] + # etcd, where do you hide your certs? Used when parsing etcd.conf + etcd_cert_params = [ + "ETCD_CA_FILE", + "ETCD_CERT_FILE", + "ETCD_PEER_CA_FILE", + "ETCD_PEER_CERT_FILE", + ] + # Expiry checking stuff now = datetime.datetime.now() # todo, catch exception for invalid input and return a fail_json @@ -262,15 +290,15 @@ def main(): check_results['meta']['warn_after_date'] = str(now + expire_window) check_results['meta']['show_all'] = str(module.params['show_all']) # All the analyzed certs accumulate here - certs = [] + ocp_certs = [] ###################################################################### # Sure, why not? Let's enable check mode. if module.check_mode: - check_results['certs'] = [] + check_results['ocp_certs'] = [] module.exit_json( check_results=check_results, - msg="Checked 0 certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], + msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], rc=0, changed=False ) @@ -307,7 +335,7 @@ def main(): 'health': None, } - classify_cert(expire_check_result, now, time_remaining, expire_window, certs) + classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs) ###################################################################### # /Check for OpenShift Container Platform specific certs @@ -326,33 +354,36 @@ def main(): # this host is a node. with open(openshift_node_config_path, 'r') as fp: cfg = yaml.load(fp) - # OK, the config file exists, therefore this is a - # node. Nodes have their own kubeconfig files to - # communicate with the master API. Let's read the relative - # path to that file from the node config. - node_masterKubeConfig = cfg['masterKubeConfig'] - # As before, the path to the 'masterKubeConfig' file is - # relative to `fp` - cfg_path = os.path.dirname(fp.name) - node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) + + # OK, the config file exists, therefore this is a + # node. Nodes have their own kubeconfig files to + # communicate with the master API. Let's read the relative + # path to that file from the node config. + node_masterKubeConfig = cfg['masterKubeConfig'] + # As before, the path to the 'masterKubeConfig' file is + # relative to `fp` + cfg_path = os.path.dirname(fp.name) + node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) + with open(node_kubeconfig, 'r') as fp: # Read in the nodes kubeconfig file and grab the good stuff cfg = yaml.load(fp) - c = cfg['users'][0]['user']['client-certificate-data'] - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(c, now, base64decode=True) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': fp.name, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) - except Exception: + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + except IOError: # This is not a node pass @@ -360,15 +391,60 @@ def main(): with open(kube, 'r') as fp: # TODO: Maybe consider catching exceptions here? cfg = yaml.load(fp) - # Per conversation, "the kubeconfigs you care about: - # admin, router, registry should all be single - # value". Following that advice we only grab the data for - # the user at index 0 in the 'users' list. There should - # not be more than one user. - c = cfg['users'][0]['user']['client-certificate-data'] + + # Per conversation, "the kubeconfigs you care about: + # admin, router, registry should all be single + # value". Following that advice we only grab the data for + # the user at index 0 in the 'users' list. There should + # not be more than one user. + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + + ###################################################################### + # /Check service Kubeconfigs + ###################################################################### + + ###################################################################### + # Check etcd certs + ###################################################################### + # Some values may be duplicated, make this a set for now so we + # unique them all + etcd_certs_to_check = set([]) + etcd_certs = [] + etcd_cert_params.append('dne') + try: + with open('/etc/etcd/etcd.conf', 'r') as fp: + etcd_config = ConfigParser.ConfigParser() + etcd_config.readfp(FakeSecHead(fp)) + + for param in etcd_cert_params: + try: + etcd_certs_to_check.add(etcd_config.get('ETCD', param)) + except ConfigParser.NoOptionError: + # That parameter does not exist, oh well... + pass + except IOError: + # No etcd to see here, move along + pass + + for etcd_cert in filter_paths(etcd_certs_to_check): + with open(etcd_cert, 'r') as fp: + c = fp.read() (cert_subject, cert_expiry_date, - time_remaining) = load_and_handle_cert(c, now, base64decode=True) + time_remaining) = load_and_handle_cert(c, now) expire_check_result = { 'cert_cn': cert_subject, @@ -378,15 +454,15 @@ def main(): 'health': None, } - classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) - + classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) ###################################################################### - # /Check service Kubeconfigs + # /Check etcd certs ###################################################################### - res = tabulate_summary(certs, kubeconfigs) - msg = "Checked {count} certificates and kubeconfigs. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( + res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs) + + msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( count=res['total'], exp=res['expired'], warn=res['warning'], @@ -398,18 +474,22 @@ def main(): # warning certificates. If show_all is true then we will print all # the certificates examined. if not module.params['show_all']: - check_results['certs'] = filter(lambda ctr: ctr['health'] in ['expired', 'warning'], certs) - check_results['kubeconfigs'] = filter(lambda ctr: ctr['health'] in ['expired', 'warning'], kubeconfigs) + check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']] + check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']] + check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']] else: - check_results['certs'] = certs + check_results['ocp_certs'] = ocp_certs check_results['kubeconfigs'] = kubeconfigs + check_results['etcd'] = etcd_certs # Sort the final results to report in order of ascending safety # time. That is to say, the certificates which will expire sooner # will be at the front of the list and certificates which will # expire later are at the end. - check_results['certs'] = sorted(check_results['certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + # This module will never change anything, but we might want to # change the return code parameter if there is some catastrophic # error we noticed earlier @@ -422,7 +502,9 @@ def main(): ) ###################################################################### -# import module snippets +# It's just the way we do things in Ansible. So disable this warning +# +# pylint: disable=wrong-import-position,import-error from ansible.module_utils.basic import AnsibleModule if __name__ == '__main__': main() diff --git a/playbooks/common/openshift-cluster/check-cert-expiry.yaml b/playbooks/common/openshift-cluster/check-cert-expiry.yaml index e160383af..b585fd849 100644 --- a/playbooks/common/openshift-cluster/check-cert-expiry.yaml +++ b/playbooks/common/openshift-cluster/check-cert-expiry.yaml @@ -34,4 +34,11 @@ - name: Check cert expirys on host openshift_cert_expiry: warning_days: 1500 - show_all: true + register: check_results + - name: Generate html + become: no + run_once: yes + template: + src: templates/cert-expiry-table.html.j2 + dest: /tmp/cert-table.html + delegate_to: localhost diff --git a/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 b/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 new file mode 100644 index 000000000..da7844c37 --- /dev/null +++ b/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 @@ -0,0 +1,110 @@ + + + + + OCP Certificate Expiry Report + {# For fancy icons #} + + + + + +

OCP Certificate Expiry Report

+ +
+ + {# Each host has a header and table to itself #} + {% for host in play_hosts %} +

{{ host }}

+ +

+ {{ hostvars[host].check_results.msg }} +

+
    +
  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • +
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
  • +
+ + + {# These are hard-coded right now, but should be grabbed dynamically from the registered results #} + {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs'] -%} + + + + + + + + + + + + + + {# A row for each certificate examined #} + {%- for v in hostvars[host].check_results.check_results[kind] -%} + + {# Let's add some flair and show status visually with fancy icons #} + {% if v.health == 'ok' %} + {% set health_icon = 'glyphicon glyphicon-ok' %} + {% elif v.health == 'warning' %} + {% set health_icon = 'glyphicon glyphicon-alert' %} + {% else %} + {% set health_icon = 'glyphicon glyphicon-remove' %} + {% endif %} + + + + + + + + + + {% endfor %} + {# end row generation per cert of this type #} + {% endfor %} + {# end generation for each kind of cert block #} +

{{ kind }}

 Certificate Common NameHealthDays RemainingExpiration DatePath
{{ v.cert_cn }}{{ v.health }}{{ v.days_remaining }}{{ v.expiry }}{{ v.path }}
+
+ {% endfor %} + {# end section generation for each host #} + +
+

+ Expiration report generated by openshift-ansible +

+

+ Status icons from bootstrap/glyphicon +

+
+ + -- cgit v1.2.1 From 4273b21105dd11f52de354b4777d33e4296ba7e0 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Thu, 6 Oct 2016 10:01:48 -0700 Subject: Get router/registry certs. Collect common names and subjectAltNames --- library/openshift_cert_expiry.py | 167 ++++++++++++++++++--- .../templates/cert-expiry-table.html.j2 | 66 ++++---- 2 files changed, 186 insertions(+), 47 deletions(-) diff --git a/library/openshift_cert_expiry.py b/library/openshift_cert_expiry.py index 4e66de755..f18ab75d0 100644 --- a/library/openshift_cert_expiry.py +++ b/library/openshift_cert_expiry.py @@ -4,6 +4,8 @@ """For details on this module see DOCUMENTATION (below)""" +# router/registry cert grabbing +import subprocess # etcd config file import ConfigParser # Expiration parsing @@ -15,7 +17,6 @@ import yaml # Certificate loading import OpenSSL.crypto - DOCUMENTATION = ''' --- module: openshift_cert_expiry @@ -126,8 +127,59 @@ A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certif cert_loaded = OpenSSL.crypto.load_certificate( OpenSSL.crypto.FILETYPE_PEM, _cert_string) + ###################################################################### + # Read just the first name from the cert - DISABLED while testing + # out the 'get all possible names' function (below) + # # Strip the subject down to just the value of the first name - cert_subject = cert_loaded.get_subject().get_components()[0][1] + # cert_subject = cert_loaded.get_subject().get_components()[0][1] + + ###################################################################### + # Read all possible names from the cert + cert_subjects = [] + for name, value in cert_loaded.get_subject().get_components(): + cert_subjects.append('{}:{}'.format(name, value)) + + # To read SANs from a cert we must read the subjectAltName + # extension from the X509 Object. What makes this more difficult + # is that pyOpenSSL does not give extensions as a list, nor does + # it provide a count of all loaded extensions. + # + # Rather, extensions are REQUESTED by index. We must iterate over + # all extensions until we find the one called 'subjectAltName'. If + # we don't find that extension we'll eventually request an + # extension at an index where no extension exists (IndexError is + # raised). When that happens we know that the cert has no SANs so + # we break out of the loop. + i = 0 + checked_all_extensions = False + while not checked_all_extensions: + try: + # Read the extension at index 'i' + ext = cert_loaded.get_extension(i) + except IndexError: + # We tried to read an extension but it isn't there, that + # means we ran out of extensions to check. Abort + san = None + checked_all_extensions = True + else: + # We were able to load the extension at index 'i' + if ext.get_short_name() == 'subjectAltName': + san = ext + checked_all_extensions = True + else: + # Try reading the next extension + i += 1 + + if san is not None: + # The X509Extension object for subjectAltName prints as a + # string with the alt names separated by a comma and a + # space. Split the string by ', ' and then add our new names + # to the list of existing names + cert_subjects.extend(str(san).split(', ')) + + cert_subject = ', '.join(cert_subjects) + ###################################################################### # Grab the expiration date cert_expiry = cert_loaded.get_notAfter() @@ -174,7 +226,7 @@ Return: return cert_list -def tabulate_summary(certificates, kubeconfigs, etcd_certs): +def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs): """Calculate the summary text for when the module finishes running. This includes counds of each classification and what have you. @@ -190,12 +242,14 @@ Return: - `summary_results` (dict) - Counts of each cert type classification and total items examined. """ - items = certificates + kubeconfigs + etcd_certs + items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs summary_results = { 'system_certificates': len(certificates), 'kubeconfig_certificates': len(kubeconfigs), 'etcd_certificates': len(etcd_certs), + 'router_certs': len(router_certs), + 'registry_certs': len(registry_certs), 'total': len(items), 'ok': 0, 'warning': 0, @@ -213,7 +267,7 @@ Return: # This is our module MAIN function after all, so there's bound to be a # lot of code bundled up into one block # -# pylint: disable=too-many-locals,too-many-locals,too-many-statements +# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches def main(): """This module examines certificates (in various forms) which compose an OpenShift Container Platform cluster @@ -250,21 +304,19 @@ an OpenShift Container Platform cluster openshift_node_config_path, ] - # Paths for Kubeconfigs. Additional kubeconfigs are conditionally checked later in the code - kubeconfig_paths = [ - os.path.normpath( - os.path.join(openshift_base_config_path, "master/admin.kubeconfig") - ), - os.path.normpath( - os.path.join(openshift_base_config_path, "master/openshift-master.kubeconfig") - ), - os.path.normpath( - os.path.join(openshift_base_config_path, "master/openshift-node.kubeconfig") - ), - os.path.normpath( - os.path.join(openshift_base_config_path, "master/openshift-router.kubeconfig") - ), - ] + # Paths for Kubeconfigs. Additional kubeconfigs are conditionally + # checked later in the code + master_kube_configs = ['admin', 'openshift-master', + 'openshift-node', 'openshift-router', + 'openshift-registry'] + + kubeconfig_paths = [] + for m_kube_config in master_kube_configs: + kubeconfig_paths.append( + os.path.normpath( + os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config) + ) + ) # etcd, where do you hide your certs? Used when parsing etcd.conf etcd_cert_params = [ @@ -460,7 +512,80 @@ an OpenShift Container Platform cluster # /Check etcd certs ###################################################################### - res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs) + ###################################################################### + # Check router/registry certs + # + # These are saved as secrets in etcd. That means that we can not + # simply read a file to grab the data. Instead we're going to + # subprocess out to the 'oc get' command. On non-masters this + # command will fail, that is expected so we catch that exception. + ###################################################################### + router_certs = [] + registry_certs = [] + + ###################################################################### + # First the router certs + try: + router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(), + stdout=subprocess.PIPE) + router_ds = yaml.load(router_secrets_raw.communicate()[0]) + router_c = router_ds['data']['tls.crt'] + router_path = router_ds['metadata']['selfLink'] + except TypeError: + # YAML couldn't load the result, this is not a master + pass + else: + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(router_c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': router_path, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs) + + check_results['router'] = router_certs + + ###################################################################### + # Now for registry + # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split()) + # out = subprocess.PIPE + try: + registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), + stdout=subprocess.PIPE) + registry_ds = yaml.load(registry_secrets_raw.communicate()[0]) + registry_c = registry_ds['data']['registry.crt'] + registry_path = registry_ds['metadata']['selfLink'] + except TypeError: + # YAML couldn't load the result, this is not a master + pass + else: + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': registry_path, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs) + + check_results['registry'] = registry_certs + + ###################################################################### + # /Check router/registry certs + ###################################################################### + + res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs) msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( count=res['total'], diff --git a/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 b/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 index da7844c37..f74d7f1ce 100644 --- a/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 +++ b/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 @@ -3,7 +3,7 @@ OCP Certificate Expiry Report - {# For fancy icons #} + {# For fancy icons and a pleasing font #} -

OCP Certificate Expiry Report

- -
+ {# Each host has a header and table to itself #} {% for host in play_hosts %}

{{ host }}

- {{ hostvars[host].check_results.msg }} + {{ hostvars[host].check_results.msg }}

    -
  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • -
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
  • +
  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • +
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
{# These are hard-coded right now, but should be grabbed dynamically from the registered results #} - {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs'] -%} + {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs', 'router', 'registry'] -%} - - + + - {# A row for each certificate examined #} + {# A row for each certificate examined #} {%- for v in hostvars[host].check_results.check_results[kind] -%} - {# Let's add some flair and show status visually with fancy icons #} - {% if v.health == 'ok' %} - {% set health_icon = 'glyphicon glyphicon-ok' %} - {% elif v.health == 'warning' %} - {% set health_icon = 'glyphicon glyphicon-alert' %} - {% else %} - {% set health_icon = 'glyphicon glyphicon-remove' %} - {% endif %} + {# Let's add some flair and show status visually with fancy icons #} + {% if v.health == 'ok' %} + {% set health_icon = 'glyphicon glyphicon-ok' %} + {% elif v.health == 'warning' %} + {% set health_icon = 'glyphicon glyphicon-alert' %} + {% else %} + {% set health_icon = 'glyphicon glyphicon-remove' %} + {% endif %} - - - + + + {% endfor %} - {# end row generation per cert of this type #} + {# end row generation per cert of this type #} {% endfor %} - {# end generation for each kind of cert block #} + {# end generation for each kind of cert block #}

{{ kind }}

 Certificate Common Name Certificate Common/Alt Name(s) Health Days Remaining Expiration Date Path
{{ v.cert_cn }}
{{ v.cert_cn }} {{ v.health }} {{ v.days_remaining }} {{ v.expiry }} {{ v.path }}

{% endfor %} @@ -100,10 +114,10 @@

- Expiration report generated by openshift-ansible + Expiration report generated by openshift-ansible

- Status icons from bootstrap/glyphicon + Status icons from bootstrap/glyphicon

-- cgit v1.2.1 From a22e8daf1f7c93469023239f442f19608ffbc970 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Fri, 7 Oct 2016 09:55:13 -0700 Subject: Refactor into a role --- library/openshift_cert_expiry.py | 635 --------------------- .../byo/openshift-cluster/check-cert-expiry.yaml | 35 -- .../openshift-cluster/check-cert-expiry.yaml | 44 -- .../templates/cert-expiry-table.html.j2 | 124 ---- roles/openshift_certificate_expiry/README.md | 63 ++ .../openshift_certificate_expiry/defaults/main.yml | 6 + .../library/openshift_cert_expiry.py | 635 +++++++++++++++++++++ roles/openshift_certificate_expiry/meta/main.yml | 15 + roles/openshift_certificate_expiry/tasks/main.yml | 23 + .../templates/cert-expiry-table.html.j2 | 124 ++++ .../templates/save_json_results.j2 | 5 + 11 files changed, 871 insertions(+), 838 deletions(-) delete mode 100644 library/openshift_cert_expiry.py delete mode 100644 playbooks/byo/openshift-cluster/check-cert-expiry.yaml delete mode 100644 playbooks/common/openshift-cluster/check-cert-expiry.yaml delete mode 100644 playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 create mode 100644 roles/openshift_certificate_expiry/README.md create mode 100644 roles/openshift_certificate_expiry/defaults/main.yml create mode 100644 roles/openshift_certificate_expiry/library/openshift_cert_expiry.py create mode 100644 roles/openshift_certificate_expiry/meta/main.yml create mode 100644 roles/openshift_certificate_expiry/tasks/main.yml create mode 100644 roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 create mode 100644 roles/openshift_certificate_expiry/templates/save_json_results.j2 diff --git a/library/openshift_cert_expiry.py b/library/openshift_cert_expiry.py deleted file mode 100644 index f18ab75d0..000000000 --- a/library/openshift_cert_expiry.py +++ /dev/null @@ -1,635 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# pylint: disable=line-too-long,invalid-name - -"""For details on this module see DOCUMENTATION (below)""" - -# router/registry cert grabbing -import subprocess -# etcd config file -import ConfigParser -# Expiration parsing -import datetime -# File path stuff -import os -# Config file parsing -import yaml -# Certificate loading -import OpenSSL.crypto - -DOCUMENTATION = ''' ---- -module: openshift_cert_expiry -short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster -description: - - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired. - - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following: - - C(ok) - not expired, and outside of the expiration C(warning_days) window. - - C(warning) - not expired, but will expire between now and the C(warning_days) window. - - C(expired) - an expired certificate. - - Certificate flagging follow this logic: - - If the expiration date is before now then the certificate is classified as C(expired). - - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning). - - All other conditions are classified as C(ok). - - The following keys are ALSO present in the certificate summary: - - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted) - - C(days_remaining) - The number of days until the certificate expires. - - C(expiry) - The date the certificate expires on. - - C(path) - The full path to the certificate on the examined host. -version_added: "0.0" -options: - config_base: - description: - - Base path to OCP system settings. - required: false - default: /etc/origin - warning_days: - description: - - Flag certificates which will expire in C(warning_days) days from now. - required: false - default: 30 - show_all: - description: - - Enable this option to show analysis of ALL certificates examined by this module. - - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported. - required: false - default: false - -author: "Tim Bielawa (@tbielawa) " -''' - -EXAMPLES = ''' -# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now -- openshift_cert_expiry: - -# Expand the warning window to show certificates expiring within a year from now -- openshift_cert_expiry: warning_days=365 - -# Show expired, soon to expire (now + 30 days), and all other certificates examined -- openshift_cert_expiry: show_all=true -''' - - -# We only need this for one thing, we don't care if it doesn't have -# that many public methods -# -# pylint: disable=too-few-public-methods -class FakeSecHead(object): - """etcd does not begin their config file with an opening [section] as -required by the Python ConfigParser module. We hack around it by -slipping one in ourselves prior to parsing. - -Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 - """ - def __init__(self, fp): - self.fp = fp - self.sechead = '[ETCD]\n' - - def readline(self): - """Make this look like a file-type object""" - if self.sechead: - try: - return self.sechead - finally: - self.sechead = None - else: - return self.fp.readline() - - -###################################################################### - -def filter_paths(path_list): - """`path_list` - A list of file paths to check. Only files which exist -will be returned - """ - return [p for p in path_list if os.path.exists(os.path.realpath(p))] - - -def load_and_handle_cert(cert_string, now, base64decode=False): - """Load a certificate, split off the good parts, and return some -useful data - -Params: - -- `cert_string` (string) - a certificate loaded into a string object -- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against -- `base64decode` (bool) - run .decode('base64') on the input? - -Returns: -A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining) - - """ - if base64decode: - _cert_string = cert_string.decode('base-64') - else: - _cert_string = cert_string - - cert_loaded = OpenSSL.crypto.load_certificate( - OpenSSL.crypto.FILETYPE_PEM, _cert_string) - - ###################################################################### - # Read just the first name from the cert - DISABLED while testing - # out the 'get all possible names' function (below) - # - # Strip the subject down to just the value of the first name - # cert_subject = cert_loaded.get_subject().get_components()[0][1] - - ###################################################################### - # Read all possible names from the cert - cert_subjects = [] - for name, value in cert_loaded.get_subject().get_components(): - cert_subjects.append('{}:{}'.format(name, value)) - - # To read SANs from a cert we must read the subjectAltName - # extension from the X509 Object. What makes this more difficult - # is that pyOpenSSL does not give extensions as a list, nor does - # it provide a count of all loaded extensions. - # - # Rather, extensions are REQUESTED by index. We must iterate over - # all extensions until we find the one called 'subjectAltName'. If - # we don't find that extension we'll eventually request an - # extension at an index where no extension exists (IndexError is - # raised). When that happens we know that the cert has no SANs so - # we break out of the loop. - i = 0 - checked_all_extensions = False - while not checked_all_extensions: - try: - # Read the extension at index 'i' - ext = cert_loaded.get_extension(i) - except IndexError: - # We tried to read an extension but it isn't there, that - # means we ran out of extensions to check. Abort - san = None - checked_all_extensions = True - else: - # We were able to load the extension at index 'i' - if ext.get_short_name() == 'subjectAltName': - san = ext - checked_all_extensions = True - else: - # Try reading the next extension - i += 1 - - if san is not None: - # The X509Extension object for subjectAltName prints as a - # string with the alt names separated by a comma and a - # space. Split the string by ', ' and then add our new names - # to the list of existing names - cert_subjects.extend(str(san).split(', ')) - - cert_subject = ', '.join(cert_subjects) - ###################################################################### - - # Grab the expiration date - cert_expiry = cert_loaded.get_notAfter() - cert_expiry_date = datetime.datetime.strptime( - cert_expiry, - # example get_notAfter() => 20180922170439Z - '%Y%m%d%H%M%SZ') - - time_remaining = cert_expiry_date - now - - return (cert_subject, cert_expiry_date, time_remaining) - - -def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list): - """Given metadata about a certificate under examination, classify it - into one of three categories, 'ok', 'warning', and 'expired'. - -Params: - -- `cert_meta` dict - A dict with certificate metadata. Required fields - include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'. -- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against -- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires -- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is -- `cert_list` list - A list to shove the classified cert into - -Return: -- `cert_list` - The updated list of classified certificates - """ - expiry_str = str(cert_meta['expiry']) - # Categorization - if cert_meta['expiry'] < now: - # This already expired, must NOTIFY - cert_meta['health'] = 'expired' - elif time_remaining < expire_window: - # WARN about this upcoming expirations - cert_meta['health'] = 'warning' - else: - # Not expired or about to expire - cert_meta['health'] = 'ok' - - cert_meta['expiry'] = expiry_str - cert_list.append(cert_meta) - return cert_list - - -def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs): - """Calculate the summary text for when the module finishes -running. This includes counds of each classification and what have -you. - -Params: - -- `certificates` (list of dicts) - Processed `expire_check_result` - dicts with filled in `health` keys for system certificates. -- `kubeconfigs` - as above for kubeconfigs -- `etcd_certs` - as above for etcd certs -Return: - -- `summary_results` (dict) - Counts of each cert type classification - and total items examined. - """ - items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs - - summary_results = { - 'system_certificates': len(certificates), - 'kubeconfig_certificates': len(kubeconfigs), - 'etcd_certificates': len(etcd_certs), - 'router_certs': len(router_certs), - 'registry_certs': len(registry_certs), - 'total': len(items), - 'ok': 0, - 'warning': 0, - 'expired': 0 - } - - summary_results['expired'] = len([c for c in items if c['health'] == 'expired']) - summary_results['warning'] = len([c for c in items if c['health'] == 'warning']) - summary_results['ok'] = len([c for c in items if c['health'] == 'ok']) - - return summary_results - - -###################################################################### -# This is our module MAIN function after all, so there's bound to be a -# lot of code bundled up into one block -# -# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches -def main(): - """This module examines certificates (in various forms) which compose -an OpenShift Container Platform cluster - """ - - module = AnsibleModule( - argument_spec=dict( - config_base=dict( - required=False, - default="/etc/origin", - type='str'), - warning_days=dict( - required=False, - default=int(30), - type='int'), - show_all=dict( - required=False, - default="False", - type='bool') - ), - supports_check_mode=True, - ) - - # Basic scaffolding for OpenShift spcific certs - openshift_base_config_path = module.params['config_base'] - openshift_master_config_path = os.path.normpath( - os.path.join(openshift_base_config_path, "master/master-config.yaml") - ) - openshift_node_config_path = os.path.normpath( - os.path.join(openshift_base_config_path, "node/node-config.yaml") - ) - openshift_cert_check_paths = [ - openshift_master_config_path, - openshift_node_config_path, - ] - - # Paths for Kubeconfigs. Additional kubeconfigs are conditionally - # checked later in the code - master_kube_configs = ['admin', 'openshift-master', - 'openshift-node', 'openshift-router', - 'openshift-registry'] - - kubeconfig_paths = [] - for m_kube_config in master_kube_configs: - kubeconfig_paths.append( - os.path.normpath( - os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config) - ) - ) - - # etcd, where do you hide your certs? Used when parsing etcd.conf - etcd_cert_params = [ - "ETCD_CA_FILE", - "ETCD_CERT_FILE", - "ETCD_PEER_CA_FILE", - "ETCD_PEER_CERT_FILE", - ] - - # Expiry checking stuff - now = datetime.datetime.now() - # todo, catch exception for invalid input and return a fail_json - warning_days = int(module.params['warning_days']) - expire_window = datetime.timedelta(days=warning_days) - - # Module stuff - # - # The results of our cert checking to return from the task call - check_results = {} - check_results['meta'] = {} - check_results['meta']['warning_days'] = warning_days - check_results['meta']['checked_at_time'] = str(now) - check_results['meta']['warn_after_date'] = str(now + expire_window) - check_results['meta']['show_all'] = str(module.params['show_all']) - # All the analyzed certs accumulate here - ocp_certs = [] - - ###################################################################### - # Sure, why not? Let's enable check mode. - if module.check_mode: - check_results['ocp_certs'] = [] - module.exit_json( - check_results=check_results, - msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], - rc=0, - changed=False - ) - - ###################################################################### - # Check for OpenShift Container Platform specific certs - ###################################################################### - for os_cert in filter_paths(openshift_cert_check_paths): - # Open up that config file and locate the cert and CA - with open(os_cert, 'r') as fp: - cert_meta = {} - cfg = yaml.load(fp) - # cert files are specified in parsed `fp` as relative to the path - # of the original config file. 'master-config.yaml' with certFile - # = 'foo.crt' implies that 'foo.crt' is in the same - # directory. certFile = '../foo.crt' is in the parent directory. - cfg_path = os.path.dirname(fp.name) - cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile']) - cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA']) - - ###################################################################### - # Load the certificate and the CA, parse their expiration dates into - # datetime objects so we can manipulate them later - for _, v in cert_meta.iteritems(): - with open(v, 'r') as fp: - cert = fp.read() - cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': fp.name, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs) - - ###################################################################### - # /Check for OpenShift Container Platform specific certs - ###################################################################### - - ###################################################################### - # Check service Kubeconfigs - ###################################################################### - kubeconfigs = [] - - # There may be additional kubeconfigs to check, but their naming - # is less predictable than the ones we've already assembled. - - try: - # Try to read the standard 'node-config.yaml' file to check if - # this host is a node. - with open(openshift_node_config_path, 'r') as fp: - cfg = yaml.load(fp) - - # OK, the config file exists, therefore this is a - # node. Nodes have their own kubeconfig files to - # communicate with the master API. Let's read the relative - # path to that file from the node config. - node_masterKubeConfig = cfg['masterKubeConfig'] - # As before, the path to the 'masterKubeConfig' file is - # relative to `fp` - cfg_path = os.path.dirname(fp.name) - node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) - - with open(node_kubeconfig, 'r') as fp: - # Read in the nodes kubeconfig file and grab the good stuff - cfg = yaml.load(fp) - - c = cfg['users'][0]['user']['client-certificate-data'] - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(c, now, base64decode=True) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': fp.name, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) - except IOError: - # This is not a node - pass - - for kube in filter_paths(kubeconfig_paths): - with open(kube, 'r') as fp: - # TODO: Maybe consider catching exceptions here? - cfg = yaml.load(fp) - - # Per conversation, "the kubeconfigs you care about: - # admin, router, registry should all be single - # value". Following that advice we only grab the data for - # the user at index 0 in the 'users' list. There should - # not be more than one user. - c = cfg['users'][0]['user']['client-certificate-data'] - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(c, now, base64decode=True) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': fp.name, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) - - ###################################################################### - # /Check service Kubeconfigs - ###################################################################### - - ###################################################################### - # Check etcd certs - ###################################################################### - # Some values may be duplicated, make this a set for now so we - # unique them all - etcd_certs_to_check = set([]) - etcd_certs = [] - etcd_cert_params.append('dne') - try: - with open('/etc/etcd/etcd.conf', 'r') as fp: - etcd_config = ConfigParser.ConfigParser() - etcd_config.readfp(FakeSecHead(fp)) - - for param in etcd_cert_params: - try: - etcd_certs_to_check.add(etcd_config.get('ETCD', param)) - except ConfigParser.NoOptionError: - # That parameter does not exist, oh well... - pass - except IOError: - # No etcd to see here, move along - pass - - for etcd_cert in filter_paths(etcd_certs_to_check): - with open(etcd_cert, 'r') as fp: - c = fp.read() - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(c, now) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': fp.name, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) - - ###################################################################### - # /Check etcd certs - ###################################################################### - - ###################################################################### - # Check router/registry certs - # - # These are saved as secrets in etcd. That means that we can not - # simply read a file to grab the data. Instead we're going to - # subprocess out to the 'oc get' command. On non-masters this - # command will fail, that is expected so we catch that exception. - ###################################################################### - router_certs = [] - registry_certs = [] - - ###################################################################### - # First the router certs - try: - router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(), - stdout=subprocess.PIPE) - router_ds = yaml.load(router_secrets_raw.communicate()[0]) - router_c = router_ds['data']['tls.crt'] - router_path = router_ds['metadata']['selfLink'] - except TypeError: - # YAML couldn't load the result, this is not a master - pass - else: - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(router_c, now, base64decode=True) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': router_path, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs) - - check_results['router'] = router_certs - - ###################################################################### - # Now for registry - # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split()) - # out = subprocess.PIPE - try: - registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), - stdout=subprocess.PIPE) - registry_ds = yaml.load(registry_secrets_raw.communicate()[0]) - registry_c = registry_ds['data']['registry.crt'] - registry_path = registry_ds['metadata']['selfLink'] - except TypeError: - # YAML couldn't load the result, this is not a master - pass - else: - (cert_subject, - cert_expiry_date, - time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True) - - expire_check_result = { - 'cert_cn': cert_subject, - 'path': registry_path, - 'expiry': cert_expiry_date, - 'days_remaining': time_remaining.days, - 'health': None, - } - - classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs) - - check_results['registry'] = registry_certs - - ###################################################################### - # /Check router/registry certs - ###################################################################### - - res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs) - - msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( - count=res['total'], - exp=res['expired'], - warn=res['warning'], - ok=res['ok'], - window=int(module.params['warning_days']), - ) - - # By default we only return detailed information about expired or - # warning certificates. If show_all is true then we will print all - # the certificates examined. - if not module.params['show_all']: - check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']] - check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']] - check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']] - else: - check_results['ocp_certs'] = ocp_certs - check_results['kubeconfigs'] = kubeconfigs - check_results['etcd'] = etcd_certs - - # Sort the final results to report in order of ascending safety - # time. That is to say, the certificates which will expire sooner - # will be at the front of the list and certificates which will - # expire later are at the end. - check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) - check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) - check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) - - # This module will never change anything, but we might want to - # change the return code parameter if there is some catastrophic - # error we noticed earlier - module.exit_json( - check_results=check_results, - summary=res, - msg=msg, - rc=0, - changed=False - ) - -###################################################################### -# It's just the way we do things in Ansible. So disable this warning -# -# pylint: disable=wrong-import-position,import-error -from ansible.module_utils.basic import AnsibleModule -if __name__ == '__main__': - main() diff --git a/playbooks/byo/openshift-cluster/check-cert-expiry.yaml b/playbooks/byo/openshift-cluster/check-cert-expiry.yaml deleted file mode 100644 index 39efdbd36..000000000 --- a/playbooks/byo/openshift-cluster/check-cert-expiry.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# check-cert-expiry.yaml - A utility for cluster ops to scan through -# (critical) certificates for the ongoing operations of a cluster. - -# We do not support all Ansible versions. This is our safety net. -- include: ../../common/openshift-cluster/verify_ansible_version.yml - -- name: Generate the l_oo_all_hosts group - hosts: localhost - connection: local - become: no - gather_facts: no - tasks: - - include_vars: cluster_hosts.yml - - add_host: - name: "{{ item }}" - groups: l_oo_all_hosts - with_items: "{{ g_all_hosts | default([]) }}" - -# This may seem redundant, running `include_vars` again on the list of -# hosts in the group 'l_oo_all_hosts' which we just created. But the -# fact of the matter is that if we don't re-run include_vars on the -# new host group we created, then they will not have access to those -# same group variables they were birthed from. -# -# Go ahead and try to 'debug: var=g_all_hosts' later on (without this -# play) and you'll find that the result is UNDEFINED VARIABLE. -- name: Inject cluster hosts variables into l_oo_all_hosts - hosts: l_oo_all_hosts - gather_facts: no - tasks: - - include_vars: cluster_hosts.yml - -# This is where the actual business gets started: -- include: ../../common/openshift-cluster/check-cert-expiry.yaml diff --git a/playbooks/common/openshift-cluster/check-cert-expiry.yaml b/playbooks/common/openshift-cluster/check-cert-expiry.yaml deleted file mode 100644 index b585fd849..000000000 --- a/playbooks/common/openshift-cluster/check-cert-expiry.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# --- -# # This must be evaluated after cluster_hosts.yml has been evaluated on -# # 'localhost' to generate l_oo_all_hosts and then subsequently -# # evaluated again on the generated hosts group (l_oo_all_hosts). -# - include: evaluate_groups.yml -# tags: -# - always - -# # Initialize cluster facts for oo_all_hosts using the openshift_facts -# # role followed by the 'common' role -# - include: initialize_facts.yml -# tags: -# - always - -# # Get the version to install from the first master, then synchronize -# # that variable across all in oo_all_hosts -# - include: initialize_openshift_version.yml -# tags: -# - always - -# # Earlier 'initialize_facts' included the openshift_facts role and -# # used the openshift_facts module with the 'role' parameter set to -# # 'common'. Now we're applying the openshift_facts role AGAIN but just -# # to the subset of oo_all_hosts which require configuring. -# - name: Load openshift_facts -# hosts: oo_etcd_to_config:oo_masters_to_config:oo_nodes_to_config -# roles: -# - openshift_facts - -- hosts: all - become: yes - gather_facts: no - tasks: - - name: Check cert expirys on host - openshift_cert_expiry: - warning_days: 1500 - register: check_results - - name: Generate html - become: no - run_once: yes - template: - src: templates/cert-expiry-table.html.j2 - dest: /tmp/cert-table.html - delegate_to: localhost diff --git a/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 b/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 deleted file mode 100644 index f74d7f1ce..000000000 --- a/playbooks/common/openshift-cluster/templates/cert-expiry-table.html.j2 +++ /dev/null @@ -1,124 +0,0 @@ - - - - - OCP Certificate Expiry Report - {# For fancy icons and a pleasing font #} - - - - - - - - {# Each host has a header and table to itself #} - {% for host in play_hosts %} -

{{ host }}

- -

- {{ hostvars[host].check_results.msg }} -

-
    -
  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • -
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
  • -
- - - {# These are hard-coded right now, but should be grabbed dynamically from the registered results #} - {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs', 'router', 'registry'] -%} - - - - - - - - - - - - - - {# A row for each certificate examined #} - {%- for v in hostvars[host].check_results.check_results[kind] -%} - - {# Let's add some flair and show status visually with fancy icons #} - {% if v.health == 'ok' %} - {% set health_icon = 'glyphicon glyphicon-ok' %} - {% elif v.health == 'warning' %} - {% set health_icon = 'glyphicon glyphicon-alert' %} - {% else %} - {% set health_icon = 'glyphicon glyphicon-remove' %} - {% endif %} - - - - - - - - - - {% endfor %} - {# end row generation per cert of this type #} - {% endfor %} - {# end generation for each kind of cert block #} -

{{ kind }}

 Certificate Common/Alt Name(s)HealthDays RemainingExpiration DatePath
{{ v.cert_cn }}{{ v.health }}{{ v.days_remaining }}{{ v.expiry }}{{ v.path }}
-
- {% endfor %} - {# end section generation for each host #} - -
-

- Expiration report generated by openshift-ansible -

-

- Status icons from bootstrap/glyphicon -

-
- - diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md new file mode 100644 index 000000000..75970c7a0 --- /dev/null +++ b/roles/openshift_certificate_expiry/README.md @@ -0,0 +1,63 @@ +OpenShift Certificate Expiration Checker +======================================== + +OpenShift certificate expiration checking. Be warned of certificates +expiring within a configurable window of days, and notified of +certificates which have already expired. Certificates examined +include: + +* Master/Node Service Certificates +* Router/Registry Service Certificates from etcd secrets +* Master/Node/Router/Registry/Admin `kubeconfig`s +* Etcd certificates + + + +Requirements +------------ + +* None + + +Role Variables +-------------- + +From this role: + +| Name | Default value | Description | +|--------------------------|---------------|-------------------------------------------------------------------------------------| +| `config_base` | `/etc/origin` | Base openshift config directory | +| `warning_days` | `30` | Flag certificates which will expire in this many days from now | +| `show_all` | `False` | Include healthy (non-expired and non-warning) certificates in results | +| `generate_report` | `False` | Generate an HTML report of the expiry check results | +| `save_json_results` | `False` | Save expiry check results as a json file | +| `result_dir` | `/tmp` | Directory in which to put check results and generated reports | + + +Dependencies +------------ + +* None + +Example Playbook +---------------- + +``` +- name: Check cert expirys + hosts: all + become: yes + gather_facts: no + roles: + - role: openshift_certificate_expiry +``` + + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +Tim Bielawa (tbielawa@redhat.com) diff --git a/roles/openshift_certificate_expiry/defaults/main.yml b/roles/openshift_certificate_expiry/defaults/main.yml new file mode 100644 index 000000000..5c077f450 --- /dev/null +++ b/roles/openshift_certificate_expiry/defaults/main.yml @@ -0,0 +1,6 @@ +--- +config_base: "/etc/origin" +warning_days: 30 +show_all: false +generate_report: false +result_dir: "/tmp" diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py new file mode 100644 index 000000000..f18ab75d0 --- /dev/null +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -0,0 +1,635 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# pylint: disable=line-too-long,invalid-name + +"""For details on this module see DOCUMENTATION (below)""" + +# router/registry cert grabbing +import subprocess +# etcd config file +import ConfigParser +# Expiration parsing +import datetime +# File path stuff +import os +# Config file parsing +import yaml +# Certificate loading +import OpenSSL.crypto + +DOCUMENTATION = ''' +--- +module: openshift_cert_expiry +short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster +description: + - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired. + - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following: + - C(ok) - not expired, and outside of the expiration C(warning_days) window. + - C(warning) - not expired, but will expire between now and the C(warning_days) window. + - C(expired) - an expired certificate. + - Certificate flagging follow this logic: + - If the expiration date is before now then the certificate is classified as C(expired). + - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning). + - All other conditions are classified as C(ok). + - The following keys are ALSO present in the certificate summary: + - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted) + - C(days_remaining) - The number of days until the certificate expires. + - C(expiry) - The date the certificate expires on. + - C(path) - The full path to the certificate on the examined host. +version_added: "0.0" +options: + config_base: + description: + - Base path to OCP system settings. + required: false + default: /etc/origin + warning_days: + description: + - Flag certificates which will expire in C(warning_days) days from now. + required: false + default: 30 + show_all: + description: + - Enable this option to show analysis of ALL certificates examined by this module. + - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported. + required: false + default: false + +author: "Tim Bielawa (@tbielawa) " +''' + +EXAMPLES = ''' +# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now +- openshift_cert_expiry: + +# Expand the warning window to show certificates expiring within a year from now +- openshift_cert_expiry: warning_days=365 + +# Show expired, soon to expire (now + 30 days), and all other certificates examined +- openshift_cert_expiry: show_all=true +''' + + +# We only need this for one thing, we don't care if it doesn't have +# that many public methods +# +# pylint: disable=too-few-public-methods +class FakeSecHead(object): + """etcd does not begin their config file with an opening [section] as +required by the Python ConfigParser module. We hack around it by +slipping one in ourselves prior to parsing. + +Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 + """ + def __init__(self, fp): + self.fp = fp + self.sechead = '[ETCD]\n' + + def readline(self): + """Make this look like a file-type object""" + if self.sechead: + try: + return self.sechead + finally: + self.sechead = None + else: + return self.fp.readline() + + +###################################################################### + +def filter_paths(path_list): + """`path_list` - A list of file paths to check. Only files which exist +will be returned + """ + return [p for p in path_list if os.path.exists(os.path.realpath(p))] + + +def load_and_handle_cert(cert_string, now, base64decode=False): + """Load a certificate, split off the good parts, and return some +useful data + +Params: + +- `cert_string` (string) - a certificate loaded into a string object +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `base64decode` (bool) - run .decode('base64') on the input? + +Returns: +A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining) + + """ + if base64decode: + _cert_string = cert_string.decode('base-64') + else: + _cert_string = cert_string + + cert_loaded = OpenSSL.crypto.load_certificate( + OpenSSL.crypto.FILETYPE_PEM, _cert_string) + + ###################################################################### + # Read just the first name from the cert - DISABLED while testing + # out the 'get all possible names' function (below) + # + # Strip the subject down to just the value of the first name + # cert_subject = cert_loaded.get_subject().get_components()[0][1] + + ###################################################################### + # Read all possible names from the cert + cert_subjects = [] + for name, value in cert_loaded.get_subject().get_components(): + cert_subjects.append('{}:{}'.format(name, value)) + + # To read SANs from a cert we must read the subjectAltName + # extension from the X509 Object. What makes this more difficult + # is that pyOpenSSL does not give extensions as a list, nor does + # it provide a count of all loaded extensions. + # + # Rather, extensions are REQUESTED by index. We must iterate over + # all extensions until we find the one called 'subjectAltName'. If + # we don't find that extension we'll eventually request an + # extension at an index where no extension exists (IndexError is + # raised). When that happens we know that the cert has no SANs so + # we break out of the loop. + i = 0 + checked_all_extensions = False + while not checked_all_extensions: + try: + # Read the extension at index 'i' + ext = cert_loaded.get_extension(i) + except IndexError: + # We tried to read an extension but it isn't there, that + # means we ran out of extensions to check. Abort + san = None + checked_all_extensions = True + else: + # We were able to load the extension at index 'i' + if ext.get_short_name() == 'subjectAltName': + san = ext + checked_all_extensions = True + else: + # Try reading the next extension + i += 1 + + if san is not None: + # The X509Extension object for subjectAltName prints as a + # string with the alt names separated by a comma and a + # space. Split the string by ', ' and then add our new names + # to the list of existing names + cert_subjects.extend(str(san).split(', ')) + + cert_subject = ', '.join(cert_subjects) + ###################################################################### + + # Grab the expiration date + cert_expiry = cert_loaded.get_notAfter() + cert_expiry_date = datetime.datetime.strptime( + cert_expiry, + # example get_notAfter() => 20180922170439Z + '%Y%m%d%H%M%SZ') + + time_remaining = cert_expiry_date - now + + return (cert_subject, cert_expiry_date, time_remaining) + + +def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list): + """Given metadata about a certificate under examination, classify it + into one of three categories, 'ok', 'warning', and 'expired'. + +Params: + +- `cert_meta` dict - A dict with certificate metadata. Required fields + include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'. +- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against +- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires +- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is +- `cert_list` list - A list to shove the classified cert into + +Return: +- `cert_list` - The updated list of classified certificates + """ + expiry_str = str(cert_meta['expiry']) + # Categorization + if cert_meta['expiry'] < now: + # This already expired, must NOTIFY + cert_meta['health'] = 'expired' + elif time_remaining < expire_window: + # WARN about this upcoming expirations + cert_meta['health'] = 'warning' + else: + # Not expired or about to expire + cert_meta['health'] = 'ok' + + cert_meta['expiry'] = expiry_str + cert_list.append(cert_meta) + return cert_list + + +def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs): + """Calculate the summary text for when the module finishes +running. This includes counds of each classification and what have +you. + +Params: + +- `certificates` (list of dicts) - Processed `expire_check_result` + dicts with filled in `health` keys for system certificates. +- `kubeconfigs` - as above for kubeconfigs +- `etcd_certs` - as above for etcd certs +Return: + +- `summary_results` (dict) - Counts of each cert type classification + and total items examined. + """ + items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs + + summary_results = { + 'system_certificates': len(certificates), + 'kubeconfig_certificates': len(kubeconfigs), + 'etcd_certificates': len(etcd_certs), + 'router_certs': len(router_certs), + 'registry_certs': len(registry_certs), + 'total': len(items), + 'ok': 0, + 'warning': 0, + 'expired': 0 + } + + summary_results['expired'] = len([c for c in items if c['health'] == 'expired']) + summary_results['warning'] = len([c for c in items if c['health'] == 'warning']) + summary_results['ok'] = len([c for c in items if c['health'] == 'ok']) + + return summary_results + + +###################################################################### +# This is our module MAIN function after all, so there's bound to be a +# lot of code bundled up into one block +# +# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches +def main(): + """This module examines certificates (in various forms) which compose +an OpenShift Container Platform cluster + """ + + module = AnsibleModule( + argument_spec=dict( + config_base=dict( + required=False, + default="/etc/origin", + type='str'), + warning_days=dict( + required=False, + default=int(30), + type='int'), + show_all=dict( + required=False, + default="False", + type='bool') + ), + supports_check_mode=True, + ) + + # Basic scaffolding for OpenShift spcific certs + openshift_base_config_path = module.params['config_base'] + openshift_master_config_path = os.path.normpath( + os.path.join(openshift_base_config_path, "master/master-config.yaml") + ) + openshift_node_config_path = os.path.normpath( + os.path.join(openshift_base_config_path, "node/node-config.yaml") + ) + openshift_cert_check_paths = [ + openshift_master_config_path, + openshift_node_config_path, + ] + + # Paths for Kubeconfigs. Additional kubeconfigs are conditionally + # checked later in the code + master_kube_configs = ['admin', 'openshift-master', + 'openshift-node', 'openshift-router', + 'openshift-registry'] + + kubeconfig_paths = [] + for m_kube_config in master_kube_configs: + kubeconfig_paths.append( + os.path.normpath( + os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config) + ) + ) + + # etcd, where do you hide your certs? Used when parsing etcd.conf + etcd_cert_params = [ + "ETCD_CA_FILE", + "ETCD_CERT_FILE", + "ETCD_PEER_CA_FILE", + "ETCD_PEER_CERT_FILE", + ] + + # Expiry checking stuff + now = datetime.datetime.now() + # todo, catch exception for invalid input and return a fail_json + warning_days = int(module.params['warning_days']) + expire_window = datetime.timedelta(days=warning_days) + + # Module stuff + # + # The results of our cert checking to return from the task call + check_results = {} + check_results['meta'] = {} + check_results['meta']['warning_days'] = warning_days + check_results['meta']['checked_at_time'] = str(now) + check_results['meta']['warn_after_date'] = str(now + expire_window) + check_results['meta']['show_all'] = str(module.params['show_all']) + # All the analyzed certs accumulate here + ocp_certs = [] + + ###################################################################### + # Sure, why not? Let's enable check mode. + if module.check_mode: + check_results['ocp_certs'] = [] + module.exit_json( + check_results=check_results, + msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'], + rc=0, + changed=False + ) + + ###################################################################### + # Check for OpenShift Container Platform specific certs + ###################################################################### + for os_cert in filter_paths(openshift_cert_check_paths): + # Open up that config file and locate the cert and CA + with open(os_cert, 'r') as fp: + cert_meta = {} + cfg = yaml.load(fp) + # cert files are specified in parsed `fp` as relative to the path + # of the original config file. 'master-config.yaml' with certFile + # = 'foo.crt' implies that 'foo.crt' is in the same + # directory. certFile = '../foo.crt' is in the parent directory. + cfg_path = os.path.dirname(fp.name) + cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile']) + cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA']) + + ###################################################################### + # Load the certificate and the CA, parse their expiration dates into + # datetime objects so we can manipulate them later + for _, v in cert_meta.iteritems(): + with open(v, 'r') as fp: + cert = fp.read() + cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs) + + ###################################################################### + # /Check for OpenShift Container Platform specific certs + ###################################################################### + + ###################################################################### + # Check service Kubeconfigs + ###################################################################### + kubeconfigs = [] + + # There may be additional kubeconfigs to check, but their naming + # is less predictable than the ones we've already assembled. + + try: + # Try to read the standard 'node-config.yaml' file to check if + # this host is a node. + with open(openshift_node_config_path, 'r') as fp: + cfg = yaml.load(fp) + + # OK, the config file exists, therefore this is a + # node. Nodes have their own kubeconfig files to + # communicate with the master API. Let's read the relative + # path to that file from the node config. + node_masterKubeConfig = cfg['masterKubeConfig'] + # As before, the path to the 'masterKubeConfig' file is + # relative to `fp` + cfg_path = os.path.dirname(fp.name) + node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig) + + with open(node_kubeconfig, 'r') as fp: + # Read in the nodes kubeconfig file and grab the good stuff + cfg = yaml.load(fp) + + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + except IOError: + # This is not a node + pass + + for kube in filter_paths(kubeconfig_paths): + with open(kube, 'r') as fp: + # TODO: Maybe consider catching exceptions here? + cfg = yaml.load(fp) + + # Per conversation, "the kubeconfigs you care about: + # admin, router, registry should all be single + # value". Following that advice we only grab the data for + # the user at index 0 in the 'users' list. There should + # not be more than one user. + c = cfg['users'][0]['user']['client-certificate-data'] + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs) + + ###################################################################### + # /Check service Kubeconfigs + ###################################################################### + + ###################################################################### + # Check etcd certs + ###################################################################### + # Some values may be duplicated, make this a set for now so we + # unique them all + etcd_certs_to_check = set([]) + etcd_certs = [] + etcd_cert_params.append('dne') + try: + with open('/etc/etcd/etcd.conf', 'r') as fp: + etcd_config = ConfigParser.ConfigParser() + etcd_config.readfp(FakeSecHead(fp)) + + for param in etcd_cert_params: + try: + etcd_certs_to_check.add(etcd_config.get('ETCD', param)) + except ConfigParser.NoOptionError: + # That parameter does not exist, oh well... + pass + except IOError: + # No etcd to see here, move along + pass + + for etcd_cert in filter_paths(etcd_certs_to_check): + with open(etcd_cert, 'r') as fp: + c = fp.read() + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(c, now) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': fp.name, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs) + + ###################################################################### + # /Check etcd certs + ###################################################################### + + ###################################################################### + # Check router/registry certs + # + # These are saved as secrets in etcd. That means that we can not + # simply read a file to grab the data. Instead we're going to + # subprocess out to the 'oc get' command. On non-masters this + # command will fail, that is expected so we catch that exception. + ###################################################################### + router_certs = [] + registry_certs = [] + + ###################################################################### + # First the router certs + try: + router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(), + stdout=subprocess.PIPE) + router_ds = yaml.load(router_secrets_raw.communicate()[0]) + router_c = router_ds['data']['tls.crt'] + router_path = router_ds['metadata']['selfLink'] + except TypeError: + # YAML couldn't load the result, this is not a master + pass + else: + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(router_c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': router_path, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs) + + check_results['router'] = router_certs + + ###################################################################### + # Now for registry + # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split()) + # out = subprocess.PIPE + try: + registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), + stdout=subprocess.PIPE) + registry_ds = yaml.load(registry_secrets_raw.communicate()[0]) + registry_c = registry_ds['data']['registry.crt'] + registry_path = registry_ds['metadata']['selfLink'] + except TypeError: + # YAML couldn't load the result, this is not a master + pass + else: + (cert_subject, + cert_expiry_date, + time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True) + + expire_check_result = { + 'cert_cn': cert_subject, + 'path': registry_path, + 'expiry': cert_expiry_date, + 'days_remaining': time_remaining.days, + 'health': None, + } + + classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs) + + check_results['registry'] = registry_certs + + ###################################################################### + # /Check router/registry certs + ###################################################################### + + res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs) + + msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format( + count=res['total'], + exp=res['expired'], + warn=res['warning'], + ok=res['ok'], + window=int(module.params['warning_days']), + ) + + # By default we only return detailed information about expired or + # warning certificates. If show_all is true then we will print all + # the certificates examined. + if not module.params['show_all']: + check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']] + check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']] + check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']] + else: + check_results['ocp_certs'] = ocp_certs + check_results['kubeconfigs'] = kubeconfigs + check_results['etcd'] = etcd_certs + + # Sort the final results to report in order of ascending safety + # time. That is to say, the certificates which will expire sooner + # will be at the front of the list and certificates which will + # expire later are at the end. + check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) + + # This module will never change anything, but we might want to + # change the return code parameter if there is some catastrophic + # error we noticed earlier + module.exit_json( + check_results=check_results, + summary=res, + msg=msg, + rc=0, + changed=False + ) + +###################################################################### +# It's just the way we do things in Ansible. So disable this warning +# +# pylint: disable=wrong-import-position,import-error +from ansible.module_utils.basic import AnsibleModule +if __name__ == '__main__': + main() diff --git a/roles/openshift_certificate_expiry/meta/main.yml b/roles/openshift_certificate_expiry/meta/main.yml new file mode 100644 index 000000000..dd66299ea --- /dev/null +++ b/roles/openshift_certificate_expiry/meta/main.yml @@ -0,0 +1,15 @@ +--- +galaxy_info: + author: Tim Bielawa + description: OpenShift Certificate Expiry Checker + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.1 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: [] diff --git a/roles/openshift_certificate_expiry/tasks/main.yml b/roles/openshift_certificate_expiry/tasks/main.yml new file mode 100644 index 000000000..def7d1284 --- /dev/null +++ b/roles/openshift_certificate_expiry/tasks/main.yml @@ -0,0 +1,23 @@ +--- +- name: Check cert expirys on host + openshift_cert_expiry: + warning_days: 1500 + register: check_results + +- name: Generate html + become: no + run_once: yes + template: + src: cert-expiry-table.html.j2 + dest: /tmp/cert-table.html + delegate_to: localhost + when: generate_report + +- name: Generate JSON + become: no + run_once: yes + template: + src: save_json_results.j2 + dest: /tmp/cert-expiry-results.json + delegate_to: localhost + when: save_json_results diff --git a/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 new file mode 100644 index 000000000..f74d7f1ce --- /dev/null +++ b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 @@ -0,0 +1,124 @@ + + + + + OCP Certificate Expiry Report + {# For fancy icons and a pleasing font #} + + + + + + + + {# Each host has a header and table to itself #} + {% for host in play_hosts %} +

{{ host }}

+ +

+ {{ hostvars[host].check_results.msg }} +

+
    +
  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • +
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
  • +
+ + + {# These are hard-coded right now, but should be grabbed dynamically from the registered results #} + {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs', 'router', 'registry'] -%} + + + + + + + + + + + + + + {# A row for each certificate examined #} + {%- for v in hostvars[host].check_results.check_results[kind] -%} + + {# Let's add some flair and show status visually with fancy icons #} + {% if v.health == 'ok' %} + {% set health_icon = 'glyphicon glyphicon-ok' %} + {% elif v.health == 'warning' %} + {% set health_icon = 'glyphicon glyphicon-alert' %} + {% else %} + {% set health_icon = 'glyphicon glyphicon-remove' %} + {% endif %} + + + + + + + + + + {% endfor %} + {# end row generation per cert of this type #} + {% endfor %} + {# end generation for each kind of cert block #} +

{{ kind }}

 Certificate Common/Alt Name(s)HealthDays RemainingExpiration DatePath
{{ v.cert_cn }}{{ v.health }}{{ v.days_remaining }}{{ v.expiry }}{{ v.path }}
+
+ {% endfor %} + {# end section generation for each host #} + +
+

+ Expiration report generated by openshift-ansible +

+

+ Status icons from bootstrap/glyphicon +

+
+ + diff --git a/roles/openshift_certificate_expiry/templates/save_json_results.j2 b/roles/openshift_certificate_expiry/templates/save_json_results.j2 new file mode 100644 index 000000000..89602ac2b --- /dev/null +++ b/roles/openshift_certificate_expiry/templates/save_json_results.j2 @@ -0,0 +1,5 @@ +{ +{% for host in play_hosts %} +"{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=2) }}{% if not loop.last %},{% endif %} +{% endfor %} +} -- cgit v1.2.1 From 3e8279ace30affb18d7070a47a13b521995960d5 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Fri, 7 Oct 2016 12:12:45 -0700 Subject: Fix playbooks, update readme, update default vars Fix 'make ci' system Add examples --- roles/openshift_certificate_expiry/README.md | 138 +++++++++++++++++++-- .../openshift_certificate_expiry/defaults/main.yml | 8 +- .../library/openshift_cert_expiry.py | 13 +- roles/openshift_certificate_expiry/tasks/main.yml | 16 +-- .../templates/save_json_results.j2 | 3 +- utils/Makefile | 6 +- 6 files changed, 154 insertions(+), 30 deletions(-) diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md index 75970c7a0..9b543a335 100644 --- a/roles/openshift_certificate_expiry/README.md +++ b/roles/openshift_certificate_expiry/README.md @@ -22,16 +22,22 @@ Requirements Role Variables -------------- -From this role: +Core variables in this role: -| Name | Default value | Description | -|--------------------------|---------------|-------------------------------------------------------------------------------------| -| `config_base` | `/etc/origin` | Base openshift config directory | -| `warning_days` | `30` | Flag certificates which will expire in this many days from now | -| `show_all` | `False` | Include healthy (non-expired and non-warning) certificates in results | -| `generate_report` | `False` | Generate an HTML report of the expiry check results | -| `save_json_results` | `False` | Save expiry check results as a json file | -| `result_dir` | `/tmp` | Directory in which to put check results and generated reports | +| Name | Default value | Description | +|--------------------------|--------------------------------|-----------------------------------------------------------------------| +| `config_base` | `/etc/origin` | Base openshift config directory | +| `warning_days` | `30` | Flag certificates which will expire in this many days from now | +| `show_all` | `no` | Include healthy (non-expired and non-warning) certificates in results | + +Optional report/result saving variables in this role: + +| Name | Default value | Description | +|--------------------------|--------------------------------|-----------------------------------------------------------------------| +| `generate_html_report` | `no` | Generate an HTML report of the expiry check results | +| `html_report_path` | `/tmp/cert-expiry-report.html` | The full path to save the HTML report as | +| `save_json_results` | `no` | Save expiry check results as a json file | +| `json_results_path` | `/tmp/cert-expiry-report.json` | The full path to save the json report as | Dependencies @@ -42,16 +48,128 @@ Dependencies Example Playbook ---------------- +Default behavior: + +```yaml +--- +- name: Check cert expirys + hosts: all + become: yes + gather_facts: no + roles: + - role: openshift_certificate_expiry +``` + +Generate HTML and JSON artifacts in their default paths: + +```yaml +--- +- name: Check cert expirys + hosts: all + become: yes + gather_facts: no + vars: + generate_html_report: yes + save_json_results: yes + roles: + - role: openshift_certificate_expiry ``` + +Change the expiration warning window to 1500 days (good for testing +the module out) + +```yaml +--- - name: Check cert expirys hosts: all become: yes gather_facts: no + vars: + warning_days: 1500 roles: - - role: openshift_certificate_expiry + - role: openshift_certificate_expiry ``` +Example JSON Output +------------------- + +Example is abbreviated to save space: + +```json +{ + "192.168.124.148": { + "etcd": [ + { + "cert_cn": "CN:etcd-signer@1474563722", + "days_remaining": 350, + "expiry": "2017-09-22 17:02:25", + "health": "warning", + "path": "/etc/etcd/ca.crt" + }, + ], + "kubeconfigs": [ + { + "cert_cn": "O:system:nodes, CN:system:node:m01.example.com", + "days_remaining": 715, + "expiry": "2018-09-22 17:08:57", + "health": "warning", + "path": "/etc/origin/node/system:node:m01.example.com.kubeconfig" + }, + { + "cert_cn": "O:system:cluster-admins, CN:system:admin", + "days_remaining": 715, + "expiry": "2018-09-22 17:04:40", + "health": "warning", + "path": "/etc/origin/master/admin.kubeconfig" + } + ], + "meta": { + "checked_at_time": "2016-10-07 15:26:47.608192", + "show_all": "True", + "warn_after_date": "2020-11-15 15:26:47.608192", + "warning_days": 1500 + }, + "ocp_certs": [ + { + "cert_cn": "CN:172.30.0.1, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:m01.example.com, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:172.30.0.1, DNS:192.168.124.148, IP Address:172.30.0.1, IP Address:192.168.124.148", + "days_remaining": 715, + "expiry": "2018-09-22 17:04:39", + "health": "warning", + "path": "/etc/origin/master/master.server.crt" + }, + { + "cert_cn": "CN:openshift-signer@1474563878", + "days_remaining": 1810, + "expiry": "2021-09-21 17:04:38", + "health": "ok", + "path": "/etc/origin/node/ca.crt" + } + ], + "registry": [ + { + "cert_cn": "CN:172.30.101.81, DNS:docker-registry-default.router.default.svc.cluster.local, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.101.81, IP Address:172.30.101.81", + "days_remaining": 728, + "expiry": "2018-10-05 18:54:29", + "health": "warning", + "path": "/api/v1/namespaces/default/secrets/registry-certificates" + } + ], + "router": [ + { + "cert_cn": "CN:router.default.svc, DNS:router.default.svc, DNS:router.default.svc.cluster.local", + "days_remaining": 715, + "expiry": "2018-09-22 17:48:23", + "health": "warning", + "path": "/api/v1/namespaces/default/secrets/router-certs" + } + ] + } +} +``` + + + License ------- diff --git a/roles/openshift_certificate_expiry/defaults/main.yml b/roles/openshift_certificate_expiry/defaults/main.yml index 5c077f450..c05617a75 100644 --- a/roles/openshift_certificate_expiry/defaults/main.yml +++ b/roles/openshift_certificate_expiry/defaults/main.yml @@ -1,6 +1,8 @@ --- config_base: "/etc/origin" warning_days: 30 -show_all: false -generate_report: false -result_dir: "/tmp" +show_all: no +generate_html_report: no +html_report_path: "/tmp/cert-expiry-report.html" +save_json_results: no +json_results_path: "/tmp/cert-expiry-report.json" diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py index f18ab75d0..3b934d019 100644 --- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -281,11 +281,11 @@ an OpenShift Container Platform cluster type='str'), warning_days=dict( required=False, - default=int(30), + default=30, type='int'), show_all=dict( required=False, - default="False", + default=False, type='bool') ), supports_check_mode=True, @@ -549,8 +549,6 @@ an OpenShift Container Platform cluster classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs) - check_results['router'] = router_certs - ###################################################################### # Now for registry # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split()) @@ -579,8 +577,6 @@ an OpenShift Container Platform cluster classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs) - check_results['registry'] = registry_certs - ###################################################################### # /Check router/registry certs ###################################################################### @@ -602,10 +598,15 @@ an OpenShift Container Platform cluster check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']] check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']] check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']] + check_results['registry'] = [crt for crt in registry_certs if crt['health'] in ['expired', 'warning']] + check_results['router'] = [crt for crt in router_certs if crt['health'] in ['expired', 'warning']] else: check_results['ocp_certs'] = ocp_certs check_results['kubeconfigs'] = kubeconfigs check_results['etcd'] = etcd_certs + check_results['registry'] = registry_certs + check_results['router'] = router_certs + # Sort the final results to report in order of ascending safety # time. That is to say, the certificates which will expire sooner diff --git a/roles/openshift_certificate_expiry/tasks/main.yml b/roles/openshift_certificate_expiry/tasks/main.yml index def7d1284..88bc02efe 100644 --- a/roles/openshift_certificate_expiry/tasks/main.yml +++ b/roles/openshift_certificate_expiry/tasks/main.yml @@ -1,23 +1,25 @@ --- - name: Check cert expirys on host openshift_cert_expiry: - warning_days: 1500 + warning_days: "{{ warning_days|int }}" + config_base: "{{ config_base }}" + show_all: "{{ show_all|bool }}" register: check_results -- name: Generate html +- name: Generate expiration report HTML become: no run_once: yes template: src: cert-expiry-table.html.j2 - dest: /tmp/cert-table.html + dest: "{{ html_report_path }}" delegate_to: localhost - when: generate_report + when: "{{ generate_html_report|bool }}" -- name: Generate JSON +- name: Generate expiration results JSON become: no run_once: yes template: src: save_json_results.j2 - dest: /tmp/cert-expiry-results.json + dest: "{{ json_results_path }}" delegate_to: localhost - when: save_json_results + when: "{{ save_json_results|bool }}" diff --git a/roles/openshift_certificate_expiry/templates/save_json_results.j2 b/roles/openshift_certificate_expiry/templates/save_json_results.j2 index 89602ac2b..fe2800548 100644 --- a/roles/openshift_certificate_expiry/templates/save_json_results.j2 +++ b/roles/openshift_certificate_expiry/templates/save_json_results.j2 @@ -1,5 +1,6 @@ { {% for host in play_hosts %} -"{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=2) }}{% if not loop.last %},{% endif %} + "{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=4) }}{% if not loop.last %}, +{% endif %} {% endfor %} } diff --git a/utils/Makefile b/utils/Makefile index bc708964b..62f08f74b 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -82,7 +82,7 @@ ci-pylint: @echo "#############################################" @echo "# Running PyLint Tests in virtualenv" @echo "#############################################" - . $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py ../library/openshift_cert_expiry.py + . $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py ci-list-deps: @echo "#############################################" @@ -96,7 +96,7 @@ ci-pyflakes: @echo "#################################################" . $(NAME)env/bin/activate && pyflakes src/ooinstall/*.py . $(NAME)env/bin/activate && pyflakes ../callback_plugins/openshift_quick_installer.py - . $(NAME)env/bin/activate && pyflakes ../library/openshift_cert_expiry.py + . $(NAME)env/bin/activate && pyflakes ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py ci-pep8: @echo "#############################################" @@ -106,7 +106,7 @@ ci-pep8: . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) ../callback_plugins/openshift_quick_installer.py # This one excludes E402 because it is an ansible module and the # boilerplate import statement is expected to be at the bottom - . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES),E402 ../library/openshift_cert_expiry.py + . $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES),E402 ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py ci: clean virtualenv ci-list-deps ci-pep8 ci-pylint ci-pyflakes ci-unittests : -- cgit v1.2.1 From 83bdf2827aa2540872518b814b79c4753f66f1f6 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Mon, 10 Oct 2016 07:46:03 -0700 Subject: Clean up lint and other little things (polish++) * Fix code lint * Prefix role variables with the role name per best practices * Add words to README --- roles/openshift_certificate_expiry/README.md | 223 ++++++++++++--------- .../openshift_certificate_expiry/defaults/main.yml | 14 +- .../library/openshift_cert_expiry.py | 9 +- roles/openshift_certificate_expiry/tasks/main.yml | 14 +- 4 files changed, 149 insertions(+), 111 deletions(-) diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md index 9b543a335..f296c905e 100644 --- a/roles/openshift_certificate_expiry/README.md +++ b/roles/openshift_certificate_expiry/README.md @@ -11,12 +11,14 @@ include: * Master/Node/Router/Registry/Admin `kubeconfig`s * Etcd certificates +This role pairs well with the redeploy certificates playbook: +* [Redeploying Certificates Documentation](https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html) -Requirements ------------- +Just like the redeploying certificates playbook, this role is intended +to be used with an inventory that is representative of the +cluster. For best results run `ansible-playbook` with the `-v` option. -* None Role Variables @@ -24,26 +26,21 @@ Role Variables Core variables in this role: -| Name | Default value | Description | -|--------------------------|--------------------------------|-----------------------------------------------------------------------| -| `config_base` | `/etc/origin` | Base openshift config directory | -| `warning_days` | `30` | Flag certificates which will expire in this many days from now | -| `show_all` | `no` | Include healthy (non-expired and non-warning) certificates in results | +| Name | Default value | Description | +|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------| +| `openshift_certificate_expiry_config_base` | `/etc/origin` | Base openshift config directory | +| `openshift_certificate_expiry_warning_days` | `30` | Flag certificates which will expire in this many days from now | +| `openshift_certificate_expiry_show_all` | `no` | Include healthy (non-expired and non-warning) certificates in results | Optional report/result saving variables in this role: -| Name | Default value | Description | -|--------------------------|--------------------------------|-----------------------------------------------------------------------| -| `generate_html_report` | `no` | Generate an HTML report of the expiry check results | -| `html_report_path` | `/tmp/cert-expiry-report.html` | The full path to save the HTML report as | -| `save_json_results` | `no` | Save expiry check results as a json file | -| `json_results_path` | `/tmp/cert-expiry-report.json` | The full path to save the json report as | - - -Dependencies ------------- +| Name | Default value | Description | +|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------| +| `openshift_certificate_expiry_generate_html_report` | `no` | Generate an HTML report of the expiry check results | +| `openshift_certificate_expiry_html_report_path` | `/tmp/cert-expiry-report.html` | The full path to save the HTML report as | +| `openshift_certificate_expiry_save_json_results` | `no` | Save expiry check results as a json file | +| `openshift_certificate_expiry_json_results_path` | `/tmp/cert-expiry-report.json` | The full path to save the json report as | -* None Example Playbook ---------------- @@ -69,14 +66,14 @@ Generate HTML and JSON artifacts in their default paths: become: yes gather_facts: no vars: - generate_html_report: yes - save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes roles: - role: openshift_certificate_expiry ``` Change the expiration warning window to 1500 days (good for testing -the module out) +the module out): ```yaml --- @@ -85,90 +82,134 @@ the module out) become: yes gather_facts: no vars: - warning_days: 1500 + openshift_certificate_expiry_warning_days: 1500 roles: - role: openshift_certificate_expiry ``` +Change the expiration warning window to 1500 days (good for testing +the module out) and save the results as a JSON file: -Example JSON Output -------------------- +```yaml +--- +- name: Check cert expirys + hosts: all + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry +``` -Example is abbreviated to save space: + +JSON Output +----------- + +There are two top-level keys in the saved JSON results, `data` and +`summary`. + +The `data` key is a hash where the keys are the names of each host +examined and the values are the check results for each respective +host. + +The `summary` key is a hash that summarizes the number of certificates +expiring within the configured warning window and the number of +already expired certificates. + +The example below is abbreviated to save space: ```json { - "192.168.124.148": { - "etcd": [ - { - "cert_cn": "CN:etcd-signer@1474563722", - "days_remaining": 350, - "expiry": "2017-09-22 17:02:25", - "health": "warning", - "path": "/etc/etcd/ca.crt" + "data": { + "192.168.124.148": { + "etcd": [ + { + "cert_cn": "CN:etcd-signer@1474563722", + "days_remaining": 350, + "expiry": "2017-09-22 17:02:25", + "health": "warning", + "path": "/etc/etcd/ca.crt" + }, + ], + "kubeconfigs": [ + { + "cert_cn": "O:system:nodes, CN:system:node:m01.example.com", + "days_remaining": 715, + "expiry": "2018-09-22 17:08:57", + "health": "warning", + "path": "/etc/origin/node/system:node:m01.example.com.kubeconfig" + }, + { + "cert_cn": "O:system:cluster-admins, CN:system:admin", + "days_remaining": 715, + "expiry": "2018-09-22 17:04:40", + "health": "warning", + "path": "/etc/origin/master/admin.kubeconfig" + } + ], + "meta": { + "checked_at_time": "2016-10-07 15:26:47.608192", + "show_all": "True", + "warn_before_date": "2020-11-15 15:26:47.608192", + "warning_days": 1500 }, - ], - "kubeconfigs": [ - { - "cert_cn": "O:system:nodes, CN:system:node:m01.example.com", - "days_remaining": 715, - "expiry": "2018-09-22 17:08:57", - "health": "warning", - "path": "/etc/origin/node/system:node:m01.example.com.kubeconfig" - }, - { - "cert_cn": "O:system:cluster-admins, CN:system:admin", - "days_remaining": 715, - "expiry": "2018-09-22 17:04:40", - "health": "warning", - "path": "/etc/origin/master/admin.kubeconfig" - } - ], - "meta": { - "checked_at_time": "2016-10-07 15:26:47.608192", - "show_all": "True", - "warn_after_date": "2020-11-15 15:26:47.608192", - "warning_days": 1500 - }, - "ocp_certs": [ - { - "cert_cn": "CN:172.30.0.1, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:m01.example.com, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:172.30.0.1, DNS:192.168.124.148, IP Address:172.30.0.1, IP Address:192.168.124.148", - "days_remaining": 715, - "expiry": "2018-09-22 17:04:39", - "health": "warning", - "path": "/etc/origin/master/master.server.crt" - }, - { - "cert_cn": "CN:openshift-signer@1474563878", - "days_remaining": 1810, - "expiry": "2021-09-21 17:04:38", - "health": "ok", - "path": "/etc/origin/node/ca.crt" - } - ], - "registry": [ - { - "cert_cn": "CN:172.30.101.81, DNS:docker-registry-default.router.default.svc.cluster.local, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.101.81, IP Address:172.30.101.81", - "days_remaining": 728, - "expiry": "2018-10-05 18:54:29", - "health": "warning", - "path": "/api/v1/namespaces/default/secrets/registry-certificates" - } - ], - "router": [ - { - "cert_cn": "CN:router.default.svc, DNS:router.default.svc, DNS:router.default.svc.cluster.local", - "days_remaining": 715, - "expiry": "2018-09-22 17:48:23", - "health": "warning", - "path": "/api/v1/namespaces/default/secrets/router-certs" - } - ] + "ocp_certs": [ + { + "cert_cn": "CN:172.30.0.1, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:m01.example.com, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:172.30.0.1, DNS:192.168.124.148, IP Address:172.30.0.1, IP Address:192.168.124.148", + "days_remaining": 715, + "expiry": "2018-09-22 17:04:39", + "health": "warning", + "path": "/etc/origin/master/master.server.crt" + }, + { + "cert_cn": "CN:openshift-signer@1474563878", + "days_remaining": 1810, + "expiry": "2021-09-21 17:04:38", + "health": "ok", + "path": "/etc/origin/node/ca.crt" + } + ], + "registry": [ + { + "cert_cn": "CN:172.30.101.81, DNS:docker-registry-default.router.default.svc.cluster.local, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.101.81, IP Address:172.30.101.81", + "days_remaining": 728, + "expiry": "2018-10-05 18:54:29", + "health": "warning", + "path": "/api/v1/namespaces/default/secrets/registry-certificates" + } + ], + "router": [ + { + "cert_cn": "CN:router.default.svc, DNS:router.default.svc, DNS:router.default.svc.cluster.local", + "days_remaining": 715, + "expiry": "2018-09-22 17:48:23", + "health": "warning", + "path": "/api/v1/namespaces/default/secrets/router-certs" + } + ] + } + }, + "summary": { + "warning": 6, + "expired": 0 } } ``` +Requirements +------------ + +* None + + +Dependencies +------------ + +* None + License ------- diff --git a/roles/openshift_certificate_expiry/defaults/main.yml b/roles/openshift_certificate_expiry/defaults/main.yml index c05617a75..6d7b19298 100644 --- a/roles/openshift_certificate_expiry/defaults/main.yml +++ b/roles/openshift_certificate_expiry/defaults/main.yml @@ -1,8 +1,8 @@ --- -config_base: "/etc/origin" -warning_days: 30 -show_all: no -generate_html_report: no -html_report_path: "/tmp/cert-expiry-report.html" -save_json_results: no -json_results_path: "/tmp/cert-expiry-report.json" +openshift_certificate_expiry_config_base: "/etc/origin" +openshift_certificate_expiry_warning_days: 30 +openshift_certificate_expiry_show_all: no +openshift_certificate_expiry_generate_html_report: no +openshift_certificate_expiry_html_report_path: "/tmp/cert-expiry-report.html" +openshift_certificate_expiry_save_json_results: no +openshift_certificate_expiry_json_results_path: "/tmp/cert-expiry-report.json" diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py index 3b934d019..6e27a7ebd 100644 --- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -97,7 +97,6 @@ Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583 ###################################################################### - def filter_paths(path_list): """`path_list` - A list of file paths to check. Only files which exist will be returned @@ -339,7 +338,7 @@ an OpenShift Container Platform cluster check_results['meta'] = {} check_results['meta']['warning_days'] = warning_days check_results['meta']['checked_at_time'] = str(now) - check_results['meta']['warn_after_date'] = str(now + expire_window) + check_results['meta']['warn_before_date'] = str(now + expire_window) check_results['meta']['show_all'] = str(module.params['show_all']) # All the analyzed certs accumulate here ocp_certs = [] @@ -551,8 +550,6 @@ an OpenShift Container Platform cluster ###################################################################### # Now for registry - # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split()) - # out = subprocess.PIPE try: registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(), stdout=subprocess.PIPE) @@ -607,11 +604,11 @@ an OpenShift Container Platform cluster check_results['registry'] = registry_certs check_results['router'] = router_certs - # Sort the final results to report in order of ascending safety # time. That is to say, the certificates which will expire sooner # will be at the front of the list and certificates which will - # expire later are at the end. + # expire later are at the end. Router and registry certs should be + # limited to just 1 result, so don't bother sorting those. check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining'])) diff --git a/roles/openshift_certificate_expiry/tasks/main.yml b/roles/openshift_certificate_expiry/tasks/main.yml index 88bc02efe..b78209055 100644 --- a/roles/openshift_certificate_expiry/tasks/main.yml +++ b/roles/openshift_certificate_expiry/tasks/main.yml @@ -1,9 +1,9 @@ --- - name: Check cert expirys on host openshift_cert_expiry: - warning_days: "{{ warning_days|int }}" - config_base: "{{ config_base }}" - show_all: "{{ show_all|bool }}" + warning_days: "{{ openshift_certificate_expiry_warning_days|int }}" + config_base: "{{ openshift_certificate_expiry_config_base }}" + show_all: "{{ openshift_certificate_expiry_show_all|bool }}" register: check_results - name: Generate expiration report HTML @@ -11,15 +11,15 @@ run_once: yes template: src: cert-expiry-table.html.j2 - dest: "{{ html_report_path }}" + dest: "{{ openshift_certificate_expiry_html_report_path }}" delegate_to: localhost - when: "{{ generate_html_report|bool }}" + when: "{{ openshift_certificate_expiry_generate_html_report|bool }}" - name: Generate expiration results JSON become: no run_once: yes template: src: save_json_results.j2 - dest: "{{ json_results_path }}" + dest: "{{ openshift_certificate_expiry_json_results_path }}" delegate_to: localhost - when: "{{ save_json_results|bool }}" + when: "{{ openshift_certificate_expiry_save_json_results|bool }}" -- cgit v1.2.1 From 62d9ece28e270a2088511b0d37fa85de3b81bcce Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Mon, 10 Oct 2016 09:32:11 -0700 Subject: The JSON result saving template now includes a summary of expired/warned certs for easier parsing. --- .../templates/save_json_results.j2 | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/roles/openshift_certificate_expiry/templates/save_json_results.j2 b/roles/openshift_certificate_expiry/templates/save_json_results.j2 index fe2800548..9b165f26c 100644 --- a/roles/openshift_certificate_expiry/templates/save_json_results.j2 +++ b/roles/openshift_certificate_expiry/templates/save_json_results.j2 @@ -1,6 +1,20 @@ { + "data": { {% for host in play_hosts %} - "{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=4) }}{% if not loop.last %}, +{# Pretty print the check results for each host #} + "{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=6) }}{% if not loop.last %}, +{% else %} + {% endif %} {% endfor %} + }, +{# extract the nested warning/expired value from the hostvars object +using items in the playhosts list as a map. Wrap those results up into +a list and then add them all together #} +{%- set warned = play_hosts|map('extract', hostvars, ['check_results', 'summary', 'warning'])|list|sum %} +{%- set expired = play_hosts|map('extract', hostvars, ['check_results', 'summary', 'expired'])|list|sum %} + "summary": { + "warning": {{ warned }}, + "expired": {{ expired }} + } } -- cgit v1.2.1 From 314da6ffb37c88ca424b927979bae3219251ed5b Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Mon, 10 Oct 2016 10:54:53 -0700 Subject: Add JSON result CLI parsing notes to the README --- roles/openshift_certificate_expiry/README.md | 36 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/roles/openshift_certificate_expiry/README.md b/roles/openshift_certificate_expiry/README.md index f296c905e..d44438332 100644 --- a/roles/openshift_certificate_expiry/README.md +++ b/roles/openshift_certificate_expiry/README.md @@ -50,7 +50,7 @@ Default behavior: ```yaml --- - name: Check cert expirys - hosts: all + hosts: nodes:masters:etcd become: yes gather_facts: no roles: @@ -62,7 +62,7 @@ Generate HTML and JSON artifacts in their default paths: ```yaml --- - name: Check cert expirys - hosts: all + hosts: nodes:masters:etcd become: yes gather_facts: no vars: @@ -78,7 +78,7 @@ the module out): ```yaml --- - name: Check cert expirys - hosts: all + hosts: nodes:masters:etcd become: yes gather_facts: no vars: @@ -93,7 +93,7 @@ the module out) and save the results as a JSON file: ```yaml --- - name: Check cert expirys - hosts: all + hosts: nodes:masters:etcd become: yes gather_facts: no vars: @@ -198,6 +198,34 @@ The example below is abbreviated to save space: } ``` +The `summary` from the json data can be easily checked for +warnings/expirations using a variety of command-line tools. + +For exampe, using `grep` we can look for the word `summary` and print +out the 2 lines **after** the match (`-A2`): + +``` +$ grep -A2 summary /tmp/cert-expiry-report.json + "summary": { + "warning": 16, + "expired": 0 +``` + +If available, the [jq](https://stedolan.github.io/jq/) tool can also +be used to pick out specific values. Example 1 and 2 below show how to +select just one value, either `warning` or `expired`. Example 3 shows +how to select both values at once: + +``` +$ jq '.summary.warning' /tmp/cert-expiry-report.json +16 +$ jq '.summary.expired' /tmp/cert-expiry-report.json +0 +$ jq '.summary.warning,.summary.expired' /tmp/cert-expiry-report.json +16 +0 +``` + Requirements ------------ -- cgit v1.2.1 From 9075e50ca05037039fc0bfb0742aaf5f5f4ecafb Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Thu, 13 Oct 2016 13:51:49 -0700 Subject: Make the json template filter-driven. --- .../filter_plugins/oo_cert_expiry.py | 88 ++++++++++++++++++++++ .../library/openshift_cert_expiry.py | 18 ++--- roles/openshift_certificate_expiry/meta/main.yml | 1 + roles/openshift_certificate_expiry/tasks/main.yml | 7 +- .../templates/cert-expiry-table.html.j2 | 2 +- .../templates/save_json_results.j2 | 21 +----- 6 files changed, 105 insertions(+), 32 deletions(-) create mode 100644 roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py diff --git a/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py new file mode 100644 index 000000000..2e2430ee6 --- /dev/null +++ b/roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py @@ -0,0 +1,88 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# vim: expandtab:tabstop=4:shiftwidth=4 +""" +Custom filters for use in openshift-ansible +""" + +from ansible import errors +from collections import Mapping +from distutils.util import strtobool +from distutils.version import LooseVersion +from operator import itemgetter +import OpenSSL.crypto +import os +import pdb +import pkg_resources +import re +import json +import yaml +from ansible.parsing.yaml.dumper import AnsibleDumper +from urlparse import urlparse + +try: + # ansible-2.2 + # ansible.utils.unicode.to_unicode is deprecated in ansible-2.2, + # ansible.module_utils._text.to_text should be used instead. + from ansible.module_utils._text import to_text +except ImportError: + # ansible-2.1 + from ansible.utils.unicode import to_unicode as to_text + +# Disabling too-many-public-methods, since filter methods are necessarily +# public +# pylint: disable=too-many-public-methods +class FilterModule(object): + """ Custom ansible filters """ + + @staticmethod + def oo_cert_expiry_results_to_json(hostvars, play_hosts): + """Takes results (`hostvars`) from the openshift_cert_expiry role +check and serializes them into proper machine-readable JSON +output. This filter parameter **MUST** be the playbook `hostvars` +variable. The `play_hosts` parameter is so we know what to loop over +when we're extrating the values. + +Returns: + +Results are collected into two top-level keys under the `json_results` +dict: + +* `json_results.data` [dict] - Each individual host check result, keys are hostnames +* `json_results.summary` [dict] - Summary of number of `warning` and `expired` +certificates + +Example playbook usage: + + - name: Generate expiration results JSON + become: no + run_once: yes + delegate_to: localhost + when: "{{ openshift_certificate_expiry_save_json_results|bool }}" + copy: + content: "{{ hostvars|oo_cert_expiry_results_to_json() }}" + dest: "{{ openshift_certificate_expiry_json_results_path }}" + + """ + json_result = { + 'data': {}, + 'summary': {}, + } + + for host in play_hosts: + json_result['data'][host] = hostvars[host]['check_results']['check_results'] + + total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts]) + total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts]) + + json_result['summary']['warning'] = total_warnings + json_result['summary']['expired'] = total_expired + + return json_result + + + def filters(self): + """ returns a mapping of filters to methods """ + return { + "oo_cert_expiry_results_to_json": self.oo_cert_expiry_results_to_json, + } diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py index 6e27a7ebd..1d0507537 100644 --- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -36,7 +36,7 @@ description: - C(days_remaining) - The number of days until the certificate expires. - C(expiry) - The date the certificate expires on. - C(path) - The full path to the certificate on the examined host. -version_added: "0.0" +version_added: "1.0" options: config_base: description: @@ -126,13 +126,6 @@ A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certif cert_loaded = OpenSSL.crypto.load_certificate( OpenSSL.crypto.FILETYPE_PEM, _cert_string) - ###################################################################### - # Read just the first name from the cert - DISABLED while testing - # out the 'get all possible names' function (below) - # - # Strip the subject down to just the value of the first name - # cert_subject = cert_loaded.get_subject().get_components()[0][1] - ###################################################################### # Read all possible names from the cert cert_subjects = [] @@ -227,7 +220,7 @@ Return: def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs): """Calculate the summary text for when the module finishes -running. This includes counds of each classification and what have +running. This includes counts of each classification and what have you. Params: @@ -236,6 +229,7 @@ Params: dicts with filled in `health` keys for system certificates. - `kubeconfigs` - as above for kubeconfigs - `etcd_certs` - as above for etcd certs + Return: - `summary_results` (dict) - Counts of each cert type classification @@ -290,7 +284,7 @@ an OpenShift Container Platform cluster supports_check_mode=True, ) - # Basic scaffolding for OpenShift spcific certs + # Basic scaffolding for OpenShift specific certs openshift_base_config_path = module.params['config_base'] openshift_master_config_path = os.path.normpath( os.path.join(openshift_base_config_path, "master/master-config.yaml") @@ -317,6 +311,10 @@ an OpenShift Container Platform cluster ) ) + # Validate some paths we have the ability to do ahead of time + openshift_cert_check_paths = filter_paths(openshift_cert_check_paths) + kubeconfig_paths = filter_paths(kubeconfig_paths) + # etcd, where do you hide your certs? Used when parsing etcd.conf etcd_cert_params = [ "ETCD_CA_FILE", diff --git a/roles/openshift_certificate_expiry/meta/main.yml b/roles/openshift_certificate_expiry/meta/main.yml index dd66299ea..c13b29ba5 100644 --- a/roles/openshift_certificate_expiry/meta/main.yml +++ b/roles/openshift_certificate_expiry/meta/main.yml @@ -5,6 +5,7 @@ galaxy_info: company: Red Hat, Inc. license: Apache License, Version 2.0 min_ansible_version: 2.1 + version: 1.0 platforms: - name: EL versions: diff --git a/roles/openshift_certificate_expiry/tasks/main.yml b/roles/openshift_certificate_expiry/tasks/main.yml index b78209055..139d5de6e 100644 --- a/roles/openshift_certificate_expiry/tasks/main.yml +++ b/roles/openshift_certificate_expiry/tasks/main.yml @@ -15,7 +15,12 @@ delegate_to: localhost when: "{{ openshift_certificate_expiry_generate_html_report|bool }}" -- name: Generate expiration results JSON +- name: Generate the result JSON string + run_once: yes + set_fact: json_result_string="{{ hostvars|oo_cert_expiry_results_to_json(play_hosts) }}" + when: "{{ openshift_certificate_expiry_save_json_results|bool }}" + +- name: Generate results JSON file become: no run_once: yes template: diff --git a/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 index f74d7f1ce..b05110336 100644 --- a/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 +++ b/roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2 @@ -64,7 +64,7 @@

  • Expirations checked at: {{ hostvars[host].check_results.check_results.meta.checked_at_time }}
  • -
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_after_date }}
  • +
  • Warn after date: {{ hostvars[host].check_results.check_results.meta.warn_before_date }}
diff --git a/roles/openshift_certificate_expiry/templates/save_json_results.j2 b/roles/openshift_certificate_expiry/templates/save_json_results.j2 index 9b165f26c..c1173d9ea 100644 --- a/roles/openshift_certificate_expiry/templates/save_json_results.j2 +++ b/roles/openshift_certificate_expiry/templates/save_json_results.j2 @@ -1,20 +1 @@ -{ - "data": { -{% for host in play_hosts %} -{# Pretty print the check results for each host #} - "{{host}}": {{ hostvars[host].check_results.check_results | to_nice_json(indent=6) }}{% if not loop.last %}, -{% else %} - -{% endif %} -{% endfor %} - }, -{# extract the nested warning/expired value from the hostvars object -using items in the playhosts list as a map. Wrap those results up into -a list and then add them all together #} -{%- set warned = play_hosts|map('extract', hostvars, ['check_results', 'summary', 'warning'])|list|sum %} -{%- set expired = play_hosts|map('extract', hostvars, ['check_results', 'summary', 'expired'])|list|sum %} - "summary": { - "warning": {{ warned }}, - "expired": {{ expired }} - } -} +{{ json_result_string | to_nice_json(indent=2)}} -- cgit v1.2.1 From b5f46cdfd4e046ba4d895db5ec0f4d00a28e17f1 Mon Sep 17 00:00:00 2001 From: Tim Bielawa Date: Thu, 20 Oct 2016 08:10:51 -0700 Subject: Don't freak out if the oc command doesn't exist. --- roles/openshift_certificate_expiry/library/openshift_cert_expiry.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py index 1d0507537..2cdb87dc1 100644 --- a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py +++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py @@ -531,6 +531,9 @@ an OpenShift Container Platform cluster except TypeError: # YAML couldn't load the result, this is not a master pass + except OSError: + # The OC command doesn't exist here. Move along. + pass else: (cert_subject, cert_expiry_date, @@ -557,6 +560,9 @@ an OpenShift Container Platform cluster except TypeError: # YAML couldn't load the result, this is not a master pass + except OSError: + # The OC command doesn't exist here. Move along. + pass else: (cert_subject, cert_expiry_date, -- cgit v1.2.1