summaryrefslogtreecommitdiffstats
path: root/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_certificate_expiry/library/openshift_cert_expiry.py')
-rw-r--r--roles/openshift_certificate_expiry/library/openshift_cert_expiry.py635
1 files changed, 635 insertions, 0 deletions
diff --git a/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
new file mode 100644
index 000000000..f18ab75d0
--- /dev/null
+++ b/roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
@@ -0,0 +1,635 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# pylint: disable=line-too-long,invalid-name
+
+"""For details on this module see DOCUMENTATION (below)"""
+
+# router/registry cert grabbing
+import subprocess
+# etcd config file
+import ConfigParser
+# Expiration parsing
+import datetime
+# File path stuff
+import os
+# Config file parsing
+import yaml
+# Certificate loading
+import OpenSSL.crypto
+
+DOCUMENTATION = '''
+---
+module: openshift_cert_expiry
+short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster
+description:
+ - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired.
+ - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following:
+ - C(ok) - not expired, and outside of the expiration C(warning_days) window.
+ - C(warning) - not expired, but will expire between now and the C(warning_days) window.
+ - C(expired) - an expired certificate.
+ - Certificate flagging follow this logic:
+ - If the expiration date is before now then the certificate is classified as C(expired).
+ - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning).
+ - All other conditions are classified as C(ok).
+ - The following keys are ALSO present in the certificate summary:
+ - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted)
+ - C(days_remaining) - The number of days until the certificate expires.
+ - C(expiry) - The date the certificate expires on.
+ - C(path) - The full path to the certificate on the examined host.
+version_added: "0.0"
+options:
+ config_base:
+ description:
+ - Base path to OCP system settings.
+ required: false
+ default: /etc/origin
+ warning_days:
+ description:
+ - Flag certificates which will expire in C(warning_days) days from now.
+ required: false
+ default: 30
+ show_all:
+ description:
+ - Enable this option to show analysis of ALL certificates examined by this module.
+ - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported.
+ required: false
+ default: false
+
+author: "Tim Bielawa (@tbielawa) <tbielawa@redhat.com>"
+'''
+
+EXAMPLES = '''
+# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now
+- openshift_cert_expiry:
+
+# Expand the warning window to show certificates expiring within a year from now
+- openshift_cert_expiry: warning_days=365
+
+# Show expired, soon to expire (now + 30 days), and all other certificates examined
+- openshift_cert_expiry: show_all=true
+'''
+
+
+# We only need this for one thing, we don't care if it doesn't have
+# that many public methods
+#
+# pylint: disable=too-few-public-methods
+class FakeSecHead(object):
+ """etcd does not begin their config file with an opening [section] as
+required by the Python ConfigParser module. We hack around it by
+slipping one in ourselves prior to parsing.
+
+Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583
+ """
+ def __init__(self, fp):
+ self.fp = fp
+ self.sechead = '[ETCD]\n'
+
+ def readline(self):
+ """Make this look like a file-type object"""
+ if self.sechead:
+ try:
+ return self.sechead
+ finally:
+ self.sechead = None
+ else:
+ return self.fp.readline()
+
+
+######################################################################
+
+def filter_paths(path_list):
+ """`path_list` - A list of file paths to check. Only files which exist
+will be returned
+ """
+ return [p for p in path_list if os.path.exists(os.path.realpath(p))]
+
+
+def load_and_handle_cert(cert_string, now, base64decode=False):
+ """Load a certificate, split off the good parts, and return some
+useful data
+
+Params:
+
+- `cert_string` (string) - a certificate loaded into a string object
+- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
+- `base64decode` (bool) - run .decode('base64') on the input?
+
+Returns:
+A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining)
+
+ """
+ if base64decode:
+ _cert_string = cert_string.decode('base-64')
+ else:
+ _cert_string = cert_string
+
+ cert_loaded = OpenSSL.crypto.load_certificate(
+ OpenSSL.crypto.FILETYPE_PEM, _cert_string)
+
+ ######################################################################
+ # Read just the first name from the cert - DISABLED while testing
+ # out the 'get all possible names' function (below)
+ #
+ # Strip the subject down to just the value of the first name
+ # cert_subject = cert_loaded.get_subject().get_components()[0][1]
+
+ ######################################################################
+ # Read all possible names from the cert
+ cert_subjects = []
+ for name, value in cert_loaded.get_subject().get_components():
+ cert_subjects.append('{}:{}'.format(name, value))
+
+ # To read SANs from a cert we must read the subjectAltName
+ # extension from the X509 Object. What makes this more difficult
+ # is that pyOpenSSL does not give extensions as a list, nor does
+ # it provide a count of all loaded extensions.
+ #
+ # Rather, extensions are REQUESTED by index. We must iterate over
+ # all extensions until we find the one called 'subjectAltName'. If
+ # we don't find that extension we'll eventually request an
+ # extension at an index where no extension exists (IndexError is
+ # raised). When that happens we know that the cert has no SANs so
+ # we break out of the loop.
+ i = 0
+ checked_all_extensions = False
+ while not checked_all_extensions:
+ try:
+ # Read the extension at index 'i'
+ ext = cert_loaded.get_extension(i)
+ except IndexError:
+ # We tried to read an extension but it isn't there, that
+ # means we ran out of extensions to check. Abort
+ san = None
+ checked_all_extensions = True
+ else:
+ # We were able to load the extension at index 'i'
+ if ext.get_short_name() == 'subjectAltName':
+ san = ext
+ checked_all_extensions = True
+ else:
+ # Try reading the next extension
+ i += 1
+
+ if san is not None:
+ # The X509Extension object for subjectAltName prints as a
+ # string with the alt names separated by a comma and a
+ # space. Split the string by ', ' and then add our new names
+ # to the list of existing names
+ cert_subjects.extend(str(san).split(', '))
+
+ cert_subject = ', '.join(cert_subjects)
+ ######################################################################
+
+ # Grab the expiration date
+ cert_expiry = cert_loaded.get_notAfter()
+ cert_expiry_date = datetime.datetime.strptime(
+ cert_expiry,
+ # example get_notAfter() => 20180922170439Z
+ '%Y%m%d%H%M%SZ')
+
+ time_remaining = cert_expiry_date - now
+
+ return (cert_subject, cert_expiry_date, time_remaining)
+
+
+def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list):
+ """Given metadata about a certificate under examination, classify it
+ into one of three categories, 'ok', 'warning', and 'expired'.
+
+Params:
+
+- `cert_meta` dict - A dict with certificate metadata. Required fields
+ include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'.
+- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
+- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires
+- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is
+- `cert_list` list - A list to shove the classified cert into
+
+Return:
+- `cert_list` - The updated list of classified certificates
+ """
+ expiry_str = str(cert_meta['expiry'])
+ # Categorization
+ if cert_meta['expiry'] < now:
+ # This already expired, must NOTIFY
+ cert_meta['health'] = 'expired'
+ elif time_remaining < expire_window:
+ # WARN about this upcoming expirations
+ cert_meta['health'] = 'warning'
+ else:
+ # Not expired or about to expire
+ cert_meta['health'] = 'ok'
+
+ cert_meta['expiry'] = expiry_str
+ cert_list.append(cert_meta)
+ return cert_list
+
+
+def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs):
+ """Calculate the summary text for when the module finishes
+running. This includes counds of each classification and what have
+you.
+
+Params:
+
+- `certificates` (list of dicts) - Processed `expire_check_result`
+ dicts with filled in `health` keys for system certificates.
+- `kubeconfigs` - as above for kubeconfigs
+- `etcd_certs` - as above for etcd certs
+Return:
+
+- `summary_results` (dict) - Counts of each cert type classification
+ and total items examined.
+ """
+ items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs
+
+ summary_results = {
+ 'system_certificates': len(certificates),
+ 'kubeconfig_certificates': len(kubeconfigs),
+ 'etcd_certificates': len(etcd_certs),
+ 'router_certs': len(router_certs),
+ 'registry_certs': len(registry_certs),
+ 'total': len(items),
+ 'ok': 0,
+ 'warning': 0,
+ 'expired': 0
+ }
+
+ summary_results['expired'] = len([c for c in items if c['health'] == 'expired'])
+ summary_results['warning'] = len([c for c in items if c['health'] == 'warning'])
+ summary_results['ok'] = len([c for c in items if c['health'] == 'ok'])
+
+ return summary_results
+
+
+######################################################################
+# This is our module MAIN function after all, so there's bound to be a
+# lot of code bundled up into one block
+#
+# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches
+def main():
+ """This module examines certificates (in various forms) which compose
+an OpenShift Container Platform cluster
+ """
+
+ module = AnsibleModule(
+ argument_spec=dict(
+ config_base=dict(
+ required=False,
+ default="/etc/origin",
+ type='str'),
+ warning_days=dict(
+ required=False,
+ default=int(30),
+ type='int'),
+ show_all=dict(
+ required=False,
+ default="False",
+ type='bool')
+ ),
+ supports_check_mode=True,
+ )
+
+ # Basic scaffolding for OpenShift spcific certs
+ openshift_base_config_path = module.params['config_base']
+ openshift_master_config_path = os.path.normpath(
+ os.path.join(openshift_base_config_path, "master/master-config.yaml")
+ )
+ openshift_node_config_path = os.path.normpath(
+ os.path.join(openshift_base_config_path, "node/node-config.yaml")
+ )
+ openshift_cert_check_paths = [
+ openshift_master_config_path,
+ openshift_node_config_path,
+ ]
+
+ # Paths for Kubeconfigs. Additional kubeconfigs are conditionally
+ # checked later in the code
+ master_kube_configs = ['admin', 'openshift-master',
+ 'openshift-node', 'openshift-router',
+ 'openshift-registry']
+
+ kubeconfig_paths = []
+ for m_kube_config in master_kube_configs:
+ kubeconfig_paths.append(
+ os.path.normpath(
+ os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config)
+ )
+ )
+
+ # etcd, where do you hide your certs? Used when parsing etcd.conf
+ etcd_cert_params = [
+ "ETCD_CA_FILE",
+ "ETCD_CERT_FILE",
+ "ETCD_PEER_CA_FILE",
+ "ETCD_PEER_CERT_FILE",
+ ]
+
+ # Expiry checking stuff
+ now = datetime.datetime.now()
+ # todo, catch exception for invalid input and return a fail_json
+ warning_days = int(module.params['warning_days'])
+ expire_window = datetime.timedelta(days=warning_days)
+
+ # Module stuff
+ #
+ # The results of our cert checking to return from the task call
+ check_results = {}
+ check_results['meta'] = {}
+ check_results['meta']['warning_days'] = warning_days
+ check_results['meta']['checked_at_time'] = str(now)
+ check_results['meta']['warn_after_date'] = str(now + expire_window)
+ check_results['meta']['show_all'] = str(module.params['show_all'])
+ # All the analyzed certs accumulate here
+ ocp_certs = []
+
+ ######################################################################
+ # Sure, why not? Let's enable check mode.
+ if module.check_mode:
+ check_results['ocp_certs'] = []
+ module.exit_json(
+ check_results=check_results,
+ msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'],
+ rc=0,
+ changed=False
+ )
+
+ ######################################################################
+ # Check for OpenShift Container Platform specific certs
+ ######################################################################
+ for os_cert in filter_paths(openshift_cert_check_paths):
+ # Open up that config file and locate the cert and CA
+ with open(os_cert, 'r') as fp:
+ cert_meta = {}
+ cfg = yaml.load(fp)
+ # cert files are specified in parsed `fp` as relative to the path
+ # of the original config file. 'master-config.yaml' with certFile
+ # = 'foo.crt' implies that 'foo.crt' is in the same
+ # directory. certFile = '../foo.crt' is in the parent directory.
+ cfg_path = os.path.dirname(fp.name)
+ cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile'])
+ cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA'])
+
+ ######################################################################
+ # Load the certificate and the CA, parse their expiration dates into
+ # datetime objects so we can manipulate them later
+ for _, v in cert_meta.iteritems():
+ with open(v, 'r') as fp:
+ cert = fp.read()
+ cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': fp.name,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs)
+
+ ######################################################################
+ # /Check for OpenShift Container Platform specific certs
+ ######################################################################
+
+ ######################################################################
+ # Check service Kubeconfigs
+ ######################################################################
+ kubeconfigs = []
+
+ # There may be additional kubeconfigs to check, but their naming
+ # is less predictable than the ones we've already assembled.
+
+ try:
+ # Try to read the standard 'node-config.yaml' file to check if
+ # this host is a node.
+ with open(openshift_node_config_path, 'r') as fp:
+ cfg = yaml.load(fp)
+
+ # OK, the config file exists, therefore this is a
+ # node. Nodes have their own kubeconfig files to
+ # communicate with the master API. Let's read the relative
+ # path to that file from the node config.
+ node_masterKubeConfig = cfg['masterKubeConfig']
+ # As before, the path to the 'masterKubeConfig' file is
+ # relative to `fp`
+ cfg_path = os.path.dirname(fp.name)
+ node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig)
+
+ with open(node_kubeconfig, 'r') as fp:
+ # Read in the nodes kubeconfig file and grab the good stuff
+ cfg = yaml.load(fp)
+
+ c = cfg['users'][0]['user']['client-certificate-data']
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(c, now, base64decode=True)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': fp.name,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
+ except IOError:
+ # This is not a node
+ pass
+
+ for kube in filter_paths(kubeconfig_paths):
+ with open(kube, 'r') as fp:
+ # TODO: Maybe consider catching exceptions here?
+ cfg = yaml.load(fp)
+
+ # Per conversation, "the kubeconfigs you care about:
+ # admin, router, registry should all be single
+ # value". Following that advice we only grab the data for
+ # the user at index 0 in the 'users' list. There should
+ # not be more than one user.
+ c = cfg['users'][0]['user']['client-certificate-data']
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(c, now, base64decode=True)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': fp.name,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
+
+ ######################################################################
+ # /Check service Kubeconfigs
+ ######################################################################
+
+ ######################################################################
+ # Check etcd certs
+ ######################################################################
+ # Some values may be duplicated, make this a set for now so we
+ # unique them all
+ etcd_certs_to_check = set([])
+ etcd_certs = []
+ etcd_cert_params.append('dne')
+ try:
+ with open('/etc/etcd/etcd.conf', 'r') as fp:
+ etcd_config = ConfigParser.ConfigParser()
+ etcd_config.readfp(FakeSecHead(fp))
+
+ for param in etcd_cert_params:
+ try:
+ etcd_certs_to_check.add(etcd_config.get('ETCD', param))
+ except ConfigParser.NoOptionError:
+ # That parameter does not exist, oh well...
+ pass
+ except IOError:
+ # No etcd to see here, move along
+ pass
+
+ for etcd_cert in filter_paths(etcd_certs_to_check):
+ with open(etcd_cert, 'r') as fp:
+ c = fp.read()
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(c, now)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': fp.name,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
+
+ ######################################################################
+ # /Check etcd certs
+ ######################################################################
+
+ ######################################################################
+ # Check router/registry certs
+ #
+ # These are saved as secrets in etcd. That means that we can not
+ # simply read a file to grab the data. Instead we're going to
+ # subprocess out to the 'oc get' command. On non-masters this
+ # command will fail, that is expected so we catch that exception.
+ ######################################################################
+ router_certs = []
+ registry_certs = []
+
+ ######################################################################
+ # First the router certs
+ try:
+ router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(),
+ stdout=subprocess.PIPE)
+ router_ds = yaml.load(router_secrets_raw.communicate()[0])
+ router_c = router_ds['data']['tls.crt']
+ router_path = router_ds['metadata']['selfLink']
+ except TypeError:
+ # YAML couldn't load the result, this is not a master
+ pass
+ else:
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(router_c, now, base64decode=True)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': router_path,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs)
+
+ check_results['router'] = router_certs
+
+ ######################################################################
+ # Now for registry
+ # registry_secrets = subprocess.call('oc get secret registry-certificates -o yaml'.split())
+ # out = subprocess.PIPE
+ try:
+ registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(),
+ stdout=subprocess.PIPE)
+ registry_ds = yaml.load(registry_secrets_raw.communicate()[0])
+ registry_c = registry_ds['data']['registry.crt']
+ registry_path = registry_ds['metadata']['selfLink']
+ except TypeError:
+ # YAML couldn't load the result, this is not a master
+ pass
+ else:
+ (cert_subject,
+ cert_expiry_date,
+ time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True)
+
+ expire_check_result = {
+ 'cert_cn': cert_subject,
+ 'path': registry_path,
+ 'expiry': cert_expiry_date,
+ 'days_remaining': time_remaining.days,
+ 'health': None,
+ }
+
+ classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs)
+
+ check_results['registry'] = registry_certs
+
+ ######################################################################
+ # /Check router/registry certs
+ ######################################################################
+
+ res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs)
+
+ msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format(
+ count=res['total'],
+ exp=res['expired'],
+ warn=res['warning'],
+ ok=res['ok'],
+ window=int(module.params['warning_days']),
+ )
+
+ # By default we only return detailed information about expired or
+ # warning certificates. If show_all is true then we will print all
+ # the certificates examined.
+ if not module.params['show_all']:
+ check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']]
+ check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']]
+ check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']]
+ else:
+ check_results['ocp_certs'] = ocp_certs
+ check_results['kubeconfigs'] = kubeconfigs
+ check_results['etcd'] = etcd_certs
+
+ # Sort the final results to report in order of ascending safety
+ # time. That is to say, the certificates which will expire sooner
+ # will be at the front of the list and certificates which will
+ # expire later are at the end.
+ check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+ check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+ check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+
+ # This module will never change anything, but we might want to
+ # change the return code parameter if there is some catastrophic
+ # error we noticed earlier
+ module.exit_json(
+ check_results=check_results,
+ summary=res,
+ msg=msg,
+ rc=0,
+ changed=False
+ )
+
+######################################################################
+# It's just the way we do things in Ansible. So disable this warning
+#
+# pylint: disable=wrong-import-position,import-error
+from ansible.module_utils.basic import AnsibleModule
+if __name__ == '__main__':
+ main()