11 files changed, 148 insertions, 95 deletions
diff --git a/roles/openshift_health_checker/meta/main.yml b/roles/openshift_health_checker/meta/main.yml
index ed97d539c..bc8e7bdcf 100644
--- a/roles/openshift_health_checker/meta/main.yml
+++ b/roles/openshift_health_checker/meta/main.yml
@@ -1 +1,3 @@
 ---
+dependencies:
+- role: openshift_facts
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index f26008c9f..07ec6f7ef 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -10,6 +10,7 @@ from importlib import import_module
 
 from ansible.module_utils import six
 from ansible.module_utils.six.moves import reduce  # pylint: disable=import-error,redefined-builtin
+from ansible.plugins.filter.core import to_bool as ansible_to_bool
 
 
 class OpenShiftCheckException(Exception):
@@ -119,16 +120,59 @@ class OpenShiftCheck(object):
 
         Ansible task_vars structures are Python dicts, often mapping strings to
         other dicts. This helper makes it easier to get a nested value, raising
-        OpenShiftCheckException when a key is not found or returning a default value
-        provided as a keyword argument.
+        OpenShiftCheckException when a key is not found.
+
+        Keyword args:
+          default:
+            On missing key, return this as default value instead of raising exception.
+          convert:
+            Supply a function to apply to normalize the value before returning it.
+            None is the default (return as-is).
+            This function should raise ValueError if the user has provided a value
+            that cannot be converted, or OpenShiftCheckException if some other
+            problem needs to be described to the user.
         """
+        if len(keys) == 1:
+            keys = keys[0].split(".")
+
         try:
             value = reduce(operator.getitem, keys, self.task_vars)
         except (KeyError, TypeError):
-            if "default" in kwargs:
-                return kwargs["default"]
-            raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys))))
-        return value
+            if "default" not in kwargs:
+                raise OpenShiftCheckException(
+                    "This check expects the '{}' inventory variable to be defined\n"
+                    "in order to proceed, but it is undefined. There may be a bug\n"
+                    "in Ansible, the checks, or their dependencies."
+                    "".format(".".join(map(str, keys)))
+                )
+            value = kwargs["default"]
+
+        convert = kwargs.get("convert", None)
+        try:
+            if convert is None:
+                return value
+            elif convert is bool:  # interpret bool as Ansible does, instead of python truthiness
+                return ansible_to_bool(value)
+            else:
+                return convert(value)
+
+        except ValueError as error:  # user error in specifying value
+            raise OpenShiftCheckException(
+                'Cannot convert inventory variable to expected type:\n'
+                '  "{var}={value}"\n'
+                '{error}'.format(var=".".join(keys), value=value, error=error)
+            )
+
+        except OpenShiftCheckException:  # some other check-specific problem
+            raise
+
+        except Exception as error:  # probably a bug in the function
+            raise OpenShiftCheckException(
+                'There is a bug in this check. While trying to convert variable \n'
+                '  "{var}={value}"\n'
+                'the given converter cannot be used or failed unexpectedly:\n'
+                '{error}'.format(var=".".join(keys), value=value, error=error)
+            )
 
     @staticmethod
     def get_major_minor_version(openshift_image_tag):
@@ -153,6 +197,31 @@ class OpenShiftCheck(object):
         components = tuple(int(x) for x in components[:2])
         return components
 
+    def find_ansible_mount(self, path):
+        """Return the mount point for path from ansible_mounts."""
+
+        # reorganize list of mounts into dict by path
+        mount_for_path = {
+            mount['mount']: mount
+            for mount
+            in self.get_var('ansible_mounts')
+        }
+
+        # NOTE: including base cases '/' and '' to ensure the loop ends
+        mount_targets = set(mount_for_path.keys()) | {'/', ''}
+        mount_point = path
+        while mount_point not in mount_targets:
+            mount_point = os.path.dirname(mount_point)
+
+        try:
+            return mount_for_path[mount_point]
+        except KeyError:
+            known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path))
+            raise OpenShiftCheckException(
+                'Unable to determine mount point for path "{}".\n'
+                'Known mount points: {}.'.format(path, known_mounts or 'none')
+            )
+
 
 LOADER_EXCLUDES = (
     "__init__.py",
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 39ac0e4ec..6d1dea9ce 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -1,6 +1,5 @@
 """Check that there is enough disk space in predefined paths."""
 
-import os.path
 import tempfile
 
 from openshift_checks import OpenShiftCheck, OpenShiftCheckException
@@ -55,9 +54,6 @@ class DiskAvailability(OpenShiftCheck):
 
     def run(self):
         group_names = self.get_var("group_names")
-        ansible_mounts = self.get_var("ansible_mounts")
-        ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}
-
         user_config = self.get_var("openshift_check_min_host_disk_gb", default={})
         try:
             # For backwards-compatibility, if openshift_check_min_host_disk_gb
@@ -80,7 +76,7 @@ class DiskAvailability(OpenShiftCheck):
         # not part of the official recommendation but present in the user
         # configuration.
         for path, recommendation in self.recommended_disk_space_bytes.items():
-            free_bytes = self.free_bytes(path, ansible_mounts)
+            free_bytes = self.free_bytes(path)
             recommended_bytes = max(recommendation.get(name, 0) for name in group_names)
 
             config = user_config.get(path, {})
@@ -127,22 +123,17 @@ class DiskAvailability(OpenShiftCheck):
 
         return {}
 
-    @staticmethod
-    def free_bytes(path, ansible_mounts):
+    def free_bytes(self, path):
         """Return the size available in path based on ansible_mounts."""
-        mount_point = path
-        # arbitry value to prevent an infinite loop, in the unlike case that '/'
-        # is not in ansible_mounts.
-        max_depth = 32
-        while mount_point not in ansible_mounts and max_depth > 0:
-            mount_point = os.path.dirname(mount_point)
-            max_depth -= 1
-
+        mount = self.find_ansible_mount(path)
         try:
-            free_bytes = ansible_mounts[mount_point]['size_available']
+            return mount['size_available']
         except KeyError:
-            known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none'
-            msg = 'Unable to determine disk availability for "{}". Known mount points: {}.'
-            raise OpenShiftCheckException(msg.format(path, known_mounts))
-
-        return free_bytes
+            raise OpenShiftCheckException(
+                'Unable to retrieve disk availability for "{path}".\n'
+                'Ansible facts included a matching mount point for this path:\n'
+                '  {mount}\n'
+                'however it is missing the size_available field.\n'
+                'To investigate, you can inspect the output of `ansible -m setup <host>`'
+                ''.format(path=path, mount=mount)
+            )
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
index 7ae384bd7..0558ddf14 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_storage.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -1,6 +1,5 @@
 """Check Docker storage driver and usage."""
 import json
-import os.path
 import re
 from openshift_checks import OpenShiftCheck, OpenShiftCheckException
 from openshift_checks.mixins import DockerHostMixin
@@ -252,7 +251,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
                 "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
             }
 
-        mount = self.find_ansible_mount(path, self.get_var("ansible_mounts"))
+        mount = self.find_ansible_mount(path)
         try:
             free_bytes = mount['size_available']
             total_bytes = mount['size_total']
@@ -275,22 +274,3 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
             }
 
         return {}
-
-    # TODO(lmeyer): migrate to base class
-    @staticmethod
-    def find_ansible_mount(path, ansible_mounts):
-        """Return the mount point for path from ansible_mounts."""
-
-        mount_for_path = {mount['mount']: mount for mount in ansible_mounts}
-        mount_point = path
-        while mount_point not in mount_for_path:
-            if mount_point in ["/", ""]:  # "/" not in ansible_mounts???
-                break
-            mount_point = os.path.dirname(mount_point)
-
-        try:
-            return mount_for_path[mount_point]
-        except KeyError:
-            known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none'
-            msg = 'Unable to determine mount point for path "{}". Known mount points: {}.'
-            raise OpenShiftCheckException(msg.format(path, known_mounts))
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
index ae8460b7e..f4296753a 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
@@ -2,7 +2,7 @@
 Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster.
 """
 
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
 
 
 class EtcdImageDataSize(OpenShiftCheck):
@@ -12,7 +12,7 @@ class EtcdImageDataSize(OpenShiftCheck):
     tags = ["etcd"]
 
     def run(self):
-        etcd_mountpath = self._get_etcd_mountpath(self.get_var("ansible_mounts"))
+        etcd_mountpath = self.find_ansible_mount("/var/lib/etcd")
         etcd_avail_diskspace = etcd_mountpath["size_available"]
         etcd_total_diskspace = etcd_mountpath["size_total"]
 
@@ -68,18 +68,5 @@ class EtcdImageDataSize(OpenShiftCheck):
         return {}
 
     @staticmethod
-    def _get_etcd_mountpath(ansible_mounts):
-        valid_etcd_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
-
-        mount_for_path = {mnt.get("mount"): mnt for mnt in ansible_mounts}
-        for path in valid_etcd_mount_paths:
-            if path in mount_for_path:
-                return mount_for_path[path]
-
-        paths = ', '.join(sorted(mount_for_path)) or 'none'
-        msg = "Unable to determine a valid etcd mountpath. Paths mounted: {}.".format(paths)
-        raise OpenShiftCheckException(msg)
-
-    @staticmethod
     def _to_gigabytes(byte_size):
         return float(byte_size) / 10.0**9
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
index e55d55e91..e5d93ff3f 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
@@ -1,6 +1,6 @@
 """A health check for OpenShift clusters."""
 
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
 
 
 class EtcdVolume(OpenShiftCheck):
@@ -11,8 +11,8 @@ class EtcdVolume(OpenShiftCheck):
 
     # Default device usage threshold. Value should be in the range [0, 100].
     default_threshold_percent = 90
-    # Where to find ectd data, higher priority first.
-    supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
+    # Where to find etcd data
+    etcd_mount_path = "/var/lib/etcd"
 
     def is_active(self):
         etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or []
@@ -20,7 +20,7 @@ class EtcdVolume(OpenShiftCheck):
         return super(EtcdVolume, self).is_active() and is_etcd_host
 
     def run(self):
-        mount_info = self._etcd_mount_info()
+        mount_info = self.find_ansible_mount(self.etcd_mount_path)
         available = mount_info["size_available"]
         total = mount_info["size_total"]
         used = total - available
@@ -41,15 +41,3 @@ class EtcdVolume(OpenShiftCheck):
             return {"failed": True, "msg": msg}
 
         return {}
-
-    def _etcd_mount_info(self):
-        ansible_mounts = self.get_var("ansible_mounts")
-        mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
-
-        for path in self.supported_mount_paths:
-            if path in mounts:
-                return mounts[path]
-
-        paths = ', '.join(sorted(mounts)) or 'none'
-        msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
-        raise OpenShiftCheckException(msg)
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 3b7c39760..ecd8adb64 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -27,7 +27,7 @@ class LoggingCheck(OpenShiftCheck):
     name = "logging"
 
     def is_active(self):
-        logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False)
+        logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False)
         return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master()
 
     def is_first_master(self):
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index 5720eeacf..f4fd2dfed 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -20,12 +20,24 @@ def test_is_active(group_names, is_active):
     assert DiskAvailability(None, task_vars).is_active() == is_active
 
 
-@pytest.mark.parametrize('ansible_mounts,extra_words', [
-    ([], ['none']),  # empty ansible_mounts
-    ([{'mount': '/mnt'}], ['/mnt']),  # missing relevant mount paths
-    ([{'mount': '/var'}], ['/var']),  # missing size_available
+@pytest.mark.parametrize('desc, ansible_mounts, expect_chunks', [
+    (
+        'empty ansible_mounts',
+        [],
+        ['determine mount point', 'none'],
+    ),
+    (
+        'missing relevant mount paths',
+        [{'mount': '/mnt'}],
+        ['determine mount point', '/mnt'],
+    ),
+    (
+        'missing size_available',
+        [{'mount': '/var'}, {'mount': '/usr'}, {'mount': '/tmp'}],
+        ['missing', 'size_available'],
+    ),
 ])
-def test_cannot_determine_available_disk(ansible_mounts, extra_words):
+def test_cannot_determine_available_disk(desc, ansible_mounts, expect_chunks):
     task_vars = dict(
         group_names=['masters'],
         ansible_mounts=ansible_mounts,
@@ -34,8 +46,8 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
     with pytest.raises(OpenShiftCheckException) as excinfo:
         DiskAvailability(fake_execute_module, task_vars).run()
 
-    for word in 'determine disk availability'.split() + extra_words:
-        assert word in str(excinfo.value)
+    for chunk in expect_chunks:
+        assert chunk in str(excinfo.value)
 
 
 @pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [
@@ -97,7 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
     assert not result.get('failed', False)
 
 
-@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [
+@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,expect_chunks', [
     (
         'test with no space available',
         ['masters'],
@@ -164,7 +176,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
         ['0.0 GB'],
     ),
 ], ids=lambda argval: argval[0])
-def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words):
+def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, expect_chunks):
     task_vars = dict(
         group_names=group_names,
         openshift_check_min_host_disk_gb=configured_min,
@@ -174,8 +186,8 @@ def test_fails_with_insufficient_disk_space(name, group_names, configured_min, a
     result = DiskAvailability(fake_execute_module, task_vars).run()
 
     assert result['failed']
-    for word in 'below recommended'.split() + extra_words:
-        assert word in result.get('msg', '')
+    for chunk in 'below recommended'.split() + expect_chunks:
+        assert chunk in result.get('msg', '')
 
 
 @pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [
diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
index e3d6706fa..d3aae98f2 100644
--- a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
+++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
@@ -1,7 +1,8 @@
 import pytest
 
 from collections import namedtuple
-from openshift_checks.etcd_imagedata_size import EtcdImageDataSize, OpenShiftCheckException
+from openshift_checks.etcd_imagedata_size import EtcdImageDataSize
+from openshift_checks import OpenShiftCheckException
 from etcdkeysize import check_etcd_key_size
 
 
@@ -56,7 +57,7 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words):
     with pytest.raises(OpenShiftCheckException) as excinfo:
         check.run()
 
-    for word in 'determine valid etcd mountpath'.split() + extra_words:
+    for word in ['Unable to determine mount point'] + extra_words:
         assert word in str(excinfo.value)
 
 
diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py
index 0b255136e..077cea3ea 100644
--- a/roles/openshift_health_checker/test/etcd_volume_test.py
+++ b/roles/openshift_health_checker/test/etcd_volume_test.py
@@ -1,6 +1,7 @@
 import pytest
 
-from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException
+from openshift_checks.etcd_volume import EtcdVolume
+from openshift_checks import OpenShiftCheckException
 
 
 @pytest.mark.parametrize('ansible_mounts,extra_words', [
@@ -15,7 +16,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
     with pytest.raises(OpenShiftCheckException) as excinfo:
         EtcdVolume(fake_execute_module, task_vars).run()
 
-    for word in 'Unable to find etcd storage mount point'.split() + extra_words:
+    for word in ['Unable to determine mount point'] + extra_words:
         assert word in str(excinfo.value)
 
 
diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py
index 43aa875f4..789784c77 100644
--- a/roles/openshift_health_checker/test/openshift_check_test.py
+++ b/roles/openshift_health_checker/test/openshift_check_test.py
@@ -81,6 +81,7 @@ def dummy_check(task_vars):
 @pytest.mark.parametrize("keys,expected", [
     (("foo",), 42),
     (("bar", "baz"), "openshift"),
+    (("bar.baz",), "openshift"),
 ])
 def test_get_var_ok(task_vars, keys, expected):
     assert dummy_check(task_vars).get_var(*keys) == expected
@@ -94,3 +95,24 @@ def test_get_var_error(task_vars, missing_keys):
 def test_get_var_default(task_vars, missing_keys):
     default = object()
     assert dummy_check(task_vars).get_var(*missing_keys, default=default) == default
+
+
+@pytest.mark.parametrize("keys, convert, expected", [
+    (("foo",), str, "42"),
+    (("foo",), float, 42.0),
+    (("bar", "baz"), bool, False),
+])
+def test_get_var_convert(task_vars, keys, convert, expected):
+    assert dummy_check(task_vars).get_var(*keys, convert=convert) == expected
+
+
+@pytest.mark.parametrize("keys, convert", [
+    (("bar", "baz"), int),
+    (("bar.baz"), float),
+    (("foo"), "bogus"),
+    (("foo"), lambda a, b: 1),
+    (("foo"), lambda a: 1 / 0),
+])
+def test_get_var_convert_error(task_vars, keys, convert):
+    with pytest.raises(OpenShiftCheckException):
+        dummy_check(task_vars).get_var(*keys, convert=convert)