21 files changed, 956 insertions, 193 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index a62e4331e..0390dc82e 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -38,14 +38,13 @@ class ActionModule(ActionBase):
 
         try:
             known_checks = self.load_known_checks()
+            args = self._task.args
+            resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
         except OpenShiftCheckException as e:
             result["failed"] = True
             result["msg"] = str(e)
             return result
 
-        args = self._task.args
-        resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
-
         result["checks"] = check_results = {}
 
         user_disabled_checks = [
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
index 64c29a8d9..443b76ea1 100644
--- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
+++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
@@ -39,7 +39,8 @@ class CallbackModule(CallbackBase):
 
     def v2_runner_on_failed(self, result, ignore_errors=False):
         super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)
-        self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
+        if not ignore_errors:
+            self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
 
     def v2_playbook_on_stats(self, stats):
         super(CallbackModule, self).v2_playbook_on_stats(stats)
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index 4c205e48c..4f43ee751 100755
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -19,6 +19,10 @@ the inventory, the version comparison checks just pass.
 '''
 
 from ansible.module_utils.basic import AnsibleModule
+# NOTE: because of the dependency on yum (Python 2-only), this module does not
+# work under Python 3. But since we run unit tests against both Python 2 and
+# Python 3, we use six for cross compatibility in this module alone:
+from ansible.module_utils.six import string_types
 
 IMPORT_EXCEPTION = None
 try:
@@ -122,12 +126,15 @@ def _check_precise_version_found(pkgs, expected_pkgs_dict):
     for pkg in pkgs:
         if pkg.name not in expected_pkgs_dict:
             continue
-        # does the version match, to the precision requested?
-        # and, is it strictly greater, at the precision requested?
-        expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
-        match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
-        if match_version == expected_pkg_version:
-            pkgs_precise_version_found.add(pkg.name)
+        expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+        if isinstance(expected_pkg_versions, string_types):
+            expected_pkg_versions = [expected_pkg_versions]
+        for expected_pkg_version in expected_pkg_versions:
+            # does the version match, to the precision requested?
+            # and, is it strictly greater, at the precision requested?
+            match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
+            if match_version == expected_pkg_version:
+                pkgs_precise_version_found.add(pkg.name)
 
     not_found = []
     for name, pkg in expected_pkgs_dict.items():
@@ -157,8 +164,13 @@ def _check_higher_version_found(pkgs, expected_pkgs_dict):
     for pkg in pkgs:
         if pkg.name not in expected_pkg_names:
             continue
-        expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
-        req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")]
+        expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+        if isinstance(expected_pkg_versions, string_types):
+            expected_pkg_versions = [expected_pkg_versions]
+        # NOTE: the list of versions is assumed to be sorted so that the highest
+        # desirable version is the last.
+        highest_desirable_version = expected_pkg_versions[-1]
+        req_release_arr = [int(segment) for segment in highest_desirable_version.split(".")]
         version = [int(segment) for segment in pkg.version.split(".")]
         too_high = version[:len(req_release_arr)] > req_release_arr
         higher_than_seen = version > higher_version_for_pkg.get(pkg.name, [])
diff --git a/roles/openshift_health_checker/library/search_journalctl.py b/roles/openshift_health_checker/library/search_journalctl.py
new file mode 100644
index 000000000..3631f71c8
--- /dev/null
+++ b/roles/openshift_health_checker/library/search_journalctl.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+"""Interface to journalctl."""
+
+from time import time
+import json
+import re
+import subprocess
+
+from ansible.module_utils.basic import AnsibleModule
+
+
+class InvalidMatcherRegexp(Exception):
+    """Exception class for invalid matcher regexp."""
+    pass
+
+
+class InvalidLogEntry(Exception):
+    """Exception class for invalid / non-json log entries."""
+    pass
+
+
+class LogInputSubprocessError(Exception):
+    """Exception class for errors that occur while executing a subprocess."""
+    pass
+
+
+def main():
+    """Scan a given list of "log_matchers" for journalctl messages containing given patterns.
+    "log_matchers" is a list of dicts consisting of three keys that help fine-tune log searching:
+    'start_regexp', 'regexp', and 'unit'.
+
+    Sample "log_matchers" list:
+
+    [
+      {
+        'start_regexp': r'Beginning of systemd unit',
+        'regexp': r'the specific log message to find',
+        'unit': 'etcd',
+      }
+    ]
+    """
+    module = AnsibleModule(
+        argument_spec=dict(
+            log_count_limit=dict(type="int", default=500),
+            log_matchers=dict(type="list", required=True),
+        ),
+    )
+
+    timestamp_limit_seconds = time() - 60 * 60  # 1 hour
+
+    log_count_limit = module.params["log_count_limit"]
+    log_matchers = module.params["log_matchers"]
+
+    matched_regexp, errors = get_log_matches(log_matchers, log_count_limit, timestamp_limit_seconds)
+
+    module.exit_json(
+        changed=False,
+        failed=bool(errors),
+        errors=errors,
+        matched=matched_regexp,
+    )
+
+
+def get_log_matches(matchers, log_count_limit, timestamp_limit_seconds):
+    """Return a list of up to log_count_limit matches for each matcher.
+
+    Log entries are only considered if newer than timestamp_limit_seconds.
+    """
+    matched_regexp = []
+    errors = []
+
+    for matcher in matchers:
+        try:
+            log_output = get_log_output(matcher)
+        except LogInputSubprocessError as err:
+            errors.append(str(err))
+            continue
+
+        try:
+            matched = find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds)
+            if matched:
+                matched_regexp.append(matcher.get("regexp", ""))
+        except InvalidMatcherRegexp as err:
+            errors.append(str(err))
+        except InvalidLogEntry as err:
+            errors.append(str(err))
+
+    return matched_regexp, errors
+
+
+def get_log_output(matcher):
+    """Return an iterator on the logs of a given matcher."""
+    try:
+        cmd_output = subprocess.Popen(list([
+            '/bin/journalctl',
+            '-ru', matcher.get("unit", ""),
+            '--output', 'json',
+        ]), stdout=subprocess.PIPE)
+
+        return iter(cmd_output.stdout.readline, '')
+
+    except subprocess.CalledProcessError as exc:
+        msg = "Could not obtain journalctl logs for the specified systemd unit: {}: {}"
+        raise LogInputSubprocessError(msg.format(matcher.get("unit", "<missing>"), str(exc)))
+    except OSError as exc:
+        raise LogInputSubprocessError(str(exc))
+
+
+def find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds):
+    """Return log messages matched in iterable log_output by a given matcher.
+
+    Ignore any log_output items older than timestamp_limit_seconds.
+    """
+    try:
+        regexp = re.compile(matcher.get("regexp", ""))
+        start_regexp = re.compile(matcher.get("start_regexp", ""))
+    except re.error as err:
+        msg = "A log matcher object was provided with an invalid regular expression: {}"
+        raise InvalidMatcherRegexp(msg.format(str(err)))
+
+    matched = None
+
+    for log_count, line in enumerate(log_output):
+        if log_count >= log_count_limit:
+            break
+
+        try:
+            obj = json.loads(line)
+
+            # don't need to look past the most recent service restart
+            if start_regexp.match(obj["MESSAGE"]):
+                break
+
+            log_timestamp_seconds = float(obj["__REALTIME_TIMESTAMP"]) / 1000000
+            if log_timestamp_seconds < timestamp_limit_seconds:
+                break
+
+            if regexp.match(obj["MESSAGE"]):
+                matched = line
+                break
+
+        except ValueError:
+            msg = "Log entry for systemd unit {} contained invalid json syntax: {}"
+            raise InvalidLogEntry(msg.format(matcher.get("unit"), line))
+
+    return matched
+
+
+if __name__ == '__main__':
+    main()
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 962148cb8..e93e81efa 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -1,9 +1,12 @@
-# pylint: disable=missing-docstring
+"""Check that there is enough disk space in predefined paths."""
+
+import os.path
+import tempfile
+
 from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
-from openshift_checks.mixins import NotContainerizedMixin
 
 
-class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
+class DiskAvailability(OpenShiftCheck):
     """Check that recommended disk space is available before a first-time install."""
 
     name = "disk_availability"
@@ -12,56 +15,101 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
     # Values taken from the official installation documentation:
     # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements
     recommended_disk_space_bytes = {
-        "masters": 40 * 10**9,
-        "nodes": 15 * 10**9,
-        "etcd": 20 * 10**9,
+        '/var': {
+            'masters': 40 * 10**9,
+            'nodes': 15 * 10**9,
+            'etcd': 20 * 10**9,
+        },
+        # Used to copy client binaries into,
+        # see roles/openshift_cli/library/openshift_container_binary_sync.py.
+        '/usr/local/bin': {
+            'masters': 1 * 10**9,
+            'nodes': 1 * 10**9,
+            'etcd': 1 * 10**9,
+        },
+        # Used as temporary storage in several cases.
+        tempfile.gettempdir(): {
+            'masters': 1 * 10**9,
+            'nodes': 1 * 10**9,
+            'etcd': 1 * 10**9,
+        },
     }
 
     @classmethod
     def is_active(cls, task_vars):
         """Skip hosts that do not have recommended disk space requirements."""
         group_names = get_var(task_vars, "group_names", default=[])
-        has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes))
+        active_groups = set()
+        for recommendation in cls.recommended_disk_space_bytes.values():
+            active_groups.update(recommendation.keys())
+        has_disk_space_recommendation = bool(active_groups.intersection(group_names))
         return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation
 
     def run(self, tmp, task_vars):
         group_names = get_var(task_vars, "group_names")
         ansible_mounts = get_var(task_vars, "ansible_mounts")
-        free_bytes = self.openshift_available_disk(ansible_mounts)
-
-        recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
-        configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9
-        min_free_bytes = configured_min or recommended_min
-
-        if free_bytes < min_free_bytes:
-            return {
-                'failed': True,
-                'msg': (
-                    'Available disk space ({:.1f} GB) for the volume containing '
-                    '"/var" is below minimum recommended space ({:.1f} GB)'
-                ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9)
+        ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}
+
+        user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={})
+        try:
+            # For backwards-compatibility, if openshift_check_min_host_disk_gb
+            # is a number, then it overrides the required config for '/var'.
+            number = float(user_config)
+            user_config = {
+                '/var': {
+                    'masters': number,
+                    'nodes': number,
+                    'etcd': number,
+                },
             }
+        except TypeError:
+            # If it is not a number, then it should be a nested dict.
+            pass
+
+        # TODO: as suggested in
+        # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
+        # maybe we could support checking disk availability in paths that are
+        # not part of the official recommendation but present in the user
+        # configuration.
+        for path, recommendation in self.recommended_disk_space_bytes.items():
+            free_bytes = self.free_bytes(path, ansible_mounts)
+            recommended_bytes = max(recommendation.get(name, 0) for name in group_names)
+
+            config = user_config.get(path, {})
+            # NOTE: the user config is in GB, but we compare bytes, thus the
+            # conversion.
+            config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
+            recommended_bytes = config_bytes or recommended_bytes
+
+            if free_bytes < recommended_bytes:
+                free_gb = float(free_bytes) / 10**9
+                recommended_gb = float(recommended_bytes) / 10**9
+                return {
+                    'failed': True,
+                    'msg': (
+                        'Available disk space in "{}" ({:.1f} GB) '
+                        'is below minimum recommended ({:.1f} GB)'
+                    ).format(path, free_gb, recommended_gb)
+                }
 
         return {}
 
     @staticmethod
-    def openshift_available_disk(ansible_mounts):
-        """Determine the available disk space for an OpenShift installation.
-
-        ansible_mounts should be a list of dicts like the 'setup' Ansible module
-        returns.
-        """
-        # priority list in descending order
-        supported_mnt_paths = ["/var", "/"]
-        available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+    def free_bytes(path, ansible_mounts):
+        """Return the size available in path based on ansible_mounts."""
+        mount_point = path
+        # arbitry value to prevent an infinite loop, in the unlike case that '/'
+        # is not in ansible_mounts.
+        max_depth = 32
+        while mount_point not in ansible_mounts and max_depth > 0:
+            mount_point = os.path.dirname(mount_point)
+            max_depth -= 1
 
         try:
-            for path in supported_mnt_paths:
-                if path in available_mnts:
-                    return available_mnts[path]["size_available"]
+            free_bytes = ansible_mounts[mount_point]['size_available']
         except KeyError:
-            pass
+            known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none'
+            msg = 'Unable to determine disk availability for "{}". Known mount points: {}.'
+            raise OpenShiftCheckException(msg.format(path, known_mounts))
 
-        paths = ''.join(sorted(available_mnts)) or 'none'
-        msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths)
-        raise OpenShiftCheckException(msg)
+        return free_bytes
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 26bf4c09b..bde81ad2c 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -94,7 +94,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
         required = set()
         deployment_type = get_var(task_vars, "openshift_deployment_type")
         host_groups = get_var(task_vars, "group_names")
-        image_tag = get_var(task_vars, "openshift_image_tag")
+        # containerized etcd may not have openshift_image_tag, see bz 1466622
+        image_tag = get_var(task_vars, "openshift_image_tag", default="latest")
         image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
         if not image_info:
             return required
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
index 2bd615457..d2227d244 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_storage.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -1,5 +1,6 @@
 """Check Docker storage driver and usage."""
 import json
+import os.path
 import re
 from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
 from openshift_checks.mixins import DockerHostMixin
@@ -17,13 +18,30 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
     tags = ["pre-install", "health", "preflight"]
 
     dependencies = ["python-docker-py"]
-    storage_drivers = ["devicemapper", "overlay2"]
+    storage_drivers = ["devicemapper", "overlay", "overlay2"]
     max_thinpool_data_usage_percent = 90.0
     max_thinpool_meta_usage_percent = 90.0
+    max_overlay_usage_percent = 90.0
+
+    # TODO(lmeyer): mention these in the output when check fails
+    configuration_variables = [
+        (
+            "max_thinpool_data_usage_percent",
+            "For 'devicemapper' storage driver, usage threshold percentage for data. "
+            "Format: float. Default: {:.1f}".format(max_thinpool_data_usage_percent),
+        ),
+        (
+            "max_thinpool_meta_usage_percent",
+            "For 'devicemapper' storage driver, usage threshold percentage for metadata. "
+            "Format: float. Default: {:.1f}".format(max_thinpool_meta_usage_percent),
+        ),
+        (
+            "max_overlay_usage_percent",
+            "For 'overlay' or 'overlay2' storage driver, usage threshold percentage. "
+            "Format: float. Default: {:.1f}".format(max_overlay_usage_percent),
+        ),
+    ]
 
-    # pylint: disable=too-many-return-statements
-    # Reason: permanent stylistic exception;
-    #         it is clearer to return on failures and there are just many ways to fail here.
     def run(self, tmp, task_vars):
         msg, failed, changed = self.ensure_dependencies(task_vars)
         if failed:
@@ -34,17 +52,17 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
             }
 
         # attempt to get the docker info hash from the API
-        info = self.execute_module("docker_info", {}, task_vars=task_vars)
-        if info.get("failed"):
+        docker_info = self.execute_module("docker_info", {}, task_vars=task_vars)
+        if docker_info.get("failed"):
             return {"failed": True, "changed": changed,
                     "msg": "Failed to query Docker API. Is docker running on this host?"}
-        if not info.get("info"):  # this would be very strange
+        if not docker_info.get("info"):  # this would be very strange
             return {"failed": True, "changed": changed,
-                    "msg": "Docker API query missing info:\n{}".format(json.dumps(info))}
-        info = info["info"]
+                    "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))}
+        docker_info = docker_info["info"]
 
         # check if the storage driver we saw is valid
-        driver = info.get("Driver", "[NONE]")
+        driver = docker_info.get("Driver", "[NONE]")
         if driver not in self.storage_drivers:
             msg = (
                 "Detected unsupported Docker storage driver '{driver}'.\n"
@@ -53,26 +71,34 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
             return {"failed": True, "changed": changed, "msg": msg}
 
         # driver status info is a list of tuples; convert to dict and validate based on driver
-        driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])}
+        driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])}
+
+        result = {}
+
         if driver == "devicemapper":
-            if driver_status.get("Data loop file"):
-                msg = (
-                    "Use of loopback devices with the Docker devicemapper storage driver\n"
-                    "(the default storage configuration) is unsupported in production.\n"
-                    "Please use docker-storage-setup to configure a backing storage volume.\n"
-                    "See http://red.ht/2rNperO for further information."
-                )
-                return {"failed": True, "changed": changed, "msg": msg}
-            result = self._check_dm_usage(driver_status, task_vars)
-            result['changed'] = result.get('changed', False) or changed
-            return result
+            result = self.check_devicemapper_support(driver_status, task_vars)
 
-        # TODO(lmeyer): determine how to check usage for overlay2
+        if driver in ['overlay', 'overlay2']:
+            result = self.check_overlay_support(docker_info, driver_status, task_vars)
 
-        return {"changed": changed}
+        result['changed'] = result.get('changed', False) or changed
+        return result
 
-    def _check_dm_usage(self, driver_status, task_vars):
-        """
+    def check_devicemapper_support(self, driver_status, task_vars):
+        """Check if dm storage driver is supported as configured. Return: result dict."""
+        if driver_status.get("Data loop file"):
+            msg = (
+                "Use of loopback devices with the Docker devicemapper storage driver\n"
+                "(the default storage configuration) is unsupported in production.\n"
+                "Please use docker-storage-setup to configure a backing storage volume.\n"
+                "See http://red.ht/2rNperO for further information."
+            )
+            return {"failed": True, "msg": msg}
+        result = self.check_dm_usage(driver_status, task_vars)
+        return result
+
+    def check_dm_usage(self, driver_status, task_vars):
+        """Check usage thresholds for Docker dm storage driver. Return: result dict.
         Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
         implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
         devicemapper storage. The LV is "thin" because it does not use all available storage
@@ -83,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
         could run out of space first; so we check both.
         """
         vals = dict(
-            vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
+            vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars),
             data_used=driver_status.get("Data Space Used"),
             data_total=driver_status.get("Data Space Total"),
             metadata_used=driver_status.get("Metadata Space Used"),
@@ -93,7 +119,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
         # convert all human-readable strings to bytes
         for key, value in vals.copy().items():
             try:
-                vals[key + "_bytes"] = self._convert_to_bytes(value)
+                vals[key + "_bytes"] = self.convert_to_bytes(value)
             except ValueError as err:  # unlikely to hit this from API info, but just to be safe
                 return {
                     "failed": True,
@@ -131,10 +157,12 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
         vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
         return vals
 
-    def _get_vg_free(self, pool, task_vars):
-        # Determine which VG to examine according to the pool name, the only indicator currently
-        # available from the Docker API driver info. We assume a name that looks like
-        # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen.
+    def get_vg_free(self, pool, task_vars):
+        """Determine which VG to examine according to the pool name. Return: size vgs reports.
+        Pool name is the only indicator currently available from the Docker API driver info.
+        We assume a name that looks like "vg--name-docker--pool";
+        vg and lv names with inner hyphens doubled, joined by a hyphen.
+        """
         match = re.match(r'((?:[^-]|--)+)-(?!-)', pool)  # matches up to the first single hyphen
         if not match:  # unlikely, but... be clear if we assumed wrong
             raise OpenShiftCheckException(
@@ -143,7 +171,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
                 "so the available storage in the VG cannot be determined.".format(pool)
             )
         vg_name = match.groups()[0].replace("--", "-")
-        vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name
+        vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name
         # should return free space like "  12.00g" if the VG exists; empty if it does not
 
         ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars)
@@ -163,7 +191,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
         return size
 
     @staticmethod
-    def _convert_to_bytes(string):
+    def convert_to_bytes(string):
+        """Convert string like "10.3 G" to bytes (binary units assumed). Return: float bytes."""
         units = dict(
             b=1,
             k=1024,
@@ -183,3 +212,87 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
             raise ValueError("Cannot convert to a byte size: " + string)
 
         return float(number) * multiplier
+
+    def check_overlay_support(self, docker_info, driver_status, task_vars):
+        """Check if overlay storage driver is supported for this host. Return: result dict."""
+        # check for xfs as backing store
+        backing_fs = driver_status.get("Backing Filesystem", "[NONE]")
+        if backing_fs != "xfs":
+            msg = (
+                "Docker storage drivers 'overlay' and 'overlay2' are only supported with\n"
+                "'xfs' as the backing storage, but this host's storage is type '{fs}'."
+            ).format(fs=backing_fs)
+            return {"failed": True, "msg": msg}
+
+        # check support for OS and kernel version
+        o_s = docker_info.get("OperatingSystem", "[NONE]")
+        if "Red Hat Enterprise Linux" in o_s or "CentOS" in o_s:
+            # keep it simple, only check enterprise kernel versions; assume everyone else is good
+            kernel = docker_info.get("KernelVersion", "[NONE]")
+            kernel_arr = [int(num) for num in re.findall(r'\d+', kernel)]
+            if kernel_arr < [3, 10, 0, 514]:  # rhel < 7.3
+                msg = (
+                    "Docker storage drivers 'overlay' and 'overlay2' are only supported beginning with\n"
+                    "kernel version 3.10.0-514; but Docker reports kernel version {version}."
+                ).format(version=kernel)
+                return {"failed": True, "msg": msg}
+            # NOTE: we could check for --selinux-enabled here but docker won't even start with
+            # that option until it's supported in the kernel so we don't need to.
+
+        return self.check_overlay_usage(docker_info, task_vars)
+
+    def check_overlay_usage(self, docker_info, task_vars):
+        """Check disk usage on OverlayFS backing store volume. Return: result dict."""
+        path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"]
+
+        threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent)
+        try:
+            threshold = float(threshold)
+        except ValueError:
+            return {
+                "failed": True,
+                "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
+            }
+
+        mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts"))
+        try:
+            free_bytes = mount['size_available']
+            total_bytes = mount['size_total']
+            usage = 100.0 * (total_bytes - free_bytes) / total_bytes
+        except (KeyError, ZeroDivisionError):
+            return {
+                "failed": True,
+                "msg": "The ansible_mount found for path {} is invalid.\n"
+                       "This is likely to be an Ansible bug. The record was:\n"
+                       "{}".format(path, json.dumps(mount, indent=2)),
+            }
+
+        if usage > threshold:
+            return {
+                "failed": True,
+                "msg": (
+                    "For Docker OverlayFS mount point {path},\n"
+                    "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}."
+                ).format(path=mount["mount"], pct=usage, thresh=threshold)
+            }
+
+        return {}
+
+    # TODO(lmeyer): migrate to base class
+    @staticmethod
+    def find_ansible_mount(path, ansible_mounts):
+        """Return the mount point for path from ansible_mounts."""
+
+        mount_for_path = {mount['mount']: mount for mount in ansible_mounts}
+        mount_point = path
+        while mount_point not in mount_for_path:
+            if mount_point in ["/", ""]:  # "/" not in ansible_mounts???
+                break
+            mount_point = os.path.dirname(mount_point)
+
+        try:
+            return mount_for_path[mount_point]
+        except KeyError:
+            known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none'
+            msg = 'Unable to determine mount point for path "{}". Known mount points: {}.'
+            raise OpenShiftCheckException(msg.format(path, known_mounts))
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
new file mode 100644
index 000000000..40c87873d
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
@@ -0,0 +1,47 @@
+"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
+
+from openshift_checks import OpenShiftCheck, get_var
+
+
+class EtcdTraffic(OpenShiftCheck):
+    """Check if host is being affected by an increase in etcd traffic."""
+
+    name = "etcd_traffic"
+    tags = ["health", "etcd"]
+
+    @classmethod
+    def is_active(cls, task_vars):
+        """Skip hosts that do not have etcd in their group names."""
+        group_names = get_var(task_vars, "group_names", default=[])
+        valid_group_names = "etcd" in group_names
+
+        version = get_var(task_vars, "openshift", "common", "short_version")
+        valid_version = version in ("3.4", "3.5", "1.4", "1.5")
+
+        return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
+
+    def run(self, tmp, task_vars):
+        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+        unit = "etcd_container" if is_containerized else "etcd"
+
+        log_matchers = [{
+            "start_regexp": r"Starting Etcd Server",
+            "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
+            "unit": unit
+        }]
+
+        match = self.execute_module("search_journalctl", {
+            "log_matchers": log_matchers,
+        }, task_vars)
+
+        if match.get("matched"):
+            msg = ("Higher than normal etcd traffic detected.\n"
+                   "OpenShift 3.4 introduced an increase in etcd traffic.\n"
+                   "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
+                   "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
+            return {"failed": True, "msg": msg}
+
+        if match.get("failed"):
+            return {"failed": True, "msg": "\n".join(match.get("errors"))}
+
+        return {}
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index 442f407b1..551e8dfa0 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -62,7 +62,7 @@ class Kibana(LoggingCheck):
             # TODO(lmeyer): give users option to validate certs
             status_code=302,
         )
-        result = self.execute_module('uri', args, task_vars)
+        result = self.execute_module('uri', args, None, task_vars)
         if result.get('failed'):
             return result['msg']
         return None
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 1fddcd6f6..02a094007 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -55,12 +55,12 @@ class LoggingCheck(OpenShiftCheck):
         """Returns: list of pods not in a ready and running state"""
         return [
             pod for pod in pods
-            if any(
+            if not pod.get("status", {}).get("containerStatuses") or any(
                 container['ready'] is False
                 for container in pod['status']['containerStatuses']
             ) or not any(
                 condition['type'] == 'Ready' and condition['status'] == 'True'
-                for condition in pod['status']['conditions']
+                for condition in pod['status'].get('conditions', [])
             )
         ]
 
@@ -79,7 +79,7 @@ class LoggingCheck(OpenShiftCheck):
             "extra_args": list(extra_args) if extra_args else [],
         }
 
-        result = execute_module("ocutil", args, task_vars)
+        result = execute_module("ocutil", args, None, task_vars)
         if result.get("failed"):
             msg = (
                 'Unexpected error using `oc` to validate the logging stack components.\n'
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 6a76bb93d..204752bd0 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -10,8 +10,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
     tags = ["preflight"]
 
     openshift_to_ovs_version = {
-        "3.6": "2.6",
-        "3.5": "2.6",
+        "3.6": ["2.6", "2.7"],
+        "3.5": ["2.6", "2.7"],
         "3.4": "2.4",
     }
 
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 6ebf0ebb2..9383b233c 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -59,7 +59,7 @@ def failed(result, msg_has=None):
     if msg_has is not None:
         assert 'msg' in result
         for term in msg_has:
-            assert term in result['msg']
+            assert term.lower() in result['msg'].lower()
     return result.get('failed', False)
 
 
@@ -178,6 +178,16 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
     assert not skipped(result)
 
 
+def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch):
+    monkeypatch.setattr(plugin, 'load_known_checks', lambda: {})
+
+    result = plugin.run(tmp=None, task_vars=task_vars)
+
+    assert failed(result, msg_has=['unknown', 'name'])
+    assert not changed(result)
+    assert not skipped(result)
+
+
 @pytest.mark.parametrize('names,all_checks,expected', [
     ([], [], set()),
     (
diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py
index 697805dd2..4100f6c70 100644
--- a/roles/openshift_health_checker/test/aos_version_test.py
+++ b/roles/openshift_health_checker/test/aos_version_test.py
@@ -18,7 +18,43 @@ expected_pkgs = {
 }
 
 
-@pytest.mark.parametrize('pkgs, expect_not_found', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
+    (
+        # all found
+        [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
+        expected_pkgs,
+    ),
+    (
+        # found with more specific version
+        [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
+        expected_pkgs,
+    ),
+    (
+        [Package('ovs', '2.6'), Package('ovs', '2.4')],
+        {
+            "ovs": {
+                "name": "ovs",
+                "version": ["2.6", "2.7"],
+                "check_multi": False,
+            }
+        },
+    ),
+    (
+        [Package('ovs', '2.7')],
+        {
+            "ovs": {
+                "name": "ovs",
+                "version": ["2.6", "2.7"],
+                "check_multi": False,
+            }
+        },
+    ),
+])
+def test_check_precise_version_found(pkgs, expected_pkgs_dict):
+    aos_version._check_precise_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expect_not_found', [
     (
         [],
         {
@@ -55,14 +91,6 @@ expected_pkgs = {
         },  # not the right version
     ),
     (
-        [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
-        {},  # all found
-    ),
-    (
-        [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
-        {},  # found with more specific version
-    ),
-    (
         [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')],
         {
             "spam": {
@@ -73,64 +101,86 @@ expected_pkgs = {
         },  # eggs found with multiple versions
     ),
 ])
-def test_check_pkgs_for_precise_version(pkgs, expect_not_found):
-    if expect_not_found:
-        with pytest.raises(aos_version.PreciseVersionNotFound) as e:
-            aos_version._check_precise_version_found(pkgs, expected_pkgs)
-
-        assert list(expect_not_found.values()) == e.value.problem_pkgs
-    else:
+def test_check_precise_version_found_fail(pkgs, expect_not_found):
+    with pytest.raises(aos_version.PreciseVersionNotFound) as e:
         aos_version._check_precise_version_found(pkgs, expected_pkgs)
+    assert list(expect_not_found.values()) == e.value.problem_pkgs
 
 
-@pytest.mark.parametrize('pkgs, expect_higher', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
     (
         [],
-        [],
+        expected_pkgs,
     ),
     (
+        # more precise but not strictly higher
         [Package('spam', '3.2.1.9')],
-        [],  # more precise but not strictly higher
+        expected_pkgs,
     ),
     (
+        [Package('ovs', '2.7')],
+        {
+            "ovs": {
+                "name": "ovs",
+                "version": ["2.6", "2.7"],
+                "check_multi": False,
+            }
+        },
+    ),
+])
+def test_check_higher_version_found(pkgs, expected_pkgs_dict):
+    aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict,expect_higher', [
+    (
         [Package('spam', '3.3')],
+        expected_pkgs,
         ['spam-3.3'],  # lower precision, but higher
     ),
     (
         [Package('spam', '3.2.1'), Package('eggs', '3.3.2')],
+        expected_pkgs,
         ['eggs-3.3.2'],  # one too high
     ),
     (
         [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')],
+        expected_pkgs,
         ['eggs-3.4'],  # multiple versions, one is higher
     ),
     (
         [Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')],
+        expected_pkgs,
         ['eggs-3.4'],  # multiple versions, two are higher
     ),
+    (
+        [Package('ovs', '2.8')],
+        {
+            "ovs": {
+                "name": "ovs",
+                "version": ["2.6", "2.7"],
+                "check_multi": False,
+            }
+        },
+        ['ovs-2.8'],
+    ),
 ])
-def test_check_pkgs_for_greater_version(pkgs, expect_higher):
-    if expect_higher:
-        with pytest.raises(aos_version.FoundHigherVersion) as e:
-            aos_version._check_higher_version_found(pkgs, expected_pkgs)
-        assert set(expect_higher) == set(e.value.problem_pkgs)
-    else:
-        aos_version._check_higher_version_found(pkgs, expected_pkgs)
+def test_check_higher_version_found_fail(pkgs, expected_pkgs_dict, expect_higher):
+    with pytest.raises(aos_version.FoundHigherVersion) as e:
+        aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+    assert set(expect_higher) == set(e.value.problem_pkgs)
 
 
-@pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [
-    (
-        [],
-        [],
-    ),
-    (
-        [Package('spam', '3.2.1')],
-        [],
-    ),
-    (
-        [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
-        [],
-    ),
+@pytest.mark.parametrize('pkgs', [
+    [],
+    [Package('spam', '3.2.1')],
+    [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
+])
+def test_check_multi_minor_release(pkgs):
+    aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+
+
+@pytest.mark.parametrize('pkgs,expect_to_flag_pkgs', [
     (
         [Package('spam', '3.2.1'), Package('spam', '3.3.2')],
         ['spam'],
@@ -140,10 +190,7 @@ def test_check_pkgs_for_greater_version(pkgs, expect_higher):
         ['eggs'],
     ),
 ])
-def test_check_pkgs_for_multi_release(pkgs, expect_to_flag_pkgs):
-    if expect_to_flag_pkgs:
-        with pytest.raises(aos_version.FoundMultiRelease) as e:
-            aos_version._check_multi_minor_release(pkgs, expected_pkgs)
-        assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
-    else:
+def test_check_multi_minor_release_fail(pkgs, expect_to_flag_pkgs):
+    with pytest.raises(aos_version.FoundMultiRelease) as e:
         aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+    assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index b353fa610..945b9eafc 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -3,22 +3,19 @@ import pytest
 from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException
 
 
-@pytest.mark.parametrize('group_names,is_containerized,is_active', [
-    (['masters'], False, True),
-    # ensure check is skipped on containerized installs
-    (['masters'], True, False),
-    (['nodes'], False, True),
-    (['etcd'], False, True),
-    (['masters', 'nodes'], False, True),
-    (['masters', 'etcd'], False, True),
-    ([], False, False),
-    (['lb'], False, False),
-    (['nfs'], False, False),
+@pytest.mark.parametrize('group_names,is_active', [
+    (['masters'], True),
+    (['nodes'], True),
+    (['etcd'], True),
+    (['masters', 'nodes'], True),
+    (['masters', 'etcd'], True),
+    ([], False),
+    (['lb'], False),
+    (['nfs'], False),
 ])
-def test_is_active(group_names, is_containerized, is_active):
+def test_is_active(group_names, is_active):
     task_vars = dict(
         group_names=group_names,
-        openshift=dict(common=dict(is_containerized=is_containerized)),
     )
     assert DiskAvailability.is_active(task_vars=task_vars) == is_active
 
@@ -38,7 +35,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
     with pytest.raises(OpenShiftCheckException) as excinfo:
         check.run(tmp=None, task_vars=task_vars)
 
-    for word in 'determine available disk'.split() + extra_words:
+    for word in 'determine disk availability'.split() + extra_words:
         assert word in str(excinfo.value)
 
 
@@ -81,7 +78,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
         [{
             # not enough space on / ...
             'mount': '/',
-            'size_available': 0,
+            'size_available': 2 * 10**9,
         }, {
             # ... but enough on /var
             'mount': '/var',
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 0a7c0f8d3..3b9e097fb 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -259,3 +259,17 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex
     )
 
     assert expected == DockerImageAvailability("DUMMY").required_images(task_vars)
+
+
+def test_containerized_etcd():
+    task_vars = dict(
+        openshift=dict(
+            common=dict(
+                is_containerized=True,
+            ),
+        ),
+        openshift_deployment_type="origin",
+        group_names=['etcd'],
+    )
+    expected = set(['registry.access.redhat.com/rhel7/etcd'])
+    assert expected == DockerImageAvailability("DUMMY").required_images(task_vars)
diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py
index 876614b1d..99c529054 100644
--- a/roles/openshift_health_checker/test/docker_storage_test.py
+++ b/roles/openshift_health_checker/test/docker_storage_test.py
@@ -23,7 +23,8 @@ def test_is_active(is_containerized, group_names, is_active):
     assert DockerStorage.is_active(task_vars=task_vars) == is_active
 
 
-non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
+def non_atomic_task_vars():
+    return {"openshift": {"common": {"is_atomic": False}}}
 
 
 @pytest.mark.parametrize('docker_info, failed, expect_msg', [
@@ -56,7 +57,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
     (
         dict(info={
             "Driver": "overlay2",
-            "DriverStatus": []
+            "DriverStatus": [("Backing Filesystem", "xfs")],
         }),
         False,
         [],
@@ -64,9 +65,30 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
     (
         dict(info={
             "Driver": "overlay",
+            "DriverStatus": [("Backing Filesystem", "btrfs")],
         }),
         True,
-        ["unsupported Docker storage driver"],
+        ["storage is type 'btrfs'", "only supported with\n'xfs'"],
+    ),
+    (
+        dict(info={
+            "Driver": "overlay2",
+            "DriverStatus": [("Backing Filesystem", "xfs")],
+            "OperatingSystem": "Red Hat Enterprise Linux Server release 7.2 (Maipo)",
+            "KernelVersion": "3.10.0-327.22.2.el7.x86_64",
+        }),
+        True,
+        ["Docker reports kernel version 3.10.0-327"],
+    ),
+    (
+        dict(info={
+            "Driver": "overlay",
+            "DriverStatus": [("Backing Filesystem", "xfs")],
+            "OperatingSystem": "CentOS",
+            "KernelVersion": "3.10.0-514",
+        }),
+        False,
+        [],
     ),
     (
         dict(info={
@@ -85,8 +107,9 @@ def test_check_storage_driver(docker_info, failed, expect_msg):
         return docker_info
 
     check = dummy_check(execute_module=execute_module)
-    check._check_dm_usage = lambda status, task_vars: dict()  # stub out for this test
-    result = check.run(tmp=None, task_vars=non_atomic_task_vars)
+    check.check_dm_usage = lambda status, task_vars: dict()  # stub out for this test
+    check.check_overlay_usage = lambda info, task_vars: dict()  # stub out for this test
+    result = check.run(tmp=None, task_vars=non_atomic_task_vars())
 
     if failed:
         assert result["failed"]
@@ -146,8 +169,8 @@ not_enough_space = {
 ])
 def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
     check = dummy_check()
-    check._get_vg_free = lambda pool, task_vars: vg_free
-    result = check._check_dm_usage(driver_status, task_vars)
+    check.get_vg_free = lambda pool, task_vars: vg_free
+    result = check.check_dm_usage(driver_status, task_vars)
     result_success = not result.get("failed")
 
     assert result_success is success
@@ -195,10 +218,10 @@ def test_vg_free(pool, command_returns, raises, returns):
     check = dummy_check(execute_module=execute_module)
     if raises:
         with pytest.raises(OpenShiftCheckException) as err:
-            check._get_vg_free(pool, {})
+            check.get_vg_free(pool, {})
         assert raises in str(err.value)
     else:
-        ret = check._get_vg_free(pool, {})
+        ret = check.get_vg_free(pool, {})
         assert ret == returns
 
 
@@ -209,7 +232,7 @@ def test_vg_free(pool, command_returns, raises, returns):
     ("12g", 12.0 * 1024**3),
 ])
 def test_convert_to_bytes(string, expect_bytes):
-    got = DockerStorage._convert_to_bytes(string)
+    got = DockerStorage.convert_to_bytes(string)
     assert got == expect_bytes
 
 
@@ -219,6 +242,70 @@ def test_convert_to_bytes(string, expect_bytes):
 ])
 def test_convert_to_bytes_error(string):
     with pytest.raises(ValueError) as err:
-        DockerStorage._convert_to_bytes(string)
+        DockerStorage.convert_to_bytes(string)
     assert "Cannot convert" in str(err.value)
     assert string in str(err.value)
+
+
+ansible_mounts_enough = [{
+    'mount': '/var/lib/docker',
+    'size_available': 50 * 10**9,
+    'size_total': 50 * 10**9,
+}]
+ansible_mounts_not_enough = [{
+    'mount': '/var/lib/docker',
+    'size_available': 0,
+    'size_total': 50 * 10**9,
+}]
+ansible_mounts_missing_fields = [dict(mount='/var/lib/docker')]
+ansible_mounts_zero_size = [{
+    'mount': '/var/lib/docker',
+    'size_available': 0,
+    'size_total': 0,
+}]
+
+
+@pytest.mark.parametrize('ansible_mounts, threshold, expect_fail, expect_msg', [
+    (
+        ansible_mounts_enough,
+        None,
+        False,
+        [],
+    ),
+    (
+        ansible_mounts_not_enough,
+        None,
+        True,
+        ["usage percentage", "higher than threshold"],
+    ),
+    (
+        ansible_mounts_not_enough,
+        "bogus percent",
+        True,
+        ["is not a percentage"],
+    ),
+    (
+        ansible_mounts_missing_fields,
+        None,
+        True,
+        ["Ansible bug"],
+    ),
+    (
+        ansible_mounts_zero_size,
+        None,
+        True,
+        ["Ansible bug"],
+    ),
+])
+def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg):
+    check = dummy_check()
+    task_vars = non_atomic_task_vars()
+    task_vars["ansible_mounts"] = ansible_mounts
+    if threshold is not None:
+        task_vars["max_overlay_usage_percent"] = threshold
+    docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay")
+    result = check.check_overlay_usage(docker_info, task_vars)
+
+    assert expect_fail == bool(result.get("failed"))
+    for msg in expect_msg:
+        assert msg in result["msg"]
diff --git a/roles/openshift_health_checker/test/etcd_traffic_test.py b/roles/openshift_health_checker/test/etcd_traffic_test.py
new file mode 100644
index 000000000..287175e29
--- /dev/null
+++ b/roles/openshift_health_checker/test/etcd_traffic_test.py
@@ -0,0 +1,80 @@
+import pytest
+
+from openshift_checks.etcd_traffic import EtcdTraffic
+
+
+@pytest.mark.parametrize('group_names,version,is_active', [
+    (['masters'], "3.5", False),
+    (['masters'], "3.6", False),
+    (['nodes'], "3.4", False),
+    (['etcd'], "3.4", True),
+    (['etcd'], "3.5", True),
+    (['etcd'], "3.1", False),
+    (['masters', 'nodes'], "3.5", False),
+    (['masters', 'etcd'], "3.5", True),
+    ([], "3.4", False),
+])
+def test_is_active(group_names, version, is_active):
+    task_vars = dict(
+        group_names=group_names,
+        openshift=dict(
+            common=dict(short_version=version),
+        ),
+    )
+    assert EtcdTraffic.is_active(task_vars=task_vars) == is_active
+
+
+@pytest.mark.parametrize('group_names,matched,failed,extra_words', [
+    (["masters"], True, True, ["Higher than normal", "traffic"]),
+    (["masters", "etcd"], False, False, []),
+    (["etcd"], False, False, []),
+])
+def test_log_matches_high_traffic_msg(group_names, matched, failed, extra_words):
+    def execute_module(module_name, args, task_vars):
+        return {
+            "matched": matched,
+            "failed": failed,
+        }
+
+    task_vars = dict(
+        group_names=group_names,
+        openshift=dict(
+            common=dict(service_type="origin", is_containerized=False),
+        )
+    )
+
+    check = EtcdTraffic(execute_module=execute_module)
+    result = check.run(tmp=None, task_vars=task_vars)
+
+    for word in extra_words:
+        assert word in result.get("msg", "")
+
+    assert result.get("failed", False) == failed
+
+
+@pytest.mark.parametrize('is_containerized,expected_unit_value', [
+    (False, "etcd"),
+    (True, "etcd_container"),
+])
+def test_systemd_unit_matches_deployment_type(is_containerized, expected_unit_value):
+    task_vars = dict(
+        openshift=dict(
+            common=dict(is_containerized=is_containerized),
+        )
+    )
+
+    def execute_module(module_name, args, task_vars):
+        assert module_name == "search_journalctl"
+        matchers = args["log_matchers"]
+
+        for matcher in matchers:
+            assert matcher["unit"] == expected_unit_value
+
+        return {"failed": False}
+
+    check = EtcdTraffic(execute_module=execute_module)
+    check.run(tmp=None, task_vars=task_vars)
+
+
+def fake_execute_module(*args):
+    raise AssertionError('this function should not be called')
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 19140a1b6..40a5d19d8 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -169,7 +169,7 @@ def test_get_kibana_url(route, expect_url, expect_error):
     ),
 ])
 def test_verify_url_internal_failure(exec_result, expect):
-    check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result))
+    check = Kibana(execute_module=lambda module_name, args, tmp, task_vars: dict(failed=True, msg=exec_result))
     check._get_kibana_url = lambda task_vars: ('url', None)
 
     error = check._check_kibana_route({})
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
index a19881e5b..4f71fbf52 100644
--- a/roles/openshift_health_checker/test/logging_check_test.py
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -50,6 +50,16 @@ plain_kibana_pod = {
     }
 }
 
+plain_kibana_pod_no_containerstatus = {
+    "metadata": {
+        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+        "name": "logging-kibana-1",
+    },
+    "status": {
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
 fluentd_pod_node1 = {
     "metadata": {
         "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
@@ -80,7 +90,7 @@ plain_curator_pod = {
     ("Permission denied", "Unexpected error using `oc`"),
 ])
 def test_oc_failure(problem, expect):
-    def execute_module(module_name, args, task_vars):
+    def execute_module(module_name, args, tmp, task_vars):
         if module_name == "ocutil":
             return dict(failed=True, result=problem)
         return dict(changed=False)
@@ -135,3 +145,23 @@ def test_get_pods_for_component(pod_output, expect_pods, expect_error):
         {}
     )
     assert_error(error, expect_error)
+
+
+@pytest.mark.parametrize('name, pods, expected_pods', [
+    (
+        'test single pod found, scheduled, but no containerStatuses field',
+        [plain_kibana_pod_no_containerstatus],
+        [plain_kibana_pod_no_containerstatus],
+    ),
+    (
+        'set of pods has at least one pod with containerStatuses (scheduled); should still fail',
+        [plain_kibana_pod_no_containerstatus, plain_kibana_pod],
+        [plain_kibana_pod_no_containerstatus],
+    ),
+
+], ids=lambda argvals: argvals[0])
+def test_get_not_running_pods_no_container_status(name, pods, expected_pods):
+    check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '')
+    result = check.not_running_pods(pods)
+
+    assert result == expected_pods
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 91eace512..1bb6371ae 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -72,36 +72,6 @@ def test_package_version(openshift_release):
     assert result is return_value
 
 
-@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [
-    ("openshift-enterprise", "3.5", "2.6"),
-    ("origin", "3.6", "2.6"),
-    ("openshift-enterprise", "3.4", "2.4"),
-    ("origin", "3.3", "2.4"),
-])
-def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version):
-    task_vars = dict(
-        openshift=dict(common=dict(service_type='origin')),
-        openshift_release=openshift_release,
-        openshift_image_tag='v' + openshift_release,
-        openshift_deployment_type=deployment_type,
-    )
-    return_value = object()
-
-    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
-        assert module_name == 'aos_version'
-        assert "package_list" in module_args
-
-        for pkg in module_args["package_list"]:
-            if pkg["name"] == "openvswitch":
-                assert pkg["version"] == expected_ovs_version
-
-        return return_value
-
-    check = PackageVersion(execute_module=execute_module)
-    result = check.run(tmp=None, task_vars=task_vars)
-    assert result is return_value
-
-
 @pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
     ("origin", "3.5", "1.12"),
     ("openshift-enterprise", "3.4", "1.12"),
diff --git a/roles/openshift_health_checker/test/search_journalctl_test.py b/roles/openshift_health_checker/test/search_journalctl_test.py
new file mode 100644
index 000000000..724928aa1
--- /dev/null
+++ b/roles/openshift_health_checker/test/search_journalctl_test.py
@@ -0,0 +1,157 @@
+import pytest
+import search_journalctl
+
+
+def canned_search_journalctl(get_log_output=None):
+    """Create a search_journalctl object with canned get_log_output method"""
+    module = search_journalctl
+    if get_log_output:
+        module.get_log_output = get_log_output
+    return module
+
+
+DEFAULT_TIMESTAMP = 1496341364
+
+
+def get_timestamp(modifier=0):
+    return DEFAULT_TIMESTAMP + modifier
+
+
+def get_timestamp_microseconds(modifier=0):
+    return get_timestamp(modifier) * 1000000
+
+
+def create_test_log_object(stamp, msg):
+    return '{{"__REALTIME_TIMESTAMP": "{}", "MESSAGE": "{}"}}'.format(stamp, msg)
+
+
+@pytest.mark.parametrize('name,matchers,log_input,expected_matches,expected_errors', [
+    (
+        'test with valid params',
+        [
+            {
+                "start_regexp": r"Sample Logs Beginning",
+                "regexp": r"test log message",
+                "unit": "test",
+            },
+        ],
+        [
+            create_test_log_object(get_timestamp_microseconds(), "test log message"),
+            create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+        ],
+        ["test log message"],
+        [],
+    ),
+    (
+        'test with invalid json in log input',
+        [
+            {
+                "start_regexp": r"Sample Logs Beginning",
+                "regexp": r"test log message",
+                "unit": "test-unit",
+            },
+        ],
+        [
+            '{__REALTIME_TIMESTAMP: ' + str(get_timestamp_microseconds()) + ', "MESSAGE": "test log message"}',
+        ],
+        [],
+        [
+            ["invalid json", "test-unit", "test log message"],
+        ],
+    ),
+    (
+        'test with invalid regexp',
+        [
+            {
+                "start_regexp": r"Sample Logs Beginning",
+                "regexp": r"test [ log message",
+                "unit": "test",
+            },
+        ],
+        [
+            create_test_log_object(get_timestamp_microseconds(), "test log message"),
+            create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+            create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+            create_test_log_object(get_timestamp_microseconds(), "dummy log message"),
+            create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+        ],
+        [],
+        [
+            ["invalid regular expression"],
+        ],
+    ),
+], ids=lambda argval: argval[0])
+def test_get_log_matches(name, matchers, log_input, expected_matches, expected_errors):
+    def get_log_output(matcher):
+        return log_input
+
+    module = canned_search_journalctl(get_log_output)
+    matched_regexp, errors = module.get_log_matches(matchers, 500, 60 * 60)
+
+    assert set(matched_regexp) == set(expected_matches)
+    assert len(expected_errors) == len(errors)
+
+    for idx, partial_err_set in enumerate(expected_errors):
+        for partial_err_msg in partial_err_set:
+            assert partial_err_msg in errors[idx]
+
+
+@pytest.mark.parametrize('name,matcher,log_count_lim,stamp_lim_seconds,log_input,expected_match', [
+    (
+        'test with matching log message, but out of bounds of log_count_lim',
+        {
+            "start_regexp": r"Sample Logs Beginning",
+            "regexp": r"dummy log message",
+            "unit": "test",
+        },
+        3,
+        get_timestamp(-100 * 60 * 60),
+        [
+            create_test_log_object(get_timestamp_microseconds(), "test log message"),
+            create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+            create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+            create_test_log_object(get_timestamp_microseconds(), "dummy log message"),
+            create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"),
+        ],
+        None,
+    ),
+    (
+        'test with matching log message, but with timestamp too old',
+        {
+            "start_regexp": r"Sample Logs Beginning",
+            "regexp": r"dummy log message",
+            "unit": "test",
+        },
+        100,
+        get_timestamp(-10),
+        [
+            create_test_log_object(get_timestamp_microseconds(), "test log message"),
+            create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+            create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+            create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+            create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"),
+        ],
+        None,
+    ),
+    (
+        'test with matching log message, and timestamp within time limit',
+        {
+            "start_regexp": r"Sample Logs Beginning",
+            "regexp": r"dummy log message",
+            "unit": "test",
+        },
+        100,
+        get_timestamp(-1010),
+        [
+            create_test_log_object(get_timestamp_microseconds(), "test log message"),
+            create_test_log_object(get_timestamp_microseconds(), "sample log message"),
+            create_test_log_object(get_timestamp_microseconds(), "fake log message"),
+            create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+            create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"),
+        ],
+        create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"),
+    ),
+], ids=lambda argval: argval[0])
+def test_find_matches_skips_logs(name, matcher, log_count_lim, stamp_lim_seconds, log_input, expected_match):
+    match = search_journalctl.find_matches(log_input, matcher, log_count_lim, stamp_lim_seconds)
+    assert match == expected_match