summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks
diff options
context:
space:
mode:
authorOpenShift Bot <eparis+openshiftbot@redhat.com>2017-06-09 15:04:03 -0400
committerGitHub <noreply@github.com>2017-06-09 15:04:03 -0400
commit2f0b5b9b6b6e91f07c677e2a09822ab1f34ef957 (patch)
treeae8c6af817dd2499492029c0a120f6838d6b5964 /roles/openshift_health_checker/openshift_checks
parentbbeaa204e7db13de6957c1e5240b25b9a4e8f4d9 (diff)
parent3912fa895a2cd205cf7410e9c54f7b04dc9d9945 (diff)
downloadopenshift-2f0b5b9b6b6e91f07c677e2a09822ab1f34ef957.tar.gz
openshift-2f0b5b9b6b6e91f07c677e2a09822ab1f34ef957.tar.bz2
openshift-2f0b5b9b6b6e91f07c677e2a09822ab1f34ef957.tar.xz
openshift-2f0b5b9b6b6e91f07c677e2a09822ab1f34ef957.zip
Merge pull request #3787 from juanvallejo/jvallejo/docker-storage-check
Merged by openshift-bot
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks')
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py22
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage.py185
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py42
3 files changed, 229 insertions, 20 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 4588ed634..27e6fe383 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -1,8 +1,9 @@
# pylint: disable=missing-docstring
from openshift_checks import OpenShiftCheck, get_var
+from openshift_checks.mixins import DockerHostMixin
-class DockerImageAvailability(OpenShiftCheck):
+class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Check that required Docker images are available.
This check attempts to ensure that required docker images are
@@ -36,19 +37,11 @@ class DockerImageAvailability(OpenShiftCheck):
def run(self, tmp, task_vars):
msg, failed, changed = self.ensure_dependencies(task_vars)
-
- # exit early if Skopeo update fails
if failed:
- if "No package matching" in msg:
- msg = "Ensure that all required dependencies can be installed via `yum`.\n"
return {
"failed": True,
"changed": changed,
- "msg": (
- "Unable to update or install required dependency packages on this host;\n"
- "These are required in order to check Docker image availability:"
- "\n {deps}\n{msg}"
- ).format(deps=',\n '.join(self.dependencies), msg=msg),
+ "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
}
required_images = self.required_images(task_vars)
@@ -168,12 +161,3 @@ class DockerImageAvailability(OpenShiftCheck):
args = {"_raw_params": cmd_str}
result = self.module_executor("command", args, task_vars)
return not result.get("failed", False) and result.get("rc", 0) == 0
-
- # ensures that the skopeo and python-docker-py packages exist
- # check is skipped on atomic installations
- def ensure_dependencies(self, task_vars):
- if get_var(task_vars, "openshift", "common", "is_atomic"):
- return "", False, False
-
- result = self.module_executor("yum", {"name": self.dependencies, "state": "latest"}, task_vars)
- return result.get("msg", ""), result.get("failed", False) or result.get("rc", 0) != 0, result.get("changed")
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
new file mode 100644
index 000000000..7f1751b36
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -0,0 +1,185 @@
+"""Check Docker storage driver and usage."""
+import json
+import re
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks.mixins import DockerHostMixin
+
+
+class DockerStorage(DockerHostMixin, OpenShiftCheck):
+ """Check Docker storage driver compatibility.
+
+ This check ensures that Docker is using a supported storage driver,
+ and that loopback is not being used (if using devicemapper).
+ Also that storage usage is not above threshold.
+ """
+
+ name = "docker_storage"
+ tags = ["pre-install", "health", "preflight"]
+
+ dependencies = ["python-docker-py"]
+ storage_drivers = ["devicemapper", "overlay2"]
+ max_thinpool_data_usage_percent = 90.0
+ max_thinpool_meta_usage_percent = 90.0
+
+ # pylint: disable=too-many-return-statements
+ # Reason: permanent stylistic exception;
+ # it is clearer to return on failures and there are just many ways to fail here.
+ def run(self, tmp, task_vars):
+ msg, failed, changed = self.ensure_dependencies(task_vars)
+ if failed:
+ return {
+ "failed": True,
+ "changed": changed,
+ "msg": "Some dependencies are required in order to query docker storage on host:\n" + msg
+ }
+
+ # attempt to get the docker info hash from the API
+ info = self.execute_module("docker_info", {}, task_vars)
+ if info.get("failed"):
+ return {"failed": True, "changed": changed,
+ "msg": "Failed to query Docker API. Is docker running on this host?"}
+ if not info.get("info"): # this would be very strange
+ return {"failed": True, "changed": changed,
+ "msg": "Docker API query missing info:\n{}".format(json.dumps(info))}
+ info = info["info"]
+
+ # check if the storage driver we saw is valid
+ driver = info.get("Driver", "[NONE]")
+ if driver not in self.storage_drivers:
+ msg = (
+ "Detected unsupported Docker storage driver '{driver}'.\n"
+ "Supported storage drivers are: {drivers}"
+ ).format(driver=driver, drivers=', '.join(self.storage_drivers))
+ return {"failed": True, "changed": changed, "msg": msg}
+
+ # driver status info is a list of tuples; convert to dict and validate based on driver
+ driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])}
+ if driver == "devicemapper":
+ if driver_status.get("Data loop file"):
+ msg = (
+ "Use of loopback devices with the Docker devicemapper storage driver\n"
+ "(the default storage configuration) is unsupported in production.\n"
+ "Please use docker-storage-setup to configure a backing storage volume.\n"
+ "See http://red.ht/2rNperO for further information."
+ )
+ return {"failed": True, "changed": changed, "msg": msg}
+ result = self._check_dm_usage(driver_status, task_vars)
+ result['changed'] = result.get('changed', False) or changed
+ return result
+
+ # TODO(lmeyer): determine how to check usage for overlay2
+
+ return {"changed": changed}
+
+ def _check_dm_usage(self, driver_status, task_vars):
+ """
+ Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
+ implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
+ devicemapper storage. The LV is "thin" because it does not use all available storage
+ from its VG, instead expanding as needed; so to determine available space, we gather
+ current usage as the Docker API reports for the driver as well as space available for
+ expansion in the pool's VG.
+ Usage within the LV is divided into pools allocated to data and metadata, either of which
+ could run out of space first; so we check both.
+ """
+ vals = dict(
+ vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
+ data_used=driver_status.get("Data Space Used"),
+ data_total=driver_status.get("Data Space Total"),
+ metadata_used=driver_status.get("Metadata Space Used"),
+ metadata_total=driver_status.get("Metadata Space Total"),
+ )
+
+ # convert all human-readable strings to bytes
+ for key, value in vals.copy().items():
+ try:
+ vals[key + "_bytes"] = self._convert_to_bytes(value)
+ except ValueError as err: # unlikely to hit this from API info, but just to be safe
+ return {
+ "failed": True,
+ "values": vals,
+ "msg": "Could not interpret {} value '{}' as bytes: {}".format(key, value, str(err))
+ }
+
+ # determine the threshold percentages which usage should not exceed
+ for name, default in [("data", self.max_thinpool_data_usage_percent),
+ ("metadata", self.max_thinpool_meta_usage_percent)]:
+ percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default)
+ try:
+ vals[name + "_threshold"] = float(percent)
+ except ValueError:
+ return {
+ "failed": True,
+ "msg": "Specified thinpool {} usage limit '{}' is not a percentage".format(name, percent)
+ }
+
+ # test whether the thresholds are exceeded
+ messages = []
+ for name in ["data", "metadata"]:
+ vals[name + "_pct_used"] = 100 * vals[name + "_used_bytes"] / (
+ vals[name + "_total_bytes"] + vals["vg_free_bytes"])
+ if vals[name + "_pct_used"] > vals[name + "_threshold"]:
+ messages.append(
+ "Docker thinpool {name} usage percentage {pct:.1f} "
+ "is higher than threshold {thresh:.1f}.".format(
+ name=name,
+ pct=vals[name + "_pct_used"],
+ thresh=vals[name + "_threshold"],
+ ))
+ vals["failed"] = True
+
+ vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
+ return vals
+
+ def _get_vg_free(self, pool, task_vars):
+ # Determine which VG to examine according to the pool name, the only indicator currently
+ # available from the Docker API driver info. We assume a name that looks like
+ # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen.
+ match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen
+ if not match: # unlikely, but... be clear if we assumed wrong
+ raise OpenShiftCheckException(
+ "This host's Docker reports it is using a storage pool named '{}'.\n"
+ "However this name does not have the expected format of 'vgname-lvname'\n"
+ "so the available storage in the VG cannot be determined.".format(pool)
+ )
+ vg_name = match.groups()[0].replace("--", "-")
+ vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name
+ # should return free space like " 12.00g" if the VG exists; empty if it does not
+
+ ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars)
+ if ret.get("failed") or ret.get("rc", 0) != 0:
+ raise OpenShiftCheckException(
+ "Is LVM installed? Failed to run /sbin/vgs "
+ "to determine docker storage usage:\n" + ret.get("msg", "")
+ )
+ size = ret.get("stdout", "").strip()
+ if not size:
+ raise OpenShiftCheckException(
+ "This host's Docker reports it is using a storage pool named '{pool}'.\n"
+ "which we expect to come from local VG '{vg}'.\n"
+ "However, /sbin/vgs did not find this VG. Is Docker for this host"
+ "running and using the storage on the host?".format(pool=pool, vg=vg_name)
+ )
+ return size
+
+ @staticmethod
+ def _convert_to_bytes(string):
+ units = dict(
+ b=1,
+ k=1024,
+ m=1024**2,
+ g=1024**3,
+ t=1024**4,
+ p=1024**5,
+ )
+ string = string or ""
+ match = re.match(r'(\d+(?:\.\d+)?)\s*(\w)?', string) # float followed by optional unit
+ if not match:
+ raise ValueError("Cannot convert to a byte size: " + string)
+
+ number, unit = match.groups()
+ multiplier = 1 if not unit else units.get(unit.lower())
+ if not multiplier:
+ raise ValueError("Cannot convert to a byte size: " + string)
+
+ return float(number) * multiplier
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 20d160eaf..7f3d78cc4 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -1,4 +1,3 @@
-# pylint: disable=missing-docstring,too-few-public-methods
"""
Mixin classes meant to be used with subclasses of OpenShiftCheck.
"""
@@ -8,8 +7,49 @@ from openshift_checks import get_var
class NotContainerizedMixin(object):
"""Mixin for checks that are only active when not in containerized mode."""
+ # permanent # pylint: disable=too-few-public-methods
+ # Reason: The mixin is not intended to stand on its own as a class.
@classmethod
def is_active(cls, task_vars):
+ """Only run on non-containerized hosts."""
is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized
+
+
+class DockerHostMixin(object):
+ """Mixin for checks that are only active on hosts that require Docker."""
+
+ dependencies = []
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Only run on hosts that depend on Docker."""
+ is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+ is_node = "nodes" in get_var(task_vars, "group_names", default=[])
+ return super(DockerHostMixin, cls).is_active(task_vars) and (is_containerized or is_node)
+
+ def ensure_dependencies(self, task_vars):
+ """
+ Ensure that docker-related packages exist, but not on atomic hosts
+ (which would not be able to install but should already have them).
+ Returns: msg, failed, changed
+ """
+ if get_var(task_vars, "openshift", "common", "is_atomic"):
+ return "", False, False
+
+ # NOTE: we would use the "package" module but it's actually an action plugin
+ # and it's not clear how to invoke one of those. This is about the same anyway:
+ pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum")
+ result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars)
+ msg = result.get("msg", "")
+ if result.get("failed"):
+ if "No package matching" in msg:
+ msg = "Ensure that all required dependencies can be installed via `yum`.\n"
+ msg = (
+ "Unable to install required packages on this host:\n"
+ " {deps}\n{msg}"
+ ).format(deps=',\n '.join(self.dependencies), msg=msg)
+ failed = result.get("failed", False) or result.get("rc", 0) != 0
+ changed = result.get("changed", False)
+ return msg, failed, changed