summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/etcd_traffic.py')
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_traffic.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
new file mode 100644
index 000000000..40c87873d
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
@@ -0,0 +1,47 @@
+"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
+
+from openshift_checks import OpenShiftCheck, get_var
+
+
+class EtcdTraffic(OpenShiftCheck):
+ """Check if host is being affected by an increase in etcd traffic."""
+
+ name = "etcd_traffic"
+ tags = ["health", "etcd"]
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Skip hosts that do not have etcd in their group names."""
+ group_names = get_var(task_vars, "group_names", default=[])
+ valid_group_names = "etcd" in group_names
+
+ version = get_var(task_vars, "openshift", "common", "short_version")
+ valid_version = version in ("3.4", "3.5", "1.4", "1.5")
+
+ return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
+
+ def run(self, tmp, task_vars):
+ is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+ unit = "etcd_container" if is_containerized else "etcd"
+
+ log_matchers = [{
+ "start_regexp": r"Starting Etcd Server",
+ "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
+ "unit": unit
+ }]
+
+ match = self.execute_module("search_journalctl", {
+ "log_matchers": log_matchers,
+ }, task_vars)
+
+ if match.get("matched"):
+ msg = ("Higher than normal etcd traffic detected.\n"
+ "OpenShift 3.4 introduced an increase in etcd traffic.\n"
+ "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
+ "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
+ return {"failed": True, "msg": msg}
+
+ if match.get("failed"):
+ return {"failed": True, "msg": "\n".join(match.get("errors"))}
+
+ return {}