summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
diff options
context:
space:
mode:
authorjuanvallejo <jvallejo@redhat.com>2017-05-30 19:00:28 -0400
committerjuanvallejo <jvallejo@redhat.com>2017-07-19 17:39:44 -0400
commit78955891fe5279d497730a49fe19d69e22b43a8b (patch)
treeffe6c70ea78b1e8970bbe2169d2c26c8a19bd824 /roles/openshift_health_checker/openshift_checks/etcd_traffic.py
parent75a46c184bda80158c085bcd38b40fd4d02aabd0 (diff)
downloadopenshift-78955891fe5279d497730a49fe19d69e22b43a8b.tar.gz
openshift-78955891fe5279d497730a49fe19d69e22b43a8b.tar.bz2
openshift-78955891fe5279d497730a49fe19d69e22b43a8b.tar.xz
openshift-78955891fe5279d497730a49fe19d69e22b43a8b.zip
add etcd increased-traffic check
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/etcd_traffic.py')
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_traffic.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
new file mode 100644
index 000000000..40c87873d
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
@@ -0,0 +1,47 @@
+"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
+
+from openshift_checks import OpenShiftCheck, get_var
+
+
+class EtcdTraffic(OpenShiftCheck):
+ """Check if host is being affected by an increase in etcd traffic."""
+
+ name = "etcd_traffic"
+ tags = ["health", "etcd"]
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Skip hosts that do not have etcd in their group names."""
+ group_names = get_var(task_vars, "group_names", default=[])
+ valid_group_names = "etcd" in group_names
+
+ version = get_var(task_vars, "openshift", "common", "short_version")
+ valid_version = version in ("3.4", "3.5", "1.4", "1.5")
+
+ return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
+
+ def run(self, tmp, task_vars):
+ is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+ unit = "etcd_container" if is_containerized else "etcd"
+
+ log_matchers = [{
+ "start_regexp": r"Starting Etcd Server",
+ "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
+ "unit": unit
+ }]
+
+ match = self.execute_module("search_journalctl", {
+ "log_matchers": log_matchers,
+ }, task_vars)
+
+ if match.get("matched"):
+ msg = ("Higher than normal etcd traffic detected.\n"
+ "OpenShift 3.4 introduced an increase in etcd traffic.\n"
+ "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
+ "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
+ return {"failed": True, "msg": msg}
+
+ if match.get("failed"):
+ return {"failed": True, "msg": "\n".join(match.get("errors"))}
+
+ return {}