summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--playbooks/byo/openshift-cluster/config.yml13
-rw-r--r--playbooks/common/openshift-checks/health.yml11
-rw-r--r--playbooks/common/openshift-checks/pre-install.yml11
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py26
-rw-r--r--roles/openshift_health_checker/callback_plugins/zz_failure_summary.py87
-rw-r--r--roles/openshift_health_checker/openshift_checks/disk_availability.py6
-rw-r--r--roles/openshift_health_checker/openshift_checks/memory_availability.py6
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py16
-rw-r--r--roles/openshift_health_checker/test/disk_availability_test.py36
-rw-r--r--roles/openshift_health_checker/test/memory_availability_test.py43
10 files changed, 198 insertions, 57 deletions
diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml
index acf5469bf..fd4a9eb26 100644
--- a/playbooks/byo/openshift-cluster/config.yml
+++ b/playbooks/byo/openshift-cluster/config.yml
@@ -3,6 +3,19 @@
tags:
- always
+- name: Verify Requirements
+ hosts: OSEv3
+ roles:
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: "install"
+ post_tasks:
+ - action: openshift_health_check
+ args:
+ checks:
+ - disk_availability
+ - memory_availability
+
- include: ../../common/openshift-cluster/std_include.yml
tags:
- always
diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml
index fc0f523d5..1bee460e8 100644
--- a/playbooks/common/openshift-checks/health.yml
+++ b/playbooks/common/openshift-checks/health.yml
@@ -2,9 +2,10 @@
- name: Run OpenShift health checks
hosts: OSEv3
roles:
- - openshift_health_checker
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: "health"
post_tasks:
- - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513
- args:
- checks:
- - '@health'
+ - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513
+ args:
+ checks: ['@health']
diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml
index c8ffc3d91..e01c6f38d 100644
--- a/playbooks/common/openshift-checks/pre-install.yml
+++ b/playbooks/common/openshift-checks/pre-install.yml
@@ -2,9 +2,10 @@
- hosts: OSEv3
name: run OpenShift pre-install checks
roles:
- - openshift_health_checker
+ - openshift_health_checker
+ vars:
+ - r_openshift_health_checker_playbook_context: "pre-install"
post_tasks:
- - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513
- args:
- checks:
- - '@preflight'
+ - action: openshift_health_check # https://github.com/ansible/ansible/issues/20513
+ args:
+ checks: ['@preflight']
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index 03c40b78b..a62e4331e 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -25,9 +25,11 @@ class ActionModule(ActionBase):
def run(self, tmp=None, task_vars=None):
result = super(ActionModule, self).run(tmp, task_vars)
+ task_vars = task_vars or {}
- if task_vars is None:
- task_vars = {}
+ # vars are not supportably available in the callback plugin,
+ # so record any it will need in the result.
+ result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')
if "openshift" not in task_vars:
result["failed"] = True
@@ -46,19 +48,27 @@ class ActionModule(ActionBase):
result["checks"] = check_results = {}
+ user_disabled_checks = [
+ check.strip()
+ for check in task_vars.get("openshift_disable_check", "").split(",")
+ ]
+
for check_name in resolved_checks:
display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))
check = known_checks[check_name]
- if check.is_active(task_vars):
+ if not check.is_active(task_vars):
+ r = dict(skipped=True, skipped_reason="Not active for this host")
+ elif check_name in user_disabled_checks:
+ r = dict(skipped=True, skipped_reason="Disabled by user request")
+ else:
try:
r = check.run(tmp, task_vars)
except OpenShiftCheckException as e:
- r = {}
- r["failed"] = True
- r["msg"] = str(e)
- else:
- r = {"skipped": True}
+ r = dict(
+ failed=True,
+ msg=str(e),
+ )
check_results[check_name] = r
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
index 7bce7f107..64c29a8d9 100644
--- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
+++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
@@ -2,6 +2,12 @@
Ansible callback plugin.
'''
+# Reason: In several locations below we disable pylint protected-access
+# for Ansible objects that do not give us any public way
+# to access the full details we need to report check failures.
+# Status: disabled permanently or until Ansible object has a public API.
+# This does leave the code more likely to be broken by future Ansible changes.
+
from pprint import pformat
from ansible.plugins.callback import CallbackBase
@@ -20,38 +26,37 @@ class CallbackModule(CallbackBase):
CALLBACK_TYPE = 'aggregate'
CALLBACK_NAME = 'failure_summary'
CALLBACK_NEEDS_WHITELIST = False
+ _playbook_file = None
def __init__(self):
super(CallbackModule, self).__init__()
self.__failures = []
+ def v2_playbook_on_start(self, playbook):
+ super(CallbackModule, self).v2_playbook_on_start(playbook)
+ # re: playbook attrs see top comment # pylint: disable=protected-access
+ self._playbook_file = playbook._file_name
+
def v2_runner_on_failed(self, result, ignore_errors=False):
super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)
self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
def v2_playbook_on_stats(self, stats):
super(CallbackModule, self).v2_playbook_on_stats(stats)
- # TODO: update condition to consider a host var or env var to
- # enable/disable the summary, so that we can control the output from a
- # play.
if self.__failures:
- self._print_failure_summary()
+ self._print_failure_details(self.__failures)
- def _print_failure_summary(self):
- '''Print a summary of failed tasks (including ignored failures).'''
+ def _print_failure_details(self, failures):
+ '''Print a summary of failed tasks or checks.'''
self._display.display(u'\nFailure summary:\n')
- # TODO: group failures by host or by task. If grouped by host, it is
- # easy to see all problems of a given host. If grouped by task, it is
- # easy to see what hosts needs the same fix.
-
- width = len(str(len(self.__failures)))
+ width = len(str(len(failures)))
initial_indent_format = u' {{:>{width}}}. '.format(width=width)
initial_indent_len = len(initial_indent_format.format(0))
subsequent_indent = u' ' * initial_indent_len
subsequent_extra_indent = u' ' * (initial_indent_len + 10)
- for i, failure in enumerate(self.__failures, 1):
+ for i, failure in enumerate(failures, 1):
entries = _format_failure(failure)
self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), entries[0]))
for entry in entries[1:]:
@@ -59,11 +64,52 @@ class CallbackModule(CallbackBase):
indented = u'{}{}'.format(subsequent_indent, entry)
self._display.display(indented)
-
-# Reason: disable pylint protected-access because we need to access _*
-# attributes of a task result to implement this method.
-# Status: permanently disabled unless Ansible's API changes.
-# pylint: disable=protected-access
+ failed_checks = set()
+ playbook_context = None
+ # re: result attrs see top comment # pylint: disable=protected-access
+ for failure in failures:
+ # get context from check task result since callback plugins cannot access task vars
+ playbook_context = playbook_context or failure['result']._result.get('playbook_context')
+ failed_checks.update(
+ name
+ for name, result in failure['result']._result.get('checks', {}).items()
+ if result.get('failed')
+ )
+ if failed_checks:
+ self._print_check_failure_summary(failed_checks, playbook_context)
+
+ def _print_check_failure_summary(self, failed_checks, context):
+ checks = ','.join(sorted(failed_checks))
+ # NOTE: context is not set if all failures occurred prior to checks task
+ summary = (
+ '\n'
+ 'The execution of "{playbook}"\n'
+ 'includes checks designed to fail early if the requirements\n'
+ 'of the playbook are not met. One or more of these checks\n'
+ 'failed. To disregard these results, you may choose to\n'
+ 'disable failing checks by setting an Ansible variable:\n\n'
+ ' openshift_disable_check={checks}\n\n'
+ 'Failing check names are shown in the failure details above.\n'
+ 'Some checks may be configurable by variables if your requirements\n'
+ 'are different from the defaults; consult check documentation.\n'
+ 'Variables can be set in the inventory or passed on the\n'
+ 'command line using the -e flag to ansible-playbook.\n'
+ ).format(playbook=self._playbook_file, checks=checks)
+ if context in ['pre-install', 'health']:
+ summary = (
+ '\n'
+ 'You may choose to configure or disable failing checks by\n'
+ 'setting Ansible variables. To disable those above:\n\n'
+ ' openshift_disable_check={checks}\n\n'
+ 'Consult check documentation for configurable variables.\n'
+ 'Variables can be set in the inventory or passed on the\n'
+ 'command line using the -e flag to ansible-playbook.\n'
+ ).format(checks=checks)
+ # other expected contexts: install, upgrade
+ self._display.display(summary)
+
+
+# re: result attrs see top comment # pylint: disable=protected-access
def _format_failure(failure):
'''Return a list of pretty-formatted text entries describing a failure, including
relevant information about it. Expect that the list of text entries will be joined
@@ -100,11 +146,8 @@ def _format_failed_checks(checks):
return stringc(pformat(checks), C.COLOR_ERROR)
-# Reason: disable pylint protected-access because we need to access _*
-# attributes of obj to implement this function.
-# This is inspired by ansible.playbook.base.Base.dump_me.
-# Status: permanently disabled unless Ansible's API changes.
-# pylint: disable=protected-access
+# This is inspired by ansible.playbook.base.Base.dump_me.
+# re: play/task/block attrs see top comment # pylint: disable=protected-access
def _get_play(obj):
'''Given a task or block, recursively tries to find its parent play.'''
if hasattr(obj, '_play'):
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index c2792a0fe..962148cb8 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -27,10 +27,12 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
def run(self, tmp, task_vars):
group_names = get_var(task_vars, "group_names")
ansible_mounts = get_var(task_vars, "ansible_mounts")
-
- min_free_bytes = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
free_bytes = self.openshift_available_disk(ansible_mounts)
+ recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
+ configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9
+ min_free_bytes = configured_min or recommended_min
+
if free_bytes < min_free_bytes:
return {
'failed': True,
diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py
index 28805dc37..8b1a58ef4 100644
--- a/roles/openshift_health_checker/openshift_checks/memory_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py
@@ -13,7 +13,7 @@ class MemoryAvailability(OpenShiftCheck):
recommended_memory_bytes = {
"masters": 16 * 10**9,
"nodes": 8 * 10**9,
- "etcd": 20 * 10**9,
+ "etcd": 8 * 10**9,
}
@classmethod
@@ -27,7 +27,9 @@ class MemoryAvailability(OpenShiftCheck):
group_names = get_var(task_vars, "group_names")
total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * 10**6
- min_memory_bytes = max(self.recommended_memory_bytes.get(name, 0) for name in group_names)
+ recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names)
+ configured_min = int(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * 10**9
+ min_memory_bytes = configured_min or recommended_min
if total_memory_bytes < min_memory_bytes:
return {
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 2693ae37b..6ebf0ebb2 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -67,6 +67,7 @@ def changed(result):
return result.get('changed', False)
+# tests whether task is skipped, not individual checks
def skipped(result):
return result.get('skipped', False)
@@ -101,7 +102,20 @@ def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):
result = plugin.run(tmp=None, task_vars=task_vars)
- assert result['checks']['fake_check'] == {'skipped': True}
+ assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Not active for this host")
+ assert not failed(result)
+ assert not changed(result)
+ assert not skipped(result)
+
+
+def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch):
+ checks = [fake_check('fake_check', is_active=True)]
+ monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks))
+
+ task_vars['openshift_disable_check'] = 'fake_check'
+ result = plugin.run(tmp=None, task_vars=task_vars)
+
+ assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request")
assert not failed(result)
assert not changed(result)
assert not skipped(result)
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index 970b474d7..b353fa610 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -42,9 +42,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
assert word in str(excinfo.value)
-@pytest.mark.parametrize('group_names,ansible_mounts', [
+@pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [
(
['masters'],
+ 0,
[{
'mount': '/',
'size_available': 40 * 10**9 + 1,
@@ -52,6 +53,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
),
(
['nodes'],
+ 0,
[{
'mount': '/',
'size_available': 15 * 10**9 + 1,
@@ -59,6 +61,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
),
(
['etcd'],
+ 0,
[{
'mount': '/',
'size_available': 20 * 10**9 + 1,
@@ -66,6 +69,15 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
),
(
['etcd'],
+ 1, # configure lower threshold
+ [{
+ 'mount': '/',
+ 'size_available': 1 * 10**9 + 1, # way smaller than recommended
+ }],
+ ),
+ (
+ ['etcd'],
+ 0,
[{
# not enough space on / ...
'mount': '/',
@@ -77,9 +89,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
}],
),
])
-def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
+def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansible_mounts):
task_vars = dict(
group_names=group_names,
+ openshift_check_min_host_disk_gb=configured_min,
ansible_mounts=ansible_mounts,
)
@@ -89,9 +102,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
assert not result.get('failed', False)
-@pytest.mark.parametrize('group_names,ansible_mounts,extra_words', [
+@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [
(
['masters'],
+ 0,
[{
'mount': '/',
'size_available': 1,
@@ -99,7 +113,17 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
['0.0 GB'],
),
(
+ ['masters'],
+ 100, # set a higher threshold
+ [{
+ 'mount': '/',
+ 'size_available': 50 * 10**9, # would normally be enough...
+ }],
+ ['100.0 GB'],
+ ),
+ (
['nodes'],
+ 0,
[{
'mount': '/',
'size_available': 1 * 10**9,
@@ -108,6 +132,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
),
(
['etcd'],
+ 0,
[{
'mount': '/',
'size_available': 1,
@@ -116,6 +141,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
),
(
['nodes', 'masters'],
+ 0,
[{
'mount': '/',
# enough space for a node, not enough for a master
@@ -125,6 +151,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
),
(
['etcd'],
+ 0,
[{
# enough space on / ...
'mount': '/',
@@ -137,9 +164,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):
['0.0 GB'],
),
])
-def test_fails_with_insufficient_disk_space(group_names, ansible_mounts, extra_words):
+def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words):
task_vars = dict(
group_names=group_names,
+ openshift_check_min_host_disk_gb=configured_min,
ansible_mounts=ansible_mounts,
)
diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py
index e161a5b9e..1db203854 100644
--- a/roles/openshift_health_checker/test/memory_availability_test.py
+++ b/roles/openshift_health_checker/test/memory_availability_test.py
@@ -20,27 +20,37 @@ def test_is_active(group_names, is_active):
assert MemoryAvailability.is_active(task_vars=task_vars) == is_active
-@pytest.mark.parametrize('group_names,ansible_memtotal_mb', [
+@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb', [
(
['masters'],
+ 0,
17200,
),
(
['nodes'],
+ 0,
8200,
),
(
+ ['nodes'],
+ 1, # configure lower threshold
+ 2000, # too low for recommended but not for configured
+ ),
+ (
['etcd'],
- 22200,
+ 0,
+ 8200,
),
(
['masters', 'nodes'],
+ 0,
17000,
),
])
-def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb):
+def test_succeeds_with_recommended_memory(group_names, configured_min, ansible_memtotal_mb):
task_vars = dict(
group_names=group_names,
+ openshift_check_min_host_memory_gb=configured_min,
ansible_memtotal_mb=ansible_memtotal_mb,
)
@@ -50,39 +60,56 @@ def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb):
assert not result.get('failed', False)
-@pytest.mark.parametrize('group_names,ansible_memtotal_mb,extra_words', [
+@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb,extra_words', [
(
['masters'],
0,
+ 0,
['0.0 GB'],
),
(
['nodes'],
+ 0,
100,
['0.1 GB'],
),
(
+ ['nodes'],
+ 24, # configure higher threshold
+ 20000, # enough to meet recommended but not configured
+ ['20.0 GB'],
+ ),
+ (
['etcd'],
- -1,
- ['0.0 GB'],
+ 0,
+ 7000,
+ ['7.0 GB'],
+ ),
+ (
+ ['etcd', 'masters'],
+ 0,
+ 9000, # enough memory for etcd, not enough for a master
+ ['9.0 GB'],
),
(
['nodes', 'masters'],
+ 0,
# enough memory for a node, not enough for a master
11000,
['11.0 GB'],
),
])
-def test_fails_with_insufficient_memory(group_names, ansible_memtotal_mb, extra_words):
+def test_fails_with_insufficient_memory(group_names, configured_min, ansible_memtotal_mb, extra_words):
task_vars = dict(
group_names=group_names,
+ openshift_check_min_host_memory_gb=configured_min,
ansible_memtotal_mb=ansible_memtotal_mb,
)
check = MemoryAvailability(execute_module=fake_execute_module)
result = check.run(tmp=None, task_vars=task_vars)
- assert result['failed']
+ assert result.get('failed', False)
for word in 'below recommended'.split() + extra_words:
assert word in result['msg']