add elasticseatch, fluentd, kibana check

author: juanvallejo <jvallejo@redhat.com> 2017-03-22 15:52:35 -0400
committer: juanvallejo <jvallejo@redhat.com> 2017-06-02 16:44:07 -0400
commit: 2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df (patch)
tree: e2bcc99a895fc6647bc7623a8bcbf6c8fd5385f8 /roles/openshift_health_checker/test
parent: 46dca9b8b15ed67adfa2ca617f300e5d1df7c3e0 (diff)
download: openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.gz
openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.bz2
openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.xz
openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.zip
5 files changed, 712 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/test/curator_test.py b/roles/openshift_health_checker/test/curator_test.py
new file mode 100644
index 000000000..ae108c96e
--- /dev/null
+++ b/roles/openshift_health_checker/test/curator_test.py
@@ -0,0 +1,68 @@
+import pytest
+
+from openshift_checks.logging.curator import Curator
+
+
+def canned_curator(exec_oc=None):
+    """Create a Curator check object with canned exec_oc method"""
+    check = Curator("dummy")  # fails if a module is actually invoked
+    if exec_oc:
+        check._exec_oc = exec_oc
+    return check
+
+
+def assert_error(error, expect_error):
+    if expect_error:
+        assert error
+        assert expect_error in error
+    else:
+        assert not error
+
+
+plain_curator_pod = {
+    "metadata": {
+        "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+        "name": "logging-curator-1",
+    },
+    "status": {
+        "containerStatuses": [{"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+        "podIP": "10.10.10.10",
+    }
+}
+
+not_running_curator_pod = {
+    "metadata": {
+        "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+        "name": "logging-curator-2",
+    },
+    "status": {
+        "containerStatuses": [{"ready": False}],
+        "conditions": [{"status": "False", "type": "Ready"}],
+        "podIP": "10.10.10.10",
+    }
+}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+    (
+        [],
+        "no Curator pods",
+    ),
+    (
+        [plain_curator_pod],
+        None,
+    ),
+    (
+        [not_running_curator_pod],
+        "not currently in a running state",
+    ),
+    (
+        [plain_curator_pod, plain_curator_pod],
+        "more than one Curator pod",
+    ),
+])
+def test_get_curator_pods(pods, expect_error):
+    check = canned_curator()
+    error = check.check_curator(pods)
+    assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py
new file mode 100644
index 000000000..b9d375d8c
--- /dev/null
+++ b/roles/openshift_health_checker/test/elasticsearch_test.py
@@ -0,0 +1,180 @@
+import pytest
+import json
+
+from openshift_checks.logging.elasticsearch import Elasticsearch
+
+task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
+
+
+def canned_elasticsearch(exec_oc=None):
+    """Create an Elasticsearch check object with canned exec_oc method"""
+    check = Elasticsearch("dummy")  # fails if a module is actually invoked
+    if exec_oc:
+        check._exec_oc = exec_oc
+    return check
+
+
+def assert_error(error, expect_error):
+    if expect_error:
+        assert error
+        assert expect_error in error
+    else:
+        assert not error
+
+
+plain_es_pod = {
+    "metadata": {
+        "labels": {"component": "es", "deploymentconfig": "logging-es"},
+        "name": "logging-es",
+    },
+    "status": {
+        "conditions": [{"status": "True", "type": "Ready"}],
+        "containerStatuses": [{"ready": True}],
+        "podIP": "10.10.10.10",
+    },
+    "_test_master_name_str": "name logging-es",
+}
+
+split_es_pod = {
+    "metadata": {
+        "labels": {"component": "es", "deploymentconfig": "logging-es-2"},
+        "name": "logging-es-2",
+    },
+    "status": {
+        "conditions": [{"status": "True", "type": "Ready"}],
+        "containerStatuses": [{"ready": True}],
+        "podIP": "10.10.10.10",
+    },
+    "_test_master_name_str": "name logging-es-2",
+}
+
+
+def test_check_elasticsearch():
+    assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([], {})
+
+    # canned oc responses to match so all the checks pass
+    def _exec_oc(cmd, args, task_vars):
+        if '_cat/master' in cmd:
+            return 'name logging-es'
+        elif '/_nodes' in cmd:
+            return json.dumps(es_node_list)
+        elif '_cluster/health' in cmd:
+            return '{"status": "green"}'
+        elif ' df ' in cmd:
+            return 'IUse% Use%\n 3%  4%\n'
+        else:
+            raise Exception(cmd)
+
+    assert not canned_elasticsearch(_exec_oc).check_elasticsearch([plain_es_pod], {})
+
+
+def pods_by_name(pods):
+    return {pod['metadata']['name']: pod for pod in pods}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+    (
+        [],
+        'No logging Elasticsearch masters',
+    ),
+    (
+        [plain_es_pod],
+        None,
+    ),
+    (
+        [plain_es_pod, split_es_pod],
+        'Found multiple Elasticsearch masters',
+    ),
+])
+def test_check_elasticsearch_masters(pods, expect_error):
+    test_pods = list(pods)
+    check = canned_elasticsearch(lambda cmd, args, task_vars: test_pods.pop(0)['_test_master_name_str'])
+
+    errors = check._check_elasticsearch_masters(pods_by_name(pods), task_vars_config_base)
+    assert_error(''.join(errors), expect_error)
+
+
+es_node_list = {
+    'nodes': {
+        'random-es-name': {
+            'host': 'logging-es',
+        }}}
+
+
+@pytest.mark.parametrize('pods, node_list, expect_error', [
+    (
+        [],
+        {},
+        'No logging Elasticsearch masters',
+    ),
+    (
+        [plain_es_pod],
+        es_node_list,
+        None,
+    ),
+    (
+        [plain_es_pod],
+        {},  # empty list of nodes triggers KeyError
+        "Failed to query",
+    ),
+    (
+        [split_es_pod],
+        es_node_list,
+        'does not correspond to any known ES pod',
+    ),
+])
+def test_check_elasticsearch_node_list(pods, node_list, expect_error):
+    check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(node_list))
+
+    errors = check._check_elasticsearch_node_list(pods_by_name(pods), task_vars_config_base)
+    assert_error(''.join(errors), expect_error)
+
+
+@pytest.mark.parametrize('pods, health_data, expect_error', [
+    (
+        [plain_es_pod],
+        [{"status": "green"}],
+        None,
+    ),
+    (
+        [plain_es_pod],
+        [{"no-status": "should bomb"}],
+        'Could not retrieve cluster health status',
+    ),
+    (
+        [plain_es_pod, split_es_pod],
+        [{"status": "green"}, {"status": "red"}],
+        'Elasticsearch cluster health status is RED',
+    ),
+])
+def test_check_elasticsearch_cluster_health(pods, health_data, expect_error):
+    test_health_data = list(health_data)
+    check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(test_health_data.pop(0)))
+
+    errors = check._check_es_cluster_health(pods_by_name(pods), task_vars_config_base)
+    assert_error(''.join(errors), expect_error)
+
+
+@pytest.mark.parametrize('disk_data, expect_error', [
+    (
+        'df: /elasticsearch/persistent: No such file or directory\n',
+        'Could not retrieve storage usage',
+    ),
+    (
+        'IUse% Use%\n 3%  4%\n',
+        None,
+    ),
+    (
+        'IUse% Use%\n 95%  40%\n',
+        'Inode percent usage on the storage volume',
+    ),
+    (
+        'IUse% Use%\n 3%  94%\n',
+        'Disk percent usage on the storage volume',
+    ),
+])
+def test_check_elasticsearch_diskspace(disk_data, expect_error):
+    check = canned_elasticsearch(lambda cmd, args, task_vars: disk_data)
+
+    errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]), task_vars_config_base)
+    assert_error(''.join(errors), expect_error)
diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py
new file mode 100644
index 000000000..d151c0b19
--- /dev/null
+++ b/roles/openshift_health_checker/test/fluentd_test.py
@@ -0,0 +1,109 @@
+import pytest
+import json
+
+from openshift_checks.logging.fluentd import Fluentd
+
+
+def canned_fluentd(exec_oc=None):
+    """Create a Fluentd check object with canned exec_oc method"""
+    check = Fluentd("dummy")  # fails if a module is actually invoked
+    if exec_oc:
+        check._exec_oc = exec_oc
+    return check
+
+
+def assert_error(error, expect_error):
+    if expect_error:
+        assert error
+        assert expect_error in error
+    else:
+        assert not error
+
+
+fluentd_pod_node1 = {
+    "metadata": {
+        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+        "name": "logging-fluentd-1",
+    },
+    "spec": {"host": "node1", "nodeName": "node1"},
+    "status": {
+        "containerStatuses": [{"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+fluentd_pod_node2_down = {
+    "metadata": {
+        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+        "name": "logging-fluentd-2",
+    },
+    "spec": {"host": "node2", "nodeName": "node2"},
+    "status": {
+        "containerStatuses": [{"ready": False}],
+        "conditions": [{"status": "False", "type": "Ready"}],
+    }
+}
+fluentd_node1 = {
+    "metadata": {
+        "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "node1"},
+        "name": "node1",
+    },
+    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.1"}]},
+}
+fluentd_node2 = {
+    "metadata": {
+        "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "hostname"},
+        "name": "node2",
+    },
+    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.2"}]},
+}
+fluentd_node3_unlabeled = {
+    "metadata": {
+        "labels": {"kubernetes.io/hostname": "hostname"},
+        "name": "node3",
+    },
+    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.3"}]},
+}
+
+
+@pytest.mark.parametrize('pods, nodes, expect_error', [
+    (
+        [],
+        [],
+        'No nodes appear to be defined',
+    ),
+    (
+        [],
+        [fluentd_node3_unlabeled],
+        'There are no nodes with the fluentd label',
+    ),
+    (
+        [],
+        [fluentd_node1, fluentd_node3_unlabeled],
+        'Fluentd will not aggregate logs from these nodes.',
+    ),
+    (
+        [],
+        [fluentd_node2],
+        "nodes are supposed to have a Fluentd pod but do not",
+    ),
+    (
+        [fluentd_pod_node1, fluentd_pod_node1],
+        [fluentd_node1],
+        'more Fluentd pods running than nodes labeled',
+    ),
+    (
+        [fluentd_pod_node2_down],
+        [fluentd_node2],
+        "Fluentd pods are supposed to be running",
+    ),
+    (
+        [fluentd_pod_node1],
+        [fluentd_node1],
+        None,
+    ),
+])
+def test_get_fluentd_pods(pods, nodes, expect_error):
+    check = canned_fluentd(lambda cmd, args, task_vars: json.dumps(dict(items=nodes)))
+
+    error = check.check_fluentd(pods, {})
+    assert_error(error, expect_error)
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
new file mode 100644
index 000000000..19140a1b6
--- /dev/null
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -0,0 +1,218 @@
+import pytest
+import json
+
+try:
+    import urllib2
+    from urllib2 import HTTPError, URLError
+except ImportError:
+    from urllib.error import HTTPError, URLError
+    import urllib.request as urllib2
+
+from openshift_checks.logging.kibana import Kibana
+
+
+def canned_kibana(exec_oc=None):
+    """Create a Kibana check object with canned exec_oc method"""
+    check = Kibana("dummy")  # fails if a module is actually invoked
+    if exec_oc:
+        check._exec_oc = exec_oc
+    return check
+
+
+def assert_error(error, expect_error):
+    if expect_error:
+        assert error
+        assert expect_error in error
+    else:
+        assert not error
+
+
+plain_kibana_pod = {
+    "metadata": {
+        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+        "name": "logging-kibana-1",
+    },
+    "status": {
+        "containerStatuses": [{"ready": True}, {"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+not_running_kibana_pod = {
+    "metadata": {
+        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+        "name": "logging-kibana-2",
+    },
+    "status": {
+        "containerStatuses": [{"ready": True}, {"ready": False}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
+
+@pytest.mark.parametrize('pods, expect_error', [
+    (
+        [],
+        "There are no Kibana pods deployed",
+    ),
+    (
+        [plain_kibana_pod],
+        None,
+    ),
+    (
+        [not_running_kibana_pod],
+        "No Kibana pod is in a running state",
+    ),
+    (
+        [plain_kibana_pod, not_running_kibana_pod],
+        "The following Kibana pods are not currently in a running state",
+    ),
+])
+def test_check_kibana(pods, expect_error):
+    check = canned_kibana()
+    error = check.check_kibana(pods)
+    assert_error(error, expect_error)
+
+
+@pytest.mark.parametrize('route, expect_url, expect_error', [
+    (
+        None,
+        None,
+        'no_route_exists',
+    ),
+
+    # test route with no ingress
+    (
+        {
+            "metadata": {
+                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+                "name": "logging-kibana",
+            },
+            "status": {
+                "ingress": [],
+            },
+            "spec": {
+                "host": "hostname",
+            }
+        },
+        None,
+        'route_not_accepted',
+    ),
+
+    # test route with no host
+    (
+        {
+            "metadata": {
+                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+                "name": "logging-kibana",
+            },
+            "status": {
+                "ingress": [{
+                    "status": True,
+                }],
+            },
+            "spec": {},
+        },
+        None,
+        'route_missing_host',
+    ),
+
+    # test route that looks fine
+    (
+        {
+            "metadata": {
+                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+                "name": "logging-kibana",
+            },
+            "status": {
+                "ingress": [{
+                    "status": True,
+                }],
+            },
+            "spec": {
+                "host": "hostname",
+            },
+        },
+        "https://hostname/",
+        None,
+    ),
+])
+def test_get_kibana_url(route, expect_url, expect_error):
+    check = canned_kibana(lambda cmd, args, task_vars: json.dumps(route) if route else "")
+
+    url, error = check._get_kibana_url({})
+    if expect_url:
+        assert url == expect_url
+    else:
+        assert not url
+    if expect_error:
+        assert error == expect_error
+    else:
+        assert not error
+
+
+@pytest.mark.parametrize('exec_result, expect', [
+    (
+        'urlopen error [Errno 111] Connection refused',
+        'at least one router routing to it?',
+    ),
+    (
+        'urlopen error [Errno -2] Name or service not known',
+        'DNS configured for the Kibana hostname?',
+    ),
+    (
+        'Status code was not [302]: HTTP Error 500: Server error',
+        'did not return the correct status code',
+    ),
+    (
+        'bork bork bork',
+        'bork bork bork',  # should pass through
+    ),
+])
+def test_verify_url_internal_failure(exec_result, expect):
+    check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result))
+    check._get_kibana_url = lambda task_vars: ('url', None)
+
+    error = check._check_kibana_route({})
+    assert_error(error, expect)
+
+
+@pytest.mark.parametrize('lib_result, expect', [
+    (
+        HTTPError('url', 500, "it broke", hdrs=None, fp=None),
+        'it broke',
+    ),
+    (
+        URLError('it broke'),
+        'it broke',
+    ),
+    (
+        302,
+        'returned the wrong error code',
+    ),
+    (
+        200,
+        None,
+    ),
+])
+def test_verify_url_external_failure(lib_result, expect, monkeypatch):
+
+    class _http_return:
+
+        def __init__(self, code):
+            self.code = code
+
+        def getcode(self):
+            return self.code
+
+    def urlopen(url, context):
+        if type(lib_result) is int:
+            return _http_return(lib_result)
+        raise lib_result
+    monkeypatch.setattr(urllib2, 'urlopen', urlopen)
+
+    check = canned_kibana()
+    check._get_kibana_url = lambda task_vars: ('url', None)
+    check._verify_url_internal = lambda url, task_vars: None
+
+    error = check._check_kibana_route({})
+    assert_error(error, expect)
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
new file mode 100644
index 000000000..b6db34fe3
--- /dev/null
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -0,0 +1,137 @@
+import pytest
+import json
+
+from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
+
+task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
+
+
+logging_namespace = "logging"
+
+
+def canned_loggingcheck(exec_oc=None):
+    """Create a LoggingCheck object with canned exec_oc method"""
+    check = LoggingCheck("dummy")  # fails if a module is actually invoked
+    check.logging_namespace = 'logging'
+    if exec_oc:
+        check.exec_oc = exec_oc
+    return check
+
+
+def assert_error(error, expect_error):
+    if expect_error:
+        assert error
+        assert expect_error in error
+    else:
+        assert not error
+
+
+plain_es_pod = {
+    "metadata": {
+        "labels": {"component": "es", "deploymentconfig": "logging-es"},
+        "name": "logging-es",
+    },
+    "status": {
+        "conditions": [{"status": "True", "type": "Ready"}],
+        "containerStatuses": [{"ready": True}],
+        "podIP": "10.10.10.10",
+    },
+    "_test_master_name_str": "name logging-es",
+}
+
+plain_kibana_pod = {
+    "metadata": {
+        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+        "name": "logging-kibana-1",
+    },
+    "status": {
+        "containerStatuses": [{"ready": True}, {"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
+fluentd_pod_node1 = {
+    "metadata": {
+        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
+        "name": "logging-fluentd-1",
+    },
+    "spec": {"host": "node1", "nodeName": "node1"},
+    "status": {
+        "containerStatuses": [{"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+    }
+}
+
+plain_curator_pod = {
+    "metadata": {
+        "labels": {"component": "curator", "deploymentconfig": "logging-curator"},
+        "name": "logging-curator-1",
+    },
+    "status": {
+        "containerStatuses": [{"ready": True}],
+        "conditions": [{"status": "True", "type": "Ready"}],
+        "podIP": "10.10.10.10",
+    }
+}
+
+
+@pytest.mark.parametrize('problem, expect', [
+    ("[Errno 2] No such file or directory", "supposed to be a master"),
+    ("Permission denied", "Unexpected error using `oc`"),
+])
+def test_oc_failure(problem, expect):
+    def execute_module(module_name, args, task_vars):
+        if module_name == "ocutil":
+            return dict(failed=True, result=problem)
+        return dict(changed=False)
+
+    check = LoggingCheck({})
+
+    with pytest.raises(OpenShiftCheckException) as excinfo:
+        check.exec_oc(execute_module, logging_namespace, 'get foo', [], task_vars=task_vars_config_base)
+    assert expect in str(excinfo)
+
+
+groups_with_first_master = dict(masters=['this-host', 'other-host'])
+groups_with_second_master = dict(masters=['other-host', 'this-host'])
+groups_not_a_master = dict(masters=['other-host'])
+
+
+@pytest.mark.parametrize('groups, logging_deployed, is_active', [
+    (groups_with_first_master, True, True),
+    (groups_with_first_master, False, False),
+    (groups_not_a_master, True, False),
+    (groups_with_second_master, True, False),
+    (groups_not_a_master, True, False),
+])
+def test_is_active(groups, logging_deployed, is_active):
+    task_vars = dict(
+        ansible_ssh_host='this-host',
+        groups=groups,
+        openshift_hosted_logging_deploy=logging_deployed,
+    )
+
+    assert LoggingCheck.is_active(task_vars=task_vars) == is_active
+
+
+@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [
+    (
+        'No resources found.',
+        None,
+        'There are no pods in the logging namespace',
+    ),
+    (
+        json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}),
+        [plain_es_pod],
+        None,
+    ),
+])
+def test_get_pods_for_component(pod_output, expect_pods, expect_error):
+    check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: pod_output)
+    pods, error = check.get_pods_for_component(
+        lambda name, args, task_vars: {},
+        logging_namespace,
+        "es",
+        {}
+    )
+    assert_error(error, expect_error)
author	juanvallejo <jvallejo@redhat.com>	2017-03-22 15:52:35 -0400
committer	juanvallejo <jvallejo@redhat.com>	2017-06-02 16:44:07 -0400
commit	2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df (patch)
tree	e2bcc99a895fc6647bc7623a8bcbf6c8fd5385f8 /roles/openshift_health_checker/test
parent	46dca9b8b15ed67adfa2ca617f300e5d1df7c3e0 (diff)
download	openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.gz openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.bz2 openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.tar.xz openshift-2e53dbb4c0d9bfe79cd79e0a0ece9db065b286df.zip