summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/cockpit/defaults/main.yml8
-rw-r--r--roles/cockpit/meta/main.yml5
-rw-r--r--roles/cockpit/tasks/firewall.yml40
-rw-r--r--roles/cockpit/tasks/main.yml4
-rw-r--r--roles/docker/meta/main.yml1
-rw-r--r--roles/etcd/defaults/main.yaml12
-rw-r--r--roles/etcd/meta/main.yml7
-rw-r--r--roles/etcd/tasks/firewall.yml40
-rw-r--r--roles/etcd/tasks/main.yml4
-rw-r--r--roles/lib_os_firewall/README.md63
-rwxr-xr-xroles/lib_os_firewall/library/os_firewall_manage_iptables.py (renamed from roles/os_firewall/library/os_firewall_manage_iptables.py)0
-rw-r--r--roles/nuage_master/defaults/main.yml10
-rw-r--r--roles/nuage_master/handlers/main.yaml4
-rw-r--r--roles/nuage_master/meta/main.yml5
-rw-r--r--roles/nuage_master/tasks/firewall.yml40
-rw-r--r--roles/nuage_master/tasks/main.yaml5
-rw-r--r--roles/nuage_master/tasks/serviceaccount.yml40
-rw-r--r--roles/nuage_master/vars/main.yaml4
-rw-r--r--roles/nuage_node/defaults/main.yml12
-rw-r--r--roles/nuage_node/meta/main.yml7
-rw-r--r--roles/nuage_node/tasks/firewall.yml40
-rw-r--r--roles/nuage_node/tasks/main.yaml4
-rw-r--r--roles/nuage_node/templates/vsp-openshift.j22
-rw-r--r--roles/openshift_cfme/defaults/main.yml6
-rw-r--r--roles/openshift_cfme/handlers/main.yml9
-rw-r--r--roles/openshift_common/meta/main.yml1
-rw-r--r--roles/openshift_docker/meta/main.yml1
-rw-r--r--roles/openshift_facts/defaults/main.yml2
-rw-r--r--roles/openshift_facts/meta/main.yml3
-rw-r--r--roles/openshift_facts/tasks/main.yml118
-rw-r--r--roles/openshift_facts/vars/Fedora.yml6
-rw-r--r--roles/openshift_facts/vars/default.yml6
-rw-r--r--roles/openshift_facts/vars/main.yml5
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py10
-rw-r--r--roles/openshift_health_checker/meta/main.yml1
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py110
-rw-r--r--roles/openshift_health_checker/openshift_checks/disk_availability.py33
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py10
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage.py32
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py21
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_volume.py22
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/curator.py32
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py166
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/fluentd.py191
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py17
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py208
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py56
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py75
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py8
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py14
-rw-r--r--roles/openshift_health_checker/test/curator_test.py45
-rw-r--r--roles/openshift_health_checker/test/disk_availability_test.py34
-rw-r--r--roles/openshift_health_checker/test/elasticsearch_test.py148
-rw-r--r--roles/openshift_health_checker/test/etcd_imagedata_size_test.py5
-rw-r--r--roles/openshift_health_checker/test/etcd_volume_test.py5
-rw-r--r--roles/openshift_health_checker/test/fluentd_config_test.py15
-rw-r--r--roles/openshift_health_checker/test/fluentd_test.py55
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py149
-rw-r--r--roles/openshift_health_checker/test/logging_check_test.py49
-rw-r--r--roles/openshift_health_checker/test/logging_index_time_test.py82
-rw-r--r--roles/openshift_health_checker/test/openshift_check_test.py22
-rw-r--r--roles/openshift_hosted/defaults/main.yml15
-rw-r--r--roles/openshift_hosted/meta/main.yml6
-rw-r--r--roles/openshift_hosted/tasks/registry/firewall.yml40
-rw-r--r--roles/openshift_hosted/tasks/registry/registry.yml7
-rw-r--r--roles/openshift_hosted/tasks/router/firewall.yml40
-rw-r--r--roles/openshift_hosted/tasks/router/router.yml4
-rw-r--r--roles/openshift_hosted_logging/handlers/main.yml5
-rw-r--r--roles/openshift_hosted_metrics/handlers/main.yml9
-rw-r--r--roles/openshift_loadbalancer/defaults/main.yml13
-rw-r--r--roles/openshift_loadbalancer/meta/main.yml12
-rw-r--r--roles/openshift_loadbalancer/tasks/firewall.yml40
-rw-r--r--roles/openshift_loadbalancer/tasks/main.yml4
-rw-r--r--roles/openshift_logging/handlers/main.yml9
-rw-r--r--roles/openshift_logging/tasks/update_master_config.yaml1
-rw-r--r--roles/openshift_logging_fluentd/tasks/main.yaml2
-rw-r--r--roles/openshift_logging_kibana/tasks/main.yaml6
-rw-r--r--roles/openshift_master/defaults/main.yml17
-rw-r--r--roles/openshift_master/files/atomic-openshift-master.service23
-rw-r--r--roles/openshift_master/files/origin-master.service23
-rw-r--r--roles/openshift_master/handlers/main.yml9
-rw-r--r--roles/openshift_master/meta/main.yml16
-rw-r--r--roles/openshift_master/tasks/clean_systemd_units.yml5
l---------roles/openshift_master/tasks/files1
-rw-r--r--roles/openshift_master/tasks/firewall.yml40
-rw-r--r--roles/openshift_master/tasks/main.yml90
-rw-r--r--roles/openshift_master/tasks/system_container.yml14
-rw-r--r--roles/openshift_master/tasks/systemd_units.yml79
-rw-r--r--roles/openshift_master/templates/master.yaml.v1.j25
-rw-r--r--roles/openshift_master/templates/master_docker/master.docker.service.j218
-rw-r--r--roles/openshift_master/vars/main.yml1
-rw-r--r--roles/openshift_master_facts/tasks/main.yml2
-rw-r--r--roles/openshift_metrics/handlers/main.yml9
-rw-r--r--roles/openshift_metrics/tasks/update_master_config.yaml1
-rw-r--r--roles/openshift_node/defaults/main.yml15
-rw-r--r--roles/openshift_node/meta/main.yml27
-rw-r--r--roles/openshift_node/tasks/firewall.yml40
-rw-r--r--roles/openshift_node/tasks/main.yml32
-rw-r--r--roles/openshift_node_upgrade/tasks/main.yml2
-rw-r--r--roles/openshift_node_upgrade/tasks/restart.yml1
-rw-r--r--roles/openshift_provisioners/tasks/install_efs.yaml2
-rw-r--r--roles/openshift_service_catalog/tasks/wire_aggregator.yml8
-rw-r--r--roles/openshift_storage_nfs/defaults/main.yml8
-rw-r--r--roles/openshift_storage_nfs/meta/main.yml5
-rw-r--r--roles/openshift_storage_nfs/tasks/firewall.yml40
-rw-r--r--roles/openshift_storage_nfs/tasks/main.yml4
-rw-r--r--roles/os_firewall/README.md37
-rw-r--r--roles/os_firewall/defaults/main.yml2
-rw-r--r--roles/os_firewall/tasks/firewall/firewalld.yml16
-rw-r--r--roles/os_firewall/tasks/firewall/iptables.yml16
110 files changed, 1604 insertions, 1346 deletions
diff --git a/roles/cockpit/defaults/main.yml b/roles/cockpit/defaults/main.yml
new file mode 100644
index 000000000..cbe5bb92b
--- /dev/null
+++ b/roles/cockpit/defaults/main.yml
@@ -0,0 +1,8 @@
+---
+r_cockpit_firewall_enabled: True
+r_cockpit_use_firewalld: False
+
+r_cockpit_os_firewall_deny: []
+r_cockpit_os_firewall_allow:
+- service: cockpit-ws
+ port: 9090/tcp
diff --git a/roles/cockpit/meta/main.yml b/roles/cockpit/meta/main.yml
index 0f507e75e..8c0ed3cb8 100644
--- a/roles/cockpit/meta/main.yml
+++ b/roles/cockpit/meta/main.yml
@@ -12,7 +12,4 @@ galaxy_info:
categories:
- cloud
dependencies:
-- role: os_firewall
- os_firewall_allow:
- - service: cockpit-ws
- port: 9090/tcp
+- role: lib_os_firewall
diff --git a/roles/cockpit/tasks/firewall.yml b/roles/cockpit/tasks/firewall.yml
new file mode 100644
index 000000000..e597ac84d
--- /dev/null
+++ b/roles/cockpit/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_cockpit_firewall_enabled | bool and not r_cockpit_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_cockpit_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_cockpit_os_firewall_deny }}"
+
+- when: r_cockpit_firewall_enabled | bool and r_cockpit_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_cockpit_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_cockpit_os_firewall_deny }}"
diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml
index 57f49ea11..066ee3f3b 100644
--- a/roles/cockpit/tasks/main.yml
+++ b/roles/cockpit/tasks/main.yml
@@ -1,4 +1,8 @@
---
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Install cockpit-ws
package: name={{ item }} state=present
with_items:
diff --git a/roles/docker/meta/main.yml b/roles/docker/meta/main.yml
index cd4083572..b773a417c 100644
--- a/roles/docker/meta/main.yml
+++ b/roles/docker/meta/main.yml
@@ -10,5 +10,4 @@ galaxy_info:
versions:
- 7
dependencies:
-- role: os_firewall
- role: lib_openshift
diff --git a/roles/etcd/defaults/main.yaml b/roles/etcd/defaults/main.yaml
index c0d1d5946..d12d7a358 100644
--- a/roles/etcd/defaults/main.yaml
+++ b/roles/etcd/defaults/main.yaml
@@ -1,4 +1,7 @@
---
+r_etcd_firewall_enabled: True
+r_etcd_use_firewalld: False
+
etcd_initial_cluster_state: new
etcd_initial_cluster_token: etcd-cluster-1
@@ -7,4 +10,13 @@ etcd_listen_peer_urls: "{{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_
etcd_advertise_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}"
etcd_listen_client_urls: "{{ etcd_url_scheme }}://{{ etcd_ip }}:{{ etcd_client_port }}"
+etcd_client_port: 2379
+etcd_peer_port: 2380
+
etcd_systemd_dir: "/etc/systemd/system/{{ etcd_service }}.service.d"
+r_etcd_os_firewall_deny: []
+r_etcd_os_firewall_allow:
+- service: etcd
+ port: "{{etcd_client_port}}/tcp"
+- service: etcd peering
+ port: "{{ etcd_peer_port }}/tcp"
diff --git a/roles/etcd/meta/main.yml b/roles/etcd/meta/main.yml
index 689c07a84..9a955c822 100644
--- a/roles/etcd/meta/main.yml
+++ b/roles/etcd/meta/main.yml
@@ -17,11 +17,6 @@ galaxy_info:
- system
dependencies:
- role: lib_openshift
-- role: os_firewall
- os_firewall_allow:
- - service: etcd
- port: "{{etcd_client_port}}/tcp"
- - service: etcd peering
- port: "{{ etcd_peer_port }}/tcp"
+- role: lib_os_firewall
- role: etcd_server_certificates
- role: etcd_common
diff --git a/roles/etcd/tasks/firewall.yml b/roles/etcd/tasks/firewall.yml
new file mode 100644
index 000000000..4d0f6290a
--- /dev/null
+++ b/roles/etcd/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_etcd_firewall_enabled | bool and not r_etcd_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_etcd_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_etcd_os_firewall_deny }}"
+
+- when: r_etcd_firewall_enabled | bool and r_etcd_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_etcd_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_etcd_os_firewall_deny }}"
diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml
index 8c2f392ee..78e543ef1 100644
--- a/roles/etcd/tasks/main.yml
+++ b/roles/etcd/tasks/main.yml
@@ -6,6 +6,10 @@
etcd_hostname: "{{ etcd_hostname }}"
etcd_ip: "{{ etcd_ip }}"
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Install etcd
package: name=etcd{{ '-' + etcd_version if etcd_version is defined else '' }} state=present
when: not etcd_is_containerized | bool
diff --git a/roles/lib_os_firewall/README.md b/roles/lib_os_firewall/README.md
new file mode 100644
index 000000000..ba8c84865
--- /dev/null
+++ b/roles/lib_os_firewall/README.md
@@ -0,0 +1,63 @@
+lib_os_firewall
+===========
+
+lib_os_firewall manages iptables firewall settings for a minimal use
+case (Adding/Removing rules based on protocol and port number).
+
+Note: firewalld is not supported on Atomic Host
+https://bugzilla.redhat.com/show_bug.cgi?id=1403331
+
+Requirements
+------------
+
+Ansible 2.2
+
+Role Variables
+--------------
+
+| Name | Default | |
+|---------------------------|---------|----------------------------------------|
+| os_firewall_allow | [] | List of service,port mappings to allow |
+| os_firewall_deny | [] | List of service, port mappings to deny |
+
+Dependencies
+------------
+
+None.
+
+Example Playbook
+----------------
+
+Use iptables and open tcp ports 80 and 443:
+```
+---
+- hosts: servers
+ vars:
+ os_firewall_use_firewalld: false
+ os_firewall_allow:
+ - service: httpd
+ port: 80/tcp
+ - service: https
+ port: 443/tcp
+ tasks:
+ - include_role:
+ name: lib_os_firewall
+
+ - name: set allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ with_items: "{{ os_firewall_allow }}"
+```
+
+
+License
+-------
+
+Apache License, Version 2.0
+
+Author Information
+------------------
+Jason DeTiberus - jdetiber@redhat.com
diff --git a/roles/os_firewall/library/os_firewall_manage_iptables.py b/roles/lib_os_firewall/library/os_firewall_manage_iptables.py
index aeee3ede8..aeee3ede8 100755
--- a/roles/os_firewall/library/os_firewall_manage_iptables.py
+++ b/roles/lib_os_firewall/library/os_firewall_manage_iptables.py
diff --git a/roles/nuage_master/defaults/main.yml b/roles/nuage_master/defaults/main.yml
new file mode 100644
index 000000000..ffab25775
--- /dev/null
+++ b/roles/nuage_master/defaults/main.yml
@@ -0,0 +1,10 @@
+---
+r_nuage_master_firewall_enabled: True
+r_nuage_master_use_firewalld: False
+
+nuage_mon_rest_server_port: '9443'
+
+r_nuage_master_os_firewall_deny: []
+r_nuage_master_os_firewall_allow:
+- service: openshift-monitor
+ port: "{{ nuage_mon_rest_server_port }}/tcp"
diff --git a/roles/nuage_master/handlers/main.yaml b/roles/nuage_master/handlers/main.yaml
index 162aaae1a..ad7bbb111 100644
--- a/roles/nuage_master/handlers/main.yaml
+++ b/roles/nuage_master/handlers/main.yaml
@@ -3,10 +3,6 @@
become: yes
systemd: name=nuage-openshift-monitor state=restarted
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (not openshift_master_ha | bool) and (not master_service_status_changed | default(false))
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
when: >
diff --git a/roles/nuage_master/meta/main.yml b/roles/nuage_master/meta/main.yml
index e3ed9ac71..3da340c85 100644
--- a/roles/nuage_master/meta/main.yml
+++ b/roles/nuage_master/meta/main.yml
@@ -16,8 +16,5 @@ dependencies:
- role: nuage_ca
- role: nuage_common
- role: openshift_etcd_client_certificates
-- role: os_firewall
- role: lib_openshift
- os_firewall_allow:
- - service: openshift-monitor
- port: "{{ nuage_mon_rest_server_port }}/tcp"
+- role: lib_os_firewall
diff --git a/roles/nuage_master/tasks/firewall.yml b/roles/nuage_master/tasks/firewall.yml
new file mode 100644
index 000000000..0057dc9ab
--- /dev/null
+++ b/roles/nuage_master/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_nuage_master_firewall_enabled | bool and not r_nuage_master_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_master_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_master_os_firewall_deny }}"
+
+- when: r_nuage_master_firewall_enabled | bool and r_nuage_master_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_master_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_master_os_firewall_deny }}"
diff --git a/roles/nuage_master/tasks/main.yaml b/roles/nuage_master/tasks/main.yaml
index 4f8adb63e..d0363c981 100644
--- a/roles/nuage_master/tasks/main.yaml
+++ b/roles/nuage_master/tasks/main.yaml
@@ -1,4 +1,8 @@
---
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Create directory /usr/share/nuage-openshift-monitor
become: yes
file: path=/usr/share/nuage-openshift-monitor state=directory
@@ -45,7 +49,6 @@
become: yes
template: src=nuage-openshift-monitor.j2 dest=/usr/share/nuage-openshift-monitor/nuage-openshift-monitor.yaml owner=root mode=0644
notify:
- - restart master
- restart master api
- restart master controllers
- restart nuage-openshift-monitor
diff --git a/roles/nuage_master/tasks/serviceaccount.yml b/roles/nuage_master/tasks/serviceaccount.yml
index eee448e2c..fbf2c4f8d 100644
--- a/roles/nuage_master/tasks/serviceaccount.yml
+++ b/roles/nuage_master/tasks/serviceaccount.yml
@@ -1,26 +1,6 @@
---
-- name: Create temporary directory for admin kubeconfig
- command: mktemp -u /tmp/openshift-ansible-XXXXXXX.kubeconfig
- register: nuage_tmp_conf_mktemp
- changed_when: False
- run_once: True
- delegate_to: "{{ nuage_ca_master }}"
-
-- set_fact:
- nuage_tmp_conf: "{{ nuage_tmp_conf_mktemp.stdout }}"
- run_once: True
- delegate_to: "{{ nuage_ca_master }}"
-
-- name: Copy Configuration to temporary conf
- command: >
- cp {{ openshift.common.config_base }}/master/admin.kubeconfig {{nuage_tmp_conf}}
- changed_when: false
- run_once: True
- delegate_to: "{{ nuage_ca_master }}"
-
- name: Create Admin Service Account
oc_serviceaccount:
- kubeconfig: "{{ openshift_master_config_dir }}/admin.kubeconfig"
name: nuage
namespace: default
state: present
@@ -28,13 +8,12 @@
delegate_to: "{{ nuage_ca_master }}"
- name: Configure role/user permissions
- command: >
- {{ openshift.common.client_binary }} adm {{item}}
- --config={{ nuage_tmp_conf }}
- with_items: "{{nuage_tasks}}"
- register: osnuage_perm_task
- failed_when: "'the object has been modified' not in osnuage_perm_task.stderr and osnuage_perm_task.rc != 0"
- changed_when: osnuage_perm_task.rc == 0
+ oc_adm_policy_user:
+ namespace: default
+ resource_name: "{{ item.resource_name }}"
+ resource_kind: "{{ item.resource_kind }}"
+ user: "{{ item.user }}"
+ with_items: "{{ nuage_tasks }}"
run_once: True
delegate_to: "{{ nuage_ca_master }}"
@@ -52,10 +31,3 @@
--user={{ nuage_service_account }}
delegate_to: "{{ nuage_ca_master }}"
run_once: True
-
-- name: Clean temporary configuration file
- command: >
- rm -f {{nuage_tmp_conf}}
- changed_when: false
- delegate_to: "{{ nuage_ca_master }}"
- run_once: True
diff --git a/roles/nuage_master/vars/main.yaml b/roles/nuage_master/vars/main.yaml
index 651d5775c..57d5d2595 100644
--- a/roles/nuage_master/vars/main.yaml
+++ b/roles/nuage_master/vars/main.yaml
@@ -23,4 +23,6 @@ nuage_master_crt_dir: /usr/share/nuage-openshift-monitor
nuage_service_account: system:serviceaccount:default:nuage
nuage_tasks:
- - policy add-cluster-role-to-user cluster-reader {{ nuage_service_account }}
+- resource_kind: cluster-role
+ resource_name: cluster-reader
+ user: "{{ nuage_service_account }}"
diff --git a/roles/nuage_node/defaults/main.yml b/roles/nuage_node/defaults/main.yml
new file mode 100644
index 000000000..b3d2e3cec
--- /dev/null
+++ b/roles/nuage_node/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+r_nuage_node_firewall_enabled: True
+r_nuage_node_use_firewalld: False
+
+nuage_mon_rest_server_port: '9443'
+
+r_nuage_node_os_firewall_deny: []
+r_nuage_node_os_firewall_allow:
+- service: vxlan
+ port: 4789/udp
+- service: nuage-monitor
+ port: "{{ nuage_mon_rest_server_port }}/tcp"
diff --git a/roles/nuage_node/meta/main.yml b/roles/nuage_node/meta/main.yml
index 3e2a5e0c9..9b0315054 100644
--- a/roles/nuage_node/meta/main.yml
+++ b/roles/nuage_node/meta/main.yml
@@ -15,9 +15,4 @@ galaxy_info:
dependencies:
- role: nuage_common
- role: nuage_ca
-- role: os_firewall
- os_firewall_allow:
- - service: vxlan
- port: 4789/udp
- - service: nuage-monitor
- port: "{{ nuage_mon_rest_server_port }}/tcp"
+- role: lib_os_firewall
diff --git a/roles/nuage_node/tasks/firewall.yml b/roles/nuage_node/tasks/firewall.yml
new file mode 100644
index 000000000..baf600d57
--- /dev/null
+++ b/roles/nuage_node/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_nuage_node_firewall_enabled | bool and not r_nuage_node_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_node_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_node_os_firewall_deny }}"
+
+- when: r_nuage_node_firewall_enabled | bool and r_nuage_node_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_node_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_nuage_node_os_firewall_deny }}"
diff --git a/roles/nuage_node/tasks/main.yaml b/roles/nuage_node/tasks/main.yaml
index 928f9e2e6..9cd743304 100644
--- a/roles/nuage_node/tasks/main.yaml
+++ b/roles/nuage_node/tasks/main.yaml
@@ -54,3 +54,7 @@
- restart node
- include: iptables.yml
+
+- name: setup firewall
+ include: firewall.yml
+ static: yes
diff --git a/roles/nuage_node/templates/vsp-openshift.j2 b/roles/nuage_node/templates/vsp-openshift.j2
index 9fab53906..f6bccebc2 100644
--- a/roles/nuage_node/templates/vsp-openshift.j2
+++ b/roles/nuage_node/templates/vsp-openshift.j2
@@ -9,7 +9,7 @@ enterpriseName: {{ enterprise }}
# Name of the domain in which pods will reside
domainName: {{ domain }}
# Name of the VSD user in admin group
-vsdUser: {{ vsduser }}
+vsdUser: {{ vsd_user }}
# IP address and port number of master API server
masterApiServer: {{ api_server }}
# REST server URL
diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml
index 79e59b410..27ed57703 100644
--- a/roles/openshift_cfme/defaults/main.yml
+++ b/roles/openshift_cfme/defaults/main.yml
@@ -35,9 +35,9 @@ openshift_cfme_nfs_server: "{{ groups.nfs.0 }}"
# --template=manageiq). If False everything UP TO 'new-app' is ran.
openshift_cfme_install_app: False
# Docker image to pull
-openshift_cfme_application_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-app' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}"
-openshift_cfme_postgresql_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}"
-openshift_cfme_memcached_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-memcached' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods:app-latest-fine' }}"
+openshift_cfme_application_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-app' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}"
+openshift_cfme_postgresql_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}"
+openshift_cfme_memcached_img_name: "{{ 'registry.access.redhat.com/cloudforms45/cfme-openshift-memcached' if openshift_deployment_type == 'openshift-enterprise' else 'docker.io/manageiq/manageiq-pods' }}"
openshift_cfme_application_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'app-latest-fine' }}"
openshift_cfme_memcached_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'memcached-latest-fine' }}"
openshift_cfme_postgresql_img_tag: "{{ 'latest' if openshift_deployment_type == 'openshift-enterprise' else 'postgresql-latest-fine' }}"
diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml
index 476a5e030..7e90b09a4 100644
--- a/roles/openshift_cfme/handlers/main.yml
+++ b/roles/openshift_cfme/handlers/main.yml
@@ -6,19 +6,14 @@
# See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782
######################################################################
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
notify: Verify API Server
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
diff --git a/roles/openshift_common/meta/main.yml b/roles/openshift_common/meta/main.yml
index cd8c75ec5..f1cf3e161 100644
--- a/roles/openshift_common/meta/main.yml
+++ b/roles/openshift_common/meta/main.yml
@@ -14,4 +14,3 @@ galaxy_info:
dependencies:
- role: openshift_facts
- role: openshift_repos
-- role: openshift_version
diff --git a/roles/openshift_docker/meta/main.yml b/roles/openshift_docker/meta/main.yml
index 10131f717..60efd4e45 100644
--- a/roles/openshift_docker/meta/main.yml
+++ b/roles/openshift_docker/meta/main.yml
@@ -12,6 +12,5 @@ galaxy_info:
categories:
- cloud
dependencies:
-- role: openshift_version
- role: openshift_docker_facts
- role: docker
diff --git a/roles/openshift_facts/defaults/main.yml b/roles/openshift_facts/defaults/main.yml
deleted file mode 100644
index cc4dc9365..000000000
--- a/roles/openshift_facts/defaults/main.yml
+++ /dev/null
@@ -1,2 +0,0 @@
----
-openshift_use_system_containers: false
diff --git a/roles/openshift_facts/meta/main.yml b/roles/openshift_facts/meta/main.yml
index 7eead2d6e..0be3afd24 100644
--- a/roles/openshift_facts/meta/main.yml
+++ b/roles/openshift_facts/meta/main.yml
@@ -12,5 +12,4 @@ galaxy_info:
categories:
- cloud
- system
-dependencies:
-- role: openshift_sanitize_inventory
+dependencies: []
diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml
deleted file mode 100644
index 4af02ab96..000000000
--- a/roles/openshift_facts/tasks/main.yml
+++ /dev/null
@@ -1,118 +0,0 @@
----
-- name: Detecting Operating System
- stat:
- path: /run/ostree-booted
- register: ostree_booted
-
-# Locally setup containerized facts for now
-- set_fact:
- l_is_atomic: "{{ ostree_booted.stat.exists }}"
- l_use_crio: "{{ openshift_docker_use_crio | default(false) }}"
-- set_fact:
- l_is_containerized: "{{ (l_is_atomic | bool) or (containerized | default(false) | bool) }}"
- l_is_openvswitch_system_container: "{{ (openshift_use_openvswitch_system_container | default(openshift_use_system_containers) | bool) }}"
- l_is_node_system_container: "{{ (openshift_use_node_system_container | default(openshift_use_system_containers) | bool) }}"
- l_is_master_system_container: "{{ (openshift_use_master_system_container | default(openshift_use_system_containers) | bool) }}"
- l_is_etcd_system_container: "{{ (openshift_use_etcd_system_container | default(openshift_use_system_containers) | bool) }}"
-- set_fact:
- l_any_system_container: "{{ l_is_etcd_system_container or l_is_openvswitch_system_container or l_is_node_system_container or l_is_master_system_container }}"
-- set_fact:
- l_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else 'docker' if l_is_containerized else 'host' }}"
-
-
-- name: Validate python version
- fail:
- msg: |
- openshift-ansible requires Python 3 for {{ ansible_distribution }};
- For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html
- when:
- - ansible_distribution == 'Fedora'
- - ansible_python['version']['major'] != 3
- - r_openshift_facts_ran is not defined
-
-- name: Validate python version
- fail:
- msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}"
- when:
- - ansible_distribution != 'Fedora'
- - ansible_python['version']['major'] != 2
- - r_openshift_facts_ran is not defined
-
-# Fail as early as possible if Atomic and old version of Docker
-- block:
-
- # See https://access.redhat.com/articles/2317361
- # and https://github.com/ansible/ansible/issues/15892
- # NOTE: the "'s can not be removed at this level else the docker command will fail
- # NOTE: When ansible >2.2.1.x is used this can be updated per
- # https://github.com/openshift/openshift-ansible/pull/3475#discussion_r103525121
- - name: Determine Atomic Host Docker Version
- shell: 'CURLY="{"; docker version --format "$CURLY{json .Server.Version}}"'
- register: l_atomic_docker_version
-
- - assert:
- msg: Installation on Atomic Host requires Docker 1.12 or later. Please upgrade and restart the Atomic Host.
- that:
- - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=')
-
- when:
- - not l_use_crio
- - l_is_atomic | bool
- - r_openshift_facts_ran is not defined
-
-- name: Load variables
- include_vars: "{{ item }}"
- with_first_found:
- - "{{ ansible_distribution }}.yml"
- - "default.yml"
-
-- name: Ensure various deps are installed
- package: name={{ item }} state=present
- with_items: "{{ required_packages }}"
- when:
- - not l_is_atomic | bool
- - r_openshift_facts_ran is not defined
-
-- name: Ensure various deps for running system containers are installed
- package: name={{ item }} state=present
- with_items: "{{ required_system_containers_packages }}"
- when:
- - not l_is_atomic | bool
- - l_any_system_container | bool
- - r_openshift_facts_ran is not defined
-
-- name: Gather Cluster facts and set is_containerized if needed
- openshift_facts:
- role: common
- local_facts:
- debug_level: "{{ openshift_debug_level | default(2) }}"
- deployment_type: "{{ openshift_deployment_type }}"
- deployment_subtype: "{{ openshift_deployment_subtype | default(None) }}"
- cluster_id: "{{ openshift_cluster_id | default('default') }}"
- hostname: "{{ openshift_hostname | default(None) }}"
- ip: "{{ openshift_ip | default(None) }}"
- is_containerized: "{{ l_is_containerized | default(None) }}"
- is_openvswitch_system_container: "{{ l_is_openvswitch_system_container | default(false) }}"
- is_node_system_container: "{{ l_is_node_system_container | default(false) }}"
- is_master_system_container: "{{ l_is_master_system_container | default(false) }}"
- is_etcd_system_container: "{{ l_is_etcd_system_container | default(false) }}"
- etcd_runtime: "{{ l_etcd_runtime }}"
- system_images_registry: "{{ system_images_registry | default('') }}"
- public_hostname: "{{ openshift_public_hostname | default(None) }}"
- public_ip: "{{ openshift_public_ip | default(None) }}"
- portal_net: "{{ openshift_portal_net | default(openshift_master_portal_net) | default(None) }}"
- http_proxy: "{{ openshift_http_proxy | default(None) }}"
- https_proxy: "{{ openshift_https_proxy | default(None) }}"
- no_proxy: "{{ openshift_no_proxy | default(None) }}"
- generate_no_proxy_hosts: "{{ openshift_generate_no_proxy_hosts | default(True) }}"
- no_proxy_internal_hostnames: "{{ openshift_no_proxy_internal_hostnames | default(None) }}"
- sdn_network_plugin_name: "{{ os_sdn_network_plugin_name | default(None) }}"
- use_openshift_sdn: "{{ openshift_use_openshift_sdn | default(None) }}"
-
-- name: Set repoquery command
- set_fact:
- repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}"
-
-- name: Register that this already ran
- set_fact:
- r_openshift_facts_ran: True
diff --git a/roles/openshift_facts/vars/Fedora.yml b/roles/openshift_facts/vars/Fedora.yml
deleted file mode 100644
index 745f5f398..000000000
--- a/roles/openshift_facts/vars/Fedora.yml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-required_packages:
- - iproute
- - python3-dbus
- - PyYAML
- - yum-utils
diff --git a/roles/openshift_facts/vars/default.yml b/roles/openshift_facts/vars/default.yml
deleted file mode 100644
index 3cd616d16..000000000
--- a/roles/openshift_facts/vars/default.yml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-required_packages:
- - iproute
- - python-dbus
- - PyYAML
- - yum-utils
diff --git a/roles/openshift_facts/vars/main.yml b/roles/openshift_facts/vars/main.yml
deleted file mode 100644
index 89d4034d3..000000000
--- a/roles/openshift_facts/vars/main.yml
+++ /dev/null
@@ -1,5 +0,0 @@
----
-required_system_containers_packages:
- - atomic
- - ostree
- - runc
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index 23da53940..05e53333d 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -68,13 +68,15 @@ class ActionModule(ActionBase):
msg=str(e),
)
+ if check.changed:
+ r["changed"] = True
check_results[check_name] = r
- if r.get("failed", False):
- result["failed"] = True
- result["msg"] = "One or more checks failed"
+ result["changed"] = any(r.get("changed") for r in check_results.values())
+ if any(r.get("failed") for r in check_results.values()):
+ result["failed"] = True
+ result["msg"] = "One or more checks failed"
- result["changed"] = any(r.get("changed", False) for r in check_results.values())
return result
def load_known_checks(self, tmp, task_vars):
diff --git a/roles/openshift_health_checker/meta/main.yml b/roles/openshift_health_checker/meta/main.yml
index 4d141974c..cd9b55902 100644
--- a/roles/openshift_health_checker/meta/main.yml
+++ b/roles/openshift_health_checker/meta/main.yml
@@ -2,4 +2,3 @@
dependencies:
- role: openshift_facts
- role: openshift_repos
- - role: openshift_version
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 85cbc6224..07ec6f7ef 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -10,11 +10,34 @@ from importlib import import_module
from ansible.module_utils import six
from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin
+from ansible.plugins.filter.core import to_bool as ansible_to_bool
class OpenShiftCheckException(Exception):
- """Raised when a check cannot proceed."""
- pass
+ """Raised when a check encounters a failure condition."""
+
+ def __init__(self, name, msg=None):
+ # msg is for the message the user will see when this is raised.
+ # name is for test code to identify the error without looking at msg text.
+ if msg is None: # for parameter backward compatibility
+ msg = name
+ name = self.__class__.__name__
+ self.name = name
+ super(OpenShiftCheckException, self).__init__(msg)
+
+
+class OpenShiftCheckExceptionList(OpenShiftCheckException):
+ """A container for multiple logging errors that may be detected in one check."""
+ def __init__(self, errors):
+ self.errors = errors
+ super(OpenShiftCheckExceptionList, self).__init__(
+ 'OpenShiftCheckExceptionList',
+ '\n'.join(str(msg) for msg in errors)
+ )
+
+ # make iterable
+ def __getitem__(self, index):
+ return self.errors[index]
@six.add_metaclass(ABCMeta)
@@ -35,6 +58,9 @@ class OpenShiftCheck(object):
self.task_vars = task_vars or {}
self.tmp = tmp
+ # set to True when the check changes the host, for accurate total "changed" count
+ self.changed = False
+
@abstractproperty
def name(self):
"""The name of this check, usually derived from the class name."""
@@ -94,16 +120,59 @@ class OpenShiftCheck(object):
Ansible task_vars structures are Python dicts, often mapping strings to
other dicts. This helper makes it easier to get a nested value, raising
- OpenShiftCheckException when a key is not found or returning a default value
- provided as a keyword argument.
+ OpenShiftCheckException when a key is not found.
+
+ Keyword args:
+ default:
+ On missing key, return this as default value instead of raising exception.
+ convert:
+ Supply a function to apply to normalize the value before returning it.
+ None is the default (return as-is).
+ This function should raise ValueError if the user has provided a value
+ that cannot be converted, or OpenShiftCheckException if some other
+ problem needs to be described to the user.
"""
+ if len(keys) == 1:
+ keys = keys[0].split(".")
+
try:
value = reduce(operator.getitem, keys, self.task_vars)
except (KeyError, TypeError):
- if "default" in kwargs:
- return kwargs["default"]
- raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys))))
- return value
+ if "default" not in kwargs:
+ raise OpenShiftCheckException(
+ "This check expects the '{}' inventory variable to be defined\n"
+ "in order to proceed, but it is undefined. There may be a bug\n"
+ "in Ansible, the checks, or their dependencies."
+ "".format(".".join(map(str, keys)))
+ )
+ value = kwargs["default"]
+
+ convert = kwargs.get("convert", None)
+ try:
+ if convert is None:
+ return value
+ elif convert is bool: # interpret bool as Ansible does, instead of python truthiness
+ return ansible_to_bool(value)
+ else:
+ return convert(value)
+
+ except ValueError as error: # user error in specifying value
+ raise OpenShiftCheckException(
+ 'Cannot convert inventory variable to expected type:\n'
+ ' "{var}={value}"\n'
+ '{error}'.format(var=".".join(keys), value=value, error=error)
+ )
+
+ except OpenShiftCheckException: # some other check-specific problem
+ raise
+
+ except Exception as error: # probably a bug in the function
+ raise OpenShiftCheckException(
+ 'There is a bug in this check. While trying to convert variable \n'
+ ' "{var}={value}"\n'
+ 'the given converter cannot be used or failed unexpectedly:\n'
+ '{error}'.format(var=".".join(keys), value=value, error=error)
+ )
@staticmethod
def get_major_minor_version(openshift_image_tag):
@@ -128,6 +197,31 @@ class OpenShiftCheck(object):
components = tuple(int(x) for x in components[:2])
return components
+ def find_ansible_mount(self, path):
+ """Return the mount point for path from ansible_mounts."""
+
+ # reorganize list of mounts into dict by path
+ mount_for_path = {
+ mount['mount']: mount
+ for mount
+ in self.get_var('ansible_mounts')
+ }
+
+ # NOTE: including base cases '/' and '' to ensure the loop ends
+ mount_targets = set(mount_for_path.keys()) | {'/', ''}
+ mount_point = path
+ while mount_point not in mount_targets:
+ mount_point = os.path.dirname(mount_point)
+
+ try:
+ return mount_for_path[mount_point]
+ except KeyError:
+ known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path))
+ raise OpenShiftCheckException(
+ 'Unable to determine mount point for path "{}".\n'
+ 'Known mount points: {}.'.format(path, known_mounts or 'none')
+ )
+
LOADER_EXCLUDES = (
"__init__.py",
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 39ac0e4ec..6d1dea9ce 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -1,6 +1,5 @@
"""Check that there is enough disk space in predefined paths."""
-import os.path
import tempfile
from openshift_checks import OpenShiftCheck, OpenShiftCheckException
@@ -55,9 +54,6 @@ class DiskAvailability(OpenShiftCheck):
def run(self):
group_names = self.get_var("group_names")
- ansible_mounts = self.get_var("ansible_mounts")
- ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}
-
user_config = self.get_var("openshift_check_min_host_disk_gb", default={})
try:
# For backwards-compatibility, if openshift_check_min_host_disk_gb
@@ -80,7 +76,7 @@ class DiskAvailability(OpenShiftCheck):
# not part of the official recommendation but present in the user
# configuration.
for path, recommendation in self.recommended_disk_space_bytes.items():
- free_bytes = self.free_bytes(path, ansible_mounts)
+ free_bytes = self.free_bytes(path)
recommended_bytes = max(recommendation.get(name, 0) for name in group_names)
config = user_config.get(path, {})
@@ -127,22 +123,17 @@ class DiskAvailability(OpenShiftCheck):
return {}
- @staticmethod
- def free_bytes(path, ansible_mounts):
+ def free_bytes(self, path):
"""Return the size available in path based on ansible_mounts."""
- mount_point = path
- # arbitry value to prevent an infinite loop, in the unlike case that '/'
- # is not in ansible_mounts.
- max_depth = 32
- while mount_point not in ansible_mounts and max_depth > 0:
- mount_point = os.path.dirname(mount_point)
- max_depth -= 1
-
+ mount = self.find_ansible_mount(path)
try:
- free_bytes = ansible_mounts[mount_point]['size_available']
+ return mount['size_available']
except KeyError:
- known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none'
- msg = 'Unable to determine disk availability for "{}". Known mount points: {}.'
- raise OpenShiftCheckException(msg.format(path, known_mounts))
-
- return free_bytes
+ raise OpenShiftCheckException(
+ 'Unable to retrieve disk availability for "{path}".\n'
+ 'Ansible facts included a matching mount point for this path:\n'
+ ' {mount}\n'
+ 'however it is missing the size_available field.\n'
+ 'To investigate, you can inspect the output of `ansible -m setup <host>`'
+ ''.format(path=path, mount=mount)
+ )
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 77180223e..85a922f86 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -41,11 +41,10 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type
def run(self):
- msg, failed, changed = self.ensure_dependencies()
+ msg, failed = self.ensure_dependencies()
if failed:
return {
"failed": True,
- "changed": changed,
"msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
}
@@ -54,11 +53,11 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# exit early if all images were found locally
if not missing_images:
- return {"changed": changed}
+ return {}
registries = self.known_docker_registries()
if not registries:
- return {"failed": True, "msg": "Unable to retrieve any docker registries.", "changed": changed}
+ return {"failed": True, "msg": "Unable to retrieve any docker registries."}
available_images = self.available_images(missing_images, registries)
unavailable_images = set(missing_images) - set(available_images)
@@ -70,10 +69,9 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"One or more required Docker images are not available:\n {}\n"
"Configured registries: {}"
).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries)),
- "changed": changed,
}
- return {"changed": changed}
+ return {}
def required_images(self):
"""
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
index dea15a56e..0558ddf14 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_storage.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -1,6 +1,5 @@
"""Check Docker storage driver and usage."""
import json
-import os.path
import re
from openshift_checks import OpenShiftCheck, OpenShiftCheckException
from openshift_checks.mixins import DockerHostMixin
@@ -43,21 +42,20 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
]
def run(self):
- msg, failed, changed = self.ensure_dependencies()
+ msg, failed = self.ensure_dependencies()
if failed:
return {
"failed": True,
- "changed": changed,
"msg": "Some dependencies are required in order to query docker storage on host:\n" + msg
}
# attempt to get the docker info hash from the API
docker_info = self.execute_module("docker_info", {})
if docker_info.get("failed"):
- return {"failed": True, "changed": changed,
+ return {"failed": True,
"msg": "Failed to query Docker API. Is docker running on this host?"}
if not docker_info.get("info"): # this would be very strange
- return {"failed": True, "changed": changed,
+ return {"failed": True,
"msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))}
docker_info = docker_info["info"]
@@ -68,7 +66,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
"Detected unsupported Docker storage driver '{driver}'.\n"
"Supported storage drivers are: {drivers}"
).format(driver=driver, drivers=', '.join(self.storage_drivers))
- return {"failed": True, "changed": changed, "msg": msg}
+ return {"failed": True, "msg": msg}
# driver status info is a list of tuples; convert to dict and validate based on driver
driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])}
@@ -81,7 +79,6 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
if driver in ['overlay', 'overlay2']:
result = self.check_overlay_support(docker_info, driver_status)
- result['changed'] = result.get('changed', False) or changed
return result
def check_devicemapper_support(self, driver_status):
@@ -254,7 +251,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
"msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
}
- mount = self.find_ansible_mount(path, self.get_var("ansible_mounts"))
+ mount = self.find_ansible_mount(path)
try:
free_bytes = mount['size_available']
total_bytes = mount['size_total']
@@ -277,22 +274,3 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
}
return {}
-
- # TODO(lmeyer): migrate to base class
- @staticmethod
- def find_ansible_mount(path, ansible_mounts):
- """Return the mount point for path from ansible_mounts."""
-
- mount_for_path = {mount['mount']: mount for mount in ansible_mounts}
- mount_point = path
- while mount_point not in mount_for_path:
- if mount_point in ["/", ""]: # "/" not in ansible_mounts???
- break
- mount_point = os.path.dirname(mount_point)
-
- try:
- return mount_for_path[mount_point]
- except KeyError:
- known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none'
- msg = 'Unable to determine mount point for path "{}". Known mount points: {}.'
- raise OpenShiftCheckException(msg.format(path, known_mounts))
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
index 28c38504d..f4296753a 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py
@@ -2,7 +2,7 @@
Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster.
"""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
class EtcdImageDataSize(OpenShiftCheck):
@@ -12,7 +12,7 @@ class EtcdImageDataSize(OpenShiftCheck):
tags = ["etcd"]
def run(self):
- etcd_mountpath = self._get_etcd_mountpath(self.get_var("ansible_mounts"))
+ etcd_mountpath = self.find_ansible_mount("/var/lib/etcd")
etcd_avail_diskspace = etcd_mountpath["size_available"]
etcd_total_diskspace = etcd_mountpath["size_total"]
@@ -56,7 +56,7 @@ class EtcdImageDataSize(OpenShiftCheck):
reason = etcdkeysize["module_stderr"]
msg = msg.format(host=etcd_host, reason=reason)
- return {"failed": True, "changed": False, "msg": msg}
+ return {"failed": True, "msg": msg}
if etcdkeysize["size_limit_exceeded"]:
limit = self._to_gigabytes(etcd_imagedata_size_limit)
@@ -65,20 +65,7 @@ class EtcdImageDataSize(OpenShiftCheck):
"Use the `oadm prune images` command to cleanup unused Docker images.")
return {"failed": True, "msg": msg.format(host=etcd_host, limit=limit)}
- return {"changed": False}
-
- @staticmethod
- def _get_etcd_mountpath(ansible_mounts):
- valid_etcd_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
-
- mount_for_path = {mnt.get("mount"): mnt for mnt in ansible_mounts}
- for path in valid_etcd_mount_paths:
- if path in mount_for_path:
- return mount_for_path[path]
-
- paths = ', '.join(sorted(mount_for_path)) or 'none'
- msg = "Unable to determine a valid etcd mountpath. Paths mounted: {}.".format(paths)
- raise OpenShiftCheckException(msg)
+ return {}
@staticmethod
def _to_gigabytes(byte_size):
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
index da7d0364a..e5d93ff3f 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
@@ -1,6 +1,6 @@
"""A health check for OpenShift clusters."""
-from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+from openshift_checks import OpenShiftCheck
class EtcdVolume(OpenShiftCheck):
@@ -11,8 +11,8 @@ class EtcdVolume(OpenShiftCheck):
# Default device usage threshold. Value should be in the range [0, 100].
default_threshold_percent = 90
- # Where to find ectd data, higher priority first.
- supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
+ # Where to find etcd data
+ etcd_mount_path = "/var/lib/etcd"
def is_active(self):
etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or []
@@ -20,7 +20,7 @@ class EtcdVolume(OpenShiftCheck):
return super(EtcdVolume, self).is_active() and is_etcd_host
def run(self):
- mount_info = self._etcd_mount_info()
+ mount_info = self.find_ansible_mount(self.etcd_mount_path)
available = mount_info["size_available"]
total = mount_info["size_total"]
used = total - available
@@ -40,16 +40,4 @@ class EtcdVolume(OpenShiftCheck):
)
return {"failed": True, "msg": msg}
- return {"changed": False}
-
- def _etcd_mount_info(self):
- ansible_mounts = self.get_var("ansible_mounts")
- mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
-
- for path in self.supported_mount_paths:
- if path in mounts:
- return mounts[path]
-
- paths = ', '.join(sorted(mounts)) or 'none'
- msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
- raise OpenShiftCheckException(msg)
+ return {}
diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py
index 32d853d57..b27f97172 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/curator.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py
@@ -1,6 +1,6 @@
"""Check for an aggregated logging Curator deployment"""
-from openshift_checks.logging.logging import LoggingCheck
+from openshift_checks.logging.logging import OpenShiftCheckException, LoggingCheck
class Curator(LoggingCheck):
@@ -12,27 +12,17 @@ class Curator(LoggingCheck):
def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
- curator_pods, error = self.get_pods_for_component(
- self.logging_namespace,
- "curator",
- )
- if error:
- return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_curator(curator_pods)
-
- if check_error:
- msg = ("The following Curator deployment issue was found:"
- "{}".format(check_error))
- return {"failed": True, "changed": False, "msg": msg}
-
+ curator_pods = self.get_pods_for_component("curator")
+ self.check_curator(curator_pods)
# TODO(lmeyer): run it all again for the ops cluster
- return {"failed": False, "changed": False, "msg": 'No problems found with Curator deployment.'}
+
+ return {}
def check_curator(self, pods):
"""Check to see if curator is up and working. Returns: error string"""
if not pods:
- return (
+ raise OpenShiftCheckException(
+ "MissingComponentPods",
"There are no Curator pods for the logging stack,\n"
"so nothing will prune Elasticsearch indexes.\n"
"Is Curator correctly deployed?"
@@ -40,14 +30,14 @@ class Curator(LoggingCheck):
not_running = self.not_running_pods(pods)
if len(not_running) == len(pods):
- return (
+ raise OpenShiftCheckException(
+ "CuratorNotRunning",
"The Curator pod is not currently in a running state,\n"
"so Elasticsearch indexes may increase without bound."
)
if len(pods) - len(not_running) > 1:
- return (
+ raise OpenShiftCheckException(
+ "TooManyCurators",
"There is more than one Curator pod running. This should not normally happen.\n"
"Although this doesn't cause any problems, you may want to investigate."
)
-
- return None
diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
index 8bdda1f32..7fc843fd7 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py
@@ -3,6 +3,7 @@
import json
import re
+from openshift_checks import OpenShiftCheckException, OpenShiftCheckExceptionList
from openshift_checks.logging.logging import LoggingCheck
@@ -15,168 +16,178 @@ class Elasticsearch(LoggingCheck):
def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
- es_pods, error = self.get_pods_for_component(
- self.logging_namespace,
- "es",
- )
- if error:
- return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_elasticsearch(es_pods)
-
- if check_error:
- msg = ("The following Elasticsearch deployment issue was found:"
- "{}".format(check_error))
- return {"failed": True, "changed": False, "msg": msg}
-
+ es_pods = self.get_pods_for_component("es")
+ self.check_elasticsearch(es_pods)
# TODO(lmeyer): run it all again for the ops cluster
- return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'}
- def _not_running_elasticsearch_pods(self, es_pods):
- """Returns: list of pods that are not running, list of errors about non-running pods"""
- not_running = self.not_running_pods(es_pods)
- if not_running:
- return not_running, [(
- 'The following Elasticsearch pods are not running:\n'
- '{pods}'
- 'These pods will not aggregate logs from their nodes.'
- ).format(pods=''.join(
- " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None'))
- for pod in not_running
- ))]
- return not_running, []
+ return {}
def check_elasticsearch(self, es_pods):
- """Various checks for elasticsearch. Returns: error string"""
- not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods)
- running_pods = [pod for pod in es_pods if pod not in not_running_pods]
+ """Perform checks for Elasticsearch. Raises OpenShiftCheckExceptionList on any errors."""
+ running_pods, errors = self.running_elasticsearch_pods(es_pods)
pods_by_name = {
pod['metadata']['name']: pod for pod in running_pods
# Filter out pods that are not members of a DC
if pod['metadata'].get('labels', {}).get('deploymentconfig')
}
if not pods_by_name:
- return 'No logging Elasticsearch pods were found. Is logging deployed?'
- error_msgs += self._check_elasticsearch_masters(pods_by_name)
- error_msgs += self._check_elasticsearch_node_list(pods_by_name)
- error_msgs += self._check_es_cluster_health(pods_by_name)
- error_msgs += self._check_elasticsearch_diskspace(pods_by_name)
- return '\n'.join(error_msgs)
+ # nothing running, cannot run the rest of the check
+ errors.append(OpenShiftCheckException(
+ 'NoRunningPods',
+ 'No logging Elasticsearch pods were found running, so no logs are being aggregated.'
+ ))
+ raise OpenShiftCheckExceptionList(errors)
+
+ errors += self.check_elasticsearch_masters(pods_by_name)
+ errors += self.check_elasticsearch_node_list(pods_by_name)
+ errors += self.check_es_cluster_health(pods_by_name)
+ errors += self.check_elasticsearch_diskspace(pods_by_name)
+ if errors:
+ raise OpenShiftCheckExceptionList(errors)
+
+ def running_elasticsearch_pods(self, es_pods):
+ """Returns: list of running pods, list of errors about non-running pods"""
+ not_running = self.not_running_pods(es_pods)
+ running_pods = [pod for pod in es_pods if pod not in not_running]
+ if not_running:
+ return running_pods, [OpenShiftCheckException(
+ 'PodNotRunning',
+ 'The following Elasticsearch pods are defined but not running:\n'
+ '{pods}'.format(pods=''.join(
+ " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None'))
+ for pod in not_running
+ ))
+ )]
+ return running_pods, []
@staticmethod
def _build_es_curl_cmd(pod_name, url):
base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'"
return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url)
- def _check_elasticsearch_masters(self, pods_by_name):
- """Check that Elasticsearch masters are sane. Returns: list of error strings"""
+ def check_elasticsearch_masters(self, pods_by_name):
+ """Check that Elasticsearch masters are sane. Returns: list of errors"""
es_master_names = set()
- error_msgs = []
+ errors = []
for pod_name in pods_by_name.keys():
# Compare what each ES node reports as master and compare for split brain
get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master")
- master_name_str = self.exec_oc(self.logging_namespace, get_master_cmd, [])
+ master_name_str = self.exec_oc(get_master_cmd, [])
master_names = (master_name_str or '').split(' ')
if len(master_names) > 1:
es_master_names.add(master_names[1])
else:
- error_msgs.append(
- 'No master? Elasticsearch {pod} returned bad string when asked master name:\n'
+ errors.append(OpenShiftCheckException(
+ 'NoMasterName',
+ 'Elasticsearch {pod} gave unexpected response when asked master name:\n'
' {response}'.format(pod=pod_name, response=master_name_str)
- )
+ ))
if not es_master_names:
- error_msgs.append('No logging Elasticsearch masters were found. Is logging deployed?')
- return '\n'.join(error_msgs)
+ errors.append(OpenShiftCheckException(
+ 'NoMasterFound',
+ 'No logging Elasticsearch masters were found.'
+ ))
+ return errors
if len(es_master_names) > 1:
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'SplitBrainMasters',
'Found multiple Elasticsearch masters according to the pods:\n'
'{master_list}\n'
'This implies that the masters have "split brain" and are not correctly\n'
'replicating data for the logging cluster. Log loss is likely to occur.'
.format(master_list='\n'.join(' ' + master for master in es_master_names))
- )
+ ))
- return error_msgs
+ return errors
- def _check_elasticsearch_node_list(self, pods_by_name):
- """Check that reported ES masters are accounted for by pods. Returns: list of error strings"""
+ def check_elasticsearch_node_list(self, pods_by_name):
+ """Check that reported ES masters are accounted for by pods. Returns: list of errors"""
if not pods_by_name:
- return ['No logging Elasticsearch masters were found. Is logging deployed?']
+ return [OpenShiftCheckException(
+ 'MissingComponentPods',
+ 'No logging Elasticsearch pods were found.'
+ )]
# get ES cluster nodes
node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes')
- cluster_node_data = self.exec_oc(self.logging_namespace, node_cmd, [])
+ cluster_node_data = self.exec_oc(node_cmd, [])
try:
cluster_nodes = json.loads(cluster_node_data)['nodes']
except (ValueError, KeyError):
- return [
+ return [OpenShiftCheckException(
+ 'MissingNodeList',
'Failed to query Elasticsearch for the list of ES nodes. The output was:\n' +
cluster_node_data
- ]
+ )]
# Try to match all ES-reported node hosts to known pods.
- error_msgs = []
+ errors = []
for node in cluster_nodes.values():
# Note that with 1.4/3.4 the pod IP may be used as the master name
if not any(node['host'] in (pod_name, pod['status'].get('podIP'))
for pod_name, pod in pods_by_name.items()):
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'EsPodNodeMismatch',
'The Elasticsearch cluster reports a member node "{node}"\n'
'that does not correspond to any known ES pod.'.format(node=node['host'])
- )
+ ))
- return error_msgs
+ return errors
- def _check_es_cluster_health(self, pods_by_name):
+ def check_es_cluster_health(self, pods_by_name):
"""Exec into the elasticsearch pods and check the cluster health. Returns: list of errors"""
- error_msgs = []
+ errors = []
for pod_name in pods_by_name.keys():
cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true')
- cluster_health_data = self.exec_oc(self.logging_namespace, cluster_health_cmd, [])
+ cluster_health_data = self.exec_oc(cluster_health_cmd, [])
try:
health_res = json.loads(cluster_health_data)
if not health_res or not health_res.get('status'):
raise ValueError()
except ValueError:
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'BadEsResponse',
'Could not retrieve cluster health status from logging ES pod "{pod}".\n'
'Response was:\n{output}'.format(pod=pod_name, output=cluster_health_data)
- )
+ ))
continue
if health_res['status'] not in ['green', 'yellow']:
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'EsClusterHealthRed',
'Elasticsearch cluster health status is RED according to pod "{}"'.format(pod_name)
- )
+ ))
- return error_msgs
+ return errors
- def _check_elasticsearch_diskspace(self, pods_by_name):
+ def check_elasticsearch_diskspace(self, pods_by_name):
"""
Exec into an ES pod and query the diskspace on the persistent volume.
Returns: list of errors
"""
- error_msgs = []
+ errors = []
for pod_name in pods_by_name.keys():
df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
- disk_output = self.exec_oc(self.logging_namespace, df_cmd, [])
+ disk_output = self.exec_oc(df_cmd, [])
lines = disk_output.splitlines()
# expecting one header looking like 'IUse% Use%' and one body line
body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$'
if len(lines) != 2 or len(lines[0].split()) != 2 or not re.match(body_re, lines[1]):
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'BadDfResponse',
'Could not retrieve storage usage from logging ES pod "{pod}".\n'
'Response to `df` command was:\n{output}'.format(pod=pod_name, output=disk_output)
- )
+ ))
continue
inode_pct, disk_pct = re.match(body_re, lines[1]).groups()
inode_pct_thresh = self.get_var('openshift_check_efk_es_inode_pct', default='90')
if int(inode_pct) >= int(inode_pct_thresh):
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'InodeUsageTooHigh',
'Inode percent usage on the storage volume for logging ES pod "{pod}"\n'
' is {pct}, greater than threshold {limit}.\n'
' Note: threshold can be specified in inventory with {param}'.format(
@@ -184,10 +195,11 @@ class Elasticsearch(LoggingCheck):
pct=str(inode_pct),
limit=str(inode_pct_thresh),
param='openshift_check_efk_es_inode_pct',
- ))
+ )))
disk_pct_thresh = self.get_var('openshift_check_efk_es_storage_pct', default='80')
if int(disk_pct) >= int(disk_pct_thresh):
- error_msgs.append(
+ errors.append(OpenShiftCheckException(
+ 'DiskUsageTooHigh',
'Disk percent usage on the storage volume for logging ES pod "{pod}"\n'
' is {pct}, greater than threshold {limit}.\n'
' Note: threshold can be specified in inventory with {param}'.format(
@@ -195,6 +207,6 @@ class Elasticsearch(LoggingCheck):
pct=str(disk_pct),
limit=str(disk_pct_thresh),
param='openshift_check_efk_es_storage_pct',
- ))
+ )))
- return error_msgs
+ return errors
diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
index b3485bf44..3b192a281 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py
@@ -2,6 +2,8 @@
import json
+
+from openshift_checks import OpenShiftCheckException, OpenShiftCheckExceptionList
from openshift_checks.logging.logging import LoggingCheck
@@ -12,57 +14,96 @@ class Fluentd(LoggingCheck):
tags = ["health", "logging"]
def run(self):
- """Check various things and gather errors. Returns: result as hash"""
+ """Check the Fluentd deployment and raise an error if any problems are found."""
+
+ fluentd_pods = self.get_pods_for_component("fluentd")
+ self.check_fluentd(fluentd_pods)
+ return {}
+
+ def check_fluentd(self, pods):
+ """Verify fluentd is running everywhere. Raises OpenShiftCheckExceptionList if error(s) found."""
- self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
- fluentd_pods, error = super(Fluentd, self).get_pods_for_component(
- self.logging_namespace,
- "fluentd",
+ node_selector = self.get_var(
+ 'openshift_logging_fluentd_nodeselector',
+ default='logging-infra-fluentd=true'
)
- if error:
- return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_fluentd(fluentd_pods)
- if check_error:
- msg = ("The following Fluentd deployment issue was found:"
- "{}".format(check_error))
- return {"failed": True, "changed": False, "msg": msg}
+ nodes_by_name = self.get_nodes_by_name()
+ fluentd_nodes = self.filter_fluentd_labeled_nodes(nodes_by_name, node_selector)
- # TODO(lmeyer): run it all again for the ops cluster
- return {"failed": False, "changed": False, "msg": 'No problems found with Fluentd deployment.'}
+ errors = []
+ errors += self.check_node_labeling(nodes_by_name, fluentd_nodes, node_selector)
+ errors += self.check_nodes_have_fluentd(pods, fluentd_nodes)
+ errors += self.check_fluentd_pods_running(pods)
+
+ # Make sure there are no extra fluentd pods
+ if len(pods) > len(fluentd_nodes):
+ errors.append(OpenShiftCheckException(
+ 'TooManyFluentdPods',
+ 'There are more Fluentd pods running than nodes labeled.\n'
+ 'This may not cause problems with logging but it likely indicates something wrong.'
+ ))
+
+ if errors:
+ raise OpenShiftCheckExceptionList(errors)
+
+ def get_nodes_by_name(self):
+ """Retrieve all the node definitions. Returns: dict(name: node)"""
+ nodes_json = self.exec_oc("get nodes -o json", [])
+ try:
+ nodes = json.loads(nodes_json)
+ except ValueError: # no valid json - should not happen
+ raise OpenShiftCheckException(
+ "BadOcNodeList",
+ "Could not obtain a list of nodes to validate fluentd.\n"
+ "Output from oc get:\n" + nodes_json
+ )
+ if not nodes or not nodes.get('items'): # also should not happen
+ raise OpenShiftCheckException(
+ "NoNodesDefined",
+ "No nodes appear to be defined according to the API."
+ )
+ return {
+ node['metadata']['name']: node
+ for node in nodes['items']
+ }
@staticmethod
- def _filter_fluentd_labeled_nodes(nodes_by_name, node_selector):
- """Filter to all nodes with fluentd label. Returns dict(name: node), error string"""
+ def filter_fluentd_labeled_nodes(nodes_by_name, node_selector):
+ """Filter to all nodes with fluentd label. Returns dict(name: node)"""
label, value = node_selector.split('=', 1)
fluentd_nodes = {
name: node for name, node in nodes_by_name.items()
if node['metadata']['labels'].get(label) == value
}
if not fluentd_nodes:
- return None, (
+ raise OpenShiftCheckException(
+ 'NoNodesLabeled',
'There are no nodes with the fluentd label {label}.\n'
- 'This means no logs will be aggregated from the nodes.'
- ).format(label=node_selector)
- return fluentd_nodes, None
+ 'This means no logs will be aggregated from the nodes.'.format(label=node_selector)
+ )
+ return fluentd_nodes
- def _check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector):
- """Note if nodes are not labeled as expected. Returns: error string"""
+ def check_node_labeling(self, nodes_by_name, fluentd_nodes, node_selector):
+ """Note if nodes are not labeled as expected. Returns: error list"""
intended_nodes = self.get_var('openshift_logging_fluentd_hosts', default=['--all'])
if not intended_nodes or '--all' in intended_nodes:
intended_nodes = nodes_by_name.keys()
nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys())
if nodes_missing_labels:
- return (
+ return [OpenShiftCheckException(
+ 'NodesUnlabeled',
'The following nodes are supposed to be labeled with {label} but are not:\n'
' {nodes}\n'
- 'Fluentd will not aggregate logs from these nodes.'
- ).format(label=node_selector, nodes=', '.join(nodes_missing_labels))
- return None
+ 'Fluentd will not aggregate logs from these nodes.'.format(
+ label=node_selector, nodes=', '.join(nodes_missing_labels)
+ ))]
+
+ return []
@staticmethod
- def _check_nodes_have_fluentd(pods, fluentd_nodes):
- """Make sure fluentd is on all the labeled nodes. Returns: error string"""
+ def check_nodes_have_fluentd(pods, fluentd_nodes):
+ """Make sure fluentd is on all the labeled nodes. Returns: error list"""
unmatched_nodes = fluentd_nodes.copy()
node_names_by_label = {
node['metadata']['labels']['kubernetes.io/hostname']: name
@@ -82,80 +123,32 @@ class Fluentd(LoggingCheck):
]:
unmatched_nodes.pop(name, None)
if unmatched_nodes:
- return (
+ return [OpenShiftCheckException(
+ 'MissingFluentdPod',
'The following nodes are supposed to have a Fluentd pod but do not:\n'
- '{nodes}'
- 'These nodes will not have their logs aggregated.'
- ).format(nodes=''.join(
- " {}\n".format(name)
- for name in unmatched_nodes.keys()
- ))
- return None
+ ' {nodes}\n'
+ 'These nodes will not have their logs aggregated.'.format(
+ nodes='\n '.join(unmatched_nodes.keys())
+ ))]
+
+ return []
- def _check_fluentd_pods_running(self, pods):
+ def check_fluentd_pods_running(self, pods):
"""Make sure all fluentd pods are running. Returns: error string"""
not_running = super(Fluentd, self).not_running_pods(pods)
if not_running:
- return (
+ return [OpenShiftCheckException(
+ 'FluentdNotRunning',
'The following Fluentd pods are supposed to be running but are not:\n'
- '{pods}'
- 'These pods will not aggregate logs from their nodes.'
- ).format(pods=''.join(
- " {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None'))
- for pod in not_running
- ))
- return None
-
- def check_fluentd(self, pods):
- """Verify fluentd is running everywhere. Returns: error string"""
-
- node_selector = self.get_var(
- 'openshift_logging_fluentd_nodeselector',
- default='logging-infra-fluentd=true'
- )
-
- nodes_by_name, error = self.get_nodes_by_name()
-
- if error:
- return error
- fluentd_nodes, error = self._filter_fluentd_labeled_nodes(nodes_by_name, node_selector)
- if error:
- return error
-
- error_msgs = []
- error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector)
- if error:
- error_msgs.append(error)
- error = self._check_nodes_have_fluentd(pods, fluentd_nodes)
- if error:
- error_msgs.append(error)
- error = self._check_fluentd_pods_running(pods)
- if error:
- error_msgs.append(error)
-
- # Make sure there are no extra fluentd pods
- if len(pods) > len(fluentd_nodes):
- error_msgs.append(
- 'There are more Fluentd pods running than nodes labeled.\n'
- 'This may not cause problems with logging but it likely indicates something wrong.'
- )
-
- return '\n'.join(error_msgs)
-
- def get_nodes_by_name(self):
- """Retrieve all the node definitions. Returns: dict(name: node), error string"""
- nodes_json = self.exec_oc(
- self.logging_namespace,
- "get nodes -o json",
- []
- )
- try:
- nodes = json.loads(nodes_json)
- except ValueError: # no valid json - should not happen
- return None, "Could not obtain a list of nodes to validate fluentd. Output from oc get:\n" + nodes_json
- if not nodes or not nodes.get('items'): # also should not happen
- return None, "No nodes appear to be defined according to the API."
- return {
- node['metadata']['name']: node
- for node in nodes['items']
- }, None
+ ' {pods}\n'
+ 'These pods will not aggregate logs from their nodes.'.format(
+ pods='\n'.join(
+ " {name} ({host})".format(
+ name=pod['metadata']['name'],
+ host=pod['spec'].get('host', 'None')
+ )
+ for pod in not_running
+ )
+ ))]
+
+ return []
diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py
index 0970f0a63..d783e6760 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py
@@ -24,7 +24,6 @@ class FluentdConfig(LoggingCheck):
def run(self):
"""Check that Fluentd has running pods, and that its logging config matches Docker's logging config."""
- self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace)
config_error = self.check_logging_config()
if config_error:
msg = ("The following Fluentd logging configuration problem was found:"
@@ -120,19 +119,13 @@ class FluentdConfig(LoggingCheck):
def running_fluentd_pods(self):
"""Return a list of running fluentd pods."""
- fluentd_pods, error = self.get_pods_for_component(
- self.logging_namespace,
- "fluentd",
- )
- if error:
- msg = 'Unable to retrieve any pods for the "fluentd" logging component: {}'.format(error)
- raise OpenShiftCheckException(msg)
+ fluentd_pods = self.get_pods_for_component("fluentd")
running_fluentd_pods = [pod for pod in fluentd_pods if pod['status']['phase'] == 'Running']
if not running_fluentd_pods:
- msg = ('No Fluentd pods were found to be in the "Running" state. '
- 'At least one Fluentd pod is required in order to perform this check.')
-
- raise OpenShiftCheckException(msg)
+ raise OpenShiftCheckException(
+ 'No Fluentd pods were found to be in the "Running" state. '
+ 'At least one Fluentd pod is required in order to perform this check.'
+ )
return running_fluentd_pods
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index efb14ab42..3b1cf8baa 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -12,7 +12,7 @@ except ImportError:
from urllib.error import HTTPError, URLError
import urllib.request as urllib2
-from openshift_checks.logging.logging import LoggingCheck
+from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
class Kibana(LoggingCheck):
@@ -24,25 +24,12 @@ class Kibana(LoggingCheck):
def run(self):
"""Check various things and gather errors. Returns: result as hash"""
- self.logging_namespace = self.get_var("openshift_logging_namespace", default="logging")
- kibana_pods, error = self.get_pods_for_component(
- self.logging_namespace,
- "kibana",
- )
- if error:
- return {"failed": True, "changed": False, "msg": error}
- check_error = self.check_kibana(kibana_pods)
-
- if not check_error:
- check_error = self._check_kibana_route()
-
- if check_error:
- msg = ("The following Kibana deployment issue was found:"
- "{}".format(check_error))
- return {"failed": True, "changed": False, "msg": msg}
-
+ kibana_pods = self.get_pods_for_component("kibana")
+ self.check_kibana(kibana_pods)
+ self.check_kibana_route()
# TODO(lmeyer): run it all again for the ops cluster
- return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}
+
+ return {}
def _verify_url_internal(self, url):
"""
@@ -65,7 +52,7 @@ class Kibana(LoggingCheck):
def _verify_url_external(url):
"""
Try to reach a URL from ansible control host.
- Returns: success (bool), reason (for failure)
+ Raise an OpenShiftCheckException if anything goes wrong.
"""
# This actually checks from the ansible control host, which may or may not
# really be "external" to the cluster.
@@ -91,130 +78,149 @@ class Kibana(LoggingCheck):
return None
def check_kibana(self, pods):
- """Check to see if Kibana is up and working. Returns: error string."""
+ """Check to see if Kibana is up and working. Raises OpenShiftCheckException if not."""
if not pods:
- return "There are no Kibana pods deployed, so no access to the logging UI."
+ raise OpenShiftCheckException(
+ "MissingComponentPods",
+ "There are no Kibana pods deployed, so no access to the logging UI."
+ )
not_running = self.not_running_pods(pods)
if len(not_running) == len(pods):
- return "No Kibana pod is in a running state, so there is no access to the logging UI."
+ raise OpenShiftCheckException(
+ "NoRunningPods",
+ "No Kibana pod is in a running state, so there is no access to the logging UI."
+ )
elif not_running:
- return (
+ raise OpenShiftCheckException(
+ "PodNotRunning",
"The following Kibana pods are not currently in a running state:\n"
- "{pods}"
- "However at least one is, so service may not be impacted."
- ).format(pods="".join(" " + pod['metadata']['name'] + "\n" for pod in not_running))
-
- return None
+ " {pods}\n"
+ "However at least one is, so service may not be impacted.".format(
+ pods="\n ".join(pod['metadata']['name'] for pod in not_running)
+ )
+ )
def _get_kibana_url(self):
"""
Get kibana route or report error.
- Returns: url (or empty), reason for failure
+ Returns: url
"""
# Get logging url
- get_route = self.exec_oc(
- self.logging_namespace,
- "get route logging-kibana -o json",
- [],
- )
+ get_route = self.exec_oc("get route logging-kibana -o json", [])
if not get_route:
- return None, 'no_route_exists'
+ raise OpenShiftCheckException(
+ 'no_route_exists',
+ 'No route is defined for Kibana in the logging namespace,\n'
+ 'so the logging stack is not accessible. Is logging deployed?\n'
+ 'Did something remove the logging-kibana route?'
+ )
- route = json.loads(get_route)
+ try:
+ route = json.loads(get_route)
+ # check that the route has been accepted by a router
+ ingress = route["status"]["ingress"]
+ except (ValueError, KeyError):
+ raise OpenShiftCheckException(
+ 'get_route_failed',
+ '"oc get route" returned an unexpected response:\n' + get_route
+ )
- # check that the route has been accepted by a router
- ingress = route["status"]["ingress"]
# ingress can be null if there is no router, or empty if not routed
if not ingress or not ingress[0]:
- return None, 'route_not_accepted'
+ raise OpenShiftCheckException(
+ 'route_not_accepted',
+ 'The logging-kibana route is not being routed by any router.\n'
+ 'Is the router deployed and working?'
+ )
host = route.get("spec", {}).get("host")
if not host:
- return None, 'route_missing_host'
+ raise OpenShiftCheckException(
+ 'route_missing_host',
+ 'The logging-kibana route has no hostname defined,\n'
+ 'which should never happen. Did something alter its definition?'
+ )
- return 'https://{}/'.format(host), None
+ return 'https://{}/'.format(host)
- def _check_kibana_route(self):
+ def check_kibana_route(self):
"""
Check to see if kibana route is up and working.
- Returns: error string
+ Raises exception if not.
"""
- known_errors = dict(
- no_route_exists=(
- 'No route is defined for Kibana in the logging namespace,\n'
- 'so the logging stack is not accessible. Is logging deployed?\n'
- 'Did something remove the logging-kibana route?'
- ),
- route_not_accepted=(
- 'The logging-kibana route is not being routed by any router.\n'
- 'Is the router deployed and working?'
- ),
- route_missing_host=(
- 'The logging-kibana route has no hostname defined,\n'
- 'which should never happen. Did something alter its definition?'
- ),
- )
- kibana_url, error = self._get_kibana_url()
- if not kibana_url:
- return known_errors.get(error, error)
+ kibana_url = self._get_kibana_url()
# first, check that kibana is reachable from the master.
error = self._verify_url_internal(kibana_url)
if error:
if 'urlopen error [Errno 111] Connection refused' in error:
- error = (
+ raise OpenShiftCheckException(
+ 'FailedToConnectInternal',
'Failed to connect from this master to Kibana URL {url}\n'
- 'Is kibana running, and is at least one router routing to it?'
- ).format(url=kibana_url)
+ 'Is kibana running, and is at least one router routing to it?'.format(url=kibana_url)
+ )
elif 'urlopen error [Errno -2] Name or service not known' in error:
- error = (
+ raise OpenShiftCheckException(
+ 'FailedToResolveInternal',
'Failed to connect from this master to Kibana URL {url}\n'
'because the hostname does not resolve.\n'
- 'Is DNS configured for the Kibana hostname?'
- ).format(url=kibana_url)
+ 'Is DNS configured for the Kibana hostname?'.format(url=kibana_url)
+ )
elif 'Status code was not' in error:
- error = (
+ raise OpenShiftCheckException(
+ 'WrongReturnCodeInternal',
'A request from this master to the Kibana URL {url}\n'
'did not return the correct status code (302).\n'
'This could mean that Kibana is malfunctioning, the hostname is\n'
'resolving incorrectly, or other network issues. The output was:\n'
- ' {error}'
- ).format(url=kibana_url, error=error)
- return 'Error validating the logging Kibana route:\n' + error
+ ' {error}'.format(url=kibana_url, error=error)
+ )
+ raise OpenShiftCheckException(
+ 'MiscRouteErrorInternal',
+ 'Error validating the logging Kibana route internally:\n' + error
+ )
# in production we would like the kibana route to work from outside the
# cluster too; but that may not be the case, so allow disabling just this part.
- if not self.get_var("openshift_check_efk_kibana_external", default=True):
- return None
+ if self.get_var("openshift_check_efk_kibana_external", default="True").lower() != "true":
+ return
error = self._verify_url_external(kibana_url)
- if error:
- if 'urlopen error [Errno 111] Connection refused' in error:
- error = (
- 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
- 'Is the router for the Kibana hostname exposed externally?'
- ).format(url=kibana_url)
- elif 'urlopen error [Errno -2] Name or service not known' in error:
- error = (
- 'Failed to resolve the Kibana hostname in {url}\n'
- 'from the Ansible control host.\n'
- 'Is DNS configured to resolve this Kibana hostname externally?'
- ).format(url=kibana_url)
- elif 'Expected success (200)' in error:
- error = (
- 'A request to Kibana at {url}\n'
- 'returned the wrong error code:\n'
- ' {error}\n'
- 'This could mean that Kibana is malfunctioning, the hostname is\n'
- 'resolving incorrectly, or other network issues.'
- ).format(url=kibana_url, error=error)
- error = (
- 'Error validating the logging Kibana route:\n{error}\n'
- 'To disable external Kibana route validation, set in your inventory:\n'
- ' openshift_check_efk_kibana_external=False'
- ).format(error=error)
- return error
- return None
+
+ if not error:
+ return
+
+ error_fmt = (
+ 'Error validating the logging Kibana route:\n{error}\n'
+ 'To disable external Kibana route validation, set the variable:\n'
+ ' openshift_check_efk_kibana_external=False'
+ )
+ if 'urlopen error [Errno 111] Connection refused' in error:
+ msg = (
+ 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
+ 'Is the router for the Kibana hostname exposed externally?'
+ ).format(url=kibana_url)
+ raise OpenShiftCheckException('FailedToConnect', error_fmt.format(error=msg))
+ elif 'urlopen error [Errno -2] Name or service not known' in error:
+ msg = (
+ 'Failed to resolve the Kibana hostname in {url}\n'
+ 'from the Ansible control host.\n'
+ 'Is DNS configured to resolve this Kibana hostname externally?'
+ ).format(url=kibana_url)
+ raise OpenShiftCheckException('FailedToResolve', error_fmt.format(error=msg))
+ elif 'Expected success (200)' in error:
+ msg = (
+ 'A request to Kibana at {url}\n'
+ 'returned the wrong error code:\n'
+ ' {error}\n'
+ 'This could mean that Kibana is malfunctioning, the hostname is\n'
+ 'resolving incorrectly, or other network issues.'
+ ).format(url=kibana_url, error=error)
+ raise OpenShiftCheckException('WrongReturnCode', error_fmt.format(error=msg))
+ raise OpenShiftCheckException(
+ 'MiscRouteError',
+ 'Error validating the logging Kibana route externally:\n' + error
+ )
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 43ba6c406..ecd8adb64 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -8,6 +8,16 @@ import os
from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+class MissingComponentPods(OpenShiftCheckException):
+ """Raised when a component has no pods in the namespace."""
+ pass
+
+
+class CouldNotUseOc(OpenShiftCheckException):
+ """Raised when ocutil has a failure running oc."""
+ pass
+
+
class LoggingCheck(OpenShiftCheck):
"""Base class for OpenShift aggregated logging component checks"""
@@ -15,10 +25,9 @@ class LoggingCheck(OpenShiftCheck):
# run by itself.
name = "logging"
- logging_namespace = "logging"
def is_active(self):
- logging_deployed = self.get_var("openshift_hosted_logging_deploy", default=False)
+ logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False)
return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master()
def is_first_master(self):
@@ -32,22 +41,24 @@ class LoggingCheck(OpenShiftCheck):
def run(self):
return {}
- def get_pods_for_component(self, namespace, logging_component):
- """Get all pods for a given component. Returns: list of pods for component, error string"""
+ def get_pods_for_component(self, logging_component):
+ """Get all pods for a given component. Returns: list of pods."""
pod_output = self.exec_oc(
- namespace,
"get pods -l component={} -o json".format(logging_component),
[],
)
try:
- pods = json.loads(pod_output)
- if not pods or not pods.get('items'):
+ pods = json.loads(pod_output) # raises ValueError if deserialize fails
+ if not pods or not pods.get('items'): # also a broken response, treat the same
raise ValueError()
except ValueError:
- # successful run but non-parsing data generally means there were no pods in the namespace
- return None, 'No pods were found for the "{}" logging component.'.format(logging_component)
+ # successful run but non-parsing data generally means there were no pods to be found
+ raise MissingComponentPods(
+ 'There are no "{}" component pods in the "{}" namespace.\n'
+ 'Is logging deployed?'.format(logging_component, self.logging_namespace())
+ )
- return pods['items'], None
+ return pods['items']
@staticmethod
def not_running_pods(pods):
@@ -63,15 +74,19 @@ class LoggingCheck(OpenShiftCheck):
)
]
- def exec_oc(self, namespace="logging", cmd_str="", extra_args=None):
+ def logging_namespace(self):
+ """Returns the namespace in which logging is configured to deploy."""
+ return self.get_var("openshift_logging_namespace", default="logging")
+
+ def exec_oc(self, cmd_str="", extra_args=None):
"""
Execute an 'oc' command in the remote host.
Returns: output of command and namespace,
- or raises OpenShiftCheckException on error
+ or raises CouldNotUseOc on error
"""
config_base = self.get_var("openshift", "common", "config_base")
args = {
- "namespace": namespace,
+ "namespace": self.logging_namespace(),
"config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
"cmd": cmd_str,
"extra_args": list(extra_args) if extra_args else [],
@@ -79,17 +94,16 @@ class LoggingCheck(OpenShiftCheck):
result = self.execute_module("ocutil", args)
if result.get("failed"):
- msg = (
- 'Unexpected error using `oc` to validate the logging stack components.\n'
- 'Error executing `oc {cmd}`:\n'
- '{error}'
- ).format(cmd=args['cmd'], error=result['result'])
-
if result['result'] == '[Errno 2] No such file or directory':
- msg = (
+ raise CouldNotUseOc(
"This host is supposed to be a master but does not have the `oc` command where expected.\n"
"Has an installation been run on this host yet?"
)
- raise OpenShiftCheckException(msg)
+
+ raise CouldNotUseOc(
+ 'Unexpected error using `oc` to validate the logging stack components.\n'
+ 'Error executing `oc {cmd}`:\n'
+ '{error}'.format(cmd=args['cmd'], error=result['result'])
+ )
return result.get("result", "")
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
index b24e88e05..d781db649 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py
@@ -19,8 +19,6 @@ class LoggingIndexTime(LoggingCheck):
name = "logging_index_time"
tags = ["health", "logging"]
- logging_namespace = "logging"
-
def run(self):
"""Add log entry by making unique request to Kibana. Check for unique entry in the ElasticSearch pod logs."""
try:
@@ -28,29 +26,25 @@ class LoggingIndexTime(LoggingCheck):
self.get_var("openshift_check_logging_index_timeout_seconds", default=ES_CMD_TIMEOUT_SECONDS)
)
except ValueError:
- return {
- "failed": True,
- "msg": ('Invalid value provided for "openshift_check_logging_index_timeout_seconds". '
- 'Value must be an integer representing an amount in seconds.'),
- }
+ raise OpenShiftCheckException(
+ 'InvalidTimeout',
+ 'Invalid value provided for "openshift_check_logging_index_timeout_seconds". '
+ 'Value must be an integer representing an amount in seconds.'
+ )
running_component_pods = dict()
# get all component pods
- self.logging_namespace = self.get_var("openshift_logging_namespace", default=self.logging_namespace)
for component, name in (['kibana', 'Kibana'], ['es', 'Elasticsearch']):
- pods, error = self.get_pods_for_component(self.logging_namespace, component)
-
- if error:
- msg = 'Unable to retrieve pods for the {} logging component: {}'
- return {"failed": True, "changed": False, "msg": msg.format(name, error)}
-
+ pods = self.get_pods_for_component(component)
running_pods = self.running_pods(pods)
if not running_pods:
- msg = ('No {} pods in the "Running" state were found.'
- 'At least one pod is required in order to perform this check.')
- return {"failed": True, "changed": False, "msg": msg.format(name)}
+ raise OpenShiftCheckException(
+ component + 'NoRunningPods',
+ 'No {} pods in the "Running" state were found.'
+ 'At least one pod is required in order to perform this check.'.format(name)
+ )
running_component_pods[component] = running_pods
@@ -65,8 +59,11 @@ class LoggingIndexTime(LoggingCheck):
interval = 1
while not self.query_es_from_es(es_pod, uuid):
if time.time() + interval > deadline:
- msg = "expecting match in Elasticsearch for message with uuid {}, but no matches were found after {}s."
- raise OpenShiftCheckException(msg.format(uuid, timeout_secs))
+ raise OpenShiftCheckException(
+ "NoMatchFound",
+ "expecting match in Elasticsearch for message with uuid {}, "
+ "but no matches were found after {}s.".format(uuid, timeout_secs)
+ )
time.sleep(interval)
def curl_kibana_with_uuid(self, kibana_pod):
@@ -76,22 +73,23 @@ class LoggingIndexTime(LoggingCheck):
exec_cmd = "exec {pod_name} -c kibana -- curl --max-time 30 -s http://localhost:5601/{uuid}"
exec_cmd = exec_cmd.format(pod_name=pod_name, uuid=uuid)
- error_str = self.exec_oc(self.logging_namespace, exec_cmd, [])
+ error_str = self.exec_oc(exec_cmd, [])
try:
error_code = json.loads(error_str)["statusCode"]
- except KeyError:
- msg = ('invalid response returned from Kibana request (Missing "statusCode" key):\n'
- 'Command: {}\nResponse: {}').format(exec_cmd, error_str)
- raise OpenShiftCheckException(msg)
- except ValueError:
- msg = ('invalid response returned from Kibana request (Non-JSON output):\n'
- 'Command: {}\nResponse: {}').format(exec_cmd, error_str)
- raise OpenShiftCheckException(msg)
+ except (KeyError, ValueError):
+ raise OpenShiftCheckException(
+ 'kibanaInvalidResponse',
+ 'invalid response returned from Kibana request:\n'
+ 'Command: {}\nResponse: {}'.format(exec_cmd, error_str)
+ )
if error_code != 404:
- msg = 'invalid error code returned from Kibana request. Expecting error code "404", but got "{}" instead.'
- raise OpenShiftCheckException(msg.format(error_code))
+ raise OpenShiftCheckException(
+ 'kibanaInvalidReturnCode',
+ 'invalid error code returned from Kibana request.\n'
+ 'Expecting error code "404", but got "{}" instead.'.format(error_code)
+ )
return uuid
@@ -105,17 +103,18 @@ class LoggingIndexTime(LoggingCheck):
"--key /etc/elasticsearch/secret/admin-key "
"https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}"
)
- exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace, uuid=uuid)
- result = self.exec_oc(self.logging_namespace, exec_cmd, [])
+ exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace(), uuid=uuid)
+ result = self.exec_oc(exec_cmd, [])
try:
count = json.loads(result)["count"]
- except KeyError:
- msg = 'invalid response from Elasticsearch query:\n"{}"\nMissing "count" key:\n{}'
- raise OpenShiftCheckException(msg.format(exec_cmd, result))
- except ValueError:
- msg = 'invalid response from Elasticsearch query:\n"{}"\nNon-JSON output:\n{}'
- raise OpenShiftCheckException(msg.format(exec_cmd, result))
+ except (KeyError, ValueError):
+ raise OpenShiftCheckException(
+ 'esInvalidResponse',
+ 'Invalid response from Elasticsearch query:\n'
+ ' {}\n'
+ 'Response was:\n{}'.format(exec_cmd, result)
+ )
return count
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 3b2c64e6a..e9bae60a3 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -29,10 +29,10 @@ class DockerHostMixin(object):
"""
Ensure that docker-related packages exist, but not on atomic hosts
(which would not be able to install but should already have them).
- Returns: msg, failed, changed
+ Returns: msg, failed
"""
if self.get_var("openshift", "common", "is_atomic"):
- return "", False, False
+ return "", False
# NOTE: we would use the "package" module but it's actually an action plugin
# and it's not clear how to invoke one of those. This is about the same anyway:
@@ -49,5 +49,5 @@ class DockerHostMixin(object):
" {deps}\n{msg}"
).format(deps=',\n '.join(self.dependencies), msg=msg)
failed = result.get("failed", False) or result.get("rc", 0) != 0
- changed = result.get("changed", False)
- return msg, failed, changed
+ self.changed = result.get("changed", False)
+ return msg, failed
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 2d068be3d..f5161d6f5 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -6,7 +6,7 @@ from openshift_health_check import ActionModule, resolve_checks
from openshift_checks import OpenShiftCheckException
-def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None):
+def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None, changed=False):
"""Returns a new class that is compatible with OpenShiftCheck for testing."""
_name, _tags = name, tags
@@ -14,6 +14,7 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru
class FakeCheck(object):
name = _name
tags = _tags or []
+ changed = False
def __init__(self, execute_module=None, task_vars=None, tmp=None):
pass
@@ -22,6 +23,7 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru
return is_active
def run(self):
+ self.changed = changed
if run_exception is not None:
raise run_exception
return run_return
@@ -135,14 +137,15 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch):
- check_return_value = {'ok': 'test', 'changed': True}
- check_class = fake_check(run_return=check_return_value)
+ check_return_value = {'ok': 'test'}
+ check_class = fake_check(run_return=check_return_value, changed=True)
monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
result = plugin.run(tmp=None, task_vars=task_vars)
assert result['checks']['fake_check'] == check_return_value
+ assert changed(result['checks']['fake_check'])
assert not failed(result)
assert changed(result)
assert not skipped(result)
@@ -165,7 +168,7 @@ def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch):
def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
exception_msg = 'fake check has an exception'
run_exception = OpenShiftCheckException(exception_msg)
- check_class = fake_check(run_exception=run_exception)
+ check_class = fake_check(run_exception=run_exception, changed=True)
monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()})
monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
@@ -173,7 +176,8 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
assert failed(result['checks']['fake_check'], msg_has=exception_msg)
assert failed(result, msg_has=['failed'])
- assert not changed(result)
+ assert changed(result['checks']['fake_check'])
+ assert changed(result)
assert not skipped(result)
diff --git a/roles/openshift_health_checker/test/curator_test.py b/roles/openshift_health_checker/test/curator_test.py
index ae108c96e..62c680b74 100644
--- a/roles/openshift_health_checker/test/curator_test.py
+++ b/roles/openshift_health_checker/test/curator_test.py
@@ -1,22 +1,6 @@
import pytest
-from openshift_checks.logging.curator import Curator
-
-
-def canned_curator(exec_oc=None):
- """Create a Curator check object with canned exec_oc method"""
- check = Curator("dummy") # fails if a module is actually invoked
- if exec_oc:
- check._exec_oc = exec_oc
- return check
-
-
-def assert_error(error, expect_error):
- if expect_error:
- assert error
- assert expect_error in error
- else:
- assert not error
+from openshift_checks.logging.curator import Curator, OpenShiftCheckException
plain_curator_pod = {
@@ -44,25 +28,30 @@ not_running_curator_pod = {
}
+def test_get_curator_pods():
+ check = Curator()
+ check.get_pods_for_component = lambda *_: [plain_curator_pod]
+ result = check.run()
+ assert "failed" not in result or not result["failed"]
+
+
@pytest.mark.parametrize('pods, expect_error', [
(
[],
- "no Curator pods",
- ),
- (
- [plain_curator_pod],
- None,
+ 'MissingComponentPods',
),
(
[not_running_curator_pod],
- "not currently in a running state",
+ 'CuratorNotRunning',
),
(
[plain_curator_pod, plain_curator_pod],
- "more than one Curator pod",
+ 'TooManyCurators',
),
])
-def test_get_curator_pods(pods, expect_error):
- check = canned_curator()
- error = check.check_curator(pods)
- assert_error(error, expect_error)
+def test_get_curator_pods_fail(pods, expect_error):
+ check = Curator()
+ check.get_pods_for_component = lambda *_: pods
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run()
+ assert excinfo.value.name == expect_error
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index 5720eeacf..f4fd2dfed 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -20,12 +20,24 @@ def test_is_active(group_names, is_active):
assert DiskAvailability(None, task_vars).is_active() == is_active
-@pytest.mark.parametrize('ansible_mounts,extra_words', [
- ([], ['none']), # empty ansible_mounts
- ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths
- ([{'mount': '/var'}], ['/var']), # missing size_available
+@pytest.mark.parametrize('desc, ansible_mounts, expect_chunks', [
+ (
+ 'empty ansible_mounts',
+ [],
+ ['determine mount point', 'none'],
+ ),
+ (
+ 'missing relevant mount paths',
+ [{'mount': '/mnt'}],
+ ['determine mount point', '/mnt'],
+ ),
+ (
+ 'missing size_available',
+ [{'mount': '/var'}, {'mount': '/usr'}, {'mount': '/tmp'}],
+ ['missing', 'size_available'],
+ ),
])
-def test_cannot_determine_available_disk(ansible_mounts, extra_words):
+def test_cannot_determine_available_disk(desc, ansible_mounts, expect_chunks):
task_vars = dict(
group_names=['masters'],
ansible_mounts=ansible_mounts,
@@ -34,8 +46,8 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
with pytest.raises(OpenShiftCheckException) as excinfo:
DiskAvailability(fake_execute_module, task_vars).run()
- for word in 'determine disk availability'.split() + extra_words:
- assert word in str(excinfo.value)
+ for chunk in expect_chunks:
+ assert chunk in str(excinfo.value)
@pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [
@@ -97,7 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
assert not result.get('failed', False)
-@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [
+@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,expect_chunks', [
(
'test with no space available',
['masters'],
@@ -164,7 +176,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib
['0.0 GB'],
),
], ids=lambda argval: argval[0])
-def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words):
+def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, expect_chunks):
task_vars = dict(
group_names=group_names,
openshift_check_min_host_disk_gb=configured_min,
@@ -174,8 +186,8 @@ def test_fails_with_insufficient_disk_space(name, group_names, configured_min, a
result = DiskAvailability(fake_execute_module, task_vars).run()
assert result['failed']
- for word in 'below recommended'.split() + extra_words:
- assert word in result.get('msg', '')
+ for chunk in 'below recommended'.split() + expect_chunks:
+ assert chunk in result.get('msg', '')
@pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [
diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py
index 67408609a..09bacd9ac 100644
--- a/roles/openshift_health_checker/test/elasticsearch_test.py
+++ b/roles/openshift_health_checker/test/elasticsearch_test.py
@@ -1,17 +1,26 @@
import pytest
import json
-from openshift_checks.logging.elasticsearch import Elasticsearch
+from openshift_checks.logging.elasticsearch import Elasticsearch, OpenShiftCheckExceptionList
+
task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
-def assert_error(error, expect_error):
- if expect_error:
- assert error
- assert expect_error in error
- else:
- assert not error
+def canned_elasticsearch(task_vars=None, exec_oc=None):
+ """Create an Elasticsearch check object with stubbed exec_oc method"""
+ check = Elasticsearch(None, task_vars or {})
+ if exec_oc:
+ check.exec_oc = exec_oc
+ return check
+
+
+def assert_error_in_list(expect_err, errorlist):
+ assert any(err.name == expect_err for err in errorlist), "{} in {}".format(str(expect_err), str(errorlist))
+
+
+def pods_by_name(pods):
+ return {pod['metadata']['name']: pod for pod in pods}
plain_es_pod = {
@@ -19,6 +28,7 @@ plain_es_pod = {
"labels": {"component": "es", "deploymentconfig": "logging-es"},
"name": "logging-es",
},
+ "spec": {},
"status": {
"conditions": [{"status": "True", "type": "Ready"}],
"containerStatuses": [{"ready": True}],
@@ -32,6 +42,7 @@ split_es_pod = {
"labels": {"component": "es", "deploymentconfig": "logging-es-2"},
"name": "logging-es-2",
},
+ "spec": {},
"status": {
"conditions": [{"status": "True", "type": "Ready"}],
"containerStatuses": [{"ready": True}],
@@ -40,12 +51,28 @@ split_es_pod = {
"_test_master_name_str": "name logging-es-2",
}
+unready_es_pod = {
+ "metadata": {
+ "labels": {"component": "es", "deploymentconfig": "logging-es-3"},
+ "name": "logging-es-3",
+ },
+ "spec": {},
+ "status": {
+ "conditions": [{"status": "False", "type": "Ready"}],
+ "containerStatuses": [{"ready": False}],
+ "podIP": "10.10.10.10",
+ },
+ "_test_master_name_str": "BAD_NAME_RESPONSE",
+}
+
def test_check_elasticsearch():
- assert 'No logging Elasticsearch pods' in Elasticsearch().check_elasticsearch([])
+ with pytest.raises(OpenShiftCheckExceptionList) as excinfo:
+ canned_elasticsearch().check_elasticsearch([])
+ assert_error_in_list('NoRunningPods', excinfo.value)
# canned oc responses to match so all the checks pass
- def _exec_oc(ns, cmd, args):
+ def exec_oc(cmd, args):
if '_cat/master' in cmd:
return 'name logging-es'
elif '/_nodes' in cmd:
@@ -57,35 +84,41 @@ def test_check_elasticsearch():
else:
raise Exception(cmd)
- check = Elasticsearch(None, {})
- check.exec_oc = _exec_oc
- assert not check.check_elasticsearch([plain_es_pod])
+ check = canned_elasticsearch({}, exec_oc)
+ check.get_pods_for_component = lambda *_: [plain_es_pod]
+ assert {} == check.run()
-def pods_by_name(pods):
- return {pod['metadata']['name']: pod for pod in pods}
+def test_check_running_es_pods():
+ pods, errors = Elasticsearch().running_elasticsearch_pods([plain_es_pod, unready_es_pod])
+ assert plain_es_pod in pods
+ assert_error_in_list('PodNotRunning', errors)
+
+
+def test_check_elasticsearch_masters():
+ pods = [plain_es_pod]
+ check = canned_elasticsearch(task_vars_config_base, lambda *_: plain_es_pod['_test_master_name_str'])
+ assert not check.check_elasticsearch_masters(pods_by_name(pods))
@pytest.mark.parametrize('pods, expect_error', [
(
[],
- 'No logging Elasticsearch masters',
+ 'NoMasterFound',
),
(
- [plain_es_pod],
- None,
+ [unready_es_pod],
+ 'NoMasterName',
),
(
[plain_es_pod, split_es_pod],
- 'Found multiple Elasticsearch masters',
+ 'SplitBrainMasters',
),
])
-def test_check_elasticsearch_masters(pods, expect_error):
+def test_check_elasticsearch_masters_error(pods, expect_error):
test_pods = list(pods)
- check = Elasticsearch(None, task_vars_config_base)
- check.execute_module = lambda cmd, args: {'result': test_pods.pop(0)['_test_master_name_str']}
- errors = check._check_elasticsearch_masters(pods_by_name(pods))
- assert_error(''.join(errors), expect_error)
+ check = canned_elasticsearch(task_vars_config_base, lambda *_: test_pods.pop(0)['_test_master_name_str'])
+ assert_error_in_list(expect_error, check.check_elasticsearch_masters(pods_by_name(pods)))
es_node_list = {
@@ -95,83 +128,76 @@ es_node_list = {
}}}
+def test_check_elasticsearch_node_list():
+ check = canned_elasticsearch(task_vars_config_base, lambda *_: json.dumps(es_node_list))
+ assert not check.check_elasticsearch_node_list(pods_by_name([plain_es_pod]))
+
+
@pytest.mark.parametrize('pods, node_list, expect_error', [
(
[],
{},
- 'No logging Elasticsearch masters',
- ),
- (
- [plain_es_pod],
- es_node_list,
- None,
+ 'MissingComponentPods',
),
(
[plain_es_pod],
{}, # empty list of nodes triggers KeyError
- "Failed to query",
+ 'MissingNodeList',
),
(
[split_es_pod],
es_node_list,
- 'does not correspond to any known ES pod',
+ 'EsPodNodeMismatch',
),
])
-def test_check_elasticsearch_node_list(pods, node_list, expect_error):
- check = Elasticsearch(None, task_vars_config_base)
- check.execute_module = lambda cmd, args: {'result': json.dumps(node_list)}
+def test_check_elasticsearch_node_list_errors(pods, node_list, expect_error):
+ check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list))
+ assert_error_in_list(expect_error, check.check_elasticsearch_node_list(pods_by_name(pods)))
- errors = check._check_elasticsearch_node_list(pods_by_name(pods))
- assert_error(''.join(errors), expect_error)
+
+def test_check_elasticsearch_cluster_health():
+ test_health_data = [{"status": "green"}]
+ check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0)))
+ assert not check.check_es_cluster_health(pods_by_name([plain_es_pod]))
@pytest.mark.parametrize('pods, health_data, expect_error', [
(
[plain_es_pod],
- [{"status": "green"}],
- None,
- ),
- (
- [plain_es_pod],
[{"no-status": "should bomb"}],
- 'Could not retrieve cluster health status',
+ 'BadEsResponse',
),
(
[plain_es_pod, split_es_pod],
[{"status": "green"}, {"status": "red"}],
- 'Elasticsearch cluster health status is RED',
+ 'EsClusterHealthRed',
),
])
-def test_check_elasticsearch_cluster_health(pods, health_data, expect_error):
+def test_check_elasticsearch_cluster_health_errors(pods, health_data, expect_error):
test_health_data = list(health_data)
- check = Elasticsearch(None, task_vars_config_base)
- check.execute_module = lambda cmd, args: {'result': json.dumps(test_health_data.pop(0))}
+ check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0)))
+ assert_error_in_list(expect_error, check.check_es_cluster_health(pods_by_name(pods)))
- errors = check._check_es_cluster_health(pods_by_name(pods))
- assert_error(''.join(errors), expect_error)
+
+def test_check_elasticsearch_diskspace():
+ check = canned_elasticsearch(exec_oc=lambda *_: 'IUse% Use%\n 3% 4%\n')
+ assert not check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))
@pytest.mark.parametrize('disk_data, expect_error', [
(
'df: /elasticsearch/persistent: No such file or directory\n',
- 'Could not retrieve storage usage',
- ),
- (
- 'IUse% Use%\n 3% 4%\n',
- None,
+ 'BadDfResponse',
),
(
'IUse% Use%\n 95% 40%\n',
- 'Inode percent usage on the storage volume',
+ 'InodeUsageTooHigh',
),
(
'IUse% Use%\n 3% 94%\n',
- 'Disk percent usage on the storage volume',
+ 'DiskUsageTooHigh',
),
])
-def test_check_elasticsearch_diskspace(disk_data, expect_error):
- check = Elasticsearch(None, task_vars_config_base)
- check.execute_module = lambda cmd, args: {'result': disk_data}
-
- errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))
- assert_error(''.join(errors), expect_error)
+def test_check_elasticsearch_diskspace_errors(disk_data, expect_error):
+ check = canned_elasticsearch(exec_oc=lambda *_: disk_data)
+ assert_error_in_list(expect_error, check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod])))
diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
index e3d6706fa..d3aae98f2 100644
--- a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
+++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py
@@ -1,7 +1,8 @@
import pytest
from collections import namedtuple
-from openshift_checks.etcd_imagedata_size import EtcdImageDataSize, OpenShiftCheckException
+from openshift_checks.etcd_imagedata_size import EtcdImageDataSize
+from openshift_checks import OpenShiftCheckException
from etcdkeysize import check_etcd_key_size
@@ -56,7 +57,7 @@ def test_cannot_determine_available_mountpath(ansible_mounts, extra_words):
with pytest.raises(OpenShiftCheckException) as excinfo:
check.run()
- for word in 'determine valid etcd mountpath'.split() + extra_words:
+ for word in ['Unable to determine mount point'] + extra_words:
assert word in str(excinfo.value)
diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py
index 0b255136e..077cea3ea 100644
--- a/roles/openshift_health_checker/test/etcd_volume_test.py
+++ b/roles/openshift_health_checker/test/etcd_volume_test.py
@@ -1,6 +1,7 @@
import pytest
-from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException
+from openshift_checks.etcd_volume import EtcdVolume
+from openshift_checks import OpenShiftCheckException
@pytest.mark.parametrize('ansible_mounts,extra_words', [
@@ -15,7 +16,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
with pytest.raises(OpenShiftCheckException) as excinfo:
EtcdVolume(fake_execute_module, task_vars).run()
- for word in 'Unable to find etcd storage mount point'.split() + extra_words:
+ for word in ['Unable to determine mount point'] + extra_words:
assert word in str(excinfo.value)
diff --git a/roles/openshift_health_checker/test/fluentd_config_test.py b/roles/openshift_health_checker/test/fluentd_config_test.py
index 8a2d8b72b..10db253bc 100644
--- a/roles/openshift_health_checker/test/fluentd_config_test.py
+++ b/roles/openshift_health_checker/test/fluentd_config_test.py
@@ -198,12 +198,9 @@ def test_check_logging_config_master(name, pods, logging_driver, extra_words):
),
)
- def get_pods(namespace, logging_component):
- return pods, None
-
check = FluentdConfig(execute_module, task_vars)
check.execute_module = execute_module
- check.get_pods_for_component = get_pods
+ check.get_pods_for_component = lambda _: pods
error = check.check_logging_config()
assert error is None
@@ -283,12 +280,9 @@ def test_check_logging_config_master_failed(name, pods, logging_driver, words):
),
)
- def get_pods(namespace, logging_component):
- return pods, None
-
check = FluentdConfig(execute_module, task_vars)
check.execute_module = execute_module
- check.get_pods_for_component = get_pods
+ check.get_pods_for_component = lambda _: pods
error = check.check_logging_config()
assert error is not None
@@ -343,11 +337,8 @@ def test_check_logging_config_master_fails_on_unscheduled_deployment(name, pods,
),
)
- def get_pods(namespace, logging_component):
- return pods, None
-
check = FluentdConfig(execute_module, task_vars)
- check.get_pods_for_component = get_pods
+ check.get_pods_for_component = lambda _: pods
with pytest.raises(OpenShiftCheckException) as error:
check.check_logging_config()
diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py
index a84d89cef..e7bf9818b 100644
--- a/roles/openshift_health_checker/test/fluentd_test.py
+++ b/roles/openshift_health_checker/test/fluentd_test.py
@@ -1,15 +1,11 @@
import pytest
import json
-from openshift_checks.logging.fluentd import Fluentd
+from openshift_checks.logging.fluentd import Fluentd, OpenShiftCheckExceptionList, OpenShiftCheckException
-def assert_error(error, expect_error):
- if expect_error:
- assert error
- assert expect_error in error
- else:
- assert not error
+def assert_error_in_list(expect_err, errorlist):
+ assert any(err.name == expect_err for err in errorlist), "{} in {}".format(str(expect_err), str(errorlist))
fluentd_pod_node1 = {
@@ -57,45 +53,60 @@ fluentd_node3_unlabeled = {
}
+def test_get_fluentd_pods():
+ check = Fluentd()
+ check.exec_oc = lambda *_: json.dumps(dict(items=[fluentd_node1]))
+ check.get_pods_for_component = lambda *_: [fluentd_pod_node1]
+ assert not check.run()
+
+
@pytest.mark.parametrize('pods, nodes, expect_error', [
(
[],
[],
- 'No nodes appear to be defined',
+ 'NoNodesDefined',
),
(
[],
[fluentd_node3_unlabeled],
- 'There are no nodes with the fluentd label',
+ 'NoNodesLabeled',
),
(
[],
[fluentd_node1, fluentd_node3_unlabeled],
- 'Fluentd will not aggregate logs from these nodes.',
+ 'NodesUnlabeled',
),
(
[],
[fluentd_node2],
- "nodes are supposed to have a Fluentd pod but do not",
+ 'MissingFluentdPod',
),
(
[fluentd_pod_node1, fluentd_pod_node1],
[fluentd_node1],
- 'more Fluentd pods running than nodes labeled',
+ 'TooManyFluentdPods',
),
(
[fluentd_pod_node2_down],
[fluentd_node2],
- "Fluentd pods are supposed to be running",
- ),
- (
- [fluentd_pod_node1],
- [fluentd_node1],
- None,
+ 'FluentdNotRunning',
),
])
-def test_get_fluentd_pods(pods, nodes, expect_error):
+def test_get_fluentd_pods_errors(pods, nodes, expect_error):
+ check = Fluentd()
+ check.exec_oc = lambda *_: json.dumps(dict(items=nodes))
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.check_fluentd(pods)
+ if isinstance(excinfo.value, OpenShiftCheckExceptionList):
+ assert_error_in_list(expect_error, excinfo.value)
+ else:
+ assert expect_error == excinfo.value.name
+
+
+def test_bad_oc_node_list():
check = Fluentd()
- check.exec_oc = lambda ns, cmd, args: json.dumps(dict(items=nodes))
- error = check.check_fluentd(pods)
- assert_error(error, expect_error)
+ check.exec_oc = lambda *_: "this isn't even json"
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.get_nodes_by_name()
+ assert 'BadOcNodeList' == excinfo.value.name
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 0bf492511..04a5e89c4 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -8,15 +8,7 @@ except ImportError:
from urllib.error import HTTPError, URLError
import urllib.request as urllib2
-from openshift_checks.logging.kibana import Kibana
-
-
-def assert_error(error, expect_error):
- if expect_error:
- assert error
- assert expect_error in error
- else:
- assert not error
+from openshift_checks.logging.kibana import Kibana, OpenShiftCheckException
plain_kibana_pod = {
@@ -41,39 +33,45 @@ not_running_kibana_pod = {
}
+def test_check_kibana():
+ # should run without exception:
+ Kibana().check_kibana([plain_kibana_pod])
+
+
@pytest.mark.parametrize('pods, expect_error', [
(
[],
- "There are no Kibana pods deployed",
- ),
- (
- [plain_kibana_pod],
- None,
+ "MissingComponentPods",
),
(
[not_running_kibana_pod],
- "No Kibana pod is in a running state",
+ "NoRunningPods",
),
(
[plain_kibana_pod, not_running_kibana_pod],
- "The following Kibana pods are not currently in a running state",
+ "PodNotRunning",
),
])
-def test_check_kibana(pods, expect_error):
- check = Kibana()
- error = check.check_kibana(pods)
- assert_error(error, expect_error)
+def test_check_kibana_error(pods, expect_error):
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ Kibana().check_kibana(pods)
+ assert expect_error == excinfo.value.name
-@pytest.mark.parametrize('route, expect_url, expect_error', [
+@pytest.mark.parametrize('comment, route, expect_error', [
(
+ "No route returned",
None,
- None,
- 'no_route_exists',
+ "no_route_exists",
),
- # test route with no ingress
(
+ "broken route response",
+ {"status": {}},
+ "get_route_failed",
+ ),
+ (
+ "route with no ingress",
{
"metadata": {
"labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
@@ -86,12 +84,11 @@ def test_check_kibana(pods, expect_error):
"host": "hostname",
}
},
- None,
- 'route_not_accepted',
+ "route_not_accepted",
),
- # test route with no host
(
+ "route with no host",
{
"metadata": {
"labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
@@ -104,12 +101,21 @@ def test_check_kibana(pods, expect_error):
},
"spec": {},
},
- None,
- 'route_missing_host',
+ "route_missing_host",
),
+])
+def test_get_kibana_url_error(comment, route, expect_error):
+ check = Kibana()
+ check.exec_oc = lambda *_: json.dumps(route) if route else ""
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check._get_kibana_url()
+ assert excinfo.value.name == expect_error
- # test route that looks fine
+
+@pytest.mark.parametrize('comment, route, expect_url', [
(
+ "test route that looks fine",
{
"metadata": {
"labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
@@ -125,62 +131,57 @@ def test_check_kibana(pods, expect_error):
},
},
"https://hostname/",
- None,
),
])
-def test_get_kibana_url(route, expect_url, expect_error):
+def test_get_kibana_url(comment, route, expect_url):
check = Kibana()
- check.exec_oc = lambda ns, cmd, args: json.dumps(route) if route else ""
-
- url, error = check._get_kibana_url()
- if expect_url:
- assert url == expect_url
- else:
- assert not url
- if expect_error:
- assert error == expect_error
- else:
- assert not error
+ check.exec_oc = lambda *_: json.dumps(route)
+ assert expect_url == check._get_kibana_url()
@pytest.mark.parametrize('exec_result, expect', [
(
'urlopen error [Errno 111] Connection refused',
- 'at least one router routing to it?',
+ 'FailedToConnectInternal',
),
(
'urlopen error [Errno -2] Name or service not known',
- 'DNS configured for the Kibana hostname?',
+ 'FailedToResolveInternal',
),
(
'Status code was not [302]: HTTP Error 500: Server error',
- 'did not return the correct status code',
+ 'WrongReturnCodeInternal',
),
(
'bork bork bork',
- 'bork bork bork', # should pass through
+ 'MiscRouteErrorInternal',
),
])
def test_verify_url_internal_failure(exec_result, expect):
check = Kibana(execute_module=lambda *_: dict(failed=True, msg=exec_result))
- check._get_kibana_url = lambda: ('url', None)
+ check._get_kibana_url = lambda: 'url'
- error = check._check_kibana_route()
- assert_error(error, expect)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.check_kibana_route()
+ assert expect == excinfo.value.name
@pytest.mark.parametrize('lib_result, expect', [
(
- HTTPError('url', 500, "it broke", hdrs=None, fp=None),
- 'it broke',
+ HTTPError('url', 500, 'it broke', hdrs=None, fp=None),
+ 'MiscRouteError',
+ ),
+ (
+ URLError('urlopen error [Errno 111] Connection refused'),
+ 'FailedToConnect',
),
(
- URLError('it broke'),
- 'it broke',
+ URLError('urlopen error [Errno -2] Name or service not known'),
+ 'FailedToResolve',
),
(
302,
- 'returned the wrong error code',
+ 'WrongReturnCode',
),
(
200,
@@ -204,8 +205,40 @@ def test_verify_url_external_failure(lib_result, expect, monkeypatch):
monkeypatch.setattr(urllib2, 'urlopen', urlopen)
check = Kibana()
- check._get_kibana_url = lambda: ('url', None)
+ check._get_kibana_url = lambda: 'url'
check._verify_url_internal = lambda url: None
- error = check._check_kibana_route()
- assert_error(error, expect)
+ if not expect:
+ check.check_kibana_route()
+ return
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.check_kibana_route()
+ assert expect == excinfo.value.name
+
+
+def test_verify_url_external_skip():
+ check = Kibana(lambda *_: {}, dict(openshift_check_efk_kibana_external="false"))
+ check._get_kibana_url = lambda: 'url'
+ check.check_kibana_route()
+
+
+# this is kind of silly but it adds coverage for the run() method...
+def test_run():
+ pods = ["foo"]
+ ran = dict(check_kibana=False, check_route=False)
+
+ def check_kibana(pod_list):
+ ran["check_kibana"] = True
+ assert pod_list == pods
+
+ def check_kibana_route():
+ ran["check_route"] = True
+
+ check = Kibana()
+ check.get_pods_for_component = lambda *_: pods
+ check.check_kibana = check_kibana
+ check.check_kibana_route = check_kibana_route
+
+ check.run()
+ assert ran["check_kibana"] and ran["check_route"]
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
index 6f1697ee6..1a1c190f6 100644
--- a/roles/openshift_health_checker/test/logging_check_test.py
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -1,18 +1,14 @@
import pytest
import json
-from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
+from openshift_checks.logging.logging import LoggingCheck, MissingComponentPods, CouldNotUseOc
task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin')))
-logging_namespace = "logging"
-
-
-def canned_loggingcheck(exec_oc=None):
+def canned_loggingcheck(exec_oc=None, execute_module=None):
"""Create a LoggingCheck object with canned exec_oc method"""
- check = LoggingCheck() # fails if a module is actually invoked
- check.logging_namespace = 'logging'
+ check = LoggingCheck(execute_module)
if exec_oc:
check.exec_oc = exec_oc
return check
@@ -97,8 +93,8 @@ def test_oc_failure(problem, expect):
check = LoggingCheck(execute_module, task_vars_config_base)
- with pytest.raises(OpenShiftCheckException) as excinfo:
- check.exec_oc(logging_namespace, 'get foo', [])
+ with pytest.raises(CouldNotUseOc) as excinfo:
+ check.exec_oc('get foo', [])
assert expect in str(excinfo)
@@ -124,25 +120,32 @@ def test_is_active(groups, logging_deployed, is_active):
assert LoggingCheck(None, task_vars).is_active() == is_active
-@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [
+@pytest.mark.parametrize('pod_output, expect_pods', [
+ (
+ json.dumps({'items': [plain_es_pod]}),
+ [plain_es_pod],
+ ),
+])
+def test_get_pods_for_component(pod_output, expect_pods):
+ check = canned_loggingcheck(lambda *_: pod_output)
+ pods = check.get_pods_for_component("es")
+ assert pods == expect_pods
+
+
+@pytest.mark.parametrize('exec_oc_output, expect_error', [
(
'No resources found.',
- None,
- 'No pods were found for the "es"',
+ MissingComponentPods,
),
(
- json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}),
- [plain_es_pod],
- None,
+ '{"items": null}',
+ MissingComponentPods,
),
])
-def test_get_pods_for_component(pod_output, expect_pods, expect_error):
- check = canned_loggingcheck(lambda namespace, cmd, args: pod_output)
- pods, error = check.get_pods_for_component(
- logging_namespace,
- "es",
- )
- assert_error(error, expect_error)
+def test_get_pods_for_component_fail(exec_oc_output, expect_error):
+ check = canned_loggingcheck(lambda *_: exec_oc_output)
+ with pytest.raises(expect_error):
+ check.get_pods_for_component("es")
@pytest.mark.parametrize('name, pods, expected_pods', [
@@ -159,7 +162,7 @@ def test_get_pods_for_component(pod_output, expect_pods, expect_error):
], ids=lambda argvals: argvals[0])
def test_get_not_running_pods_no_container_status(name, pods, expected_pods):
- check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '')
+ check = canned_loggingcheck(lambda *_: '')
result = check.not_running_pods(pods)
assert result == expected_pods
diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py
index 178d7cd84..22566b295 100644
--- a/roles/openshift_health_checker/test/logging_index_time_test.py
+++ b/roles/openshift_health_checker/test/logging_index_time_test.py
@@ -69,7 +69,29 @@ def test_check_running_pods(pods, expect_pods):
assert pods == expect_pods
-@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [
+def test_bad_config_param():
+ with pytest.raises(OpenShiftCheckException) as error:
+ LoggingIndexTime(task_vars=dict(openshift_check_logging_index_timeout_seconds="foo")).run()
+ assert 'InvalidTimeout' == error.value.name
+
+
+def test_no_running_pods():
+ check = LoggingIndexTime()
+ check.get_pods_for_component = lambda *_: [not_running_kibana_pod]
+ with pytest.raises(OpenShiftCheckException) as error:
+ check.run()
+ assert 'kibanaNoRunningPods' == error.value.name
+
+
+def test_with_running_pods():
+ check = LoggingIndexTime()
+ check.get_pods_for_component = lambda *_: [plain_running_kibana_pod, plain_running_elasticsearch_pod]
+ check.curl_kibana_with_uuid = lambda *_: SAMPLE_UUID
+ check.wait_until_cmd_or_err = lambda *_: None
+ assert not check.run().get("failed")
+
+
+@pytest.mark.parametrize('name, json_response, uuid, timeout', [
(
'valid count in response',
{
@@ -77,94 +99,72 @@ def test_check_running_pods(pods, expect_pods):
},
SAMPLE_UUID,
0.001,
- [],
),
], ids=lambda argval: argval[0])
-def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout, extra_words):
+def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout):
check = canned_loggingindextime(lambda *_: json.dumps(json_response))
check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout)
-@pytest.mark.parametrize('name, json_response, uuid, timeout, extra_words', [
+@pytest.mark.parametrize('name, json_response, timeout, expect_error', [
(
'invalid json response',
{
"invalid_field": 1,
},
- SAMPLE_UUID,
0.001,
- ["invalid response", "Elasticsearch"],
+ 'esInvalidResponse',
),
(
'empty response',
{},
- SAMPLE_UUID,
0.001,
- ["invalid response", "Elasticsearch"],
+ 'esInvalidResponse',
),
(
'valid response but invalid match count',
{
"count": 0,
},
- SAMPLE_UUID,
0.005,
- ["expecting match", SAMPLE_UUID, "0.005s"],
+ 'NoMatchFound',
)
], ids=lambda argval: argval[0])
-def test_wait_until_cmd_or_err(name, json_response, uuid, timeout, extra_words):
+def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error):
check = canned_loggingindextime(lambda *_: json.dumps(json_response))
with pytest.raises(OpenShiftCheckException) as error:
- check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout)
+ check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, SAMPLE_UUID, timeout)
- for word in extra_words:
- assert word in str(error)
+ assert expect_error == error.value.name
-@pytest.mark.parametrize('name, json_response, uuid, extra_words', [
- (
- 'correct response code, found unique id is returned',
- {
- "statusCode": 404,
- },
- "sample unique id",
- ["sample unique id"],
- ),
-], ids=lambda argval: argval[0])
-def test_curl_kibana_with_uuid(name, json_response, uuid, extra_words):
- check = canned_loggingindextime(lambda *_: json.dumps(json_response))
- check.generate_uuid = lambda: uuid
-
- result = check.curl_kibana_with_uuid(plain_running_kibana_pod)
-
- for word in extra_words:
- assert word in result
+def test_curl_kibana_with_uuid():
+ check = canned_loggingindextime(lambda *_: json.dumps({"statusCode": 404}))
+ check.generate_uuid = lambda: SAMPLE_UUID
+ assert SAMPLE_UUID == check.curl_kibana_with_uuid(plain_running_kibana_pod)
-@pytest.mark.parametrize('name, json_response, uuid, extra_words', [
+@pytest.mark.parametrize('name, json_response, expect_error', [
(
'invalid json response',
{
"invalid_field": "invalid",
},
- SAMPLE_UUID,
- ["invalid response returned", 'Missing "statusCode" key'],
+ 'kibanaInvalidResponse',
),
(
'wrong error code in response',
{
"statusCode": 500,
},
- SAMPLE_UUID,
- ["Expecting error code", "500"],
+ 'kibanaInvalidReturnCode',
),
], ids=lambda argval: argval[0])
-def test_failed_curl_kibana_with_uuid(name, json_response, uuid, extra_words):
+def test_failed_curl_kibana_with_uuid(name, json_response, expect_error):
check = canned_loggingindextime(lambda *_: json.dumps(json_response))
- check.generate_uuid = lambda: uuid
+ check.generate_uuid = lambda: SAMPLE_UUID
with pytest.raises(OpenShiftCheckException) as error:
check.curl_kibana_with_uuid(plain_running_kibana_pod)
- for word in extra_words:
- assert word in str(error)
+ assert expect_error == error.value.name
diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py
index 43aa875f4..789784c77 100644
--- a/roles/openshift_health_checker/test/openshift_check_test.py
+++ b/roles/openshift_health_checker/test/openshift_check_test.py
@@ -81,6 +81,7 @@ def dummy_check(task_vars):
@pytest.mark.parametrize("keys,expected", [
(("foo",), 42),
(("bar", "baz"), "openshift"),
+ (("bar.baz",), "openshift"),
])
def test_get_var_ok(task_vars, keys, expected):
assert dummy_check(task_vars).get_var(*keys) == expected
@@ -94,3 +95,24 @@ def test_get_var_error(task_vars, missing_keys):
def test_get_var_default(task_vars, missing_keys):
default = object()
assert dummy_check(task_vars).get_var(*missing_keys, default=default) == default
+
+
+@pytest.mark.parametrize("keys, convert, expected", [
+ (("foo",), str, "42"),
+ (("foo",), float, 42.0),
+ (("bar", "baz"), bool, False),
+])
+def test_get_var_convert(task_vars, keys, convert, expected):
+ assert dummy_check(task_vars).get_var(*keys, convert=convert) == expected
+
+
+@pytest.mark.parametrize("keys, convert", [
+ (("bar", "baz"), int),
+ (("bar.baz"), float),
+ (("foo"), "bogus"),
+ (("foo"), lambda a, b: 1),
+ (("foo"), lambda a: 1 / 0),
+])
+def test_get_var_convert_error(task_vars, keys, convert):
+ with pytest.raises(OpenShiftCheckException):
+ dummy_check(task_vars).get_var(*keys, convert=convert)
diff --git a/roles/openshift_hosted/defaults/main.yml b/roles/openshift_hosted/defaults/main.yml
index 0391e5602..13cbfb14e 100644
--- a/roles/openshift_hosted/defaults/main.yml
+++ b/roles/openshift_hosted/defaults/main.yml
@@ -1,4 +1,10 @@
---
+r_openshift_hosted_router_firewall_enabled: True
+r_openshift_hosted_router_use_firewalld: False
+
+r_openshift_hosted_registry_firewall_enabled: True
+r_openshift_hosted_registry_use_firewalld: False
+
registry_volume_claim: 'registry-claim'
openshift_hosted_router_edits:
@@ -26,12 +32,15 @@ openshift_hosted_routers:
- 443:443
certificate: "{{ openshift_hosted_router_certificate | default({}) }}"
-
openshift_hosted_router_certificate: {}
openshift_hosted_registry_cert_expire_days: 730
openshift_hosted_router_create_certificate: True
-os_firewall_allow:
+r_openshift_hosted_router_os_firewall_deny: []
+r_openshift_hosted_router_os_firewall_allow: []
+
+r_openshift_hosted_registry_os_firewall_deny: []
+r_openshift_hosted_registry_os_firewall_allow:
- service: Docker Registry Port
port: 5000/tcp
- when: openshift.common.use_calico | bool
+ cond: "{{ r_openshift_hosted_use_calico }}"
diff --git a/roles/openshift_hosted/meta/main.yml b/roles/openshift_hosted/meta/main.yml
index 9e3f37130..28fd396d6 100644
--- a/roles/openshift_hosted/meta/main.yml
+++ b/roles/openshift_hosted/meta/main.yml
@@ -15,8 +15,4 @@ dependencies:
- role: openshift_cli
- role: openshift_hosted_facts
- role: lib_openshift
-- role: os_firewall
- os_firewall_allow:
- - service: Docker Registry Port
- port: 5000/tcp
- when: openshift.common.use_calico | bool
+- role: lib_os_firewall
diff --git a/roles/openshift_hosted/tasks/registry/firewall.yml b/roles/openshift_hosted/tasks/registry/firewall.yml
new file mode 100644
index 000000000..775b7d6d7
--- /dev/null
+++ b/roles/openshift_hosted/tasks/registry/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_hosted_registry_firewall_enabled | bool and not r_openshift_hosted_registry_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_registry_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_registry_os_firewall_deny }}"
+
+- when: r_openshift_hosted_registry_firewall_enabled | bool and r_openshift_hosted_registry_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_registry_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_registry_os_firewall_deny }}"
diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml
index b946ec8ca..dcd9c87fc 100644
--- a/roles/openshift_hosted/tasks/registry/registry.yml
+++ b/roles/openshift_hosted/tasks/registry/registry.yml
@@ -1,6 +1,10 @@
---
-- block:
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+- when: openshift.hosted.registry.replicas | default(none) is none
+ block:
- name: Retrieve list of openshift nodes matching registry selector
oc_obj:
state: list
@@ -28,7 +32,6 @@
l_default_replicas: "{{ l_node_count if openshift.hosted.registry.storage.kind | default(none) is not none else 1 }}"
when: l_node_count | int > 0
- when: openshift.hosted.registry.replicas | default(none) is none
- name: set openshift_hosted facts
set_fact:
diff --git a/roles/openshift_hosted/tasks/router/firewall.yml b/roles/openshift_hosted/tasks/router/firewall.yml
new file mode 100644
index 000000000..ff90f3372
--- /dev/null
+++ b/roles/openshift_hosted/tasks/router/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_hosted_router_firewall_enabled | bool and not r_openshift_hosted_router_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_router_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_router_os_firewall_deny }}"
+
+- when: r_openshift_hosted_router_firewall_enabled | bool and r_openshift_hosted_router_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_router_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_hosted_router_os_firewall_deny }}"
diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml
index dd485a64a..72a1ead80 100644
--- a/roles/openshift_hosted/tasks/router/router.yml
+++ b/roles/openshift_hosted/tasks/router/router.yml
@@ -1,4 +1,8 @@
---
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Retrieve list of openshift nodes matching router selector
oc_obj:
state: list
diff --git a/roles/openshift_hosted_logging/handlers/main.yml b/roles/openshift_hosted_logging/handlers/main.yml
index ffb812271..d7e83fe9a 100644
--- a/roles/openshift_hosted_logging/handlers/main.yml
+++ b/roles/openshift_hosted_logging/handlers/main.yml
@@ -1,9 +1,4 @@
---
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
# wait_for port doesn't provide health information.
diff --git a/roles/openshift_hosted_metrics/handlers/main.yml b/roles/openshift_hosted_metrics/handlers/main.yml
index 69c5a1663..ce7688581 100644
--- a/roles/openshift_hosted_metrics/handlers/main.yml
+++ b/roles/openshift_hosted_metrics/handlers/main.yml
@@ -1,17 +1,12 @@
---
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
notify: Verify API Server
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
diff --git a/roles/openshift_loadbalancer/defaults/main.yml b/roles/openshift_loadbalancer/defaults/main.yml
index 6190383b6..3f6409233 100644
--- a/roles/openshift_loadbalancer/defaults/main.yml
+++ b/roles/openshift_loadbalancer/defaults/main.yml
@@ -1,4 +1,7 @@
---
+r_openshift_loadbalancer_firewall_enabled: True
+r_openshift_loadbalancer_use_firewalld: False
+
haproxy_frontends:
- name: main
binds:
@@ -12,3 +15,13 @@ haproxy_backends:
- name: web01
address: 127.0.0.1:9000
opts: check
+
+r_openshift_loadbalancer_os_firewall_deny: []
+r_openshift_loadbalancer_os_firewall_allow:
+- service: haproxy stats
+ port: "9000/tcp"
+- service: haproxy balance
+ port: "{{ openshift_master_api_port | default(8443) }}/tcp"
+- service: nuage mon
+ port: "{{ nuage_mon_rest_server_port | default(9443) }}/tcp"
+ cond: "{{ openshift_use_nuage | default(false) | bool }}"
diff --git a/roles/openshift_loadbalancer/meta/main.yml b/roles/openshift_loadbalancer/meta/main.yml
index 0dffb545f..073bdd94d 100644
--- a/roles/openshift_loadbalancer/meta/main.yml
+++ b/roles/openshift_loadbalancer/meta/main.yml
@@ -10,16 +10,6 @@ galaxy_info:
versions:
- 7
dependencies:
+- role: lib_os_firewall
- role: openshift_facts
-- role: os_firewall
- os_firewall_allow:
- - service: haproxy stats
- port: "9000/tcp"
- - service: haproxy balance
- port: "{{ openshift_master_api_port | default(8443) }}/tcp"
-- role: os_firewall
- os_firewall_allow:
- - service: nuage mon
- port: "{{ nuage_mon_rest_server_port | default(9443) }}/tcp"
- when: openshift_use_nuage | default(false) | bool
- role: openshift_repos
diff --git a/roles/openshift_loadbalancer/tasks/firewall.yml b/roles/openshift_loadbalancer/tasks/firewall.yml
new file mode 100644
index 000000000..7d6e8ff36
--- /dev/null
+++ b/roles/openshift_loadbalancer/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_loadbalancer_firewall_enabled | bool and not r_openshift_loadbalancer_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_loadbalancer_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_loadbalancer_os_firewall_deny }}"
+
+- when: r_openshift_loadbalancer_firewall_enabled | bool and r_openshift_loadbalancer_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_loadbalancer_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_loadbalancer_os_firewall_deny }}"
diff --git a/roles/openshift_loadbalancer/tasks/main.yml b/roles/openshift_loadbalancer/tasks/main.yml
index 68bb4ace8..69b061fc5 100644
--- a/roles/openshift_loadbalancer/tasks/main.yml
+++ b/roles/openshift_loadbalancer/tasks/main.yml
@@ -1,4 +1,8 @@
---
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Install haproxy
package: name=haproxy state=present
when: not openshift.common.is_containerized | bool
diff --git a/roles/openshift_logging/handlers/main.yml b/roles/openshift_logging/handlers/main.yml
index 69c5a1663..ce7688581 100644
--- a/roles/openshift_logging/handlers/main.yml
+++ b/roles/openshift_logging/handlers/main.yml
@@ -1,17 +1,12 @@
---
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
notify: Verify API Server
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
diff --git a/roles/openshift_logging/tasks/update_master_config.yaml b/roles/openshift_logging/tasks/update_master_config.yaml
index 10f522b61..b96b8e29d 100644
--- a/roles/openshift_logging/tasks/update_master_config.yaml
+++ b/roles/openshift_logging/tasks/update_master_config.yaml
@@ -5,7 +5,6 @@
yaml_key: assetConfig.loggingPublicURL
yaml_value: "https://{{ openshift_logging_kibana_hostname }}"
notify:
- - restart master
- restart master api
- restart master controllers
tags:
diff --git a/roles/openshift_logging_fluentd/tasks/main.yaml b/roles/openshift_logging_fluentd/tasks/main.yaml
index 9dfc6fc86..74b4d7db4 100644
--- a/roles/openshift_logging_fluentd/tasks/main.yaml
+++ b/roles/openshift_logging_fluentd/tasks/main.yaml
@@ -1,7 +1,7 @@
---
- fail:
msg: Only one Fluentd nodeselector key pair should be provided
- when: "{{ openshift_logging_fluentd_nodeselector.keys() | count }} > 1"
+ when: openshift_logging_fluentd_nodeselector.keys() | count > 1
- fail:
msg: Application logs destination is required
diff --git a/roles/openshift_logging_kibana/tasks/main.yaml b/roles/openshift_logging_kibana/tasks/main.yaml
index 62bc26e37..166f102f7 100644
--- a/roles/openshift_logging_kibana/tasks/main.yaml
+++ b/roles/openshift_logging_kibana/tasks/main.yaml
@@ -99,17 +99,17 @@
# TODO: set up these certs differently?
- set_fact:
kibana_key: "{{ lookup('file', openshift_logging_kibana_key) | b64encode }}"
- when: "{{ openshift_logging_kibana_key | trim | length > 0 }}"
+ when: openshift_logging_kibana_key | trim | length > 0
changed_when: false
- set_fact:
kibana_cert: "{{ lookup('file', openshift_logging_kibana_cert) | b64encode }}"
- when: "{{ openshift_logging_kibana_cert | trim | length > 0 }}"
+ when: openshift_logging_kibana_cert | trim | length > 0
changed_when: false
- set_fact:
kibana_ca: "{{ lookup('file', openshift_logging_kibana_ca) | b64encode }}"
- when: "{{ openshift_logging_kibana_ca | trim | length > 0 }}"
+ when: openshift_logging_kibana_ca | trim | length > 0
changed_when: false
- set_fact:
diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml
index 2d3ce5bcd..a4c178908 100644
--- a/roles/openshift_master/defaults/main.yml
+++ b/roles/openshift_master/defaults/main.yml
@@ -1,4 +1,21 @@
---
+r_openshift_master_firewall_enabled: True
+r_openshift_master_use_firewalld: False
+
openshift_node_ips: []
r_openshift_master_clean_install: false
r_openshift_master_etcd3_storage: false
+r_openshift_master_os_firewall_enable: true
+r_openshift_master_os_firewall_deny: []
+r_openshift_master_os_firewall_allow:
+- service: api server https
+ port: "{{ openshift.master.api_port }}/tcp"
+- service: api controllers https
+ port: "{{ openshift.master.controllers_port }}/tcp"
+- service: skydns tcp
+ port: "{{ openshift.master.dns_port }}/tcp"
+- service: skydns udp
+ port: "{{ openshift.master.dns_port }}/udp"
+- service: etcd embedded
+ port: 4001/tcp
+ cond: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
diff --git a/roles/openshift_master/files/atomic-openshift-master.service b/roles/openshift_master/files/atomic-openshift-master.service
deleted file mode 100644
index 02af4dd16..000000000
--- a/roles/openshift_master/files/atomic-openshift-master.service
+++ /dev/null
@@ -1,23 +0,0 @@
-[Unit]
-Description=Atomic OpenShift Master
-Documentation=https://github.com/openshift/origin
-After=network-online.target
-After=etcd.service
-Before=atomic-openshift-node.service
-Requires=network-online.target
-
-[Service]
-Type=notify
-EnvironmentFile=/etc/sysconfig/atomic-openshift-master
-Environment=GOTRACEBACK=crash
-ExecStart=/usr/bin/openshift start master --config=${CONFIG_FILE} $OPTIONS
-LimitNOFILE=131072
-LimitCORE=infinity
-WorkingDirectory=/var/lib/origin/
-SyslogIdentifier=atomic-openshift-master
-Restart=always
-RestartSec=5s
-
-[Install]
-WantedBy=multi-user.target
-WantedBy=atomic-openshift-node.service
diff --git a/roles/openshift_master/files/origin-master.service b/roles/openshift_master/files/origin-master.service
deleted file mode 100644
index cf79dda02..000000000
--- a/roles/openshift_master/files/origin-master.service
+++ /dev/null
@@ -1,23 +0,0 @@
-[Unit]
-Description=Origin Master Service
-Documentation=https://github.com/openshift/origin
-After=network-online.target
-After=etcd.service
-Before=origin-node.service
-Requires=network-online.target
-
-[Service]
-Type=notify
-EnvironmentFile=/etc/sysconfig/origin-master
-Environment=GOTRACEBACK=crash
-ExecStart=/usr/bin/openshift start master --config=${CONFIG_FILE} $OPTIONS
-LimitNOFILE=131072
-LimitCORE=infinity
-WorkingDirectory=/var/lib/origin/
-SyslogIdentifier=origin-master
-Restart=always
-RestartSec=5s
-
-[Install]
-WantedBy=multi-user.target
-WantedBy=origin-node.service
diff --git a/roles/openshift_master/handlers/main.yml b/roles/openshift_master/handlers/main.yml
index 69c5a1663..ce7688581 100644
--- a/roles/openshift_master/handlers/main.yml
+++ b/roles/openshift_master/handlers/main.yml
@@ -1,17 +1,12 @@
---
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
notify: Verify API Server
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
diff --git a/roles/openshift_master/meta/main.yml b/roles/openshift_master/meta/main.yml
index 907f25bc5..bd2383f61 100644
--- a/roles/openshift_master/meta/main.yml
+++ b/roles/openshift_master/meta/main.yml
@@ -13,6 +13,7 @@ galaxy_info:
- cloud
dependencies:
- role: lib_openshift
+- role: lib_os_firewall
- role: openshift_master_facts
- role: openshift_hosted_facts
- role: openshift_master_certificates
@@ -25,21 +26,6 @@ dependencies:
- role: openshift_cloud_provider
- role: openshift_builddefaults
- role: openshift_buildoverrides
-- role: os_firewall
- os_firewall_allow:
- - service: api server https
- port: "{{ openshift.master.api_port }}/tcp"
- - service: api controllers https
- port: "{{ openshift.master.controllers_port }}/tcp"
- - service: skydns tcp
- port: "{{ openshift.master.dns_port }}/tcp"
- - service: skydns udp
- port: "{{ openshift.master.dns_port }}/udp"
-- role: os_firewall
- os_firewall_allow:
- - service: etcd embedded
- port: 4001/tcp
- when: groups.oo_etcd_to_config | default([]) | length == 0
- role: nickhammond.logrotate
- role: contiv
contiv_role: netmaster
diff --git a/roles/openshift_master/tasks/clean_systemd_units.yml b/roles/openshift_master/tasks/clean_systemd_units.yml
new file mode 100644
index 000000000..590692c10
--- /dev/null
+++ b/roles/openshift_master/tasks/clean_systemd_units.yml
@@ -0,0 +1,5 @@
+---
+
+- name: Disable master service
+ systemd: name={{ openshift.common.service_type }}-master state=stopped enabled=no masked=yes
+ ignore_errors: true
diff --git a/roles/openshift_master/tasks/files b/roles/openshift_master/tasks/files
deleted file mode 120000
index feb122881..000000000
--- a/roles/openshift_master/tasks/files
+++ /dev/null
@@ -1 +0,0 @@
-../files \ No newline at end of file
diff --git a/roles/openshift_master/tasks/firewall.yml b/roles/openshift_master/tasks/firewall.yml
new file mode 100644
index 000000000..e51eeb56e
--- /dev/null
+++ b/roles/openshift_master/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_master_firewall_enabled | bool and not r_openshift_master_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_master_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_master_os_firewall_deny }}"
+
+- when: r_openshift_master_firewall_enabled | bool and r_openshift_master_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_master_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_master_os_firewall_deny }}"
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index 1f182a25c..b1412c3d9 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -12,16 +12,20 @@
# HA Variable Validation
- fail:
msg: "openshift_master_cluster_method must be set to either 'native' or 'pacemaker' for multi-master installations"
- when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method not in ["native", "pacemaker"]))
+ when: openshift.master.ha | bool and ((openshift.master.cluster_method is not defined) or (openshift.master.cluster_method is defined and openshift.master.cluster_method not in ["native", "pacemaker"]))
- fail:
msg: "'native' high availability is not supported for the requested OpenShift version"
- when: openshift_master_ha | bool and openshift_master_cluster_method == "native" and not openshift.common.version_gte_3_1_or_1_1 | bool
+ when: openshift.master.ha | bool and openshift.master.cluster_method == "native" and not openshift.common.version_gte_3_1_or_1_1 | bool
- fail:
msg: "openshift_master_cluster_password must be set for multi-master installations"
- when: openshift_master_ha | bool and openshift_master_cluster_method == "pacemaker" and (openshift_master_cluster_password is not defined or not openshift_master_cluster_password)
+ when: openshift.master.ha | bool and openshift.master.cluster_method == "pacemaker" and (openshift_master_cluster_password is not defined or not openshift_master_cluster_password)
- fail:
msg: "Pacemaker based HA is not supported at this time when used with containerized installs"
- when: openshift_master_ha | bool and openshift_master_cluster_method == "pacemaker" and openshift.common.is_containerized | bool
+ when: openshift.master.ha | bool and openshift.master.cluster_method == "pacemaker" and openshift.common.is_containerized | bool
+
+- name: Open up firewall ports
+ include: firewall.yml
+ static: yes
- name: Install Master package
package:
@@ -57,7 +61,6 @@
args:
creates: "{{ openshift_master_policy }}"
notify:
- - restart master
- restart master api
- restart master controllers
@@ -67,7 +70,6 @@
dest: "{{ openshift_master_scheduler_conf }}"
backup: true
notify:
- - restart master
- restart master api
- restart master controllers
@@ -146,6 +148,9 @@
local_facts:
no_proxy_etcd_host_ips: "{{ openshift_no_proxy_etcd_host_ips }}"
+- name: Remove the legacy master service if it exists
+ include: clean_systemd_units.yml
+
- name: Install the systemd units
include: systemd_units.yml
@@ -162,7 +167,6 @@
mode: 0600
when: openshift.master.session_auth_secrets is defined and openshift.master.session_encryption_secrets is defined
notify:
- - restart master
- restart master api
- set_fact:
@@ -178,66 +182,18 @@
group: root
mode: 0600
notify:
- - restart master
- restart master api
- restart master controllers
- include: set_loopback_context.yml
when: openshift.common.version_gte_3_2_or_1_2
-# TODO: Master startup can fail when ec2 transparently reallocates the block
-# storage, causing etcd writes to temporarily fail. Retry failures blindly just
-# once to allow time for this transient condition to to resolve and for systemd
-# to restart the master (which will eventually succeed).
-#
-# https://github.com/coreos/etcd/issues/3864
-# https://github.com/openshift/origin/issues/6065
-# https://github.com/openshift/origin/issues/6447
-- name: Start and enable master
- systemd:
- daemon_reload: yes
- name: "{{ openshift.common.service_type }}-master"
- enabled: yes
- state: started
- when: not openshift_master_ha | bool
- register: start_result
- until: not start_result | failed
- retries: 1
- delay: 60
- notify: Verify API Server
-
-- name: Dump logs from master service if it failed
- command: journalctl --no-pager -n 100 -u {{ openshift.common.service_type }}-master
- when: start_result | failed
-
-- name: Stop and disable non-HA master when running HA
- systemd:
- name: "{{ openshift.common.service_type }}-master"
- enabled: no
- state: stopped
- when: openshift_master_ha | bool
- register: task_result
- failed_when: task_result|failed and 'could not' not in task_result.msg|lower
-
-- set_fact:
- master_service_status_changed: "{{ start_result | changed }}"
- when: not openshift_master_ha | bool
-
-- name: Mask master service
- systemd:
- name: "{{ openshift.common.service_type }}-master"
- masked: yes
- when: >
- openshift_master_ha | bool and
- openshift.master.cluster_method == 'native' and
- not openshift.common.is_containerized | bool
-
- name: Start and enable master api on first master
systemd:
name: "{{ openshift.common.service_type }}-master-api"
enabled: yes
state: started
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
register: start_result
until: not start_result | failed
retries: 1
@@ -249,18 +205,18 @@
- set_fact:
master_api_service_status_changed: "{{ start_result | changed }}"
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
- pause:
seconds: 15
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
+ when: openshift.master.ha | bool and openshift.master.cluster_method == 'native'
- name: Start and enable master api all masters
systemd:
name: "{{ openshift.common.service_type }}-master-api"
enabled: yes
state: started
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
register: start_result
until: not start_result | failed
retries: 1
@@ -272,7 +228,7 @@
- set_fact:
master_api_service_status_changed: "{{ start_result | changed }}"
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
# A separate wait is required here for native HA since notifies will
# be resolved after all tasks in the role.
@@ -293,14 +249,14 @@
delay: 1
run_once: true
changed_when: false
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and master_api_service_status_changed | bool
+ when: openshift.master.cluster_method == 'native' and master_api_service_status_changed | bool
- name: Start and enable master controller on first master
systemd:
name: "{{ openshift.common.service_type }}-master-controllers"
enabled: yes
state: started
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
register: start_result
until: not start_result | failed
retries: 1
@@ -313,14 +269,14 @@
- name: Wait for master controller service to start on first master
pause:
seconds: 15
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
+ when: openshift.master.cluster_method == 'native'
- name: Start and enable master controller on all masters
systemd:
name: "{{ openshift.common.service_type }}-master-controllers"
enabled: yes
state: started
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
+ when: openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
register: start_result
until: not start_result | failed
retries: 1
@@ -332,11 +288,11 @@
- set_fact:
master_controllers_service_status_changed: "{{ start_result | changed }}"
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
+ when: openshift.master.cluster_method == 'native'
- name: Install cluster packages
package: name=pcs state=present
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'pacemaker'
+ when: openshift.master.cluster_method == 'pacemaker'
and not openshift.common.is_containerized | bool
register: install_result
@@ -345,7 +301,7 @@
name: pcsd
enabled: yes
state: started
- when: openshift_master_ha | bool and openshift.master.cluster_method == 'pacemaker'
+ when: openshift.master.cluster_method == 'pacemaker'
and not openshift.common.is_containerized | bool
- name: Set the cluster user password
diff --git a/roles/openshift_master/tasks/system_container.yml b/roles/openshift_master/tasks/system_container.yml
index 9944682cc..8d343336f 100644
--- a/roles/openshift_master/tasks/system_container.yml
+++ b/roles/openshift_master/tasks/system_container.yml
@@ -10,14 +10,6 @@
atomic containers list --no-trunc -a -f container={{ openshift.common.service_type }}-master
register: result
-- name: Install or Update master system container
- oc_atomic_container:
- name: "{{ openshift.common.service_type }}-master"
- image: "{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{{ openshift.master.master_system_image }}:{{ openshift_image_tag }}"
- state: latest
- when:
- - not l_is_ha
-
# HA
- name: Install or Update HA api master system container
oc_atomic_container:
@@ -26,15 +18,11 @@
state: latest
values:
- COMMAND=api
- when:
- - l_is_ha
- name: Install or Update HA controller master system container
oc_atomic_container:
name: "{{ openshift.common.service_type }}-master-controllers"
- image: "{{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{ openshift.master.master_system_image }}:{{ openshift_image_tag }}"
+ image: "{{ 'docker:' if openshift.common.system_images_registry == 'docker' else openshift.common.system_images_registry + '/' }}{{ openshift.master.master_system_image }}:{{ openshift_image_tag }}"
state: latest
values:
- COMMAND=controllers
- when:
- - l_is_ha
diff --git a/roles/openshift_master/tasks/systemd_units.yml b/roles/openshift_master/tasks/systemd_units.yml
index d71ad3459..723bdb0c4 100644
--- a/roles/openshift_master/tasks/systemd_units.yml
+++ b/roles/openshift_master/tasks/systemd_units.yml
@@ -22,34 +22,12 @@
changed_when: "'Downloaded newer image' in pull_result.stdout"
when: openshift.common.is_containerized | bool and not openshift.common.is_master_system_container | bool
-# workaround for missing systemd unit files
-- name: "Create the {{ openshift.common.service_type }} systemd unit file"
- template:
- src: "master_docker/master.docker.service.j2"
- dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master.service"
- when:
- - openshift.common.is_containerized | bool and (openshift.master.ha is not defined or not openshift.master.ha) | bool
- - not openshift.common.is_master_system_container | bool
- register: create_master_unit_file
-
-- name: "Install {{ openshift.common.service_type }} systemd unit file"
- copy:
- dest: "/etc/systemd/system/{{ openshift.common.service_type }}-master.service"
- src: "{{ openshift.common.service_type }}-master.service"
- register: create_master_unit_file
- when:
- - not openshift.common.is_containerized | bool
- - (openshift.master.ha is not defined or not openshift.master.ha) | bool
-
-- command: systemctl daemon-reload
- when: create_master_unit_file | changed
-
-- name: Create the ha systemd unit files for api and controller services
+- name: Create the ha systemd unit files
template:
src: "{{ ha_svc_template_path }}/atomic-openshift-master-{{ item }}.service.j2"
dest: "{{ containerized_svc_dir }}/{{ openshift.common.service_type }}-master-{{ item }}.service"
when:
- - openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ - openshift.master.cluster_method == "native"
- not openshift.common.is_master_system_container | bool
with_items:
- api
@@ -63,14 +41,14 @@
- name: Preserve Master API Proxy Config options
command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master-api
register: master_api_proxy
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
failed_when: false
changed_when: false
- name: Preserve Master API AWS options
command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master-api
register: master_api_aws
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
failed_when: false
changed_when: false
@@ -79,12 +57,12 @@
src: "{{ ha_svc_template_path }}/atomic-openshift-master-api.j2"
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api
backup: true
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
notify:
- restart master api
- name: Restore Master API Proxy Config Options
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
and master_api_proxy.rc == 0 and 'http_proxy' not in openshift.common and 'https_proxy' not in openshift.common
lineinfile:
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api
@@ -92,7 +70,7 @@
with_items: "{{ master_api_proxy.stdout_lines | default([]) }}"
- name: Restore Master API AWS Options
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
and master_api_aws.rc == 0 and
not (openshift_cloudprovider_kind is defined and openshift_cloudprovider_kind == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_secret_key is defined)
lineinfile:
@@ -104,14 +82,14 @@
- name: Preserve Master Controllers Proxy Config options
command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
register: master_controllers_proxy
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
failed_when: false
changed_when: false
- name: Preserve Master Controllers AWS options
command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
register: master_controllers_aws
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
failed_when: false
changed_when: false
@@ -120,7 +98,7 @@
src: "{{ ha_svc_template_path }}/atomic-openshift-master-controllers.j2"
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
backup: true
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
notify:
- restart master controllers
@@ -129,7 +107,7 @@
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
line: "{{ item }}"
with_items: "{{ master_controllers_proxy.stdout_lines | default([]) }}"
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
and master_controllers_proxy.rc == 0 and 'http_proxy' not in openshift.common and 'https_proxy' not in openshift.common
- name: Restore Master Controllers AWS Options
@@ -137,39 +115,6 @@
dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
line: "{{ item }}"
with_items: "{{ master_controllers_aws.stdout_lines | default([]) }}"
- when: openshift.master.ha is defined and openshift.master.ha | bool and openshift_master_cluster_method == "native"
+ when: openshift.master.cluster_method == "native"
and master_controllers_aws.rc == 0 and
not (openshift_cloudprovider_kind is defined and openshift_cloudprovider_kind == 'aws' and openshift_cloudprovider_aws_access_key is defined and openshift_cloudprovider_aws_secret_key is defined)
-
-- name: Install Master docker service file
- template:
- dest: "/etc/systemd/system/{{ openshift.common.service_type }}-master.service"
- src: master_docker/master.docker.service.j2
- register: install_result
- when: openshift.common.is_containerized | bool and openshift.master.ha is defined and not openshift.master.ha | bool and not openshift.common.is_master_system_container | bool
-
-- name: Preserve Master Proxy Config options
- command: grep PROXY /etc/sysconfig/{{ openshift.common.service_type }}-master
- register: master_proxy_result
- failed_when: false
- changed_when: false
-
-- set_fact:
- master_proxy: "{{ master_proxy_result.stdout_lines | default([]) }}"
-
-- name: Preserve Master AWS options
- command: grep AWS_ /etc/sysconfig/{{ openshift.common.service_type }}-master
- register: master_aws_result
- failed_when: false
- changed_when: false
-
-- set_fact:
- master_aws: "{{ master_aws_result.stdout_lines | default([]) }}"
-
-- name: Create the master service env file
- template:
- src: "atomic-openshift-master.j2"
- dest: /etc/sysconfig/{{ openshift.common.service_type }}-master
- backup: true
- notify:
- - restart master
diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2
index 7964bbb48..c14579435 100644
--- a/roles/openshift_master/templates/master.yaml.v1.j2
+++ b/roles/openshift_master/templates/master.yaml.v1.j2
@@ -47,11 +47,10 @@ assetConfig:
{% if openshift.master.audit_config | default(none) is not none and openshift.common.version_gte_3_2_or_1_2 | bool %}
auditConfig:{{ openshift.master.audit_config | to_padded_yaml(level=1) }}
{% endif %}
-{% if openshift_master_ha | bool %}
-controllerLeaseTTL: {{ openshift.master.controller_lease_ttl | default('30') }}
-{% endif %}
{% if openshift.common.version_gte_3_3_or_1_3 | bool %}
controllerConfig:
+ election:
+ lockName: openshift-master-controllers
serviceServingCert:
signer:
certFile: service-signer.crt
diff --git a/roles/openshift_master/templates/master_docker/master.docker.service.j2 b/roles/openshift_master/templates/master_docker/master.docker.service.j2
deleted file mode 100644
index 31c1dfc33..000000000
--- a/roles/openshift_master/templates/master_docker/master.docker.service.j2
+++ /dev/null
@@ -1,18 +0,0 @@
-[Unit]
-After={{ openshift.docker.service_name }}.service
-Requires={{ openshift.docker.service_name }}.service
-PartOf={{ openshift.docker.service_name }}.service
-After=etcd_container.service
-Wants=etcd_container.service
-
-[Service]
-EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-master
-ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-master
-ExecStart=/usr/bin/docker run --rm --privileged --net=host --name {{ openshift.common.service_type }}-master --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-master -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }} -v /var/log:/var/log -v /var/run/docker.sock:/var/run/docker.sock -v {{ openshift.common.config_base }}:{{ openshift.common.config_base }} {% if openshift_cloudprovider_kind | default('') != '' -%} -v {{ openshift.common.config_base }}/cloudprovider:{{ openshift.common.config_base}}/cloudprovider {% endif -%} -v /etc/pki:/etc/pki:ro {{ openshift.master.master_image }}:${IMAGE_VERSION} start master --config=${CONFIG_FILE} $OPTIONS
-ExecStartPost=/usr/bin/sleep 10
-ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-master
-Restart=always
-RestartSec=5s
-
-[Install]
-WantedBy={{ openshift.docker.service_name }}.service
diff --git a/roles/openshift_master/vars/main.yml b/roles/openshift_master/vars/main.yml
index 7745d014f..cf39b73f6 100644
--- a/roles/openshift_master/vars/main.yml
+++ b/roles/openshift_master/vars/main.yml
@@ -19,5 +19,4 @@ openshift_master_valid_grant_methods:
- prompt
- deny
-l_is_ha: "{{ openshift.master.ha is defined and openshift.master.ha | bool }}"
openshift_master_is_scaleup_host: False
diff --git a/roles/openshift_master_facts/tasks/main.yml b/roles/openshift_master_facts/tasks/main.yml
index ef8dcd5fd..fa228af2a 100644
--- a/roles/openshift_master_facts/tasks/main.yml
+++ b/roles/openshift_master_facts/tasks/main.yml
@@ -32,7 +32,7 @@
openshift_facts:
role: master
local_facts:
- cluster_method: "{{ openshift_master_cluster_method | default(None) }}"
+ cluster_method: "{{ openshift_master_cluster_method | default('native') }}"
cluster_hostname: "{{ openshift_master_cluster_hostname | default(None) }}"
cluster_public_hostname: "{{ openshift_master_cluster_public_hostname | default(None) }}"
debug_level: "{{ openshift_master_debug_level | default(openshift.common.debug_level) }}"
diff --git a/roles/openshift_metrics/handlers/main.yml b/roles/openshift_metrics/handlers/main.yml
index 69c5a1663..ce7688581 100644
--- a/roles/openshift_metrics/handlers/main.yml
+++ b/roles/openshift_metrics/handlers/main.yml
@@ -1,17 +1,12 @@
---
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
- notify: Verify API Server
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
notify: Verify API Server
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
- when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+ when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
- name: Verify API Server
# Using curl here since the uri module requires python-httplib2 and
diff --git a/roles/openshift_metrics/tasks/update_master_config.yaml b/roles/openshift_metrics/tasks/update_master_config.yaml
index be1e3c3a0..5059d8d94 100644
--- a/roles/openshift_metrics/tasks/update_master_config.yaml
+++ b/roles/openshift_metrics/tasks/update_master_config.yaml
@@ -5,7 +5,6 @@
yaml_key: assetConfig.metricsPublicURL
yaml_value: "https://{{ openshift_metrics_hawkular_hostname}}/hawkular/metrics"
notify:
- - restart master
- restart master api
- restart master controllers
tags:
diff --git a/roles/openshift_node/defaults/main.yml b/roles/openshift_node/defaults/main.yml
index 47073ee0f..973b3a619 100644
--- a/roles/openshift_node/defaults/main.yml
+++ b/roles/openshift_node/defaults/main.yml
@@ -1,5 +1,8 @@
---
-os_firewall_allow:
+r_openshift_node_firewall_enabled: True
+r_openshift_node_use_firewalld: False
+r_openshift_node_os_firewall_deny: []
+r_openshift_node_os_firewall_allow:
- service: Kubernetes kubelet
port: 10250/tcp
- service: http
@@ -8,7 +11,13 @@ os_firewall_allow:
port: 443/tcp
- service: OpenShift OVS sdn
port: 4789/udp
- when: openshift.common.use_openshift_sdn | default(true) | bool
+ cond: openshift.common.use_openshift_sdn | default(true) | bool
- service: Calico BGP Port
port: 179/tcp
- when: openshift.common.use_calico | bool
+ cond: "{{ openshift.common.use_calico | bool }}"
+- service: Kubernetes service NodePort TCP
+ port: "{{ openshift_node_port_range | default('') }}/tcp"
+ cond: "{{ openshift_node_port_range is defined }}"
+- service: Kubernetes service NodePort UDP
+ port: "{{ openshift_node_port_range | default('') }}/udp"
+ cond: "{{ openshift_node_port_range is defined }}"
diff --git a/roles/openshift_node/meta/main.yml b/roles/openshift_node/meta/main.yml
index 4fb841add..06373de04 100644
--- a/roles/openshift_node/meta/main.yml
+++ b/roles/openshift_node/meta/main.yml
@@ -14,36 +14,11 @@ galaxy_info:
dependencies:
- role: openshift_node_facts
- role: lib_openshift
+- role: lib_os_firewall
- role: openshift_common
- role: openshift_clock
- role: openshift_docker
- role: openshift_node_certificates
- role: openshift_cloud_provider
-- role: os_firewall
- os_firewall_allow:
- - service: Kubernetes kubelet
- port: 10250/tcp
- - service: http
- port: 80/tcp
- - service: https
- port: 443/tcp
-- role: os_firewall
- os_firewall_allow:
- - service: OpenShift OVS sdn
- port: 4789/udp
- when: openshift.common.use_openshift_sdn | default(true) | bool
-- role: os_firewall
- os_firewall_allow:
- - service: Calico BGP Port
- port: 179/tcp
- when: openshift.common.use_calico | bool
-
-- role: os_firewall
- os_firewall_allow:
- - service: Kubernetes service NodePort TCP
- port: "{{ openshift_node_port_range | default('') }}/tcp"
- - service: Kubernetes service NodePort UDP
- port: "{{ openshift_node_port_range | default('') }}/udp"
- when: openshift_node_port_range is defined
- role: openshift_node_dnsmasq
when: openshift.common.use_dnsmasq | bool
diff --git a/roles/openshift_node/tasks/firewall.yml b/roles/openshift_node/tasks/firewall.yml
new file mode 100644
index 000000000..255aa886a
--- /dev/null
+++ b/roles/openshift_node/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_node_firewall_enabled | bool and not r_openshift_node_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_node_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_node_os_firewall_deny }}"
+
+- when: r_openshift_node_firewall_enabled | bool and r_openshift_node_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_node_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_node_os_firewall_deny }}"
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index ca4fef360..3353a22e3 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -6,6 +6,38 @@
- (not ansible_selinux or ansible_selinux.status != 'enabled') and deployment_type in ['enterprise', 'online', 'atomic-enterprise', 'openshift-enterprise']
- not openshift_docker_use_crio | default(false)
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
+- name: Set node facts
+ openshift_facts:
+ role: "{{ item.role }}"
+ local_facts: "{{ item.local_facts }}"
+ with_items:
+ # Reset node labels to an empty dictionary.
+ - role: node
+ local_facts:
+ labels: {}
+ - role: node
+ local_facts:
+ annotations: "{{ openshift_node_annotations | default(none) }}"
+ debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}"
+ iptables_sync_period: "{{ openshift_node_iptables_sync_period | default(None) }}"
+ kubelet_args: "{{ openshift_node_kubelet_args | default(None) }}"
+ labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}"
+ registry_url: "{{ oreg_url_node | default(oreg_url) | default(None) }}"
+ schedulable: "{{ openshift_schedulable | default(openshift_scheduleable) | default(None) }}"
+ sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}"
+ storage_plugin_deps: "{{ osn_storage_plugin_deps | default(None) }}"
+ set_node_ip: "{{ openshift_set_node_ip | default(None) }}"
+ node_image: "{{ osn_image | default(None) }}"
+ ovs_image: "{{ osn_ovs_image | default(None) }}"
+ proxy_mode: "{{ openshift_node_proxy_mode | default('iptables') }}"
+ local_quota_per_fsgroup: "{{ openshift_node_local_quota_per_fsgroup | default(None) }}"
+ dns_ip: "{{ openshift_dns_ip | default(none) | get_dns_ip(hostvars[inventory_hostname])}}"
+ env_vars: "{{ openshift_node_env_vars | default(None) }}"
+
# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory
- name: Check for swap usage
command: grep "^[^#].*swap" /etc/fstab
diff --git a/roles/openshift_node_upgrade/tasks/main.yml b/roles/openshift_node_upgrade/tasks/main.yml
index f984a04b2..bc092c26c 100644
--- a/roles/openshift_node_upgrade/tasks/main.yml
+++ b/roles/openshift_node_upgrade/tasks/main.yml
@@ -24,7 +24,6 @@
name: "{{ item }}"
state: stopped
with_items:
- - "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-master-api"
- etcd_container
@@ -81,7 +80,6 @@
with_items:
- etcd_container
- openvswitch
- - "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-api"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-node"
diff --git a/roles/openshift_node_upgrade/tasks/restart.yml b/roles/openshift_node_upgrade/tasks/restart.yml
index f228b6e08..a4fa51172 100644
--- a/roles/openshift_node_upgrade/tasks/restart.yml
+++ b/roles/openshift_node_upgrade/tasks/restart.yml
@@ -31,7 +31,6 @@
with_items:
- etcd_container
- openvswitch
- - "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-api"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-node"
diff --git a/roles/openshift_provisioners/tasks/install_efs.yaml b/roles/openshift_provisioners/tasks/install_efs.yaml
index b53b6afa1..4a6e00513 100644
--- a/roles/openshift_provisioners/tasks/install_efs.yaml
+++ b/roles/openshift_provisioners/tasks/install_efs.yaml
@@ -67,4 +67,4 @@
register: efs_output
failed_when: efs_output.rc == 1 and 'exists' not in efs_output.stderr
check_mode: no
- when: efs_anyuid.stdout.find("system:serviceaccount:{{openshift_provisioners_project}}:provisioners-efs") == -1
+ when: efs_anyuid.stdout.find("system:serviceaccount:" + openshift_provisioners_project + ":provisioners-efs") == -1
diff --git a/roles/openshift_service_catalog/tasks/wire_aggregator.yml b/roles/openshift_service_catalog/tasks/wire_aggregator.yml
index d5291a99a..1c788470a 100644
--- a/roles/openshift_service_catalog/tasks/wire_aggregator.yml
+++ b/roles/openshift_service_catalog/tasks/wire_aggregator.yml
@@ -156,24 +156,16 @@
register: yedit_output
#restart master serially here
-- name: restart master
- systemd: name={{ openshift.common.service_type }}-master state=restarted
- when:
- - yedit_output.changed
- - openshift.master.ha is not defined or not openshift.master.ha | bool
-
- name: restart master api
systemd: name={{ openshift.common.service_type }}-master-api state=restarted
when:
- yedit_output.changed
- - openshift.master.ha is defined and openshift.master.ha | bool
- openshift.master.cluster_method == 'native'
- name: restart master controllers
systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
when:
- yedit_output.changed
- - openshift.master.ha is defined and openshift.master.ha | bool
- openshift.master.cluster_method == 'native'
- name: Verify API Server
diff --git a/roles/openshift_storage_nfs/defaults/main.yml b/roles/openshift_storage_nfs/defaults/main.yml
index 7f3c054e7..4a2bc6141 100644
--- a/roles/openshift_storage_nfs/defaults/main.yml
+++ b/roles/openshift_storage_nfs/defaults/main.yml
@@ -1,4 +1,12 @@
---
+r_openshift_storage_nfs_firewall_enabled: True
+r_openshift_storage_nfs_use_firewalld: False
+
+r_openshift_storage_nfs_os_firewall_deny: []
+r_openshift_storage_nfs_os_firewall_allow:
+- service: nfs
+ port: "2049/tcp"
+
openshift:
hosted:
registry:
diff --git a/roles/openshift_storage_nfs/meta/main.yml b/roles/openshift_storage_nfs/meta/main.yml
index 62e38bd8c..b360d0658 100644
--- a/roles/openshift_storage_nfs/meta/main.yml
+++ b/roles/openshift_storage_nfs/meta/main.yml
@@ -10,9 +10,6 @@ galaxy_info:
versions:
- 7
dependencies:
-- role: os_firewall
- os_firewall_allow:
- - service: nfs
- port: "2049/tcp"
+- role: lib_os_firewall
- role: openshift_hosted_facts
- role: openshift_repos
diff --git a/roles/openshift_storage_nfs/tasks/firewall.yml b/roles/openshift_storage_nfs/tasks/firewall.yml
new file mode 100644
index 000000000..c1c318ff4
--- /dev/null
+++ b/roles/openshift_storage_nfs/tasks/firewall.yml
@@ -0,0 +1,40 @@
+---
+- when: r_openshift_storage_nfs_firewall_enabled | bool and not r_openshift_storage_nfs_use_firewalld | bool
+ block:
+ - name: Add iptables allow rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: add
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_storage_nfs_os_firewall_allow }}"
+
+ - name: Remove iptables rules
+ os_firewall_manage_iptables:
+ name: "{{ item.service }}"
+ action: remove
+ protocol: "{{ item.port.split('/')[1] }}"
+ port: "{{ item.port.split('/')[0] }}"
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_storage_nfs_os_firewall_deny }}"
+
+- when: r_openshift_storage_nfs_firewall_enabled | bool and r_openshift_storage_nfs_use_firewalld | bool
+ block:
+ - name: Add firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: enabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_storage_nfs_os_firewall_allow }}"
+
+ - name: Remove firewalld allow rules
+ firewalld:
+ port: "{{ item.port }}"
+ permanent: true
+ immediate: true
+ state: disabled
+ when: item.cond | default(True)
+ with_items: "{{ r_openshift_storage_nfs_os_firewall_deny }}"
diff --git a/roles/openshift_storage_nfs/tasks/main.yml b/roles/openshift_storage_nfs/tasks/main.yml
index 019ada2fb..51f8f4e0e 100644
--- a/roles/openshift_storage_nfs/tasks/main.yml
+++ b/roles/openshift_storage_nfs/tasks/main.yml
@@ -1,4 +1,8 @@
---
+- name: setup firewall
+ include: firewall.yml
+ static: yes
+
- name: Install nfs-utils
package: name=nfs-utils state=present
diff --git a/roles/os_firewall/README.md b/roles/os_firewall/README.md
index e7ef544f4..be0b8291a 100644
--- a/roles/os_firewall/README.md
+++ b/roles/os_firewall/README.md
@@ -1,8 +1,8 @@
OS Firewall
===========
-OS Firewall manages firewalld and iptables firewall settings for a minimal use
-case (Adding/Removing rules based on protocol and port number).
+OS Firewall manages firewalld and iptables installation.
+case.
Note: firewalld is not supported on Atomic Host
https://bugzilla.redhat.com/show_bug.cgi?id=1403331
@@ -18,8 +18,6 @@ Role Variables
| Name | Default | |
|---------------------------|---------|----------------------------------------|
| os_firewall_use_firewalld | False | If false, use iptables |
-| os_firewall_allow | [] | List of service,port mappings to allow |
-| os_firewall_deny | [] | List of service, port mappings to deny |
Dependencies
------------
@@ -29,34 +27,27 @@ None.
Example Playbook
----------------
-Use iptables and open tcp ports 80 and 443:
+Use iptables:
```
---
- hosts: servers
- vars:
- os_firewall_use_firewalld: false
- os_firewall_allow:
- - service: httpd
- port: 80/tcp
- - service: https
- port: 443/tcp
- roles:
- - os_firewall
+ task:
+ - include_role:
+ name: os_firewall
+ vars:
+ os_firewall_use_firewalld: false
```
-Use firewalld and open tcp port 443 and close previously open tcp port 80:
+Use firewalld:
```
---
- hosts: servers
vars:
- os_firewall_allow:
- - service: https
- port: 443/tcp
- os_firewall_deny:
- - service: httpd
- port: 80/tcp
- roles:
- - os_firewall
+ tasks:
+ - include_role:
+ name: os_firewall
+ vars:
+ os_firewall_use_firewalld: true
```
License
diff --git a/roles/os_firewall/defaults/main.yml b/roles/os_firewall/defaults/main.yml
index 01859e5fc..f96a80f1c 100644
--- a/roles/os_firewall/defaults/main.yml
+++ b/roles/os_firewall/defaults/main.yml
@@ -3,5 +3,3 @@ os_firewall_enabled: True
# firewalld is not supported on Atomic Host
# https://bugzilla.redhat.com/show_bug.cgi?id=1403331
os_firewall_use_firewalld: "{{ False }}"
-os_firewall_allow: []
-os_firewall_deny: []
diff --git a/roles/os_firewall/tasks/firewall/firewalld.yml b/roles/os_firewall/tasks/firewall/firewalld.yml
index 509655b0c..2cc7af478 100644
--- a/roles/os_firewall/tasks/firewall/firewalld.yml
+++ b/roles/os_firewall/tasks/firewall/firewalld.yml
@@ -49,19 +49,3 @@
until: pkaction.rc == 0
retries: 6
delay: 10
-
-- name: Add firewalld allow rules
- firewalld:
- port: "{{ item.port }}"
- permanent: true
- immediate: true
- state: enabled
- with_items: "{{ os_firewall_allow }}"
-
-- name: Remove firewalld allow rules
- firewalld:
- port: "{{ item.port }}"
- permanent: true
- immediate: true
- state: disabled
- with_items: "{{ os_firewall_deny }}"
diff --git a/roles/os_firewall/tasks/firewall/iptables.yml b/roles/os_firewall/tasks/firewall/iptables.yml
index 55f2fc471..ccb3c4713 100644
--- a/roles/os_firewall/tasks/firewall/iptables.yml
+++ b/roles/os_firewall/tasks/firewall/iptables.yml
@@ -33,19 +33,3 @@
- name: need to pause here, otherwise the iptables service starting can sometimes cause ssh to fail
pause: seconds=10
when: result | changed
-
-- name: Add iptables allow rules
- os_firewall_manage_iptables:
- name: "{{ item.service }}"
- action: add
- protocol: "{{ item.port.split('/')[1] }}"
- port: "{{ item.port.split('/')[0] }}"
- with_items: "{{ os_firewall_allow }}"
-
-- name: Remove iptables rules
- os_firewall_manage_iptables:
- name: "{{ item.service }}"
- action: remove
- protocol: "{{ item.port.split('/')[1] }}"
- port: "{{ item.port.split('/')[0] }}"
- with_items: "{{ os_firewall_deny }}"