summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Gugino <mgugino@redhat.com>2017-12-08 18:30:09 -0500
committerMichael Gugino <mgugino@redhat.com>2017-12-12 18:00:05 -0500
commita6d5c625956a5051b7bbd9fc48430b9df11084ee (patch)
treed6ad72c967c3fcf2bdecb195b93f03a4c4941b47
parent1dccaa2ad90b363cf7f2c04d5781693e69451ffb (diff)
downloadopenshift-a6d5c625956a5051b7bbd9fc48430b9df11084ee.tar.gz
openshift-a6d5c625956a5051b7bbd9fc48430b9df11084ee.tar.bz2
openshift-a6d5c625956a5051b7bbd9fc48430b9df11084ee.tar.xz
openshift-a6d5c625956a5051b7bbd9fc48430b9df11084ee.zip
Refactor node upgrade to include less serial tasks
This commit moves the pulling of images, packages, and updating config files into a non-serialized play. The serialized play is now in charge of marking unschedulable, draining, stopping and restarting services, and marking schedulable. If rpm install / container download takes 60s per host, this will save 3 hours and 10 minutes at 200 hosts per cluster and forks of 20 hosts.
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml33
-rw-r--r--roles/openshift_node/handlers/main.yml6
-rw-r--r--roles/openshift_node/tasks/dnsmasq.yml43
-rw-r--r--roles/openshift_node/tasks/dnsmasq_install.yml43
-rw-r--r--roles/openshift_node/tasks/docker/upgrade.yml27
-rw-r--r--roles/openshift_node/tasks/main.yml1
-rw-r--r--roles/openshift_node/tasks/upgrade.yml127
-rw-r--r--roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml11
-rw-r--r--roles/openshift_node/tasks/upgrade/restart.yml9
-rw-r--r--roles/openshift_node/tasks/upgrade_pre.yml118
10 files changed, 218 insertions, 200 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index f7a85545b..a3cb1d0f9 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -1,16 +1,25 @@
---
+- name: Prepull images and rpms before doing rolling restart
+ hosts: oo_nodes_to_upgrade:!oo_masters_to_config
+ roles:
+ - role: openshift_facts
+ tasks:
+ - include_role:
+ name: openshift_node
+ tasks_from: upgrade_pre.yml
+ vars:
+ openshift_node_upgrade_in_progress: True
+
- name: Drain and upgrade nodes
hosts: oo_nodes_to_upgrade:!oo_masters_to_config
# This var must be set with -e on invocation, as it is not a per-host inventory var
# and is evaluated early. Values such as "20%" can also be used.
serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}"
-
+ roles:
+ - lib_openshift
+ - openshift_facts
pre_tasks:
- - name: Load lib_openshift modules
- import_role:
- name: lib_openshift
-
# TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
# or docker actually needs an upgrade before proceeding. Perhaps best to save this until
# we merge upgrade functionality into the base roles and a normal config.yml playbook run.
@@ -33,18 +42,12 @@
retries: 60
delay: 60
- roles:
- - openshift_facts
post_tasks:
- include_role:
name: openshift_node
tasks_from: upgrade.yml
vars:
openshift_node_upgrade_in_progress: True
- - include_role:
- name: openshift_excluder
- vars:
- r_openshift_excluder_action: enable
- name: Set node schedulability
oc_adm_manage_node:
node: "{{ openshift.node.nodename | lower }}"
@@ -55,3 +58,11 @@
register: node_schedulable
until: node_schedulable|succeeded
when: node_unschedulable|changed
+
+- name: Re-enable excluders
+ hosts: oo_nodes_to_upgrade:!oo_masters_to_config
+ tasks:
+ - include_role:
+ name: openshift_excluder
+ vars:
+ r_openshift_excluder_action: enable
diff --git a/roles/openshift_node/handlers/main.yml b/roles/openshift_node/handlers/main.yml
index 170a3dc6e..1d9797f84 100644
--- a/roles/openshift_node/handlers/main.yml
+++ b/roles/openshift_node/handlers/main.yml
@@ -4,11 +4,15 @@
name: NetworkManager
state: restarted
enabled: True
+ when:
+ - (not skip_node_svc_handlers | default(False) | bool)
- name: restart dnsmasq
systemd:
name: dnsmasq
state: restarted
+ when:
+ - (not skip_node_svc_handlers | default(False) | bool)
- name: restart openvswitch
systemd:
@@ -47,3 +51,5 @@
- name: reload systemd units
command: systemctl daemon-reload
+ when:
+ - (not skip_node_svc_handlers | default(False) | bool)
diff --git a/roles/openshift_node/tasks/dnsmasq.yml b/roles/openshift_node/tasks/dnsmasq.yml
index f210a3a21..31ca46ec0 100644
--- a/roles/openshift_node/tasks/dnsmasq.yml
+++ b/roles/openshift_node/tasks/dnsmasq.yml
@@ -1,43 +1,4 @@
---
-- name: Check for NetworkManager service
- command: >
- systemctl show NetworkManager
- register: nm_show
- changed_when: false
- ignore_errors: True
-
-- name: Set fact using_network_manager
- set_fact:
- network_manager_active: "{{ True if 'ActiveState=active' in nm_show.stdout else False }}"
-
-- name: Install dnsmasq
- package: name=dnsmasq state=installed
- when: not openshift.common.is_atomic | bool
- register: result
- until: result | success
-
-- name: ensure origin/node directory exists
- file:
- state: directory
- path: "{{ item }}"
- owner: root
- group: root
- mode: '0700'
- with_items:
- - /etc/origin
- - /etc/origin/node
-
-# this file is copied to /etc/dnsmasq.d/ when the node starts and is removed
-# when the node stops. A dbus-message is sent to dnsmasq to add the same entries
-# so that dnsmasq doesn't need to be restarted. Once we can use dnsmasq 2.77 or
-# newer we can use --server-file option to update the servers dynamically and
-# reload them by sending dnsmasq a SIGHUP. We write the file in case someone else
-# triggers a restart of dnsmasq but not a node restart.
-- name: Install node-dnsmasq.conf
- template:
- src: node-dnsmasq.conf.j2
- dest: /etc/origin/node/node-dnsmasq.conf
-
- name: Install dnsmasq configuration
template:
src: origin-dns.conf.j2
@@ -63,7 +24,3 @@
# Dynamic NetworkManager based dispatcher
- include_tasks: dnsmasq/network-manager.yml
when: network_manager_active | bool
-
-# Relies on ansible in order to configure static config
-- include_tasks: dnsmasq/no-network-manager.yml
- when: not network_manager_active | bool
diff --git a/roles/openshift_node/tasks/dnsmasq_install.yml b/roles/openshift_node/tasks/dnsmasq_install.yml
new file mode 100644
index 000000000..9f66bf12d
--- /dev/null
+++ b/roles/openshift_node/tasks/dnsmasq_install.yml
@@ -0,0 +1,43 @@
+---
+- name: Check for NetworkManager service
+ command: >
+ systemctl show NetworkManager
+ register: nm_show
+ changed_when: false
+ ignore_errors: True
+
+- name: Set fact using_network_manager
+ set_fact:
+ network_manager_active: "{{ True if 'ActiveState=active' in nm_show.stdout else False }}"
+
+- name: Install dnsmasq
+ package: name=dnsmasq state=installed
+ when: not openshift.common.is_atomic | bool
+ register: result
+ until: result | success
+
+- name: ensure origin/node directory exists
+ file:
+ state: directory
+ path: "{{ item }}"
+ owner: root
+ group: root
+ mode: '0700'
+ with_items:
+ - /etc/origin
+ - /etc/origin/node
+
+# this file is copied to /etc/dnsmasq.d/ when the node starts and is removed
+# when the node stops. A dbus-message is sent to dnsmasq to add the same entries
+# so that dnsmasq doesn't need to be restarted. Once we can use dnsmasq 2.77 or
+# newer we can use --server-file option to update the servers dynamically and
+# reload them by sending dnsmasq a SIGHUP. We write the file in case someone else
+# triggers a restart of dnsmasq but not a node restart.
+- name: Install node-dnsmasq.conf
+ template:
+ src: node-dnsmasq.conf.j2
+ dest: /etc/origin/node/node-dnsmasq.conf
+
+# Relies on ansible in order to configure static config
+- include_tasks: dnsmasq/no-network-manager.yml
+ when: not network_manager_active | bool
diff --git a/roles/openshift_node/tasks/docker/upgrade.yml b/roles/openshift_node/tasks/docker/upgrade.yml
deleted file mode 100644
index bbe9c71f5..000000000
--- a/roles/openshift_node/tasks/docker/upgrade.yml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-# input variables:
-# - openshift_service_type
-# - openshift.common.is_containerized
-# - docker_version
-# - skip_docker_restart
-
-- name: Check Docker image count
- shell: "docker images -aq | wc -l"
- register: docker_image_count
-
-- debug: var=docker_image_count.stdout
-
-- service:
- name: docker
- state: stopped
- register: l_openshift_node_upgrade_docker_stop_result
- until: not l_openshift_node_upgrade_docker_stop_result | failed
- retries: 3
- delay: 30
-
-- name: Upgrade Docker
- package: name=docker{{ '-' + docker_version }} state=present
- register: result
- until: result | success
-
-# starting docker happens back in ../main.yml where it calls ../restart.yml
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index 32c5f495f..946deb4d3 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -6,6 +6,7 @@
- deployment_type == 'openshift-enterprise'
- not openshift_use_crio
+- include_tasks: dnsmasq_install.yml
- include_tasks: dnsmasq.yml
- name: setup firewall
diff --git a/roles/openshift_node/tasks/upgrade.yml b/roles/openshift_node/tasks/upgrade.yml
index 9f333645a..ede73f22c 100644
--- a/roles/openshift_node/tasks/upgrade.yml
+++ b/roles/openshift_node/tasks/upgrade.yml
@@ -10,8 +10,6 @@
# tasks file for openshift_node_upgrade
-- include_tasks: registry_auth.yml
-
- name: Stop node and openvswitch services
service:
name: "{{ item }}"
@@ -21,58 +19,6 @@
- openvswitch
failed_when: false
-- name: Stop additional containerized services
- service:
- name: "{{ item }}"
- state: stopped
- with_items:
- - "{{ openshift_service_type }}-master-controllers"
- - "{{ openshift_service_type }}-master-api"
- - etcd_container
- failed_when: false
- when: openshift.common.is_containerized | bool
-
-- name: Pre-pull node image
- command: >
- docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }}
- register: pull_result
- changed_when: "'Downloaded newer image' in pull_result.stdout"
- when: openshift.common.is_containerized | bool
-
-- name: Pre-pull openvswitch image
- command: >
- docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }}
- register: pull_result
- changed_when: "'Downloaded newer image' in pull_result.stdout"
- when:
- - openshift.common.is_containerized | bool
- - openshift_use_openshift_sdn | bool
-
-- include_tasks: docker/upgrade.yml
- vars:
- # We will restart Docker ourselves after everything is ready:
- skip_docker_restart: True
- when:
- - l_docker_upgrade is defined
- - l_docker_upgrade | bool
-
-- include_tasks: "{{ node_config_hook }}"
- when: node_config_hook is defined
-
-- include_tasks: upgrade/rpm_upgrade.yml
- vars:
- component: "node"
- openshift_version: "{{ openshift_pkg_version | default('') }}"
- when: not openshift.common.is_containerized | bool
-
-- name: Remove obsolete docker-sdn-ovs.conf
- file:
- path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf"
- state: absent
-
-- include_tasks: upgrade/containerized_node_upgrade.yml
- when: openshift.common.is_containerized | bool
-
- name: Ensure containerized services stopped before Docker restart
service:
name: "{{ item }}"
@@ -86,6 +32,17 @@
failed_when: false
when: openshift.common.is_containerized | bool
+- service:
+ name: docker
+ state: stopped
+ register: l_openshift_node_upgrade_docker_stop_result
+ until: not l_openshift_node_upgrade_docker_stop_result | failed
+ retries: 3
+ delay: 30
+ when:
+ - l_docker_upgrade is defined
+ - l_docker_upgrade | bool
+
- name: Stop rpm based services
service:
name: "{{ item }}"
@@ -96,56 +53,19 @@
failed_when: false
when: not openshift.common.is_containerized | bool
+- include_tasks: "{{ node_config_hook }}"
+ when: node_config_hook is defined
+
# https://bugzilla.redhat.com/show_bug.cgi?id=1513054
- name: Clean up dockershim data
file:
path: "/var/lib/dockershim/sandbox/"
state: absent
-- name: Upgrade openvswitch
- package:
- name: openvswitch
- state: latest
- when: not openshift.common.is_containerized | bool
- register: result
- until: result | success
-
-- name: Update oreg value
- yedit:
- src: "{{ openshift.common.config_base }}/node/node-config.yaml"
- key: 'imageConfig.format'
- value: "{{ oreg_url | default(oreg_url_node) }}"
- when: oreg_url is defined or oreg_url_node is defined
-
-# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory
-- name: Check for swap usage
- command: grep "^[^#].*swap" /etc/fstab
- # grep: match any lines which don't begin with '#' and contain 'swap'
- changed_when: false
- failed_when: false
- register: swap_result
-
- # Disable Swap Block
-- block:
-
- - name: Disable swap
- command: swapoff --all
-
- - name: Remove swap entries from /etc/fstab
- replace:
- dest: /etc/fstab
- regexp: '(^[^#].*swap.*)'
- replace: '# \1'
- backup: yes
-
- - name: Add notice about disabling swap
- lineinfile:
- dest: /etc/fstab
- line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines'
- state: present
-
+- name: Disable swap
+ command: swapoff --all
when:
- - swap_result.stdout_lines | length > 0
+ - openshift_node_upgrade_swap_result | default(False) | bool
- openshift_disable_swap | default(true) | bool
# End Disable Swap Block
@@ -155,17 +75,6 @@
- ansible_selinux is defined
- ansible_selinux.status == 'enabled'
-- name: Apply 3.6 dns config changes
- yedit:
- src: /etc/origin/node/node-config.yaml
- key: "{{ item.key }}"
- value: "{{ item.value }}"
- with_items:
- - key: "dnsBindAddress"
- value: "127.0.0.1:53"
- - key: "dnsRecursiveResolvConf"
- value: "/etc/origin/node/resolv.conf"
-
# Restart all services
- include_tasks: upgrade/restart.yml
@@ -182,3 +91,5 @@
delay: 5
- include_tasks: dnsmasq.yml
+
+- meta: flush_handlers
diff --git a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml b/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml
index 245de60a7..8e547351b 100644
--- a/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml
+++ b/roles/openshift_node/tasks/upgrade/containerized_node_upgrade.yml
@@ -1,14 +1,3 @@
---
-# This is a hack to allow us to use systemd_units.yml, but skip the handlers which
-# restart services. We will unconditionally restart all containerized services
-# because we have to unconditionally restart Docker:
-- set_fact:
- skip_node_svc_handlers: True
-
- name: Update systemd units
include_tasks: ../systemd_units.yml
-
-# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of
-# play when the node has already been marked schedulable again. (this would look strange
-# in logs otherwise)
-- meta: flush_handlers
diff --git a/roles/openshift_node/tasks/upgrade/restart.yml b/roles/openshift_node/tasks/upgrade/restart.yml
index 65c301783..717cfa712 100644
--- a/roles/openshift_node/tasks/upgrade/restart.yml
+++ b/roles/openshift_node/tasks/upgrade/restart.yml
@@ -13,6 +13,15 @@
- name: Reload systemd to ensure latest unit files
command: systemctl daemon-reload
+- name: Restart support services
+ service:
+ name: "{{ item }}"
+ state: restarted
+ enabled: True
+ with_items:
+ - NetworkManager
+ - dnsmasq
+
- name: Restart container runtime
service:
name: "{{ openshift_docker_service_name }}"
diff --git a/roles/openshift_node/tasks/upgrade_pre.yml b/roles/openshift_node/tasks/upgrade_pre.yml
new file mode 100644
index 000000000..5d7961a24
--- /dev/null
+++ b/roles/openshift_node/tasks/upgrade_pre.yml
@@ -0,0 +1,118 @@
+---
+# This is a hack to allow us to update various components without restarting
+# services. This will persist into the upgrade play as well, so everything
+# needs to be restarted by hand.
+- set_fact:
+ skip_node_svc_handlers: True
+
+- include_tasks: registry_auth.yml
+
+- name: Check Docker image count
+ shell: "docker images -aq | wc -l"
+ register: docker_image_count
+ when:
+ - l_docker_upgrade is defined
+ - l_docker_upgrade | bool
+
+- debug: var=docker_image_count.stdout
+ when:
+ - l_docker_upgrade is defined
+ - l_docker_upgrade | bool
+
+- name: Upgrade Docker
+ package: name=docker{{ '-' + docker_version }} state=present
+ register: result
+ until: result | success
+ when:
+ - l_docker_upgrade is defined
+ - l_docker_upgrade | bool
+
+- name: Pre-pull node image
+ command: >
+ docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }}
+ register: pull_result
+ changed_when: "'Downloaded newer image' in pull_result.stdout"
+ when: openshift.common.is_containerized | bool
+
+- name: Pre-pull openvswitch image
+ command: >
+ docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }}
+ register: pull_result
+ changed_when: "'Downloaded newer image' in pull_result.stdout"
+ when:
+ - openshift.common.is_containerized | bool
+ - openshift_use_openshift_sdn | bool
+
+- include_tasks: upgrade/rpm_upgrade.yml
+ vars:
+ component: "node"
+ openshift_version: "{{ openshift_pkg_version | default('') }}"
+ when: not openshift.common.is_containerized | bool
+
+- name: Remove obsolete docker-sdn-ovs.conf
+ file:
+ path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf"
+ state: absent
+
+- include_tasks: upgrade/containerized_node_upgrade.yml
+ when: openshift.common.is_containerized | bool
+
+- name: Upgrade openvswitch
+ package:
+ name: openvswitch
+ state: latest
+ when: not openshift.common.is_containerized | bool
+ register: result
+ until: result | success
+
+- name: Update oreg value
+ yedit:
+ src: "{{ openshift.common.config_base }}/node/node-config.yaml"
+ key: 'imageConfig.format'
+ value: "{{ oreg_url | default(oreg_url_node) }}"
+ when: oreg_url is defined or oreg_url_node is defined
+
+# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory
+- name: Check for swap usage
+ command: grep "^[^#].*swap" /etc/fstab
+ # grep: match any lines which don't begin with '#' and contain 'swap'
+ changed_when: false
+ failed_when: false
+ register: swap_result
+
+# Set this fact here so we can use it during the next play, which is serial.
+- name: set_fact swap_result
+ set_fact:
+ openshift_node_upgrade_swap_result: "{{ swap_result.stdout_lines | length > 0 | bool }}"
+
+# Disable Swap Block (pre)
+- block:
+ - name: Remove swap entries from /etc/fstab
+ replace:
+ dest: /etc/fstab
+ regexp: '(^[^#].*swap.*)'
+ replace: '# \1'
+ backup: yes
+
+ - name: Add notice about disabling swap
+ lineinfile:
+ dest: /etc/fstab
+ line: '# OpenShift-Ansible Installer disabled swap per overcommit guidelines'
+ state: present
+ when:
+ - openshift_node_upgrade_swap_result | default(False) | bool
+ - openshift_disable_swap | default(true) | bool
+ # End Disable Swap Block
+
+- name: Apply 3.6 dns config changes
+ yedit:
+ src: /etc/origin/node/node-config.yaml
+ key: "{{ item.key }}"
+ value: "{{ item.value }}"
+ with_items:
+ - key: "dnsBindAddress"
+ value: "127.0.0.1:53"
+ - key: "dnsRecursiveResolvConf"
+ value: "/etc/origin/node/resolv.conf"
+
+- include_tasks: dnsmasq_install.yml