From 1a868e61fbab8f1e2095c0952031656c47926220 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 19 May 2017 16:01:03 -0400 Subject: Tolerate failures in the node upgrade playbook --- inventory/byo/hosts.origin.example | 25 +++++++++++++++++++++++++ inventory/byo/hosts.ose.example | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'inventory') diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 20f342023..310b8ab44 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -788,6 +788,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index f75a47bb8..e126bbcab 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -785,6 +785,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com -- cgit v1.2.1 From 2d8e3d2b28ce19569c76c56102e9639a6f26b0c2 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Mon, 22 May 2017 14:51:50 -0400 Subject: Add separate variables for control plane nodes --- inventory/byo/hosts.origin.example | 6 +++++- inventory/byo/hosts.ose.example | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'inventory') diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 310b8ab44..023930954 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -791,9 +791,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Upgrade Control # # By default nodes are upgraded in a serial manner one at a time and all failures -# are fatal +# are fatal, one set of variables for normal nodes, one set of variables for +# nodes that are part of control plane as the number of hosts may be different +# in those two groups. #openshift_upgrade_nodes_serial=1 #openshift_upgrade_nodes_max_fail_percentage=0 +#openshift_upgrade_control_plane_nodes_serial=1 +#openshift_upgrade_control_plane_nodes_max_fail_percentage=0 # # You can specify the number of nodes to upgrade at once. We do not currently # attempt to verify that you have capacity to drain this many nodes at once diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index e126bbcab..407ef19fb 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -788,9 +788,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Upgrade Control # # By default nodes are upgraded in a serial manner one at a time and all failures -# are fatal +# are fatal, one set of variables for normal nodes, one set of variables for +# nodes that are part of control plane as the number of hosts may be different +# in those two groups. #openshift_upgrade_nodes_serial=1 #openshift_upgrade_nodes_max_fail_percentage=0 +#openshift_upgrade_control_plane_nodes_serial=1 +#openshift_upgrade_control_plane_nodes_max_fail_percentage=0 # # You can specify the number of nodes to upgrade at once. We do not currently # attempt to verify that you have capacity to drain this many nodes at once -- cgit v1.2.1