From 6154f7d49847813dfdea9ad73aaaed86f18aa9de Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Wed, 8 Nov 2017 18:20:46 -0500 Subject: Initial upgrade for scale groups. --- .../upgrades/v3_7/upgrade_scale_groups.yml | 7 +++ .../upgrades/upgrade_scale_group.yml | 59 ++++++++++++++++++++++ roles/openshift_aws/defaults/main.yml | 23 ++++++--- .../filter_plugins/openshift_aws_filters.py | 15 +++++- roles/openshift_aws/meta/main.yml | 1 + roles/openshift_aws/tasks/accept_nodes.yml | 34 +++++++++++++ roles/openshift_aws/tasks/elb.yml | 1 + roles/openshift_aws/tasks/launch_config_create.yml | 2 +- roles/openshift_aws/tasks/provision_nodes.yml | 32 +++--------- roles/openshift_aws/tasks/remove_scale_group.yml | 27 ++++++++++ roles/openshift_aws/tasks/scale_group.yml | 10 +--- roles/openshift_aws/tasks/setup_master_group.yml | 2 +- .../tasks/setup_scale_group_facts.yml | 37 ++++++++++++++ roles/openshift_aws/tasks/upgrade_node_group.yml | 16 ++++++ roles/openshift_aws/tasks/vpc_and_subnet_id.yml | 7 ++- roles/openshift_aws/tasks/wait_for_groups.yml | 31 ++++++++++++ .../openshift_master/tasks/bootstrap_settings.yml | 1 + roles/openshift_node_group/tasks/create_config.yml | 4 ++ 18 files changed, 261 insertions(+), 48 deletions(-) create mode 100644 playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml create mode 100644 playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml create mode 100644 roles/openshift_aws/tasks/accept_nodes.yml create mode 100644 roles/openshift_aws/tasks/remove_scale_group.yml create mode 100644 roles/openshift_aws/tasks/setup_scale_group_facts.yml create mode 100644 roles/openshift_aws/tasks/upgrade_node_group.yml create mode 100644 roles/openshift_aws/tasks/wait_for_groups.yml diff --git a/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml b/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml new file mode 100644 index 000000000..14b0f85d4 --- /dev/null +++ b/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade_scale_groups.yml @@ -0,0 +1,7 @@ +--- +# +# Node Scale Group Upgrade Playbook +# +# Upgrades scale group nodes only. +# +- include: ../../../../common/openshift-cluster/upgrades/upgrade_scale_group.yml diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml new file mode 100644 index 000000000..d9ce3a7e3 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml @@ -0,0 +1,59 @@ +--- +- name: create new scale group + hosts: localhost + tasks: + - name: build upgrade scale groups + include_role: + name: openshift_aws + tasks_from: upgrade_node_group.yml + + - fail: + msg: "Ensure that new scale groups were provisioned before proceeding to update." + when: + - "'oo_sg_new_nodes' not in groups or groups.oo_sg_new_nodes|length == 0" + +- name: initialize upgrade bits + include: init.yml + +- name: Drain and upgrade nodes + hosts: oo_sg_current_nodes + # This var must be set with -e on invocation, as it is not a per-host inventory var + # and is evaluated early. Values such as "20%" can also be used. + serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" + max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}" + + pre_tasks: + - name: Load lib_openshift modules + include_role: + name: ../roles/lib_openshift + + # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node + # or docker actually needs an upgrade before proceeding. Perhaps best to save this until + # we merge upgrade functionality into the base roles and a normal config.yml playbook run. + - name: Mark node unschedulable + oc_adm_manage_node: + node: "{{ openshift.node.nodename | lower }}" + schedulable: False + delegate_to: "{{ groups.oo_first_master.0 }}" + retries: 10 + delay: 5 + register: node_unschedulable + until: node_unschedulable|succeeded + + - name: Drain Node for Kubelet upgrade + command: > + {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets + delegate_to: "{{ groups.oo_first_master.0 }}" + register: l_upgrade_nodes_drain_result + until: not l_upgrade_nodes_drain_result | failed + retries: 60 + delay: 60 + +# Alright, let's clean up! +- name: clean up the old scale group + hosts: localhost + tasks: + - name: clean up scale group + include_role: + name: openshift_aws + tasks_from: remove_scale_group.yml diff --git a/roles/openshift_aws/defaults/main.yml b/roles/openshift_aws/defaults/main.yml index c9a429675..42ef22846 100644 --- a/roles/openshift_aws/defaults/main.yml +++ b/roles/openshift_aws/defaults/main.yml @@ -6,6 +6,9 @@ openshift_aws_create_security_groups: True openshift_aws_create_launch_config: True openshift_aws_create_scale_group: True +openshift_aws_current_version: '' +openshift_aws_new_version: '' + openshift_aws_wait_for_ssh: True openshift_aws_clusterid: default @@ -39,6 +42,7 @@ openshift_aws_ami_tags: bootstrap: "true" openshift-created: "true" clusterid: "{{ openshift_aws_clusterid }}" + parent: "{{ openshift_aws_base_ami | default('unknown') }}" openshift_aws_s3_mode: create openshift_aws_s3_bucket_name: "{{ openshift_aws_clusterid }}-docker-registry" @@ -114,7 +118,6 @@ openshift_aws_node_group_config_tags: "{{ openshift_aws_clusterid | build_instan openshift_aws_node_group_termination_policy: Default openshift_aws_node_group_replace_instances: [] openshift_aws_node_group_replace_all_instances: False -openshift_aws_node_group_config_extra_labels: {} openshift_aws_ami_map: master: "{{ openshift_aws_ami }}" @@ -135,8 +138,8 @@ openshift_aws_master_group_config: tags: host-type: master sub-host-type: default - labels: - type: master + runtime: docker + version: "{{ openshift_aws_new_version }}" wait_for_instances: True termination_policy: "{{ openshift_aws_node_group_termination_policy }}" replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}" @@ -159,8 +162,8 @@ openshift_aws_node_group_config: tags: host-type: node sub-host-type: compute - labels: - type: compute + runtime: docker + version: "{{ openshift_aws_new_version }}" termination_policy: "{{ openshift_aws_node_group_termination_policy }}" replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}" iam_role: "{{ openshift_aws_iam_role_name }}" @@ -179,8 +182,8 @@ openshift_aws_node_group_config: tags: host-type: node sub-host-type: infra - labels: - type: infra + runtime: docker + version: "{{ openshift_aws_new_version }}" termination_policy: "{{ openshift_aws_node_group_termination_policy }}" replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}" iam_role: "{{ openshift_aws_iam_role_name }}" @@ -262,7 +265,7 @@ openshift_aws_node_security_groups: openshift_aws_vpc_tags: Name: "{{ openshift_aws_vpc_name }}" -openshift_aws_subnet_name: us-east-1c +openshift_aws_subnet_az: us-east-1c openshift_aws_vpc: name: "{{ openshift_aws_vpc_name }}" @@ -280,6 +283,10 @@ openshift_aws_node_run_bootstrap_startup: True openshift_aws_node_user_data: '' openshift_aws_node_config_namespace: openshift-node +openshift_aws_node_groups: nodes + +openshift_aws_masters_groups: masters,etcd,nodes + # If creating extra node groups, you'll need to define all of the following # The format is the same as openshift_aws_node_group_config, but the top-level diff --git a/roles/openshift_aws/filter_plugins/openshift_aws_filters.py b/roles/openshift_aws/filter_plugins/openshift_aws_filters.py index a9893c0a7..e707abd3f 100644 --- a/roles/openshift_aws/filter_plugins/openshift_aws_filters.py +++ b/roles/openshift_aws/filter_plugins/openshift_aws_filters.py @@ -8,6 +8,18 @@ Custom filters for use in openshift_aws class FilterModule(object): ''' Custom ansible filters for use by openshift_aws role''' + @staticmethod + def scale_groups_match_capacity(scale_group_info): + ''' This function will verify that the scale group instance count matches + the scale group desired capacity + + ''' + for scale_group in scale_group_info: + if scale_group['desired_capacity'] != len(scale_group['instances']): + return False + + return True + @staticmethod def build_instance_tags(clusterid): ''' This function will return a dictionary of the instance tags. @@ -25,4 +37,5 @@ class FilterModule(object): def filters(self): ''' returns a mapping of filters to methods ''' - return {'build_instance_tags': self.build_instance_tags} + return {'build_instance_tags': self.build_instance_tags, + 'scale_groups_match_capacity': self.scale_groups_match_capacity} diff --git a/roles/openshift_aws/meta/main.yml b/roles/openshift_aws/meta/main.yml index 875efcb8f..fa0a24a33 100644 --- a/roles/openshift_aws/meta/main.yml +++ b/roles/openshift_aws/meta/main.yml @@ -1,3 +1,4 @@ --- dependencies: - lib_utils +- lib_openshift diff --git a/roles/openshift_aws/tasks/accept_nodes.yml b/roles/openshift_aws/tasks/accept_nodes.yml new file mode 100644 index 000000000..ae320962f --- /dev/null +++ b/roles/openshift_aws/tasks/accept_nodes.yml @@ -0,0 +1,34 @@ +--- +- name: fetch masters + ec2_remote_facts: + region: "{{ openshift_aws_region | default('us-east-1') }}" + filters: + "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid, + 'tag:host-type': 'master', 'instance-state-name': 'running'} }}" + register: mastersout + retries: 20 + delay: 3 + until: "'instances' in mastersout and mastersout.instances|length > 0" + +- name: fetch new node instances + ec2_remote_facts: + region: "{{ openshift_aws_region }}" + filters: + "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid, + 'tag:host-type': 'node', + 'instance-state-name': 'running'} }}" + register: instancesout + retries: 20 + delay: 3 + until: "'instances' in instancesout and instancesout.instances|length > 0" + +- debug: + msg: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}" + +- name: approve nodes + oc_adm_csr: + #approve_all: True + nodes: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}" + timeout: 60 + register: nodeout + delegate_to: "{{ mastersout.instances[0].public_ip_address }}" diff --git a/roles/openshift_aws/tasks/elb.yml b/roles/openshift_aws/tasks/elb.yml index a543222d5..5d371ec7a 100644 --- a/roles/openshift_aws/tasks/elb.yml +++ b/roles/openshift_aws/tasks/elb.yml @@ -17,6 +17,7 @@ listeners: "{{ item.value }}" scheme: "{{ openshift_aws_elb_scheme }}" tags: "{{ openshift_aws_elb_tags }}" + wait: True register: new_elb with_dict: "{{ l_elb_dict_item.value }}" diff --git a/roles/openshift_aws/tasks/launch_config_create.yml b/roles/openshift_aws/tasks/launch_config_create.yml index a688496d2..f7f0f0953 100644 --- a/roles/openshift_aws/tasks/launch_config_create.yml +++ b/roles/openshift_aws/tasks/launch_config_create.yml @@ -10,7 +10,7 @@ # Create the scale group config - name: Create the node scale group launch config ec2_lc: - name: "{{ openshift_aws_launch_config_basename }}-{{ launch_config_item.key }}-{{ l_epoch_time }}" + name: "{{ openshift_aws_launch_config_basename }}-{{ launch_config_item.key }}{{'-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}" region: "{{ openshift_aws_region }}" image_id: "{{ l_aws_ami_map[launch_config_item.key] | default(openshift_aws_ami) }}" instance_type: "{{ launch_config_item.value.instance_type }}" diff --git a/roles/openshift_aws/tasks/provision_nodes.yml b/roles/openshift_aws/tasks/provision_nodes.yml index 1b40f24d3..3e84666a2 100644 --- a/roles/openshift_aws/tasks/provision_nodes.yml +++ b/roles/openshift_aws/tasks/provision_nodes.yml @@ -12,7 +12,9 @@ register: instancesout retries: 20 delay: 3 - until: instancesout.instances|length > 0 + until: + - "'instances' in instancesout" + - instancesout.instances|length > 0 - name: slurp down the bootstrap.kubeconfig slurp: @@ -42,29 +44,7 @@ l_launch_config_security_groups: "{{ openshift_aws_launch_config_security_groups_extra }}" l_aws_ami_map: "{{ openshift_aws_ami_map_extra }}" - +# instances aren't scaling fast enough here, we need to wait for them - when: openshift_aws_wait_for_ssh | bool - block: - - name: pause and allow for instances to scale before we query them - pause: - seconds: 10 - - - name: fetch newly created instances - ec2_remote_facts: - region: "{{ openshift_aws_region }}" - filters: - "tag:clusterid": "{{ openshift_aws_clusterid }}" - "tag:host-type": node - instance-state-name: running - register: instancesout - retries: 20 - delay: 3 - until: instancesout.instances|length > 0 - - - name: wait for ssh to become available - wait_for: - port: 22 - host: "{{ item.public_ip_address }}" - timeout: 300 - search_regex: OpenSSH - with_items: "{{ instancesout.instances }}" + name: wait for our new nodes to come up + include: wait_for_groups.yml diff --git a/roles/openshift_aws/tasks/remove_scale_group.yml b/roles/openshift_aws/tasks/remove_scale_group.yml new file mode 100644 index 000000000..55d1af2b5 --- /dev/null +++ b/roles/openshift_aws/tasks/remove_scale_group.yml @@ -0,0 +1,27 @@ +--- +- name: fetch the scale groups + ec2_asg_facts: + region: "{{ openshift_aws_region }}" + tags: + "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid, + 'version': openshift_aws_current_version} }}" + register: qasg + +- name: remove non-master scale groups + ec2_asg: + region: "{{ openshift_aws_region }}" + state: absent + name: "{{ item.auto_scaling_group_name }}" + when: "'master' not in item.auto_scaling_group_name" + register: asg_results + with_items: "{{ qasg.results }}" + async: 600 + poll: 0 + +- name: join the asynch scale group removals + async_status: + jid: "{{ item.ansible_job_id }}" + register: jobs_results + with_items: "{{ asg_results.results }}" + until: jobs_results.finished + retries: 200 diff --git a/roles/openshift_aws/tasks/scale_group.yml b/roles/openshift_aws/tasks/scale_group.yml index 097859af2..30df7545d 100644 --- a/roles/openshift_aws/tasks/scale_group.yml +++ b/roles/openshift_aws/tasks/scale_group.yml @@ -1,16 +1,8 @@ --- -- name: fetch the subnet to use in scale group - ec2_vpc_subnet_facts: - region: "{{ openshift_aws_region }}" - filters: - "tag:Name": "{{ openshift_aws_subnet_name }}" - vpc-id: "{{ vpcout.vpcs[0].id }}" - register: subnetout - - name: Create the scale group ec2_asg: name: "{{ openshift_aws_scale_group_basename }} {{ item.key }}" - launch_config_name: "{{ openshift_aws_launch_config_basename }}-{{ item.key }}-{{ l_epoch_time }}" + launch_config_name: "{{ openshift_aws_launch_config_basename }}-{{ item.key }}{{ '-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}" health_check_period: "{{ item.value.health_check.period }}" health_check_type: "{{ item.value.health_check.type }}" min_size: "{{ item.value.min_size }}" diff --git a/roles/openshift_aws/tasks/setup_master_group.yml b/roles/openshift_aws/tasks/setup_master_group.yml index 166f3b938..05b68f460 100644 --- a/roles/openshift_aws/tasks/setup_master_group.yml +++ b/roles/openshift_aws/tasks/setup_master_group.yml @@ -21,7 +21,7 @@ - name: add new master to masters group add_host: - groups: "masters,etcd,nodes" + groups: "{{ openshift_aws_masters_groups }}" name: "{{ item.public_dns_name }}" hostname: "{{ openshift_aws_clusterid }}-master-{{ item.id[:-5] }}" with_items: "{{ instancesout.instances }}" diff --git a/roles/openshift_aws/tasks/setup_scale_group_facts.yml b/roles/openshift_aws/tasks/setup_scale_group_facts.yml new file mode 100644 index 000000000..d65fdc2de --- /dev/null +++ b/roles/openshift_aws/tasks/setup_scale_group_facts.yml @@ -0,0 +1,37 @@ +--- +- name: group scale group nodes + ec2_remote_facts: + region: "{{ openshift_aws_region }}" + filters: + "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid }}}" + register: qinstances + +- name: Build new node group + add_host: + groups: oo_sg_new_nodes + ansible_ssh_host: "{{ item.public_dns_name }}" + name: "{{ item.public_dns_name }}" + hostname: "{{ item.public_dns_name }}" + when: + - (item.tags.version | default(False)) == openshift_aws_new_version + - "'node' in item.tags['host-type']" + with_items: "{{ qinstances.instances }}" + +- name: Build current node group + add_host: + groups: oo_sg_current_nodes + ansible_ssh_host: "{{ item.public_dns_name }}" + name: "{{ item.public_dns_name }}" + hostname: "{{ item.public_dns_name }}" + when: + - (item.tags.version | default('')) == openshift_aws_current_version + - "'node' in item.tags['host-type']" + with_items: "{{ qinstances.instances }}" + +- name: place all nodes into nodes group + add_host: + groups: nodes + ansible_ssh_host: "{{ item.public_dns_name }}" + name: "{{ item.public_dns_name }}" + hostname: "{{ item.public_dns_name }}" + with_items: "{{ qinstances.instances }}" diff --git a/roles/openshift_aws/tasks/upgrade_node_group.yml b/roles/openshift_aws/tasks/upgrade_node_group.yml new file mode 100644 index 000000000..d7851d887 --- /dev/null +++ b/roles/openshift_aws/tasks/upgrade_node_group.yml @@ -0,0 +1,16 @@ +--- +- fail: + msg: 'Please ensure the current_version and new_version variables are not the same.' + when: + - openshift_aws_current_version == openshift_aws_new_version + +- include: provision_nodes.yml + +- include: accept_nodes.yml + +- include: setup_scale_group_facts.yml + +- include: setup_master_group.yml + vars: + # we do not set etcd here as its limited to 1 or 3 + openshift_aws_masters_groups: masters,nodes diff --git a/roles/openshift_aws/tasks/vpc_and_subnet_id.yml b/roles/openshift_aws/tasks/vpc_and_subnet_id.yml index aaf9b300f..1b754f863 100644 --- a/roles/openshift_aws/tasks/vpc_and_subnet_id.yml +++ b/roles/openshift_aws/tasks/vpc_and_subnet_id.yml @@ -6,13 +6,16 @@ 'tag:Name': "{{ openshift_aws_vpc_name }}" register: vpcout -- name: debug +- name: debug vcpout debug: var=vpcout - name: fetch the default subnet id ec2_vpc_subnet_facts: region: "{{ openshift_aws_region }}" filters: - "tag:Name": "{{ openshift_aws_subnet_name }}" + "availability_zone": "{{ openshift_aws_subnet_az }}" vpc-id: "{{ vpcout.vpcs[0].id }}" register: subnetout + +- name: debug subnetout + debug: var=subnetout diff --git a/roles/openshift_aws/tasks/wait_for_groups.yml b/roles/openshift_aws/tasks/wait_for_groups.yml new file mode 100644 index 000000000..9f1a68a2a --- /dev/null +++ b/roles/openshift_aws/tasks/wait_for_groups.yml @@ -0,0 +1,31 @@ +--- +# The idea here is to wait until all scale groups are at +# their desired capacity before continuing. +- name: fetch the scale groups + ec2_asg_facts: + region: "{{ openshift_aws_region }}" + tags: + "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid} }}" + register: qasg + until: qasg.results | scale_groups_match_capacity | bool + delay: 10 + retries: 60 + +- name: fetch newly created instances + ec2_remote_facts: + region: "{{ openshift_aws_region }}" + filters: + "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid, + 'tag:version': openshift_aws_new_version} }}" + register: instancesout + until: instancesout.instances|length > 0 + delay: 5 + retries: 60 + +- name: wait for ssh to become available + wait_for: + port: 22 + host: "{{ item.public_ip_address }}" + timeout: 300 + search_regex: OpenSSH + with_items: "{{ instancesout.instances }}" diff --git a/roles/openshift_master/tasks/bootstrap_settings.yml b/roles/openshift_master/tasks/bootstrap_settings.yml index cbd7f587b..10e0828eb 100644 --- a/roles/openshift_master/tasks/bootstrap_settings.yml +++ b/roles/openshift_master/tasks/bootstrap_settings.yml @@ -11,4 +11,5 @@ - /etc/origin/master/ca.key notify: - restart master controllers + - restart master api when: openshift_master_bootstrap_enabled | default(False) diff --git a/roles/openshift_node_group/tasks/create_config.yml b/roles/openshift_node_group/tasks/create_config.yml index 02ec30a62..8b3218e7b 100644 --- a/roles/openshift_node_group/tasks/create_config.yml +++ b/roles/openshift_node_group/tasks/create_config.yml @@ -9,6 +9,7 @@ - name: debug node config debug: var=configout + run_once: true - when: - configout.results.results.0 == {} or (configout.results.results.0 != {} and openshift_node_group_edits|length > 0) @@ -24,6 +25,7 @@ dest: "{{ mktempout.stdout }}/node-config.yaml" when: - configout.results.results.0 == {} + run_once: true - name: lay down the config from the existing configmap copy: @@ -31,6 +33,7 @@ dest: "{{ mktempout.stdout }}/node-config.yaml" when: - configout.results.results.0 != {} + run_once: true - name: "specialize the generated configs for {{ openshift_node_group_name }}" yedit: @@ -42,6 +45,7 @@ run_once: true - debug: var=yeditout + run_once: true - name: create node-config.yaml configmap oc_configmap: -- cgit v1.2.1