From a5dc813bab19dffe02dadfb58c19b455df199a08 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Mon, 13 Aug 2018 21:33:02 +0200 Subject: Prevent IB from crashing Fedora systems, mount Ands shared NFS storage, support development files on CentOS --- devel.yml | 6 ++++++ ib.yml | 5 +++++ install.yml | 6 ++++++ inventories/ipe.erb | 12 ++++++++++-- roles/common/tasks/main.yml | 9 +++++++++ roles/devel/tasks/dnf.yml | 5 +++++ roles/devel/tasks/main.yml | 5 +++++ roles/devel/tasks/yum.yml | 5 +++++ roles/ib/handlers/main.yml | 16 ++++++++++++++++ roles/ib/tasks/main.yml | 16 ++++++++++++++++ roles/ib/templates/00-ibnm.rules.j2 | 2 ++ roles/ib/templates/mlx.conf.j2 | 1 + roles/storage/tasks/main.yml | 9 +++++++++ storage.yml | 6 ++++++ 14 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 devel.yml create mode 100644 ib.yml create mode 100644 roles/devel/tasks/dnf.yml create mode 100644 roles/devel/tasks/main.yml create mode 100644 roles/devel/tasks/yum.yml create mode 100644 roles/ib/handlers/main.yml create mode 100644 roles/ib/tasks/main.yml create mode 100644 roles/ib/templates/00-ibnm.rules.j2 create mode 100644 roles/ib/templates/mlx.conf.j2 create mode 100644 roles/storage/tasks/main.yml create mode 100644 storage.yml diff --git a/devel.yml b/devel.yml new file mode 100644 index 0000000..f744e47 --- /dev/null +++ b/devel.yml @@ -0,0 +1,6 @@ +- name: Common Software + hosts: all + remote_user: root + roles: + - role: devel + diff --git a/ib.yml b/ib.yml new file mode 100644 index 0000000..b822e88 --- /dev/null +++ b/ib.yml @@ -0,0 +1,5 @@ +- name: Common Software + hosts: ib + remote_user: root + roles: + - role: ib diff --git a/install.yml b/install.yml index 2cd3768..278dac9 100644 --- a/install.yml +++ b/install.yml @@ -4,6 +4,12 @@ roles: - role: common +- name: Infiniband + hosts: ib + remote_user: root + roles: + - role: ib + - name: CUDA hosts: cuda remote_user: root diff --git a/inventories/ipe.erb b/inventories/ipe.erb index 5c33cdb..df62890 100644 --- a/inventories/ipe.erb +++ b/inventories/ipe.erb @@ -1,3 +1,6 @@ +[ands] +192.168.26.[140:149] + [camera] 192.168.26.[80:89] @@ -7,5 +10,10 @@ [desktop:children] student -[cuda:children] -camera +[cuda] +192.168.26.[80:84] +192.168.26.[86:89] + +[ib] +192.168.26.[60:69] +192.168.26.[80:89] diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 8070bc6..286a027 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -16,3 +16,12 @@ - name: Install additional software include_tasks: software.yml + +- name: Configure git + shell: | + git config --global http.sslVerify false + git config --global user.name "Suren A. Chilingaryan" + git config --global user.email csa@suren.me + exit 0 + args: + executable: /bin/bash diff --git a/roles/devel/tasks/dnf.yml b/roles/devel/tasks/dnf.yml new file mode 100644 index 0000000..1bf34f7 --- /dev/null +++ b/roles/devel/tasks/dnf.yml @@ -0,0 +1,5 @@ +- name: Install various ansible requirements + dnf: name={{item}} state=present + with_items: + - "@Development and Creative Workstation" + - "libconfig-devel" \ No newline at end of file diff --git a/roles/devel/tasks/main.yml b/roles/devel/tasks/main.yml new file mode 100644 index 0000000..02ddce1 --- /dev/null +++ b/roles/devel/tasks/main.yml @@ -0,0 +1,5 @@ +- include_tasks: dnf.yml + when: ansible_distribution | lower == 'fedora' + +- include_tasks: yum.yml + when: (ansible_os_family | lower == 'redhat') and (ansible_distribution | lower != 'fedora') diff --git a/roles/devel/tasks/yum.yml b/roles/devel/tasks/yum.yml new file mode 100644 index 0000000..4613ada --- /dev/null +++ b/roles/devel/tasks/yum.yml @@ -0,0 +1,5 @@ +- name: Install various ansible requirements + yum: name={{item}} state=present + with_items: + - "@^Development and Creative Workstation" + - "libconfig-devel" diff --git a/roles/ib/handlers/main.yml b/roles/ib/handlers/main.yml new file mode 100644 index 0000000..75661b9 --- /dev/null +++ b/roles/ib/handlers/main.yml @@ -0,0 +1,16 @@ +--- +- name: Unload mlx4_ib + modprobe: name="mlx4_ib" state="absent" + listen: mlx-reload + +- name: Unload mlx4_en + modprobe: name="mlx4_en" state="absent" + listen: mlx-reload + +- name: Unload mlx4_core + modprobe: name="mlx4_core" state="absent" + listen: mlx-reload + +- name: Load mlx4_en + modprobe: name="mlx4_en" state="present" + listen: mlx-reload diff --git a/roles/ib/tasks/main.yml b/roles/ib/tasks/main.yml new file mode 100644 index 0000000..45612ec --- /dev/null +++ b/roles/ib/tasks/main.yml @@ -0,0 +1,16 @@ +--- +- name: Find infiniband interface files + find: paths="/etc/sysconfig/network-scripts/" patterns="ifcfg-ib*" + register: result + +- name: Remove infiniband files from sysconfig + file: path="{{ item.path }}" state="absent" + with_items: "{{ result.files }}" + +- name: Set udev rules + template: src="00-ibnm.rules.j2" dest="/etc/udev/rules.d/00-ibnm.rules" owner="root" group="root" mode="0644" + +- name: Set mellanox options + template: src="mlx.conf.j2" dest="/etc/modprobe.d/mlx.conf" owner="root" group="root" mode="0644" + notify: mlx-reload + diff --git a/roles/ib/templates/00-ibnm.rules.j2 b/roles/ib/templates/00-ibnm.rules.j2 new file mode 100644 index 0000000..7025eab --- /dev/null +++ b/roles/ib/templates/00-ibnm.rules.j2 @@ -0,0 +1,2 @@ +ENV{INTERFACE}=="ib[0-9]*" ENV{NM_UNMANAGED}="1" +ENV{ID_NET_DRIVER}=="mlx4_*", ENV{NM_UNMANAGED}="1" diff --git a/roles/ib/templates/mlx.conf.j2 b/roles/ib/templates/mlx.conf.j2 new file mode 100644 index 0000000..df8a3ab --- /dev/null +++ b/roles/ib/templates/mlx.conf.j2 @@ -0,0 +1 @@ +options mlx4_core log_num_mgm_entry_size=-1 diff --git a/roles/storage/tasks/main.yml b/roles/storage/tasks/main.yml new file mode 100644 index 0000000..871e785 --- /dev/null +++ b/roles/storage/tasks/main.yml @@ -0,0 +1,9 @@ +--- +- name: Ensure NFS common is installed. + package: name=nfs-utils state=present + +- name: Create mountable dir + file: path=/mnt/ands state=directory mode=755 owner=root group=root + +- name: set mountpoints + mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted diff --git a/storage.yml b/storage.yml new file mode 100644 index 0000000..787f0b5 --- /dev/null +++ b/storage.yml @@ -0,0 +1,6 @@ +- name: Common Software + hosts: all + remote_user: root + roles: + - role: storage + -- cgit v1.2.1