summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2020-08-20 04:02:45 +0200
committerSuren A. Chilingaryan <csa@suren.me>2020-08-20 04:02:45 +0200
commit430b52b32bb44e05516b0178501cf2878909aaec (patch)
treebf3e01323b69cf465c48c3242083422e08193c57
parent5de35a8ae4e76f283abf159f84bfa9c9b17efddb (diff)
downloadands-430b52b32bb44e05516b0178501cf2878909aaec.tar.gz
ands-430b52b32bb44e05516b0178501cf2878909aaec.tar.bz2
ands-430b52b32bb44e05516b0178501cf2878909aaec.tar.xz
ands-430b52b32bb44e05516b0178501cf2878909aaec.zip
EPICS namespace and documetation update
-rw-r--r--docs/performance/gluster.txt45
-rw-r--r--docs/projects/epics.txt1
-rw-r--r--docs/samples/access/externalip.yaml28
-rw-r--r--docs/samples/mysql/templates/01-sds-secrets.yml.j226
-rw-r--r--docs/samples/mysql/vars/sds.yml24
-rw-r--r--docs/troubleshooting.txt11
-rw-r--r--docs/vision.txt4
-rw-r--r--docs/webservices.txt8
-rw-r--r--group_vars/OSEv3.yml1
-rw-r--r--group_vars/production.yml1
-rw-r--r--group_vars/staging.yml1
-rw-r--r--group_vars/testing.yml1
-rw-r--r--setup/configs/openshift.yml19
-rw-r--r--setup/configs/secrets.yml19
-rw-r--r--setup/configs/security.yml1
-rw-r--r--setup/external_ip.txt8
-rw-r--r--setup/users/htpasswd2
17 files changed, 182 insertions, 18 deletions
diff --git a/docs/performance/gluster.txt b/docs/performance/gluster.txt
new file mode 100644
index 0000000..ada9a9c
--- /dev/null
+++ b/docs/performance/gluster.txt
@@ -0,0 +1,45 @@
+# Changed (for katrin_data)
+gluster volume set katrin_data server.event-threads 8 # 1
+gluster volume set katrin_data client.event-threads 8 # 2
+gluster volume set katrin_data performance.io-thread-count 32 # 16
+gluster volume set katrin_data cluster.lookup-optimize on # off
+gluster volume set katrin_data cluster.readdir-optimize on # off
+
+# Already set
+performance.stat-prefetch: on
+performance.readdir-ahead: on
+performance.io-cache: on
+cluster.choose-local: true
+performance.flush-behind: on
+performance.write-behind-window-size: 1MB
+
+# Shall we ?
+gluster volume set katrin_data server.outstanding-rpc-limit # 64
+
+# Does this cache (per-client size) makes sence with 1-10s delay?
+gluster volume set katrin_data performance.cache-refresh-timeout 10 # 1 (seconds)
+gluster volume set katrin_data performance.cache-size 1GB # 32MB
+gluster volume set katrin_data performance.write-behind-window-size # 1MB
+gluster volume set katrin_data performance.cache-max-file-size 2MB # 0 (unlimited)
+
+# Major, but seems to affect only Samba/NFS in 3.x
+gluster volume set katrin_data features.cache-invalidation on # off
+gluster volume set katrin_data performance.cache-invalidation on # off
+gluster volume set katrin_data features.cache-invalidation-timeout 600 # 60
+gluster volume set katrin_data performance.md-cache-timeout 600 # 1
+
+# Not recommended
+gluster volume set katrin_data performance.client-io-threads on # off
+
+
+Systemctl (not applied as we use rdma anyway)
+=========
+sysctl -w net.ipv4.tcp_congestion_control=htcp # cubic
+sysctl -w net.ipv4.tcp_mtu_probing=1 # recommended for hosts with jumbo frames enabled
+
+# Optimal value unclear (big value may harm small file performance)
+sysctl -w net.ipv4.tcp_rmem = 4096 87380 33554432 # increase Linux autotuning TCP buffer limit to 32MB
+sysctl -w net.ipv4.tcp_wmem = 4096 87380 33554432
+sysctl -w net.core.rmem_max = 67108864 # allow testing with buffers up to 64MB
+sysctl -w net.core.wmem_max = 67108864
+sysctl -w net.core.netdev_max_backlog = 30000 # increase the length of the processor input queue
diff --git a/docs/projects/epics.txt b/docs/projects/epics.txt
new file mode 100644
index 0000000..6190dbd
--- /dev/null
+++ b/docs/projects/epics.txt
@@ -0,0 +1 @@
+EPICS_CA_ADDR_LIST="172.30.14.13" caget -w 3 -t darwin:ist:ts1
diff --git a/docs/samples/access/externalip.yaml b/docs/samples/access/externalip.yaml
new file mode 100644
index 0000000..3827968
--- /dev/null
+++ b/docs/samples/access/externalip.yaml
@@ -0,0 +1,28 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: epics-softioc-external
+ namespace: epics
+spec:
+ type: ClusterIP
+ selector:
+ deploymentconfig: epics-softioc
+ externalIPs:
+ - 192.168.130.1
+ ports:
+ - name: 5064-tcp
+ port: 5064
+ protocol: TCP
+ targetPort: 5064
+ - name: 5064-udp
+ port: 5064
+ protocol: UDP
+ targetPort: 5064
+ - name: 5065-tcp
+ port: 5065
+ protocol: TCP
+ targetPort: 5065
+ - name: 5065-udp
+ port: 5065
+ protocol: UDP
+ targetPort: 5065
diff --git a/docs/samples/mysql/templates/01-sds-secrets.yml.j2 b/docs/samples/mysql/templates/01-sds-secrets.yml.j2
new file mode 100644
index 0000000..2922118
--- /dev/null
+++ b/docs/samples/mysql/templates/01-sds-secrets.yml.j2
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: Template
+metadata:
+ name: sds-secrets
+ labels:
+ app: sds
+ annotations:
+ descriptions: "SymmetricDS Secrets"
+objects:
+- apiVersion: v1
+ kind: Secret
+ metadata:
+ annotations:
+ template.openshift.io/expose-root_password: '{.data[''root-password'']}'
+ template.openshift.io/expose-database_password: '{.data[''database-password'']}'
+ name: sds
+ stringData:
+ root-password: "${DATABASE_PASSWORD}"
+ database-password: "${DATABASE_PASSWORD}"
+parameters:
+- description: SymmetricDS Database Password
+ displayName: SymmetricDS Database Password
+ from: '[a-zA-Z0-9]{16}'
+ generate: expression
+ name: DATABASE_PASSWORD
+ required: true
diff --git a/docs/samples/mysql/vars/sds.yml b/docs/samples/mysql/vars/sds.yml
new file mode 100644
index 0000000..abe0f4f
--- /dev/null
+++ b/docs/samples/mysql/vars/sds.yml
@@ -0,0 +1,24 @@
+sds:
+ pods:
+ sds-mysql:
+ service: { ports: [ 3306 ] }
+ sched: { replicas: 1, strategy: "Recreate" }
+ groups: [ "services_sds" ]
+ images:
+ - stream: "openshift/mysql:5.7"
+ env:
+ - { name: "MYSQL_USER", value: "sds" }
+ - { name: "MYSQL_PASSWORD", value: "secret@sds/database-password" }
+ - { name: "MYSQL_ROOT_PASSWORD", value: "secret@sds/root-password" }
+ - { name: "MYSQL_DATABASE", value: "sds" }
+ - { name: "MYSQL_MAX_CONNECTIONS", value: "50" }
+ mappings:
+ - { name: "db", path: "sds", mount: "/var/lib/mysql/data" }
+ resources: { limit: { cpu: 1000m, mem: 2Gi } }
+# probes:
+# - { port: 3306 }
+ probes:
+ - { type: "liveness", port: 3306 }
+ - { type: "readiness", command: [ /bin/sh, -i, -c, MYSQL_PWD="$MYSQL_PASSWORD" mysql -h 127.0.0.1 -u $MYSQL_USER -D $MYSQL_DATABASE -e 'SELECT 1' ], delay: "15", timeout: "5" }
+ hooks:
+ - { type: "postStart", command: [ /bin/sh, -i, -c, sleep 10; MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -h 127.0.0.1 -u root -D $MYSQL_DATABASE -e "GRANT ALL ON *.* TO 'sds'@'%'; UPDATE mysql.user SET Super_Priv='Y' WHERE user='sds' AND host='%'; FLUSH PRIVILEGES;" ] }
diff --git a/docs/troubleshooting.txt b/docs/troubleshooting.txt
index 1f52fe9..5eb0cc7 100644
--- a/docs/troubleshooting.txt
+++ b/docs/troubleshooting.txt
@@ -263,6 +263,17 @@ pods: very slow scheduling (normal start time in seconds range), failed pods, ro
and the pods should be allowed to access files. Possible errors:
unable to create pods: pods "mongodb-2-" is forbidden: no providers available to validate pod request
+Pod Networking
+==============
+- Run commands in pod network, particularly execute packet sniffers (which would not work in the container due to missing capabilities)
+ * Get container cid
+ docker ps -f label=io.kubernetes.pod.name=epics-archappl-46-h6j62 -f label=io.kubernetes.pod.namespace=epics -f label=io.kubernetes.docker.type=podsandbox -q
+ * Run command with container networking, e.g. tcpdump
+ nsenter -n -t $(docker inspect --format "{{ .State.Pid }}" "f5a0ad4f5793") tcpdump -nv -i eth0
+
+- Check if service properly exposed ports
+ * 'nc' will not properly tell if UDP port is open or not due to underlaying firewall.
+ iptables -n -L -t nat | grep 5064 | grep 172
Builds
diff --git a/docs/vision.txt b/docs/vision.txt
index bf6de57..fdc921d 100644
--- a/docs/vision.txt
+++ b/docs/vision.txt
@@ -2,12 +2,14 @@ Ands v.2
========
- Try overlay2 storage driver (LVM is used in Ands v.1). Check also further docker configuration options: 'cgroup-driver', ...
* This actually seems problematic in CentOS-8. Something, like 'rsync portage portage/.tmp' is EXREMELY slow (<1 MB/s). Just check eix-sync.
- - Integrate fast Ethernet and use conteiner native networking. OpenVSwitch is slow and causes problems.
+ - Integrate fast Ethernet and use conteiner native networking. OpenVSwitch is slow and causes problems. Alternatively, can we rely on some hardware
+ features of novel network cards, e.g. Mellanox ASAP2 (Accelerated Switch and Packet Processing)
- Do not run pods on Master nodes, but Gluster and a few databases pods (MySQL) are OK (multiple reasons, especially mounting a lot of Gluster Volumes)
* Restrict all periodic jobs to a specific node: easy to re-install (non-master), fast SSD storage, ...?
- Object Storage should be integrated, either Gluster Block is ready for production or we have to use Ceph as well
- Automatic provisioning would be much better then handling volumes trough Ands. Basically, this will render Ands redundant. We can switch to Helm, etc.
But, we need ability to easily understand which volume belong to which pod/namespace and automatically kill redundant volumes.
+ - Avoid conflicts with SCC private vlans (KIT WiFi, VPN, ...?)
Questions
=========
diff --git a/docs/webservices.txt b/docs/webservices.txt
index 2545bd5..0edfdeb 100644
--- a/docs/webservices.txt
+++ b/docs/webservices.txt
@@ -10,12 +10,18 @@ Architecture
by setting 'haproxy.router.openshift.io/balance' to 'source' in route metadata. Then, the destination
replica will be determined based on the client IP.
* HAProxy has configured a default timeout. If replica does not send data within '30s' the connection
- will be terminated. It can be increased with 'haproxy.router.openshift.io/timeout'
+ will be terminated. It can be increased with 'haproxy.router.openshift.io/timeout' in route metadata.
* There is a several ways to configure certiciates for HTTPS services defined by type of tls termination
in the route specification. With 'passthrough' the container is expected to handle certificates itself.
In the edge termination mode, the certificates are configured in the route and HAProxy manages secure
communication with clients and provides unencrypted data to the service in the cluster.
+ - Sample metadata configuration for route:
+ kind: Route
+ metadata:
+ annotations:
+ haproxy.router.openshift.io/balance: 'source'
+ haproxy.router.openshift.io/timeout: 300s
Updating/Generating certificates for the router
===============================================
diff --git a/group_vars/OSEv3.yml b/group_vars/OSEv3.yml
index 4f99f5a..a92487f 100644
--- a/group_vars/OSEv3.yml
+++ b/group_vars/OSEv3.yml
@@ -46,6 +46,7 @@ openshift_master_cluster_hostname: "{{ ands_use_inner_lb | ternary(ands_inner_lb
openshift_master_cluster_public_hostname: "{{ ands_openshift_lb }}"
openshift_master_default_subdomain: "{{ ands_openshift_subdomain | default(ands_openshift_lb) }}"
openshift_master_ingress_ip_network_cidr: "{{ ands_openshift_ingress_network }}"
+openshift_master_external_ip_network_cidrs: "{{ ands_openshift_external_network }}"
#openshift_portal_net:
#osm_host_subnet_length:
diff --git a/group_vars/production.yml b/group_vars/production.yml
index c731873..e661c58 100644
--- a/group_vars/production.yml
+++ b/group_vars/production.yml
@@ -8,6 +8,7 @@ ands_openshift_subdomain: kaas.kit.edu
ands_openshift_network: 192.168.13.0/24
ands_openshift_public_network: 192.168.26.0/24
ands_openshift_ingress_network: 192.168.16.0/24
+ands_openshift_external_network: [192.168.128.0/18]
ands_global_network: 141.52.64.0/23
#ands_inner_domain: ""
diff --git a/group_vars/staging.yml b/group_vars/staging.yml
index 00ec146..94c13fd 100644
--- a/group_vars/staging.yml
+++ b/group_vars/staging.yml
@@ -9,6 +9,7 @@ ands_openshift_subdomain: openshift.suren.me
ands_openshift_network: 192.168.213.0/24
ands_openshift_public_network: 192.168.226.0/24
ands_openshift_ingress_network: 192.168.216.0/24
+ands_openshift_external_network: [192.168.128.0/18]
ands_inner_domain: ""
ands_use_inner_lb: true
diff --git a/group_vars/testing.yml b/group_vars/testing.yml
index f7e04cf..af3ba4b 100644
--- a/group_vars/testing.yml
+++ b/group_vars/testing.yml
@@ -7,6 +7,7 @@ ands_openshift_subdomain: kaas.kit.edu
ands_openshift_network: 192.168.13.0/24
ands_openshift_public_network: 192.168.26.0/24
ands_openshift_ingress_network: 192.168.16.0/24
+ands_openshift_external_network: [192.168.128.0/18]
#ands_inner_domain: ""
ands_openshift_set_hostname: false
diff --git a/setup/configs/openshift.yml b/setup/configs/openshift.yml
index fc1743a..878d687 100644
--- a/setup/configs/openshift.yml
+++ b/setup/configs/openshift.yml
@@ -8,6 +8,7 @@ ands_openshift_projects:
kaas: KaaS router and common resources
katrin: KArlsruhe TRItium Neutrino
status: KATRIN status display
+ epics: EPICS control system
adei: ADEI
adai: ADAI
bora: Build Once Run Always
@@ -24,6 +25,9 @@ ands_openshift_users:
kopmann: { name: "Andreas Kopmann", email: "kopmann@kit.edu" }
ntj: { name: "Nicholas Tan Jerome", email: "nicholas.jerome@kit.edu" }
jonasteufel: { name: "Jonas Teufel", email: "jonseb1998@gmail.com" }
+ jalal: { name: "Jalal Mostafa", email: "jalal.mostapha@outlook.com" }
+ gil: { name: "Woosik Gil", email: "gil@kit.edu" }
+
ands_openshift_roles:
cluster-admin: csa
@@ -31,14 +35,15 @@ ands_openshift_roles:
katrin/admin: katrin
status/admin: katrin
adei/admin: csa
- adei/view: pdv, kopmann
- adei/kaas-maintain: pdv, kopmann
- adai/admin: csa, kopmann
- bora/admin: csa, ntj, katrin, kopmann
+ adei/view: pdv, kopmann, jalal
+ adei/kaas-maintain: pdv, kopmann, jalal
+ adai/admin: csa, kopmann, jalal
+ bora/admin: csa, ntj, gil, jalal, katrin, kopmann
+ epics/admin: csa, jalal
wave/admin: csa, ntj
- services/admin: csa
- web/admin: kopmann, jonasteufel
- mon/admin: csa
+ services/admin: csa, jalal, katrin
+ web/admin: kopmann, jonasteufel, jalal
+ mon/admin: csa, jalal
test/admin: csa, ntj, kopmann, katrin
ands_repos:
diff --git a/setup/configs/secrets.yml b/setup/configs/secrets.yml
index 5005be0..0d04ece 100644
--- a/setup/configs/secrets.yml
+++ b/setup/configs/secrets.yml
@@ -1,10 +1,11 @@
$ANSIBLE_VAULT;1.1;AES256
-30383738386265633133306363326639656331333736303966633133333661383561373533303966
-6361396564626437656237663035616461656661316265610a306336373231616136393330616632
-39376265346133303332363235303635383239336463633234616261643161643639313732313536
-3264636131353136640a623238663037336261303063313564303665386561643030373064356536
-61633136393138656533336563346635656531376161376639656436343437326538366336643734
-35363464646131316366626234613737366633626166376339313832646239626265333637613261
-32353535356537323533373831396138326239643937623865613731373165393633626331623839
-66323039393136313431383264633731653965386261613336376263396531333862306562313337
-38346465613831613566353233346634373032663537353633643330363136343264
+65363962636236613634613837653134633264656465386635633664366365316666393163646130
+3566376130643337626562393361646565396465396661300a333031313732373061393036616366
+62633866616631316261373135666164356161303332613862643132333230376532356237393539
+3263663537613631350a633965366532333361613164346162626138336565303935393862633933
+32653639336436623563666438343230376563643730636635653239393034393663383333643032
+63363330356165666266623463666137393861643930366632373763613236633661643166633439
+30633638393864343430366538343933333039366565326231633233643263383832356361633439
+62666661643963346431643138663538313761316639623065333437386463313231626532323561
+66616531346566366663393632643533636236633464353162613965316634666535313530623337
+3435386536643263316131303736396238366630393037346664
diff --git a/setup/configs/security.yml b/setup/configs/security.yml
index a35b33e..171fdf6 100644
--- a/setup/configs/security.yml
+++ b/setup/configs/security.yml
@@ -20,6 +20,7 @@ ands_openshift_gid_ranges:
kaas: "4000/10"
katrin: "5000/10"
status: "5100/10"
+ epics: "5200/10"
adei: "6000/10"
adai: "6050/10"
bora: "6100/10"
diff --git a/setup/external_ip.txt b/setup/external_ip.txt
new file mode 100644
index 0000000..e9e6826
--- /dev/null
+++ b/setup/external_ip.txt
@@ -0,0 +1,8 @@
+External networks, ranges 192.168.128.x - 192.168.191.x
+=================
+ - 192.168.128.0/24 KaaS VMs (reserved, unused)
+ - 192.168.129.0/24 ADEI VMs (reserved, unused)
+ - 192.168.130.0/24 EPICS VMs (managed by Jalal)
+
+
+ \ No newline at end of file
diff --git a/setup/users/htpasswd b/setup/users/htpasswd
index 8965caf..4386b9e 100644
--- a/setup/users/htpasswd
+++ b/setup/users/htpasswd
@@ -4,3 +4,5 @@ katrin:$apr1$94lAgTxt$LVOWdwye92nsZVqVT7VaG1
ntj:$apr1$G5/ThWdp$kFLsj/hO9jIYYP.Zab9kC/
kopmann:$apr1$jU8jCdPh$u7ZUBiT3gzxlf1xPJl6FI.
jonasteufel:$apr1$2dsiiZ1p$Us/5i8DEt9fxeliGy7L6h/
+jalal:$apr1$hwKRrL2x$RbtSQbfZZqPuvHL9YhCKp.
+gil:$apr1$p2khs49v$7poH4dUbTpCyhEO5JmgLx0