From df21c126d6034ef1034f153f7934af570d5964cc Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Wed, 20 May 2026 12:33:39 +0800 Subject: [PATCH 1/2] DevOps: add Ansible playbook for cluster deployment Introduces an Ansible-based automation for deploying Apache Cloudberry on bare-metal or virtual machines, covering the full deployment workflow from OS configuration to cluster initialization. Key features: - Automates all pre-deployment OS tuning: SELinux, firewall, sysctl, PAM limits, THP, IPC, SSH thresholds, and chronyd - Dynamically calculates memory-dependent sysctl parameters per host: kernel.shmall/shmmax (via PAGE_SIZE and _PHYS_PAGES), vm.overcommit_ratio (via gp_vmem formula), vm.dirty_* (bytes mode for >64GB, ratio mode for <=64GB), and vm.min_free_kbytes (3% of MemTotal) - Installs RPM/DEB packages and configures N-N passwordless SSH via gpssh-exkeys - Initializes the cluster with gpinitsystem including standby coordinator (gpinitstandby with sync verification) - Supports variable cluster sizes via inventory/hosts Files added: - ansible/site.yml main playbook - ansible/group_vars/all.yml deployment variables with inline documentation - ansible/inventory/hosts sample inventory for a 5-node cluster - ansible/ansible.cfg disables host key checking for first-run - ansible/README.md usage guide and prerequisites Assisted-by: Claude Code --- devops/deploy/ansible/README.md | 103 ++++ devops/deploy/ansible/ansible.cfg | 19 + devops/deploy/ansible/group_vars/all.yml | 76 +++ devops/deploy/ansible/inventory/hosts | 37 ++ devops/deploy/ansible/site.yml | 570 +++++++++++++++++++++++ 5 files changed, 805 insertions(+) create mode 100644 devops/deploy/ansible/README.md create mode 100644 devops/deploy/ansible/ansible.cfg create mode 100644 devops/deploy/ansible/group_vars/all.yml create mode 100644 devops/deploy/ansible/inventory/hosts create mode 100644 devops/deploy/ansible/site.yml diff --git a/devops/deploy/ansible/README.md b/devops/deploy/ansible/README.md new file mode 100644 index 00000000000..44423c5d75f --- /dev/null +++ b/devops/deploy/ansible/README.md @@ -0,0 +1,103 @@ + + +# Apache Cloudberry Deployment Via Ansible + +This directory contains an Ansible playbook for deploying Apache Cloudberry on physical or virtual machines via Ansible. + +## Quick Start + +```bash +# 1. Edit inventory and variables +vi ansible/inventory/hosts # set hostnames and IPs +vi ansible/group_vars/all.yml # set password, disk, segments, etc. + +# 2. Run the playbook +ansible-playbook ansible/site.yml -i ansible/inventory/hosts \ + -e package_path=./apache-cloudberry-db-incubating-2.1.0-1.el9.x86_64.rpm +``` + +## Cluster Layout (default) + +| Host | Role | +|------|------| +| cdw | Coordinator | +| scdw | Standby Coordinator | +| sdw1 | Segment Host 1 | +| sdw2 | Segment Host 2 | +| sdw3 | Segment Host 3 | + +Each segment host runs 2 primary segments and 2 mirror segments (spread mirroring). + +## Prerequisites + +- Ansible installed on the control machine (tested with ansible-core 2.14+) +- Root SSH access from the control machine to all hosts +- All hosts have Rocky Linux 8/9 or compatible OS installed +- Apache Cloudberry RPM/DEB package downloaded to the control machine + +Ansible 2.10+ requires the following collections to be installed separately: + +```bash +ansible-galaxy collection install ansible.posix community.general community.crypto +``` + +To suppress the Python `crypt` module deprecation warning, install `passlib`: + +```bash +pip3 install passlib +``` + +## Directory Structure + +``` +ansible/ +├── ansible.cfg # disable host key checking +├── site.yml # main playbook +├── inventory/ +│ └── hosts # hostnames and IPs +└── group_vars/ + └── all.yml # deployment variables +``` + +## What the Playbook Does + +1. Disable SELinux and firewall +2. Configure hostnames and `/etc/hosts` +3. Set kernel parameters (`sysctl`) +4. Set resource limits (`limits.conf`) +5. Configure XFS mount and disk I/O settings +6. Disable Transparent Huge Pages +7. Disable IPC object removal +8. Configure SSH thresholds +9. Synchronize system clocks (chronyd) +10. Create `gpadmin` user with sudo +11. Install Apache Cloudberry package on all hosts +12. Configure passwordless SSH for gpadmin (N-N) +13. Create data storage directories +14. Initialize the cluster with `gpinitsystem` +15. Set environment variables in `.bashrc` + +## After Deployment + +```bash +su - gpadmin +psql -d warehouse # connect to the database +gpstate -s # check cluster status +``` diff --git a/devops/deploy/ansible/ansible.cfg b/devops/deploy/ansible/ansible.cfg new file mode 100644 index 00000000000..bbc14a5f7f7 --- /dev/null +++ b/devops/deploy/ansible/ansible.cfg @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[defaults] +host_key_checking = False diff --git a/devops/deploy/ansible/group_vars/all.yml b/devops/deploy/ansible/group_vars/all.yml new file mode 100644 index 00000000000..188f2a9cc3a --- /dev/null +++ b/devops/deploy/ansible/group_vars/all.yml @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Apache Cloudberry Deployment Variables +# +# Note: kernel.shmall, kernel.shmmax, vm.overcommit_ratio, vm.min_free_kbytes, +# and vm.dirty_* parameters are NOT set here. They are dynamically calculated +# at deploy time based on each host's actual RAM and swap size. +# See the "Calculate dynamic sysctl values" tasks in site.yml. + +# Cloudberry version +cloudberry_version: "2.1.0" + +# Admin user +cloudberry_admin_user: "gpadmin" +cloudberry_admin_password: "changeme" + +# Package path (override via -e package_path=... on the command line) +# Example: -e package_path=/root/apache-cloudberry-db-incubating-2.1.0-1.el9.x86_64.rpm +# package_path: /home/gpadmin/apache-cloudberry-db-incubating-2.1.0-1.el9.x86_64.rpm + +# Data disk device and mount point. +# Must be set manually before running the playbook. +# Run 'lsblk' on each host to identify the correct device name. +# +# Common device names by environment: +# /dev/sdb — physical servers, VMware +# /dev/vdb — KVM / OpenStack / Cloud ECS +# /dev/nvme0n1 — NVMe SSD (physical or cloud) +# /dev/xvdb — AWS EC2 (older instance types) +# +# If you are using a cloud VM with only a single system disk (no dedicated +# data disk), leave data_disk empty and create the data directory manually: +# mkdir -p /data && chown -R gpadmin:gpadmin /data +# The XFS formatting and mount steps in site.yml will be skipped automatically. +data_disk: "/dev/sdb" +data_mount: "/data" + +# Data directories +# coordinator_data_dir is used on both cdw and scdw. +# primary_data_dir and mirror_data_dir are used on segment hosts. +coordinator_data_dir: "/data/coordinator" +primary_data_dir: "/data/primary" +mirror_data_dir: "/data/mirror" + +# Number of primary segment instances per segment host. +# Mirror instances are created 1:1 with primaries. +# Recommended: set to the number of CPU cores / 4, or 2 for test environments. +segments_per_host: 2 + +# Ports +# Ensure these ranges do not overlap with net.ipv4.ip_local_port_range (10000-65535). +coordinator_port: 5432 +port_base: 6000 +mirror_port_base: 7000 + +# Default database created during gpinitsystem +database_name: "warehouse" + +# Coordinator and standby hostnames (must match inventory/hosts and /etc/hosts) +coordinator_hostname: "cdw" +standby_hostname: "scdw" diff --git a/devops/deploy/ansible/inventory/hosts b/devops/deploy/ansible/inventory/hosts new file mode 100644 index 00000000000..b1d4848208e --- /dev/null +++ b/devops/deploy/ansible/inventory/hosts @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[coordinator] +cdw ansible_host=192.168.1.x + +[standby] +scdw ansible_host=192.168.1.x + +[segments] +sdw1 ansible_host=192.168.1.x +sdw2 ansible_host=192.168.1.x +sdw3 ansible_host=192.168.1.x + +[cloudberry:children] +coordinator +standby +segments + +[cloudberry:vars] +ansible_user=root +ansible_password=your_root_password_here +ansible_ssh_common_args='-o StrictHostKeyChecking=no' diff --git a/devops/deploy/ansible/site.yml b/devops/deploy/ansible/site.yml new file mode 100644 index 00000000000..cbf08100bf3 --- /dev/null +++ b/devops/deploy/ansible/site.yml @@ -0,0 +1,570 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Apache Cloudberry - Bare Metal Deployment Playbook +# +# Usage: +# ansible-playbook site.yml -i inventory/hosts \ +# -e package_path=./apache-cloudberry-db-incubating-2.1.0.el8.x86_64.rpm + +--- +- name: Configure all hosts + hosts: cloudberry + become: yes + tasks: + + # --- SELinux --- + - name: Disable SELinux + selinux: + state: disabled + when: ansible_os_family == "RedHat" + + # --- Firewall --- + - name: Stop and disable firewalld + systemd: + name: firewalld + state: stopped + enabled: no + ignore_errors: yes + + # --- Hosts file --- + - name: Set hostname + hostname: + name: "{{ inventory_hostname }}" + + - name: Add cluster hosts to /etc/hosts + lineinfile: + path: /etc/hosts + line: "{{ hostvars[item].ansible_host }} {{ item }}" + state: present + loop: "{{ groups['cloudberry'] }}" + + # --- Kernel parameters --- + # Dynamically calculate memory-dependent sysctl values based on each host's + # actual physical memory and swap, following the official documentation formulas: + # kernel.shmall = _PHYS_PAGES / 2 + # kernel.shmmax = (_PHYS_PAGES / 2) * PAGE_SIZE + # vm.overcommit_ratio = (RAM - 0.026 * gp_vmem) / RAM * 100 + # vm.dirty_* uses bytes mode for >64GB RAM, ratio mode for <=64GB RAM + # vm.min_free_kbytes = MemTotal * 3% + + - name: Get system PAGE_SIZE + command: getconf PAGE_SIZE + register: _page_size_result + changed_when: false + + - name: Calculate dynamic sysctl values + set_fact: + _ram_mb: "{{ ansible_memtotal_mb }}" + _swap_mb: "{{ ansible_swaptotal_mb }}" + _page_size: "{{ _page_size_result.stdout | int }}" + + - name: Calculate kernel.shmall and kernel.shmmax + set_fact: + _shmall: "{{ ((_ram_mb | int * 1024 / (_page_size | int)) / 2) | int }}" + _shmmax: "{{ ((_ram_mb | int * 1024 / (_page_size | int)) / 2 * (_page_size | int)) | int }}" + + - name: Calculate vm.overcommit_ratio + set_fact: + _ram_gb: "{{ (_ram_mb | int / 1024) | float }}" + _swap_gb: "{{ (_swap_mb | int / 1024) | float }}" + + - name: Calculate gp_vmem and overcommit_ratio + set_fact: + _gp_vmem: >- + {{ ((_swap_gb | float + _ram_gb | float) - (7.5 + 0.05 * (_ram_gb | float))) + / ((_ram_gb | float >= 256) | ternary(1.17, 1.7)) }} + + - name: Calculate final overcommit_ratio + set_fact: + _overcommit_ratio: "{{ ((_ram_gb | float - 0.026 * (_gp_vmem | float)) / (_ram_gb | float) * 100) | int }}" + + - name: Calculate vm.min_free_kbytes (3% of MemTotal in kB) + set_fact: + _min_free_kbytes: "{{ (_ram_mb | int * 1024 * 0.03) | round | int }}" + + - name: Set sysctl parameters (fixed values) + sysctl: + name: "{{ item.key }}" + value: "{{ item.value }}" + sysctl_set: yes + reload: yes + loop: "{{ sysctl_fixed | dict2items }}" + vars: + sysctl_fixed: + kernel.shmmni: "4096" + vm.overcommit_memory: "2" + net.ipv4.ip_local_port_range: "10000 65535" + kernel.sem: "250 2048000 200 8192" + kernel.sysrq: "1" + kernel.core_uses_pid: "1" + kernel.msgmnb: "65536" + kernel.msgmax: "65536" + kernel.msgmni: "2048" + net.ipv4.tcp_syncookies: "1" + net.ipv4.conf.default.accept_source_route: "0" + net.ipv4.tcp_max_syn_backlog: "4096" + net.ipv4.conf.all.arp_filter: "1" + net.ipv4.ipfrag_high_thresh: "41943040" + net.ipv4.ipfrag_low_thresh: "31457280" + net.ipv4.ipfrag_time: "60" + net.core.netdev_max_backlog: "10000" + net.core.rmem_max: "2097152" + net.core.wmem_max: "2097152" + vm.swappiness: "10" + vm.zone_reclaim_mode: "0" + vm.dirty_expire_centisecs: "500" + vm.dirty_writeback_centisecs: "100" + kernel.core_pattern: "/var/core/core.%h.%t" + + - name: Set sysctl parameters (memory-dependent) + sysctl: + name: "{{ item.key }}" + value: "{{ item.value }}" + sysctl_set: yes + reload: yes + loop: + - { key: "kernel.shmall", value: "{{ _shmall }}" } + - { key: "kernel.shmmax", value: "{{ _shmmax }}" } + - { key: "vm.overcommit_ratio", value: "{{ _overcommit_ratio }}" } + - { key: "vm.min_free_kbytes", value: "{{ _min_free_kbytes }}" } + + - name: Set sysctl dirty parameters for hosts with more than 64GB RAM (bytes mode) + sysctl: + name: "{{ item.key }}" + value: "{{ item.value }}" + sysctl_set: yes + reload: yes + loop: + - { key: "vm.dirty_background_ratio", value: "0" } + - { key: "vm.dirty_ratio", value: "0" } + - { key: "vm.dirty_background_bytes", value: "1610612736" } + - { key: "vm.dirty_bytes", value: "4294967296" } + when: _ram_mb | int > 65536 + + - name: Set sysctl dirty parameters for hosts with 64GB RAM or less (ratio mode) + sysctl: + name: "{{ item.key }}" + value: "{{ item.value }}" + sysctl_set: yes + reload: yes + loop: + - { key: "vm.dirty_background_ratio", value: "3" } + - { key: "vm.dirty_ratio", value: "10" } + - { key: "vm.dirty_background_bytes", value: "0" } + - { key: "vm.dirty_bytes", value: "0" } + when: _ram_mb | int <= 65536 + + - name: Create core dump directory + file: + path: /var/core + state: directory + mode: "1777" + + # --- Resource limits --- + - name: Set PAM limits + pam_limits: + domain: "*" + limit_type: "{{ item.type }}" + limit_item: "{{ item.item }}" + value: "{{ item.value }}" + loop: + - { type: soft, item: nofile, value: "524288" } + - { type: hard, item: nofile, value: "524288" } + - { type: soft, item: nproc, value: "131072" } + - { type: hard, item: nproc, value: "131072" } + - { type: soft, item: core, value: unlimited } + + # --- XFS mount --- + - name: Create XFS filesystem on data disk + filesystem: + fstype: xfs + dev: "{{ data_disk }}" + force: no + ignore_errors: yes + + - name: Mount data disk + mount: + path: "{{ data_mount }}" + src: "{{ data_disk }}" + fstype: xfs + opts: rw,nodev,noatime,inode64 + state: mounted + + # --- Disk I/O --- + - name: Set blockdev read-ahead + command: "/sbin/blockdev --setra 16384 {{ data_disk }}" + changed_when: false + + - name: Set I/O scheduler via grubby + command: grubby --update-kernel=ALL --args="elevator=mq-deadline" + ignore_errors: yes + changed_when: false + + # --- THP --- + - name: Disable Transparent Huge Pages via grubby + command: grubby --update-kernel=ALL --args="transparent_hugepage=never" + ignore_errors: yes + changed_when: false + + # --- IPC --- + - name: Disable IPC object removal + lineinfile: + path: /etc/systemd/logind.conf + regexp: "^#?RemoveIPC=" + line: "RemoveIPC=no" + notify: restart systemd-logind + + # --- SSH threshold --- + - name: Set SSH MaxStartups + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?MaxStartups" + line: "MaxStartups 10:30:200" + notify: restart sshd + + - name: Set SSH MaxSessions + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?MaxSessions" + line: "MaxSessions 200" + notify: restart sshd + + # --- Clock sync --- + - name: Enable and start chronyd + systemd: + name: chronyd + state: started + enabled: yes + + # --- gpadmin user --- + - name: Create gpadmin group + group: + name: "{{ cloudberry_admin_user }}" + state: present + + - name: Create gpadmin user + user: + name: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + system: yes + create_home: yes + password: "{{ cloudberry_admin_password | password_hash('sha512') }}" + + - name: Add gpadmin to wheel group + user: + name: "{{ cloudberry_admin_user }}" + groups: wheel + append: yes + + - name: Ensure wheel group has NOPASSWD sudo + lineinfile: + path: /etc/sudoers + regexp: "^%wheel" + line: "%wheel ALL=(ALL) NOPASSWD: ALL" + validate: "visudo -cf %s" + + - name: Set data directory ownership + file: + path: "{{ data_mount }}" + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + recurse: yes + + handlers: + - name: restart systemd-logind + service: + name: systemd-logind + state: restarted + + - name: restart sshd + service: + name: sshd + state: restarted + +# --- Install Apache Cloudberry --- +- name: Install Apache Cloudberry package + hosts: cloudberry + become: yes + tasks: + - name: Copy package to host + copy: + src: "{{ package_path }}" + dest: "/tmp/{{ package_path | basename }}" + + - name: Install package (RPM) + yum: + name: "/tmp/{{ package_path | basename }}" + state: present + disable_gpg_check: yes + when: ansible_os_family == "RedHat" + + - name: Install package (DEB) + apt: + deb: "/tmp/{{ package_path | basename }}" + state: present + when: ansible_os_family == "Debian" + + - name: Cleanup package file + file: + path: "/tmp/{{ package_path | basename }}" + state: absent + + - name: Set installation directory ownership + shell: chown -R {{ cloudberry_admin_user }}:{{ cloudberry_admin_user }} /usr/local/cloudberry* + changed_when: false + +# --- Configure SSH and initialize (coordinator only) --- +- name: Configure passwordless SSH and initialize cluster + hosts: coordinator + become: yes + become_user: "{{ cloudberry_admin_user }}" + vars: + cloudberry_admin_user: "gpadmin" + tasks: + - name: Create .ssh directory for gpadmin + file: + path: "/home/{{ cloudberry_admin_user }}/.ssh" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + mode: "0700" + + - name: Generate SSH key for gpadmin + openssh_keypair: + path: "/home/{{ cloudberry_admin_user }}/.ssh/id_rsa" + type: rsa + size: 4096 + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + + - name: Fetch gpadmin public key from coordinator + slurp: + src: "/home/{{ cloudberry_admin_user }}/.ssh/id_rsa.pub" + register: gpadmin_pubkey + + - name: Distribute SSH public key to all hosts + authorized_key: + user: "{{ cloudberry_admin_user }}" + key: "{{ gpadmin_pubkey.content | b64decode }}" + delegate_to: "{{ item }}" + loop: "{{ groups['cloudberry'] }}" + + - name: Create hostfile_exkeys + copy: + content: "{{ groups['cloudberry'] | join('\n') }}\n" + dest: "/home/{{ cloudberry_admin_user }}/hostfile_exkeys" + + - name: Create hostfile_gpinitsystem + copy: + content: "{{ groups['segments'] | join('\n') }}\n" + dest: "/home/{{ cloudberry_admin_user }}/hostfile_gpinitsystem" + + - name: Scan and trust host keys for all cluster nodes + shell: | + ssh-keyscan -H {{ groups['cloudberry'] | join(' ') }} >> /home/{{ cloudberry_admin_user }}/.ssh/known_hosts + chown {{ cloudberry_admin_user }}:{{ cloudberry_admin_user }} /home/{{ cloudberry_admin_user }}/.ssh/known_hosts + chmod 600 /home/{{ cloudberry_admin_user }}/.ssh/known_hosts + args: + executable: /bin/bash + + - name: Run gpssh-exkeys + shell: | + source /usr/local/cloudberry-db/cloudberry-env.sh + gpssh-exkeys -f /home/{{ cloudberry_admin_user }}/hostfile_exkeys + args: + executable: /bin/bash + + - name: Create data root directory as root + file: + path: "{{ coordinator_data_dir | dirname }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + mode: "0755" + become: yes + become_user: root + + - name: Create coordinator data directory + file: + path: "{{ coordinator_data_dir }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + +- name: Create data directories on standby + hosts: standby + become: yes + tasks: + - name: Create data root directory on standby + file: + path: "{{ coordinator_data_dir | dirname }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + mode: "0755" + + - name: Create coordinator data directory on standby + file: + path: "{{ coordinator_data_dir }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + + - name: Create .ssh directory for gpadmin on standby + file: + path: "/home/{{ cloudberry_admin_user }}/.ssh" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + mode: "0700" + +- name: Create data directories on segments + hosts: segments + become: yes + tasks: + - name: Create data root directory on segments + file: + path: "{{ primary_data_dir | dirname }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + mode: "0755" + + - name: Create primary data directory + file: + path: "{{ primary_data_dir }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + + - name: Create mirror data directory + file: + path: "{{ mirror_data_dir }}" + state: directory + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + +- name: Initialize Apache Cloudberry cluster + hosts: coordinator + become: yes + become_user: "{{ cloudberry_admin_user }}" + vars: + cloudberry_admin_user: "gpadmin" + tasks: + - name: Create gpconfigs directory + file: + path: "/home/{{ cloudberry_admin_user }}/gpconfigs" + state: directory + + - name: Create gpinitsystem config + copy: + content: | + SEG_PREFIX=gpseg + PORT_BASE={{ port_base }} + declare -a DATA_DIRECTORY=({% for i in range(segments_per_host) %}{{ primary_data_dir }} {% endfor %}) + COORDINATOR_HOSTNAME={{ coordinator_hostname }} + COORDINATOR_DIRECTORY={{ coordinator_data_dir }} + COORDINATOR_PORT={{ coordinator_port }} + TRUSTED_SHELL=ssh + CHECK_POINT_SEGMENTS=8 + ENCODING=UNICODE + MIRROR_PORT_BASE={{ mirror_port_base }} + declare -a MIRROR_DATA_DIRECTORY=({% for i in range(segments_per_host) %}{{ mirror_data_dir }} {% endfor %}) + DATABASE_NAME={{ database_name }} + dest: "/home/{{ cloudberry_admin_user }}/gpconfigs/gpinitsystem_config" + + - name: Run gpinitsystem + shell: | + source /usr/local/cloudberry-db/cloudberry-env.sh + yes | gpinitsystem -c /home/{{ cloudberry_admin_user }}/gpconfigs/gpinitsystem_config \ + -h /home/{{ cloudberry_admin_user }}/hostfile_gpinitsystem \ + -s {{ standby_hostname }} --mirror-mode=spread + args: + executable: /bin/bash + register: gpinitsystem_result + failed_when: "'successfully created' not in gpinitsystem_result.stdout" + + - name: Set environment variables in .bashrc + blockinfile: + path: "/home/{{ cloudberry_admin_user }}/.bashrc" + block: | + source /usr/local/cloudberry-db/cloudberry-env.sh + export COORDINATOR_DATA_DIRECTORY={{ coordinator_data_dir }}/gpseg-1 + export PGPORT={{ coordinator_port }} + export PGUSER={{ cloudberry_admin_user }} + export PGDATABASE={{ database_name }} + marker: "# {mark} CLOUDBERRY ENVIRONMENT" + + - name: Sync .bashrc to standby coordinator + shell: | + scp /home/{{ cloudberry_admin_user }}/.bashrc \ + {{ standby_hostname }}:/home/{{ cloudberry_admin_user }}/.bashrc + args: + executable: /bin/bash + + - name: Verify standby coordinator is synchronized + shell: | + source /usr/local/cloudberry-db/cloudberry-env.sh + source /home/{{ cloudberry_admin_user }}/.bashrc + gpstate -f + args: + executable: /bin/bash + register: gpstate_result + changed_when: false + failed_when: false + + - name: Initialize standby coordinator if not already active + shell: | + source /usr/local/cloudberry-db/cloudberry-env.sh + source /home/{{ cloudberry_admin_user }}/.bashrc + if ! gpstate -f 2>&1 | grep -q "Sync state: sync"; then + yes | gpinitstandby -s {{ standby_hostname }} + else + echo "Standby already synchronized, skipping gpinitstandby" + fi + args: + executable: /bin/bash + register: gpinitstandby_result + changed_when: "'skipping' not in gpinitstandby_result.stdout" + + - name: Display success message + debug: + msg: + - "==========================================" + - " Apache Cloudberry cluster initialized successfully!" + - "==========================================" + - " Connect to the database:" + - " su - gpadmin" + - " psql -d {{ database_name }}" + - "==========================================" + + - name: Check postgres --gp-version + command: /usr/local/cloudberry-db/bin/postgres --gp-version + register: gp_version + changed_when: false + + - name: Check postgres --version + command: /usr/local/cloudberry-db/bin/postgres --version + register: pg_version + changed_when: false + + - name: Show version info + debug: + msg: + - "{{ gp_version.stdout }}" + - "{{ pg_version.stdout }}" From daa232d9938ef7e11a7e6ed358c89f75575b24af Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Thu, 21 May 2026 18:42:29 +0800 Subject: [PATCH 2/2] update configure <64GB memory --- devops/deploy/ansible/site.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/devops/deploy/ansible/site.yml b/devops/deploy/ansible/site.yml index cbf08100bf3..094e23bac80 100644 --- a/devops/deploy/ansible/site.yml +++ b/devops/deploy/ansible/site.yml @@ -165,9 +165,10 @@ loop: - { key: "vm.dirty_background_ratio", value: "3" } - { key: "vm.dirty_ratio", value: "10" } - - { key: "vm.dirty_background_bytes", value: "0" } - - { key: "vm.dirty_bytes", value: "0" } when: _ram_mb | int <= 65536 + # Per official docs: for systems with 64GB RAM or less, only set the ratio + # parameters. vm.dirty_background_bytes and vm.dirty_bytes should be omitted + # entirely, not set to 0. - name: Create core dump directory file: