commit 943d390dd7d7d1b65f5beabd5c5f28c9221470ff Author: Ovidiu Poncea Date: Wed Oct 7 19:18:38 2020 +0300 Fix ceph ansible restore failure when backup is taken on controller-1 Ansible playbook starts ceph through '/etc/init.d/ceph start'. This script has two detection mechanisms for osds: 1. it looks in the ceph.conf for \[osd\.[0-9]*\] sections, grabs the osd id, mounts the corresponding devices and starts ceph-osd daemons; 2. it lists all folders in /var/lib/ceph/osd/*, grabs the osd id and starts the corresponding daemon. When backup is taken on controller-1 it contains /etc/ceph/ceph.conf with osds of this node. Restore is always done from controller-0 where ansible extracts ceph.conf from the backup to /etc/ceph/ceph.conf. This leads to osds from controller-1 trying to start on controller-0 and to ansible failure. To fix it we remove the osds configuration from ceph.conf. This works as we have code in the restore playbook that scans the disks for osds and mount them in /var/lib/ceph/osd/* allowing 'etc/init.d/ceph start' to initialize the correct ceph-osd daemons for controller-0. Closes-Bug: 1899444 Change-Id: I10672613fc26807e0cf28ac8df5a08287d80c17a diff --git a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml index e7cdee4..6dc75b9 100644 --- a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml @@ -33,10 +33,24 @@ - name: Restore ceph.conf file command: >- tar -C / -xpf {{ restore_data_file }} - 'etc/ceph/ceph.conf' + '{{ ceph_conf[1:] }}' args: warn: false + - name: Get list of OSDs defined in {{ ceph_conf }} + shell: grep "\[osd\.[0-9]*\]" {{ ceph_conf }} | grep -oh "osd\.[0-9]*" + register: ceph_conf_osd_list + failed_when: false + + # Allow starting ceph with a ceph.conf from a backup done on controller-1 + # w/o this it will try to initialize OSDs that are not configured on controller-0. + - name: Remove "[osd.*]" sections from {{ ceph_conf }} + ini_file: + path: "{{ ceph_conf }}" + section: "{{ item }}" + state: absent + with_items: "{{ ceph_conf_osd_list.stdout_lines }}" + - name: Set initial ceph-mon name set_fact: mon_name: 'controller-0' @@ -91,7 +105,7 @@ - name: Allow Ceph to start with a single monitor on a Standard deployment ini_file: - path: "/etc/ceph/ceph.conf" + path: "{{ ceph_conf }}" section: global option: mon_initial_members value: controller-0 @@ -177,7 +191,7 @@ - name: Remove Ceph option to start with a single monitor on a Standard deployment ini_file: - path: "/etc/ceph/ceph.conf" + path: "{{ ceph_conf }}" section: global option: mon_initial_members state: absent diff --git a/playbookconfig/src/playbooks/roles/recover-ceph-data/vars/main.yml b/playbookconfig/src/playbooks/roles/recover-ceph-data/vars/main.yml index 1d9a6ea..9b19a21 100644 --- a/playbookconfig/src/playbooks/roles/recover-ceph-data/vars/main.yml +++ b/playbookconfig/src/playbooks/roles/recover-ceph-data/vars/main.yml @@ -3,3 +3,4 @@ ceph_mon_manifest_apply_log: /tmp/ceph_mon_apply_manifest.log ceph_wait_time: 60 drbd_cephmon_res: /etc/drbd.d/drbd-cephmon.res tmp_drbd_cephmon_res: /tmp/drbd-cephmon.res +ceph_conf: /etc/ceph/ceph.conf