Debian 11 Bullseye
Sponsored Link

Ceph Nautilus : Add or Remove OSDs2021/08/26

 
This is how to add or remove OSDs from exisiting Cluster.
                                         |
        +--------------------+           |           +----------------------+
        |   [dlp.srv.world]  |10.0.0.30  |  10.0.0.31|    [www.srv.world]   |
        |     Ceph Client    +-----------+-----------+        RADOSGW       |
        |                    |           |           |                      |
        +--------------------+           |           +----------------------+
            +----------------------------+----------------------------+
            |                            |                            |
            |10.0.0.51                   |10.0.0.52                   |10.0.0.53 
+-----------+-----------+    +-----------+-----------+    +-----------+-----------+
|   [node01.srv.world]  |    |   [node02.srv.world]  |    |   [node03.srv.world]  |
|     Object Storage    +----+     Object Storage    +----+     Object Storage    |
|     Monitor Daemon    |    |                       |    |                       |
|     Manager Daemon    |    |                       |    |                       |
+-----------------------+    +-----------------------+    +-----------------------+

[1] For example, Add a [node04] node to OSDs on Admin Node.
For Block device on new [node04] Node, use [/dev/sdb] on this example.
# transfer public key

root@node01:~#
ssh-copy-id node04

# install required packages

root@node01:~#
ssh node04 "apt update; apt -y install ceph"
# transfer required files

root@node01:~#
scp /etc/ceph/ceph.conf node04:/etc/ceph/ceph.conf

root@node01:~#
scp /etc/ceph/ceph.client.admin.keyring node04:/etc/ceph

root@node01:~#
scp /var/lib/ceph/bootstrap-osd/ceph.keyring node04:/var/lib/ceph/bootstrap-osd
# configure OSD

root@node01:~# ssh node04 \
"chown ceph. /etc/ceph/ceph.* /var/lib/ceph/bootstrap-osd/*; \
parted --script /dev/sdb 'mklabel gpt'; \
parted --script /dev/sdb "mkpart primary 0% 100%"; \
ceph-volume lvm create --data /dev/sdb1" 
Running command: /usr/bin/ceph-authtool --gen-print-key
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new a8a4862e-ed22-4fff-85d1-937eb56ce510
Running command: /usr/sbin/vgcreate --force --yes ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562 /dev/sdb1
 stdout: Physical volume "/dev/sdb1" successfully created.
 stdout: Volume group "ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562" successfully created
Running command: /usr/sbin/lvcreate --yes -l 20479 -n osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510 ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562
 stdout: Logical volume "osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510" created.
Running command: /usr/bin/ceph-authtool --gen-print-key
Running command: /usr/bin/mount -t tmpfs tmpfs /var/lib/ceph/osd/ceph-3
--> Executable selinuxenabled not in PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
Running command: /usr/bin/chown -h ceph:ceph /dev/ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562/osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510
Running command: /usr/bin/chown -R ceph:ceph /dev/dm-2
Running command: /usr/bin/ln -s /dev/ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562/osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510 /var/lib/ceph/osd/ceph-3/block
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o /var/lib/ceph/osd/ceph-3/activate.monmap
 stderr: got monmap epoch 2
Running command: /usr/bin/ceph-authtool /var/lib/ceph/osd/ceph-3/keyring --create-keyring --name osd.3 --add-key AQAG8CZhH3q7MhAAEmrrlkKpZFTaE5AhASHVQA==
 stdout: creating /var/lib/ceph/osd/ceph-3/keyring
added entity osd.3 auth(key=AQAG8CZhH3q7MhAAEmrrlkKpZFTaE5AhASHVQA==)
Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-3/keyring
Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-3/
Running command: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 3 --monmap /var/lib/ceph/osd/ceph-3/activate.monmap --keyfile - --osd-data /var/lib/ceph/osd/ceph-3/ --osd-uuid a8a4862e-ed22-4fff-85d1-937eb56ce510 --setuser ceph --setgroup ceph
 stderr: 2021-08-26 10:36:08.965 7f8599a31c00 -1 bluestore(/var/lib/ceph/osd/ceph-3/) _read_fsid unparsable uuid
--> ceph-volume lvm prepare successful for: /dev/sdb1
Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-3
Running command: /usr/bin/ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562/osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510 --path /var/lib/ceph/osd/ceph-3 --no-mon-config
Running command: /usr/bin/ln -snf /dev/ceph-5a2cec6e-f514-4376-9aa6-19c2b8987562/osd-block-a8a4862e-ed22-4fff-85d1-937eb56ce510 /var/lib/ceph/osd/ceph-3/block
Running command: /usr/bin/chown -h ceph:ceph /var/lib/ceph/osd/ceph-3/block
Running command: /usr/bin/chown -R ceph:ceph /dev/dm-2
Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/osd/ceph-3
Running command: /usr/bin/systemctl enable ceph-volume@lvm-3-a8a4862e-ed22-4fff-85d1-937eb56ce510
 stderr: Created symlink /etc/systemd/system/multi-user.target.wants/ceph-volume@lvm-3-a8a4862e-ed22-4fff-85d1-937eb56ce510.service → /lib/systemd/system/ceph-volume@.service.
Running command: /usr/bin/systemctl enable --runtime ceph-osd@3
 stderr: Created symlink /run/systemd/system/ceph-osd.target.wants/ceph-osd@3.service → /lib/systemd/system/ceph-osd@.service.
Running command: /usr/bin/systemctl start ceph-osd@3
--> ceph-volume lvm activate successful for osd ID: 3
--> ceph-volume lvm create successful for: /dev/sdb1

root@node01:~# ceph -s 
  cluster:
    id:     92749530-d9af-4226-bfe0-ccc79a689a66
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 2h)
    mgr: node01(active, since 30m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 67s), 4 in (since 67s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   8 pools, 248 pgs
    objects: 237 objects, 6.7 MiB
    usage:   4.0 GiB used, 316 GiB / 320 GiB avail
    pgs:     248 active+clean
[2] To remove an OSD Node from existing Cluster, run commands like follows.
For example, Remove [node04] node.
root@node01:~#
ceph -s

  cluster:
    id:     92749530-d9af-4226-bfe0-ccc79a689a66
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 2h)
    mgr: node01(active, since 30m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 98s), 4 in (since 98s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   8 pools, 248 pgs
    objects: 237 objects, 6.7 MiB
    usage:   4.0 GiB used, 316 GiB / 320 GiB avail
    pgs:     248 active+clean

root@node01:~#
ceph osd tree

ID CLASS WEIGHT  TYPE NAME       STATUS REWEIGHT PRI-AFF
-1       0.31238 root default
-3       0.07809     host node01
 0   hdd 0.07809         osd.0       up  1.00000 1.00000
-5       0.07809     host node02
 1   hdd 0.07809         osd.1       up  1.00000 1.00000
-7       0.07809     host node03
 2   hdd 0.07809         osd.2       up  1.00000 1.00000
-9       0.07809     host node04
 3   hdd 0.07809         osd.3       up  1.00000 1.00000

# specify OSD ID of a node you'd like to remove

root@node01:~#
ceph osd out 3

marked out osd.3.
# live watch cluster status

# after running [ceph osd out ***], rebalancing is executed automatically

# to quit live watch, push [Ctrl + c]

root@node01:~#
ceph -w

  cluster:
    id:     92749530-d9af-4226-bfe0-ccc79a689a66
    health: HEALTH_WARN
            Degraded data redundancy: 200/711 objects degraded (28.129%), 56 pgs degraded

  services:
    mon: 1 daemons, quorum node01 (age 2h)
    mgr: node01(active, since 31m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 4 osds: 4 up (since 2m), 3 in (since 9s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   8 pools, 248 pgs
    objects: 237 objects, 6.7 MiB
    usage:   3.0 GiB used, 237 GiB / 240 GiB avail
    pgs:     200/711 objects degraded (28.129%)
             2/711 objects misplaced (0.281%)
             184 active+clean
             56  active+recovery_wait+degraded
             7   active+recovery_wait
             1   active+recovering

  io:
    recovery: 0 B/s, 1 keys/s, 0 objects/s

  progress:
    Rebalancing after osd.3 marked out
      [===================...........]


2021-08-26 10:38:41.585492 mon.node01 [WRN] Health check failed: Degraded data redundancy: 200/711 objects degraded (28.129%), 56 pgs degraded (PG_DEGRADED)
2021-08-26 10:38:41.585543 mon.node01 [INF] Health check cleared: PG_AVAILABILITY (was: Reduced data availability: 67 pgs inactive, 194 pgs peering)
2021-08-26 10:38:47.451400 mon.node01 [WRN] Health check update: Degraded data redundancy: 175/711 objects degraded (24.613%), 50 pgs degraded (PG_DEGRADED)
.....
.....

# after status turns to [HEALTH_OK], disable OSD service on the target node

root@node01:~#
ssh node04 "systemctl disable --now ceph-osd@3.service"

# remove the node to specify target OSD ID

root@node01:~#
ceph osd purge 3 --yes-i-really-mean-it

purged osd.3
root@node01:~#
ceph -s

  cluster:
    id:     92749530-d9af-4226-bfe0-ccc79a689a66
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum node01 (age 2h)
    mgr: node01(active, since 33m)
    mds: cephfs:1 {0=node01=up:active}
    osd: 3 osds: 3 up (since 13s), 3 in (since 97s)
    rgw: 1 daemon active (www)

  task status:
    scrub status:
        mds.node01: idle

  data:
    pools:   8 pools, 248 pgs
    objects: 237 objects, 6.7 MiB
    usage:   3.0 GiB used, 237 GiB / 240 GiB avail
    pgs:     248 active+clean

  io:
    client:   34 KiB/s rd, 19 op/s rd, 0 op/s wr
Matched Content