CentOS7 安裝 lustre server with zfs via dkms

之前有寫一篇「安裝 lustre with dkms」給 lustre client 的筆記。這次則是要分享如何用 yum rpm 安裝 lustre server zfs 版本。

安裝 kernel-devel 套件

# yum install kernel-devel -y

先直接對系統整個更新並重新開機。(這不是必要的,但我喜歡這樣做。)

# yum update -y ; reboot

先手動建立 lustre.repo 檔案,我指定安裝 lustre-2.12.8-ib 版本

# vi  /etc/yum.repos.d/lustre.repo
[lustre-server]
name=lustre-server
#baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/server
baseurl=https://downloads.whamcloud.com/public/lustre/lustre-2.12.8-ib/MOFED-4.9-4.1.7.0/el7/server
# exclude=*debuginfo*
gpgcheck=0

[lustre-client]
name=lustre-client
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/client
# exclude=*debuginfo*
gpgcheck=0

[e2fsprogs-wc]
name=e2fsprogs-wc
baseurl=https://downloads.whamcloud.com/public/e2fsprogs/latest/el7
# exclude=*debuginfo*
gpgcheck=0

安裝 e2fsprogs 套件

# yum --nogpgcheck --disablerepo=* --enablerepo=e2fsprogs-wc install e2fsprogs e2fsprogs-devel -y

安裝 epel-release 套件

# yum -y install epel-release

還有安裝一些雜七雜八套件

#  yum install asciidoc audit-libs-devel autoconf automake bc binutils-devel bison createrepo device-mapper-devel elfutils-devel elfutils-libelf-devel expect flex gcc gcc-c++ git glib2 glib2-devel hmaccalc keyutils-libs-devel krb5-devel ksh libaio-devel libattr-devel libblkid-devel libcurl-devel libselinux-devel libtirpc-devel libtool libuuid-devel libudev-devel libyaml-devel lsscsi make ncurses-devel net-snmp-devel net-tools newt-devel numactl-devel openssl-devel parted patchutils pciutils-devel perl-ExtUtils-Embed pesign python python-devel python2-devel python-setuptools python-cffi redhat-rpm-config rpm-build systemd-devel tcl tcl-devel tk tk-devel wget xmlto yum-utils zlib-devel -y  

然後安裝 lustre 專用的 kernel,並重新開機

# yum --nogpgcheck --disablerepo=base,extras,updates --enablerepo=lustre-server install kernel kernel-devel kernel-headers kernel-tools kernel-tools-libs
# reboot

安裝 python-packaging dkms 套件

# yum install --enablerepo=epel python-packaging dkms -y

下面的指令不要安裝,我自己想做筆記:

# yum install libuutil1 libnvpair1 libzpool2 libzfs2 libzfs2-devel spl-dkms spl zfs-dkms zfs lustre-osd-zfs-mount kmod-lustre lustre lustre-resource-agents perf -y

# yum install libuutil1 libnvpair1 libzpool2 libzfs2 libzfs2-devel lustre-osd-zfs-mount kmod-lustre lustre lustre-resource-agents perf -y

# yum --nogpgcheck --enablerepo=lustre-server install kmod-lustre-osd-ldiskfs lustre-dkms lustre-osd-ldiskfs-mount lustre-osd-zfs-mount lustre lustre-resource-agents zfs -y

因為上面的指令會失敗,錯誤訊息如下:

[ 4917.164449] LustreError: 158-c: Can't load module 'osd-zfs'
[ 4917.164487] LustreError: 15001:0:(genops.c:397:class_newdev()) OBD: unknown type: osd-zfs
[ 4917.164512] LustreError: 15001:0:(obd_config.c:403:class_attach()) Cannot create device lustre-MDT0000-osd of type osd-zfs : -19
[ 4917.164545] LustreError: 15001:0:(obd_mount.c:197:lustre_start_simple()) lustre-MDT0000-osd attach error -19
[ 4917.164577] LustreError: 15001:0:(obd_mount_server.c:1958:server_fill_super()) Unable to start osd on lustre-mdt/mgs_mdt: -19
[ 4917.164612] LustreError: 15001:0:(obd_mount.c:1608:lustre_fill_super()) Unable to mount  (-19)

要用下面方式安裝:

# yum install zfs lustre lustre-all-dkms lustre-osd-ldiskfs-mount lustre-osd-zfs-mount libzfs2  -y

但是也會出現錯誤XD,所以要修正下列檔案 第 23 行:

# vi /usr/src/lustre-all-2.12.8_6_g5457c37/lustre-dkms_pre-build.sh
        #ZFS_VERSION=$(dkms status -m zfs -k $3 -a $5 | awk -F', ' '{print $2; exit 0}' | grep -v ': added$')
        ZFS_VERSION=$(dkms status -m zfs -k $3 -a $5 | awk ' { print $1 } ' | sed -e 's/zfs\///' -e 's/,//')

並且重新編譯 dkms

# dkms build lustre-all/2.12.8_6_g5457c37
# dkms install lustre-all/2.12.8_6_g5457c37

如果上面的 yum 安裝有問題那也許要移除一些相關的套件比方說:

# yum remove libibverbs rdma-core -y
or
# yum remove mlnx-ofa_kernel-modules -y

如果有 IB 卡記得重新安裝 OFED

#  ./mlnxofedinstall --without-fw-update --add-kernel-support

也記得要編輯一下 lnet.conf

# vi /etc/modprobe.d/lnet.conf
options lnet networks=tcp1(ens1f0),tcp2(ens1f1),o2ib1(ib0)

載入 lustre kernel module ,確認是否可以正常載入,沒有訊息就是正常。

# modprobe lustre

再來建立 MGS

# mkfs.lustre --mgs --mdt --backfstype=zfs --fsname=lustre --mgsnode=192.168.100.49@o2ib1 --mgsnode=192.168.101.49@tcp1 --mgsnode=192.168.102.49@tcp2 --index=0 lustre-mdt/mgs_mdt --reformat /dev/sda

並且掛載,確認是否安裝好 lustre server

# mkdir -p /lustre/mdt
# mount -t lustre lustre-mdt/mgs_mdt /lustre/mdt
# df -h -t lustre
Filesystem Size Used Avail Use% Mounted on
lustre-mdt/mgs_mdt 1.4T 3.3M 1.4T 1% /lustre/mdt

然後切換 kernel 看是否 dkms 能生效,先列出目前的 kernels

# awk -F\' '$1=="menuentry " {print $2}' /boot/efi/EFI/centos/grub.cfg
CentOS Linux (3.10.0-1160.49.1.el7_lustre.x86_64) 7 (Core)
CentOS Linux (3.10.0-1160.53.1.el7.x86_64) 7 (Core)
CentOS Linux (3.10.0-1160.el7.x86_64) 7 (Core)
CentOS Linux (0-rescue-7dc00e9e6ebd459087f6cef1fe1f737b) 7 (Core)

設定 1 為預設的 kernel 3.10.0-1160.53.1.el7.x86_64),0 為 3.10.0-1160.49.1.el7_lustre.x86_64 。並檢查是否為所設定的 kernel。

# grub2-set-default 1
# grep saved /boot/grub2/grubenv
saved_entry=1

更新 grub 設定檔案,並且重新開機

# grub2-mkconfig -o /boot/grub2/grub.cfg
Generating grub configuration file …
Found linux image: /boot/vmlinuz-3.10.0-1160.53.1.el7.x86_64
Found initrd image: /boot/initramfs-3.10.0-1160.53.1.el7.x86_64.img
Found linux image: /boot/vmlinuz-3.10.0-1160.49.1.el7_lustre.x86_64
Found initrd image: /boot/initramfs-3.10.0-1160.49.1.el7_lustre.x86_64.img
Found linux image: /boot/vmlinuz-3.10.0-1160.el7.x86_64
Found initrd image: /boot/initramfs-3.10.0-1160.el7.x86_64.img
Found linux image: /boot/vmlinuz-0-rescue-7dc00e9e6ebd459087f6cef1fe1f737b
Found initrd image: /boot/initramfs-0-rescue-7dc00e9e6ebd459087f6cef1fe1f737b.img
Found CentOS Linux release 7.9.2009 (Core) on /dev/mapper/centos-root
done
# reboot

然後用確認是否能自動建立 dkms kernel modules

# dkms status
Deprecated feature: REMAKE_INITRD
lustre-all/2.12.8_6_g5457c37, 3.10.0-1160.49.1.el7_lustre.x86_64, x86_64: installedDeprecated feature: REMAKE_INITRD
Deprecated feature: REMAKE_INITRD
Deprecated feature: REMAKE_INITRD
Deprecated feature: REMAKE_INITRD
Deprecated feature: REMAKE_INITRD

spl/0.7.13, 3.10.0-1160.49.1.el7_lustre.x86_64, x86_64: installedDeprecated feature: REMAKE_INITRD

spl/0.7.13, 3.10.0-1160.53.1.el7.x86_64, x86_64: installedDeprecated feature: REMAKE_INITRD

zfs/0.7.13, 3.10.0-1160.49.1.el7_lustre.x86_64, x86_64: installedDeprecated feature: REMAKE_INITRD

如果沒辦法就手動建立 dkms kernel modules ,例如用下列指令:

# dkms build zfs/0.7.13
# dkms install zfs/0.7.13
# dkms build lustre-all/2.12.8_6_g5457c37
# dkms install lustre-all/2.12.8_6_g5457c37

然後發現缺少 ext4 原始碼 XD

checking ext4 source directory…
configure: WARNING:

Disabling ldiskfs support because complete ext4 source does not exist.

If you are building using kernel-devel packages and require ldiskfs
server support then ensure that the matching kernel-debuginfo-common
and kernel-debuginfo-common- packages are installed.

那就下載當前使用的 kernel 原始碼給它,並安裝 kernel 原始碼

# yumdownloader --source kernel-`uname -r`
# rpm -ivh kernel-3.10.0-1160.53.1.el7.src.rpm

解開 ext3 & ext4 原始碼並複製到 kernel 原始碼內

# tar -Jxf ~/rpmbuild/SOURCES/linux-3.10.0-1160.53.1.el7.tar.xz linux-*/fs/ext{3,4}
# cp -an ~/linux-3.10.0-1160.53.1.el7/fs/ext{3,4} /usr/src/kernels/3.10.0-1160.53.1.el7.x86_64/fs/.

如果有使用 IB 介面,記得重新安裝OFED

# ./mlnxofedinstall --without-fw-update --add-kernel-support

接下來就可以順利使用 dkms 編譯 kernel module

# dkms remove lustre-all/2.12.8_6_g5457c37
# dkms build lustre-all/2.12.8_6_g5457c37
# dkms install lustre-all/2.12.8_6_g5457c37

再一次驗證安裝狀態

# modprobe -v lustre
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/libcfs.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/lnet.ko.xz networks=tcp1(ens1f0),tcp2(ens1f1),o2ib1(ib0)
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/obdclass.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-1160.53.1.el7.x86_64/extra/lustre.ko.xz
# mount -t lustre lustre-mdt/mgs_mdt /lustre/mdt
# df -h -t lustre
Filesystem Size Used Avail Use% Mounted on
lustre-mdt/mgs_mdt 1.4T 3.3M 1.4T 1% /lustre/mdt

如果遇到不可預期的錯誤重新開機就能解決了 XD