core168: Add script to automatically repair MDRAID arrays

Please see the header of the script for more details.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
This commit is contained in:
Michael Tremer
2022-05-19 08:56:34 +00:00
committed by Peter Müller
parent 69aac83da9
commit 71d53192d3
5 changed files with 175 additions and 0 deletions

View File

@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime

View File

@@ -97,6 +97,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime

View File

@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime

View File

@@ -125,6 +125,9 @@ if ! grep -q rd.auto /etc/default/grub; then
sed -e "s/panic=10/& rd.auto/" -i /etc/default/grub
fi
# Repair any broken MDRAID arrays
/usr/local/bin/repair-mdraid
# Start services
/etc/init.d/fcron restart
/etc/init.d/sshd restart

169
src/scripts/repair-mdraid Normal file
View File

@@ -0,0 +1,169 @@
#!/bin/bash
###############################################################################
# #
# IPFire.org - A linux based firewall #
# Copyright (C) 2022 IPFire Team <info@ipfire.org> #
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
# #
###############################################################################
#
# This script is supposed to repair any broken RAID installations
# where the system has been booted from only one of the RAID devices
# without the software RAID being activated first.
#
# This script does as follows:
#
# * It tries to find an inactive RAID called "ipfire:0"
# * It will then destroy any devices that are still part of this RAID.
# This is required because if the RAID is being assembled correctly,
# data from the disk that has NOT been mounted will be replicated
# back to the device that has been changed. That causes that any
# data that has been written to the mounted disk will be lost.
# To avoid this, we will partially destroy the RAID.
# * We will then erase any partition tables and destroy any filesystems
# on the devices so that they do not get accidentially mounted again.
# * The system will then need to be rebooted where the RAID will be
# mounted again in a degraded state which might take some extra
# time at boot (the system stands still for about a minute).
# * After the system has been booted up correctly, we will re-add
# the devices back to the RAID which will resync and the system
# will be back to its intended configuration.
find_inactive_raid() {
local status
local device
local arg
local args
while read -r status device args; do
if [ "${status}" = "INACTIVE-ARRAY" ]; then
for arg in ${args}; do
case "${arg}" in
name=ipfire:0)
echo "${device}"
return 0
;;
esac
done
fi
done <<< "$(mdadm --detail --scan)"
return 1
}
find_root() {
local device
local mp
local fs
local args
while read -r device mp fs args; do
if [ "${mp}" = "/" ]; then
echo "${device:0:-1}"
return 0
fi
done < /proc/mounts
return 1
}
find_raid_devices() {
local raid="${1}"
local IFS=,
local device
for device in $(mdadm -v --detail --scan "${raid}" | awk -F= '/^[ ]+devices/ { print $2 }'); do
echo "${device}"
done
return 0
}
destroy_everything() {
local device="${1}"
local part
# Destroy the RAID superblock
mdadm --zero-superblock "${device}"
# Wipe the partition table
wipefs -a "${device}"
# Wipe any partition signatures
for part in ${device}*; do
wipefs -a "${part}"
done
}
raid_rebuild() {
local devices=( "$@" )
cat > /etc/rc.d/rcsysinit.d/S99fix-raid <<EOF
#!/bin/bash
case "\${1}" in
start)
if [ -e "/dev/md/ipfire:0" ]; then
for device in ${devices[@]}; do
mdadm --add "/dev/md/ipfire:0" "\${device}"
done
# Delete this script
rm "\${0}"
fi
;;
esac
EOF
chmod a+x /etc/rc.d/rcsysinit.d/S99fix-raid
}
main() {
local raid="$(find_inactive_raid)"
# Nothing to do if no RAID device found
if [ -z "${raid}" ]; then
return 0
fi
echo "Fixing RAID ${raid}..."
local root="$(find_root)"
# Finding any devices in this RAID
local devices=(
$(find_raid_devices "${raid}")
)
# Stop the RAID
mdadm --stop "${raid}" &>/dev/null
# Destroy any useful data on all remaining RAID devices
local device
for device in ${devices[@]}; do
# Skip root
[ "${device}" = "${root}" ] && continue
destroy_everything "${device}"
done &>/dev/null
# Re-add devices to the RAID
raid_rebuild "${device}"
return 0
}
main "$@" || return $?