Linux系统硬件老化测试脚本:自动化负载与监控
这篇文章介绍了一款用于Linux系统的硬件老化测试脚本。脚本通过对CPU、内存、硬盘和GPU进行高强度负载测试,持续运行设定时长,模拟长时间高负荷环境,验证硬件稳定性与可靠性。测试过程中,脚本实时监控CPU温度、频率、内存使用情况等,并将结果记录到日志文件中。测试完成后,提供详细反馈,帮助用户评估设备性能和健康状况。
·
简介:
这篇文章介绍了一款用于Linux系统的自动化硬件老化测试脚本。该脚本能够通过对CPU、内存、硬盘和GPU进行高强度负载测试,持续运行设定的时长(如1小时),以模拟长时间高负荷运行的环境,从而验证硬件的稳定性与可靠性。脚本还包括了系统资源监控,实时显示CPU温度、频率、内存使用情况等信息,并将测试结果记录到日志文件中。测试完成后,脚本会提供详细的反馈,并允许用户选择是否重新执行测试。通过这种方式,用户可以轻松地评估设备的性能和健康状况。
#!/bin/bash
# 请设置老化时长(小时)
set_aging_time=1
# 检查是否为数字(浮点数或整数)
if [[ ! "$set_aging_time" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
echo "set_aging_time Parameter error"
exit 1
fi
# 获取脚本绝对路径
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
echo "脚本所在的绝对路径是: ${SCRIPT_DIR}"
# 文件夹不存在者创建
if [ ! -d "${SCRIPT_DIR}/stress" ]; then
mkdir ${SCRIPT_DIR}/stress
fi
# 日志存放路径
log_file=${SCRIPT_DIR}/stress/log_file.log
stress_ng=${SCRIPT_DIR}/stress/stress_ng.log
# 安装stress-ng、figlet和glmark2-es2工具,当前环境为Ubuntu系统,根据系统替换相关安装命令
install_packages() {
if command -v stress-ng > /dev/null 2>&1 && command -v figlet > /dev/null 2>&1 && command -v glmark2-es2 > /dev/null 2>&1; then
echo "All software has been installed."
else
sudo apt update
sudo apt install -y stress-ng
sudo apt install -y glmark2-es2
sudo apt install -y figlet
# 检查 stress-ng 是否缺失
if ! command -v stress-ng > /dev/null 2>&1; then
echo "stress-ng is not installed"
exit 1
fi
# 检查 figlet 是否缺失
if ! command -v figlet > /dev/null 2>&1; then
echo "figlet is not installed"
exit 1
fi
# 检查 glmark2-es2 是否缺失
if ! command -v glmark2-es2 > /dev/null 2>&1; then
echo "glmark2-es2 is not installed"
exit 1
fi
fi
}
# CPU stress test
run_cpu_test() {
# 查看cpu信息 lscpu
stress-ng --cpu $(nproc) --metrics-brief --timeout ${aging_time}s >> ${stress_ng} 2>&1
}
# Memory stress test
run_memory_test() {
available_memory=$(free -m | grep -E 'Mem|内存' | awk '{print $7}')
half_memory=$(echo "$available_memory / $(nproc)" | bc)
# 运行内存分配释放模式
stress-ng --vm $(nproc) --vm-bytes ${half_memory}M --metrics-brief --timeout ${aging_time}s >> ${stress_ng} 2>&1
# 运行内存持续占用模式
# stress-ng --vm 1 --vm-bytes ${available_memory}M --metrics-brief --vm-keep --timeout ${aging_time}s >> ${stress_ng} 2>&1
}
#HDD stress test
run_disk_stress_test() {
stress-ng --hdd $(nproc) -i $(nproc) --metrics-brief --timeout ${aging_time}s >> ${stress_ng} 2>&1
}
# GPU stress test
run_gpu_test() {
timeout ${aging_time} glmark2-es2 --run-forever --annotate > /dev/null 2>&1
# error: XDG_RUNTIME_DIR is invalid or not set in the environment.
# Error: main: Could not initialize canvas
# 报以上错误时,请使用以下方法(当前登录的账户:admin):
# user_id=$(sudo -u admin id -u)
# su admin -c "export XDG_RUNTIME_DIR=/run/user/${user_id} && timeout ${aging_time} glmark2-es2 --run-forever --annotate > /dev/null 2>&1"
}
get_cpu_info() {
# 当前 CPU 温度路径
cpu_temp_path="/sys/class/thermal/thermal_zone1/temp"
cat ${cpu_temp_path} > /dev/null 2>&1
if [ $? -eq 0 ]; then
cpu_temp=$(echo "scale=1; $(cat ${cpu_temp_path}) / 1000" | bc)
else
cpu_temp=" ---"
fi
# 当前 CPU 频率路径
cpu_cur_freq_path="/sys/devices/system/cpu/cpufreq/policy0/cpuinfo_cur_freq"
cat ${cpu_cur_freq_path} > /dev/null 2>&1
if [ $? -eq 0 ]; then
cpu_cur_freq=$(echo "scale=2; $(cat ${cpu_cur_freq_path}) / 1000" | bc)
else
cpu_cur_freq=" --- "
fi
# 获取当前CPU使用率
cpu_usage=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
}
get_memory_info() {
# 获取总内存、已用内存、缓存
memory_info=$(free -m)
total_mem=$(echo "$memory_info" | grep -E 'Mem|内存' | awk '{print $2}')
used_mem=$(echo "$memory_info" | grep -E 'Mem|内存' | awk '{print $3}')
# 计算内存使用百分比(保留一位小数)
used_percent=$(echo "scale=1; $used_mem * 100 / $total_mem" | bc)
}
run_test() {
touch ${SCRIPT_DIR}/stress/start_state.zz
echo "Device MAC: $(ip -o link show up | awk '$2 == "eth0:" {print $17}')" >> ${log_file}
# aging_time=$((set_aging_time * 60 * 60))
aging_time=$(echo "scale=0; $set_aging_time * 60 * 60 / 1" | bc)
echo "Aging duration: ${aging_time}S" >> ${log_file}
run_cpu_test &
run_memory_test &
run_disk_stress_test &
run_gpu_test &
echo "start time: $(date)" >> ${log_file}
start_time=$(date +%s)
# 每间隔5分钟CPU相关数据写入文件中
last_write_time=$start_time # 初始化上次写入时间
write_interval=300 # 300秒
# 清屏
clear
# 隐藏光标:使用 ANSI 转义序列
echo -e "\e[?25l"
# 循环直到老化时间结束
while true; do
current_time=$(date +%s) # 获取当前时间
elapsed_time=$((current_time - start_time)) # 计算已老化时间
# 将已老化时间转换为小时、分钟和秒
hours=$((elapsed_time / 3600))
minutes=$(( (elapsed_time % 3600) / 60 ))
seconds=$((elapsed_time % 60))
tput cup 0 0
echo -n "Aging time: $(printf "%02d:%02d:%02d" $hours $minutes $seconds) "
get_cpu_info
tput cup 2 0
echo -n "CPU Usage: ${cpu_usage}% | CPU Temp: ${cpu_temp} °C | CPU Cur Freq: ${cpu_cur_freq} MHz "
get_memory_info
tput cup 4 0
echo -n "Total Mem: ${total_mem}M | Used Mem: ${used_mem}M | Mem Usage: ${used_percent}% "
# 检查是否需要写入文件(每5分钟)
if [ $((current_time - last_write_time)) -ge $write_interval ]; then
echo "$(date '+%Y-%m-%d %H:%M:%S') | CPU Usage: ${cpu_usage}% | CPU Temp: ${cpu_temp}°C | CPU Freq: ${cpu_cur_freq} MHz" >> ${log_file}
last_write_time=$current_time # 更新上次写入时间
fi
# 检查是否已经达到老化时间
if [ "$elapsed_time" -ge "$aging_time" ]; then
echo "stop time: $(date)" >> ${log_file}
break
fi
# 每隔1秒更新一次显示
sleep 1
done
# 等待所有测试完成
wait
# 显示光标:使用 ANSI 转义序列
echo -e "\e[?25h"
echo ""
printf "Aging test passed, aging duration: %02d:%02d:%02d" $hours $minutes $seconds 2>&1 | tee -a ${log_file}
echo ""
echo -e "\033[32m$(figlet "PASS")\033[0m"
touch ${SCRIPT_DIR}/stress/end_state.zz
}
install_packages
start_state="${SCRIPT_DIR}/stress/start_state.zz"
end_state="${SCRIPT_DIR}/stress/end_state.zz"
if [[ -e "$start_state" ]] && [[ ! -e "$end_state" ]]; then
echo -e "\033[31m$(figlet "FAIL")\033[0m"
read -p "Aging test failed, please choose whether to re-execute aging test? (y/n):" answer
if [ "$answer" = "Y" ] || [ "$answer" == "y" ]; then
rm -rf ${SCRIPT_DIR}/stress/*
run_test
else
exit 0
fi
elif [[ -e "$start_state" ]] && [[ -e "$end_state" ]]; then
echo -e "\033[32m$(figlet "PASS")\033[0m"
read -p "The equipment has completed the aging test and passed. Would you like to re-execute the aging test? (y/n):" answer
if [ "$answer" = "Y" ] || [ "$answer" == "y" ]; then
rm -rf ${SCRIPT_DIR}/stress/*
run_test
else
exit 0
fi
else
run_test
fi
更多推荐




所有评论(0)