Linux运维实战
运维环境搭建
1. LAMP环境部署
Ubuntu/Debian系统
# 更新系统
sudo apt update && sudo apt upgrade -y
# 安装Apache
sudo apt install apache2 -y
sudo systemctl enable apache2
sudo systemctl start apache2
# 安装MySQL
sudo apt install mysql-server -y
sudo mysql_secure_installation
# 安装PHP
sudo apt install php libapache2-mod-php php-mysql php-curl php-gd php-mbstring php-xml php-zip -y
# 重启Apache
sudo systemctl restart apache2
# 测试PHP
echo "<?php phpinfo(); ?>" | sudo tee /var/www/html/info.php
CentOS/RHEL系统
# 安装Apache
sudo dnf install httpd -y
sudo systemctl enable httpd
sudo systemctl start httpd
# 安装MySQL (MariaDB)
sudo dnf install mariadb-server mariadb -y
sudo systemctl enable mariadb
sudo systemctl start mariadb
sudo mysql_secure_installation
# 安装PHP
sudo dnf install php php-mysqlnd php-curl php-gd php-mbstring php-xml php-zip -y
# 重启Apache
sudo systemctl restart httpd
# 配置防火墙
sudo firewall-cmd --permanent --add-service=http
sudo firewall-cmd --permanent --add-service=https
sudo firewall-cmd --reload
2. LNMP环境部署
安装Nginx
# Ubuntu/Debian
sudo apt install nginx -y
# CentOS/RHEL
sudo dnf install nginx -y
# 启动并启用Nginx
sudo systemctl enable nginx
sudo systemctl start nginx
配置Nginx虚拟主机
# 创建网站目录
sudo mkdir -p /var/www/example.com/html
sudo chown -R www-data:www-data /var/www/example.com/html # Ubuntu
sudo chown -R nginx:nginx /var/www/example.com/html # CentOS
# 创建虚拟主机配置
sudo tee /etc/nginx/sites-available/example.com << EOF
server {
listen 80;
server_name example.com www.example.com;
root /var/www/example.com/html;
index index.php index.html index.htm;
location / {
try_files \$uri \$uri/ =404;
}
location ~ \.php$ {
include snippets/fastcgi-php.conf;
fastcgi_pass unix:/var/run/php/php8.1-fpm.sock;
}
location ~ /\.ht {
deny all;
}
}
EOF
# 启用站点
sudo ln -s /etc/nginx/sites-available/example.com /etc/nginx/sites-enabled/
sudo nginx -t
sudo systemctl reload nginx
系统监控和告警
1. 系统监控脚本
综合监控脚本
#!/bin/bash
# 系统监控脚本
HOSTNAME=$(hostname)
DATE=$(date '+%Y-%m-%d %H:%M:%S')
LOG_FILE="/var/log/system_monitor.log"
ALERT_EMAIL="admin@example.com"
# 监控函数
check_cpu() {
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
CPU_THRESHOLD=80
if (( $(echo "$CPU_USAGE > $CPU_THRESHOLD" | bc -l) )); then
echo "[$DATE] WARNING: CPU使用率过高: ${CPU_USAGE}%" | tee -a $LOG_FILE
return 1
fi
return 0
}
check_memory() {
MEM_USAGE=$(free | grep Mem | awk '{printf("%.2f", $3/$2 * 100.0)}')
MEM_THRESHOLD=85
if (( $(echo "$MEM_USAGE > $MEM_THRESHOLD" | bc -l) )); then
echo "[$DATE] WARNING: 内存使用率过高: ${MEM_USAGE}%" | tee -a $LOG_FILE
return 1
fi
return 0
}
check_disk() {
DISK_USAGE=$(df -h / | awk 'NR==2 {print $5}' | cut -d'%' -f1)
DISK_THRESHOLD=90
if [ "$DISK_USAGE" -gt "$DISK_THRESHOLD" ]; then
echo "[$DATE] WARNING: 磁盘使用率过高: ${DISK_USAGE}%" | tee -a $LOG_FILE
return 1
fi
return 0
}
check_load() {
LOAD_AVG=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
CPU_CORES=$(nproc)
LOAD_THRESHOLD=$(echo "$CPU_CORES * 0.8" | bc)
if (( $(echo "$LOAD_AVG > $LOAD_THRESHOLD" | bc -l) )); then
echo "[$DATE] WARNING: 系统负载过高: $LOAD_AVG" | tee -a $LOG_FILE
return 1
fi
return 0
}
# 执行监控
ALERTS=0
check_cpu || ((ALERTS++))
check_memory || ((ALERTS++))
check_disk || ((ALERTS++))
check_load || ((ALERTS++))
# 发送告警邮件
if [ $ALERTS -gt 0 ]; then
tail -20 $LOG_FILE | mail -s "[$HOSTNAME] 系统告警" $ALERT_EMAIL
fi
echo "[$DATE] 监控完成,发现 $ALERTS 个告警" >> $LOG_FILE
2. 服务监控脚本
服务状态监控
#!/bin/bash
# 服务监控脚本
SERVICES=("nginx" "mysql" "ssh" "cron")
LOG_FILE="/var/log/service_monitor.log"
DATE=$(date '+%Y-%m-%d %H:%M:%S')
monitor_service() {
local service=$1
if systemctl is-active --quiet $service; then
echo "[$DATE] $service: 运行正常" >> $LOG_FILE
else
echo "[$DATE] $service: 服务异常,尝试重启" | tee -a $LOG_FILE
systemctl restart $service
sleep 5
if systemctl is-active --quiet $service; then
echo "[$DATE] $service: 重启成功" | tee -a $LOG_FILE
else
echo "[$DATE] $service: 重启失败,需要人工干预" | tee -a $LOG_FILE
# 发送告警邮件
echo "$service 服务重启失败,请检查" | mail -s "服务告警" admin@example.com
fi
fi
}
# 监控所有服务
for service in "${SERVICES[@]}"; do
monitor_service $service
done
日志管理
1. 日志轮转配置
logrotate配置
# 创建自定义日志轮转配置
sudo tee /etc/logrotate.d/myapp << EOF
/var/log/myapp/*.log {
daily
missingok
rotate 30
compress
delaycompress
notifempty
create 644 myapp myapp
postrotate
systemctl reload myapp
endscript
}
EOF
# 测试配置
sudo logrotate -d /etc/logrotate.d/myapp
sudo logrotate -f /etc/logrotate.d/myapp
2. 日志分析脚本
Web访问日志分析
#!/bin/bash
# Web日志分析脚本
LOG_FILE="/var/log/nginx/access.log"
REPORT_FILE="/tmp/web_report_$(date +%Y%m%d).txt"
DATE=$(date '+%Y-%m-%d')
# 生成报告
{
echo "=== Web访问日志分析报告 ==="
echo "日期: $DATE"
echo "日志文件: $LOG_FILE"
echo
echo "=== 今日访问统计 ==="
echo "总访问量: $(grep "$(date '+%d/%b/%Y')" $LOG_FILE | wc -l)"
echo "独立IP数: $(grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $1}' | sort -u | wc -l)"
echo
echo "=== 状态码分布 ==="
grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $9}' | sort | uniq -c | sort -nr
echo
echo "=== 访问量最高的IP (Top 10) ==="
grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $1}' | sort | uniq -c | sort -nr | head -10
echo
echo "=== 访问量最高的页面 (Top 10) ==="
grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $7}' | sort | uniq -c | sort -nr | head -10
echo
echo "=== 错误请求 (4xx, 5xx) ==="
grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '$9 ~ /^[45]/ {print $9, $7}' | sort | uniq -c | sort -nr
} > $REPORT_FILE
echo "报告已生成: $REPORT_FILE"
备份和恢复
1. 自动化备份脚本
数据库备份脚本
#!/bin/bash
# MySQL数据库备份脚本
DB_USER="backup_user"
DB_PASS="backup_password"
BACKUP_DIR="/backup/mysql"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7
# 创建备份目录
mkdir -p $BACKUP_DIR
# 获取所有数据库列表
DATABASES=$(mysql -u$DB_USER -p$DB_PASS -e "SHOW DATABASES;" | grep -Ev "(Database|information_schema|performance_schema|mysql|sys)")
# 备份每个数据库
for db in $DATABASES; do
echo "备份数据库: $db"
mysqldump -u$DB_USER -p$DB_PASS --single-transaction --routines --triggers $db > $BACKUP_DIR/${db}_$DATE.sql
if [ $? -eq 0 ]; then
gzip $BACKUP_DIR/${db}_$DATE.sql
echo "数据库 $db 备份成功"
else
echo "数据库 $db 备份失败"
fi
done
# 清理旧备份
find $BACKUP_DIR -name "*.sql.gz" -mtime +$RETENTION_DAYS -delete
echo "已清理 $RETENTION_DAYS 天前的备份文件"
# 备份验证
echo "当前备份文件:"
ls -lh $BACKUP_DIR/*_$DATE.sql.gz 2>/dev/null || echo "没有找到今日备份文件"
文件系统备份脚本
#!/bin/bash
# 文件系统备份脚本
SOURCE_DIRS=("/etc" "/home" "/var/www")
BACKUP_DIR="/backup/files"
DATE=$(date +%Y%m%d)
RETENTION_DAYS=30
# 创建备份目录
mkdir -p $BACKUP_DIR
# 备份每个目录
for dir in "${SOURCE_DIRS[@]}"; do
if [ -d "$dir" ]; then
backup_name="$(basename $dir)_$DATE.tar.gz"
echo "备份目录: $dir -> $backup_name"
tar -czf $BACKUP_DIR/$backup_name -C / ${dir#/} 2>/dev/null
if [ $? -eq 0 ]; then
echo "目录 $dir 备份成功"
else
echo "目录 $dir 备份失败"
fi
else
echo "目录 $dir 不存在,跳过"
fi
done
# 清理旧备份
find $BACKUP_DIR -name "*.tar.gz" -mtime +$RETENTION_DAYS -delete
echo "已清理 $RETENTION_DAYS 天前的备份文件"
# 生成备份报告
echo "=== 备份报告 ===" > $BACKUP_DIR/backup_report_$DATE.txt
echo "备份时间: $(date)" >> $BACKUP_DIR/backup_report_$DATE.txt
echo "备份文件:" >> $BACKUP_DIR/backup_report_$DATE.txt
ls -lh $BACKUP_DIR/*_$DATE.tar.gz >> $BACKUP_DIR/backup_report_$DATE.txt 2>/dev/null
2. 恢复脚本
数据库恢复脚本
#!/bin/bash
# MySQL数据库恢复脚本
DB_USER="root"
DB_PASS="password"
BACKUP_FILE="$1"
if [ -z "$BACKUP_FILE" ]; then
echo "用法: $0 <备份文件>"
echo "示例: $0 /backup/mysql/mydb_20241220_120000.sql.gz"
exit 1
fi
if [ ! -f "$BACKUP_FILE" ]; then
echo "备份文件不存在: $BACKUP_FILE"
exit 1
fi
# 提取数据库名
DB_NAME=$(basename "$BACKUP_FILE" | cut -d'_' -f1)
echo "准备恢复数据库: $DB_NAME"
echo "备份文件: $BACKUP_FILE"
read -p "确认恢复? (y/N): " confirm
if [ "$confirm" != "y" ]; then
echo "恢复已取消"
exit 0
fi
# 创建数据库
mysql -u$DB_USER -p$DB_PASS -e "CREATE DATABASE IF NOT EXISTS $DB_NAME;"
# 恢复数据
if [[ "$BACKUP_FILE" == *.gz ]]; then
zcat "$BACKUP_FILE" | mysql -u$DB_USER -p$DB_PASS $DB_NAME
else
mysql -u$DB_USER -p$DB_PASS $DB_NAME < "$BACKUP_FILE"
fi
if [ $? -eq 0 ]; then
echo "数据库 $DB_NAME 恢复成功"
else
echo "数据库 $DB_NAME 恢复失败"
exit 1
fi
性能优化
1. 系统性能调优
内核参数优化
#!/bin/bash
# 系统性能优化脚本
SYSCTL_CONF="/etc/sysctl.d/99-performance.conf"
# 备份原配置
cp /etc/sysctl.conf /etc/sysctl.conf.backup
# 创建性能优化配置
cat > $SYSCTL_CONF << EOF
# 网络优化
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
net.ipv4.tcp_congestion_control = bbr
net.core.netdev_max_backlog = 5000
# 文件系统优化
fs.file-max = 2097152
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
# 安全优化
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
EOF
# 应用配置
sysctl -p $SYSCTL_CONF
echo "系统性能优化完成"
2. Web服务器优化
Nginx性能优化
# Nginx性能优化配置
sudo tee /etc/nginx/conf.d/performance.conf << EOF
# 工作进程数
worker_processes auto;
# 每个进程的最大连接数
events {
worker_connections 1024;
use epoll;
multi_accept on;
}
http {
# 开启gzip压缩
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json;
# 缓存设置
open_file_cache max=1000 inactive=20s;
open_file_cache_valid 30s;
open_file_cache_min_uses 2;
open_file_cache_errors on;
# 连接超时
keepalive_timeout 65;
keepalive_requests 100;
# 缓冲区大小
client_body_buffer_size 128k;
client_max_body_size 10m;
client_header_buffer_size 1k;
large_client_header_buffers 4 4k;
}
EOF
# 重启Nginx
sudo nginx -t && sudo systemctl reload nginx
安全加固
1. 系统安全检查脚本
安全审计脚本
#!/bin/bash
# 系统安全检查脚本
REPORT_FILE="/tmp/security_audit_$(date +%Y%m%d).txt"
{
echo "=== 系统安全审计报告 ==="
echo "检查时间: $(date)"
echo "主机名: $(hostname)"
echo
echo "=== 用户账户检查 ==="
echo "UID为0的用户:"
awk -F: '$3 == 0 {print $1}' /etc/passwd
echo
echo "空密码用户:"
awk -F: '$2 == "" {print $1}' /etc/shadow 2>/dev/null || echo "无法访问shadow文件"
echo
echo "最近登录用户:"
last -n 10
echo
echo "=== 网络服务检查 ==="
echo "监听端口:"
ss -tuln
echo
echo "=== 文件权限检查 ==="
echo "SUID文件:"
find / -perm -4000 -type f 2>/dev/null | head -20
echo
echo "SGID文件:"
find / -perm -2000 -type f 2>/dev/null | head -20
echo
echo "全局可写文件:"
find / -perm -002 -type f 2>/dev/null | head -20
echo
echo "=== 系统日志检查 ==="
echo "最近的认证失败:"
grep "authentication failure" /var/log/auth.log 2>/dev/null | tail -10 || echo "无认证日志"
echo
echo "最近的sudo使用:"
grep "sudo:" /var/log/auth.log 2>/dev/null | tail -10 || echo "无sudo日志"
} > $REPORT_FILE
echo "安全审计报告已生成: $REPORT_FILE"
2. 自动化安全加固
基础安全加固脚本
#!/bin/bash
# 系统安全加固脚本
echo "开始系统安全加固..."
# 更新系统
echo "更新系统包..."
apt update && apt upgrade -y
# 配置防火墙
echo "配置防火墙..."
ufw --force enable
ufw default deny incoming
ufw default allow outgoing
ufw allow ssh
ufw allow http
ufw allow https
# SSH安全配置
echo "配置SSH安全..."
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup
sed -i 's/#PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#Port 22/Port 2222/' /etc/ssh/sshd_config
systemctl restart sshd
# 安装fail2ban
echo "安装fail2ban..."
apt install fail2ban -y
systemctl enable fail2ban
systemctl start fail2ban
# 设置自动更新
echo "配置自动安全更新..."
apt install unattended-upgrades -y
dpkg-reconfigure -plow unattended-upgrades
echo "安全加固完成"
故障排除
1. 系统故障诊断脚本
综合故障诊断
#!/bin/bash
# 系统故障诊断脚本
echo "=== 系统故障诊断 ==="
echo "诊断时间: $(date)"
echo
echo "=== 系统基本信息 ==="
echo "主机名: $(hostname)"
echo "内核版本: $(uname -r)"
echo "系统运行时间: $(uptime)"
echo
echo "=== 资源使用情况 ==="
echo "CPU使用率:"
top -bn1 | grep "Cpu(s)"
echo
echo "内存使用:"
free -h
echo
echo "磁盘使用:"
df -h
echo
echo "=== 网络连接 ==="
echo "网络接口:"
ip addr show
echo
echo "路由表:"
ip route show
echo
echo "DNS配置:"
cat /etc/resolv.conf
echo
echo "=== 服务状态 ==="
echo "失败的服务:"
systemctl --failed
echo
echo "=== 最近的系统日志 ==="
echo "内核消息:"
dmesg | tail -20
echo
echo "系统日志:"
journalctl -n 20 --no-pager
运维最佳实践
1. 定时任务配置
运维定时任务
# 编辑crontab
crontab -e
# 添加定时任务
# 每小时执行系统监控
0 * * * * /usr/local/bin/system_monitor.sh
# 每天凌晨2点执行备份
0 2 * * * /usr/local/bin/backup.sh
# 每周日凌晨3点执行日志清理
0 3 * * 0 /usr/local/bin/log_cleanup.sh
# 每月1号执行安全检查
0 4 1 * * /usr/local/bin/security_audit.sh
2. 运维文档模板
故障处理记录
# 创建故障记录模板
cat > /tmp/incident_template.md << EOF
# 故障处理记录
## 基本信息
- 故障时间:
- 影响范围:
- 严重级别:
- 处理人员:
## 故障描述
- 故障现象:
- 用户反馈:
- 监控告警:
## 故障分析
- 可能原因:
- 排查过程:
- 根本原因:
## 解决方案
- 临时措施:
- 永久方案:
- 验证结果:
## 预防措施
- 监控改进:
- 流程优化:
- 技术改进:
## 经验总结
- 经验教训:
- 改进建议:
EOF
总结
Linux运维实战要点:
- 环境搭建:熟练部署LAMP/LNMP等Web环境
- 监控告警:建立完善的系统和服务监控体系
- 备份恢复:制定可靠的备份策略和恢复流程
- 性能优化:持续优化系统和应用性能
- 安全加固:实施全面的安全防护措施
- 故障处理:快速诊断和解决系统故障
- 文档管理:建立完善的运维文档体系
课程总结
通过本Linux教程系列的学习,你已经掌握了:
- 基础知识:Linux系统概念、发行版选择、系统安装
- 命令操作:文件管理、文本处理、系统监控等基础命令
- 系统管理:用户权限、进程服务、网络配置等管理技能
- 高级技能:Shell脚本编程、系统调优、安全配置
- 运维实战:监控告警、备份恢复、故障处理等实际应用
继续学习建议:
- 多实践,在实际环境中应用所学知识
- 关注Linux社区动态,学习新技术
- 深入学习容器、云计算等现代技术
- 参与开源项目,提升技术水平
💡 小贴士:Linux运维是一个需要持续学习和实践的领域。建议建立自己的实验环境,多动手操作,积累实战经验。记住:实践是最好的老师!
📚 文章对你有帮助?请关注我的公众号,万分感谢!
获取更多优质技术文章,第一时间掌握最新技术动态

关注公众号
第一时间获取最新技术文章

添加微信
技术交流 · 问题答疑 · 学习指导
评论讨论
欢迎留下你的想法和建议