第15课:Linux运维实战

【腾讯云】语音识别准确率高,支持多语种,多场景,限时特惠,最低14.9元起

推广

【腾讯云】语音识别准确率高,支持多语种,多场景,限时特惠,最低14.9元起

Linux运维实战

运维环境搭建

1. LAMP环境部署

Ubuntu/Debian系统

# 更新系统
sudo apt update && sudo apt upgrade -y

# 安装Apache
sudo apt install apache2 -y
sudo systemctl enable apache2
sudo systemctl start apache2

# 安装MySQL
sudo apt install mysql-server -y
sudo mysql_secure_installation

# 安装PHP
sudo apt install php libapache2-mod-php php-mysql php-curl php-gd php-mbstring php-xml php-zip -y

# 重启Apache
sudo systemctl restart apache2

# 测试PHP
echo "<?php phpinfo(); ?>" | sudo tee /var/www/html/info.php

CentOS/RHEL系统

# 安装Apache
sudo dnf install httpd -y
sudo systemctl enable httpd
sudo systemctl start httpd

# 安装MySQL (MariaDB)
sudo dnf install mariadb-server mariadb -y
sudo systemctl enable mariadb
sudo systemctl start mariadb
sudo mysql_secure_installation

# 安装PHP
sudo dnf install php php-mysqlnd php-curl php-gd php-mbstring php-xml php-zip -y

# 重启Apache
sudo systemctl restart httpd

# 配置防火墙
sudo firewall-cmd --permanent --add-service=http
sudo firewall-cmd --permanent --add-service=https
sudo firewall-cmd --reload

2. LNMP环境部署

安装Nginx

# Ubuntu/Debian
sudo apt install nginx -y

# CentOS/RHEL
sudo dnf install nginx -y

# 启动并启用Nginx
sudo systemctl enable nginx
sudo systemctl start nginx

配置Nginx虚拟主机

# 创建网站目录
sudo mkdir -p /var/www/example.com/html
sudo chown -R www-data:www-data /var/www/example.com/html  # Ubuntu
sudo chown -R nginx:nginx /var/www/example.com/html       # CentOS

# 创建虚拟主机配置
sudo tee /etc/nginx/sites-available/example.com << EOF
server {
    listen 80;
    server_name example.com www.example.com;
    root /var/www/example.com/html;
    index index.php index.html index.htm;

    location / {
        try_files \$uri \$uri/ =404;
    }

    location ~ \.php$ {
        include snippets/fastcgi-php.conf;
        fastcgi_pass unix:/var/run/php/php8.1-fpm.sock;
    }

    location ~ /\.ht {
        deny all;
    }
}
EOF

# 启用站点
sudo ln -s /etc/nginx/sites-available/example.com /etc/nginx/sites-enabled/
sudo nginx -t
sudo systemctl reload nginx

系统监控和告警

1. 系统监控脚本

综合监控脚本

#!/bin/bash

# 系统监控脚本
HOSTNAME=$(hostname)
DATE=$(date '+%Y-%m-%d %H:%M:%S')
LOG_FILE="/var/log/system_monitor.log"
ALERT_EMAIL="admin@example.com"

# 监控函数
check_cpu() {
    CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
    CPU_THRESHOLD=80
    
    if (( $(echo "$CPU_USAGE > $CPU_THRESHOLD" | bc -l) )); then
        echo "[$DATE] WARNING: CPU使用率过高: ${CPU_USAGE}%" | tee -a $LOG_FILE
        return 1
    fi
    return 0
}

check_memory() {
    MEM_USAGE=$(free | grep Mem | awk '{printf("%.2f", $3/$2 * 100.0)}')
    MEM_THRESHOLD=85
    
    if (( $(echo "$MEM_USAGE > $MEM_THRESHOLD" | bc -l) )); then
        echo "[$DATE] WARNING: 内存使用率过高: ${MEM_USAGE}%" | tee -a $LOG_FILE
        return 1
    fi
    return 0
}

check_disk() {
    DISK_USAGE=$(df -h / | awk 'NR==2 {print $5}' | cut -d'%' -f1)
    DISK_THRESHOLD=90
    
    if [ "$DISK_USAGE" -gt "$DISK_THRESHOLD" ]; then
        echo "[$DATE] WARNING: 磁盘使用率过高: ${DISK_USAGE}%" | tee -a $LOG_FILE
        return 1
    fi
    return 0
}

check_load() {
    LOAD_AVG=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
    CPU_CORES=$(nproc)
    LOAD_THRESHOLD=$(echo "$CPU_CORES * 0.8" | bc)
    
    if (( $(echo "$LOAD_AVG > $LOAD_THRESHOLD" | bc -l) )); then
        echo "[$DATE] WARNING: 系统负载过高: $LOAD_AVG" | tee -a $LOG_FILE
        return 1
    fi
    return 0
}

# 执行监控
ALERTS=0
check_cpu || ((ALERTS++))
check_memory || ((ALERTS++))
check_disk || ((ALERTS++))
check_load || ((ALERTS++))

# 发送告警邮件
if [ $ALERTS -gt 0 ]; then
    tail -20 $LOG_FILE | mail -s "[$HOSTNAME] 系统告警" $ALERT_EMAIL
fi

echo "[$DATE] 监控完成,发现 $ALERTS 个告警" >> $LOG_FILE

2. 服务监控脚本

服务状态监控

#!/bin/bash

# 服务监控脚本
SERVICES=("nginx" "mysql" "ssh" "cron")
LOG_FILE="/var/log/service_monitor.log"
DATE=$(date '+%Y-%m-%d %H:%M:%S')

monitor_service() {
    local service=$1
    
    if systemctl is-active --quiet $service; then
        echo "[$DATE] $service: 运行正常" >> $LOG_FILE
    else
        echo "[$DATE] $service: 服务异常,尝试重启" | tee -a $LOG_FILE
        systemctl restart $service
        
        sleep 5
        if systemctl is-active --quiet $service; then
            echo "[$DATE] $service: 重启成功" | tee -a $LOG_FILE
        else
            echo "[$DATE] $service: 重启失败,需要人工干预" | tee -a $LOG_FILE
            # 发送告警邮件
            echo "$service 服务重启失败,请检查" | mail -s "服务告警" admin@example.com
        fi
    fi
}

# 监控所有服务
for service in "${SERVICES[@]}"; do
    monitor_service $service
done

日志管理

1. 日志轮转配置

logrotate配置

# 创建自定义日志轮转配置
sudo tee /etc/logrotate.d/myapp << EOF
/var/log/myapp/*.log {
    daily
    missingok
    rotate 30
    compress
    delaycompress
    notifempty
    create 644 myapp myapp
    postrotate
        systemctl reload myapp
    endscript
}
EOF

# 测试配置
sudo logrotate -d /etc/logrotate.d/myapp
sudo logrotate -f /etc/logrotate.d/myapp

2. 日志分析脚本

Web访问日志分析

#!/bin/bash

# Web日志分析脚本
LOG_FILE="/var/log/nginx/access.log"
REPORT_FILE="/tmp/web_report_$(date +%Y%m%d).txt"
DATE=$(date '+%Y-%m-%d')

# 生成报告
{
    echo "=== Web访问日志分析报告 ==="
    echo "日期: $DATE"
    echo "日志文件: $LOG_FILE"
    echo
    
    echo "=== 今日访问统计 ==="
    echo "总访问量: $(grep "$(date '+%d/%b/%Y')" $LOG_FILE | wc -l)"
    echo "独立IP数: $(grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $1}' | sort -u | wc -l)"
    echo
    
    echo "=== 状态码分布 ==="
    grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $9}' | sort | uniq -c | sort -nr
    echo
    
    echo "=== 访问量最高的IP (Top 10) ==="
    grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $1}' | sort | uniq -c | sort -nr | head -10
    echo
    
    echo "=== 访问量最高的页面 (Top 10) ==="
    grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '{print $7}' | sort | uniq -c | sort -nr | head -10
    echo
    
    echo "=== 错误请求 (4xx, 5xx) ==="
    grep "$(date '+%d/%b/%Y')" $LOG_FILE | awk '$9 ~ /^[45]/ {print $9, $7}' | sort | uniq -c | sort -nr
    
} > $REPORT_FILE

echo "报告已生成: $REPORT_FILE"

备份和恢复

1. 自动化备份脚本

数据库备份脚本

#!/bin/bash

# MySQL数据库备份脚本
DB_USER="backup_user"
DB_PASS="backup_password"
BACKUP_DIR="/backup/mysql"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7

# 创建备份目录
mkdir -p $BACKUP_DIR

# 获取所有数据库列表
DATABASES=$(mysql -u$DB_USER -p$DB_PASS -e "SHOW DATABASES;" | grep -Ev "(Database|information_schema|performance_schema|mysql|sys)")

# 备份每个数据库
for db in $DATABASES; do
    echo "备份数据库: $db"
    mysqldump -u$DB_USER -p$DB_PASS --single-transaction --routines --triggers $db > $BACKUP_DIR/${db}_$DATE.sql
    
    if [ $? -eq 0 ]; then
        gzip $BACKUP_DIR/${db}_$DATE.sql
        echo "数据库 $db 备份成功"
    else
        echo "数据库 $db 备份失败"
    fi
done

# 清理旧备份
find $BACKUP_DIR -name "*.sql.gz" -mtime +$RETENTION_DAYS -delete
echo "已清理 $RETENTION_DAYS 天前的备份文件"

# 备份验证
echo "当前备份文件:"
ls -lh $BACKUP_DIR/*_$DATE.sql.gz 2>/dev/null || echo "没有找到今日备份文件"

文件系统备份脚本

#!/bin/bash

# 文件系统备份脚本
SOURCE_DIRS=("/etc" "/home" "/var/www")
BACKUP_DIR="/backup/files"
DATE=$(date +%Y%m%d)
RETENTION_DAYS=30

# 创建备份目录
mkdir -p $BACKUP_DIR

# 备份每个目录
for dir in "${SOURCE_DIRS[@]}"; do
    if [ -d "$dir" ]; then
        backup_name="$(basename $dir)_$DATE.tar.gz"
        echo "备份目录: $dir -> $backup_name"
        
        tar -czf $BACKUP_DIR/$backup_name -C / ${dir#/} 2>/dev/null
        
        if [ $? -eq 0 ]; then
            echo "目录 $dir 备份成功"
        else
            echo "目录 $dir 备份失败"
        fi
    else
        echo "目录 $dir 不存在,跳过"
    fi
done

# 清理旧备份
find $BACKUP_DIR -name "*.tar.gz" -mtime +$RETENTION_DAYS -delete
echo "已清理 $RETENTION_DAYS 天前的备份文件"

# 生成备份报告
echo "=== 备份报告 ===" > $BACKUP_DIR/backup_report_$DATE.txt
echo "备份时间: $(date)" >> $BACKUP_DIR/backup_report_$DATE.txt
echo "备份文件:" >> $BACKUP_DIR/backup_report_$DATE.txt
ls -lh $BACKUP_DIR/*_$DATE.tar.gz >> $BACKUP_DIR/backup_report_$DATE.txt 2>/dev/null

2. 恢复脚本

数据库恢复脚本

#!/bin/bash

# MySQL数据库恢复脚本
DB_USER="root"
DB_PASS="password"
BACKUP_FILE="$1"

if [ -z "$BACKUP_FILE" ]; then
    echo "用法: $0 <备份文件>"
    echo "示例: $0 /backup/mysql/mydb_20241220_120000.sql.gz"
    exit 1
fi

if [ ! -f "$BACKUP_FILE" ]; then
    echo "备份文件不存在: $BACKUP_FILE"
    exit 1
fi

# 提取数据库名
DB_NAME=$(basename "$BACKUP_FILE" | cut -d'_' -f1)

echo "准备恢复数据库: $DB_NAME"
echo "备份文件: $BACKUP_FILE"
read -p "确认恢复? (y/N): " confirm

if [ "$confirm" != "y" ]; then
    echo "恢复已取消"
    exit 0
fi

# 创建数据库
mysql -u$DB_USER -p$DB_PASS -e "CREATE DATABASE IF NOT EXISTS $DB_NAME;"

# 恢复数据
if [[ "$BACKUP_FILE" == *.gz ]]; then
    zcat "$BACKUP_FILE" | mysql -u$DB_USER -p$DB_PASS $DB_NAME
else
    mysql -u$DB_USER -p$DB_PASS $DB_NAME < "$BACKUP_FILE"
fi

if [ $? -eq 0 ]; then
    echo "数据库 $DB_NAME 恢复成功"
else
    echo "数据库 $DB_NAME 恢复失败"
    exit 1
fi

性能优化

1. 系统性能调优

内核参数优化

#!/bin/bash

# 系统性能优化脚本
SYSCTL_CONF="/etc/sysctl.d/99-performance.conf"

# 备份原配置
cp /etc/sysctl.conf /etc/sysctl.conf.backup

# 创建性能优化配置
cat > $SYSCTL_CONF << EOF
# 网络优化
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
net.ipv4.tcp_congestion_control = bbr
net.core.netdev_max_backlog = 5000

# 文件系统优化
fs.file-max = 2097152
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5

# 安全优化
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.default.accept_source_route = 0
EOF

# 应用配置
sysctl -p $SYSCTL_CONF

echo "系统性能优化完成"

2. Web服务器优化

Nginx性能优化

# Nginx性能优化配置
sudo tee /etc/nginx/conf.d/performance.conf << EOF
# 工作进程数
worker_processes auto;

# 每个进程的最大连接数
events {
    worker_connections 1024;
    use epoll;
    multi_accept on;
}

http {
    # 开启gzip压缩
    gzip on;
    gzip_vary on;
    gzip_min_length 1024;
    gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json;

    # 缓存设置
    open_file_cache max=1000 inactive=20s;
    open_file_cache_valid 30s;
    open_file_cache_min_uses 2;
    open_file_cache_errors on;

    # 连接超时
    keepalive_timeout 65;
    keepalive_requests 100;

    # 缓冲区大小
    client_body_buffer_size 128k;
    client_max_body_size 10m;
    client_header_buffer_size 1k;
    large_client_header_buffers 4 4k;
}
EOF

# 重启Nginx
sudo nginx -t && sudo systemctl reload nginx

安全加固

1. 系统安全检查脚本

安全审计脚本

#!/bin/bash

# 系统安全检查脚本
REPORT_FILE="/tmp/security_audit_$(date +%Y%m%d).txt"

{
    echo "=== 系统安全审计报告 ==="
    echo "检查时间: $(date)"
    echo "主机名: $(hostname)"
    echo

    echo "=== 用户账户检查 ==="
    echo "UID为0的用户:"
    awk -F: '$3 == 0 {print $1}' /etc/passwd
    echo
    echo "空密码用户:"
    awk -F: '$2 == "" {print $1}' /etc/shadow 2>/dev/null || echo "无法访问shadow文件"
    echo
    echo "最近登录用户:"
    last -n 10
    echo

    echo "=== 网络服务检查 ==="
    echo "监听端口:"
    ss -tuln
    echo

    echo "=== 文件权限检查 ==="
    echo "SUID文件:"
    find / -perm -4000 -type f 2>/dev/null | head -20
    echo
    echo "SGID文件:"
    find / -perm -2000 -type f 2>/dev/null | head -20
    echo
    echo "全局可写文件:"
    find / -perm -002 -type f 2>/dev/null | head -20
    echo

    echo "=== 系统日志检查 ==="
    echo "最近的认证失败:"
    grep "authentication failure" /var/log/auth.log 2>/dev/null | tail -10 || echo "无认证日志"
    echo
    echo "最近的sudo使用:"
    grep "sudo:" /var/log/auth.log 2>/dev/null | tail -10 || echo "无sudo日志"

} > $REPORT_FILE

echo "安全审计报告已生成: $REPORT_FILE"

2. 自动化安全加固

基础安全加固脚本

#!/bin/bash

# 系统安全加固脚本
echo "开始系统安全加固..."

# 更新系统
echo "更新系统包..."
apt update && apt upgrade -y

# 配置防火墙
echo "配置防火墙..."
ufw --force enable
ufw default deny incoming
ufw default allow outgoing
ufw allow ssh
ufw allow http
ufw allow https

# SSH安全配置
echo "配置SSH安全..."
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup
sed -i 's/#PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#Port 22/Port 2222/' /etc/ssh/sshd_config
systemctl restart sshd

# 安装fail2ban
echo "安装fail2ban..."
apt install fail2ban -y
systemctl enable fail2ban
systemctl start fail2ban

# 设置自动更新
echo "配置自动安全更新..."
apt install unattended-upgrades -y
dpkg-reconfigure -plow unattended-upgrades

echo "安全加固完成"

故障排除

1. 系统故障诊断脚本

综合故障诊断

#!/bin/bash

# 系统故障诊断脚本
echo "=== 系统故障诊断 ==="
echo "诊断时间: $(date)"
echo

echo "=== 系统基本信息 ==="
echo "主机名: $(hostname)"
echo "内核版本: $(uname -r)"
echo "系统运行时间: $(uptime)"
echo

echo "=== 资源使用情况 ==="
echo "CPU使用率:"
top -bn1 | grep "Cpu(s)"
echo
echo "内存使用:"
free -h
echo
echo "磁盘使用:"
df -h
echo

echo "=== 网络连接 ==="
echo "网络接口:"
ip addr show
echo
echo "路由表:"
ip route show
echo
echo "DNS配置:"
cat /etc/resolv.conf
echo

echo "=== 服务状态 ==="
echo "失败的服务:"
systemctl --failed
echo

echo "=== 最近的系统日志 ==="
echo "内核消息:"
dmesg | tail -20
echo
echo "系统日志:"
journalctl -n 20 --no-pager

运维最佳实践

1. 定时任务配置

运维定时任务

# 编辑crontab
crontab -e

# 添加定时任务
# 每小时执行系统监控
0 * * * * /usr/local/bin/system_monitor.sh

# 每天凌晨2点执行备份
0 2 * * * /usr/local/bin/backup.sh

# 每周日凌晨3点执行日志清理
0 3 * * 0 /usr/local/bin/log_cleanup.sh

# 每月1号执行安全检查
0 4 1 * * /usr/local/bin/security_audit.sh

2. 运维文档模板

故障处理记录

# 创建故障记录模板
cat > /tmp/incident_template.md << EOF
# 故障处理记录

## 基本信息
- 故障时间: 
- 影响范围: 
- 严重级别: 
- 处理人员: 

## 故障描述
- 故障现象: 
- 用户反馈: 
- 监控告警: 

## 故障分析
- 可能原因: 
- 排查过程: 
- 根本原因: 

## 解决方案
- 临时措施: 
- 永久方案: 
- 验证结果: 

## 预防措施
- 监控改进: 
- 流程优化: 
- 技术改进: 

## 经验总结
- 经验教训: 
- 改进建议: 
EOF

总结

Linux运维实战要点:

  1. 环境搭建:熟练部署LAMP/LNMP等Web环境
  2. 监控告警:建立完善的系统和服务监控体系
  3. 备份恢复:制定可靠的备份策略和恢复流程
  4. 性能优化:持续优化系统和应用性能
  5. 安全加固:实施全面的安全防护措施
  6. 故障处理:快速诊断和解决系统故障
  7. 文档管理:建立完善的运维文档体系

课程总结

通过本Linux教程系列的学习,你已经掌握了:

  • 基础知识:Linux系统概念、发行版选择、系统安装
  • 命令操作:文件管理、文本处理、系统监控等基础命令
  • 系统管理:用户权限、进程服务、网络配置等管理技能
  • 高级技能:Shell脚本编程、系统调优、安全配置
  • 运维实战:监控告警、备份恢复、故障处理等实际应用

继续学习建议:

  1. 多实践,在实际环境中应用所学知识
  2. 关注Linux社区动态,学习新技术
  3. 深入学习容器、云计算等现代技术
  4. 参与开源项目,提升技术水平

💡 小贴士:Linux运维是一个需要持续学习和实践的领域。建议建立自己的实验环境,多动手操作,积累实战经验。记住:实践是最好的老师!

Vue3 + TypeScript 企业级项目实战

课程推荐

Vue3 + TypeScript 企业级项目实战
Python 全栈开发工程师培训

热门课程

Python 全栈开发工程师培训

📚 文章对你有帮助?请关注我的公众号,万分感谢!

获取更多优质技术文章,第一时间掌握最新技术动态

关注公众号

关注公众号

第一时间获取最新技术文章

添加微信

添加微信

技术交流 · 问题答疑 · 学习指导

评论讨论

欢迎留下你的想法和建议