2024-12-05

Nginx反向代理的性能优化

Nginx 反向代理的性能优化

Nginx 作为反向代理在我们的架构中承担着重要角色。从单机几千 QPS 到集群处理十万级并发，我在 Nginx 调优方面积累了不少经验。这里分享一些实战中验证有效的优化策略。

基础性能调优

worker 进程配置

# nginx.conf 核心配置
user nginx;
worker_processes auto;  # 自动设置为CPU核心数

# 绑定worker进程到特定CPU核心
worker_cpu_affinity auto;

# 单个worker的最大连接数
events {
    worker_connections 65535;
    use epoll;                    # Linux使用epoll
    multi_accept on;              # 允许一次接收多个连接
}

# 文件句柄限制
worker_rlimit_nofile 100000;

连接处理优化

http {
    # 连接保持配置
    keepalive_timeout 60s;        # 保持连接时间
    keepalive_requests 10000;     # 单连接最大请求数

    # 客户端配置
    client_max_body_size 10m;     # 最大请求体大小
    client_body_timeout 10s;      # 请求体超时
    client_header_timeout 10s;    # 请求头超时

    # 发送配置
    send_timeout 10s;             # 响应超时
    sendfile on;                  # 零拷贝文件传输
    tcp_nopush on;               # 批量发送数据
    tcp_nodelay on;              # 禁用Nagle算法
}

反向代理优化

upstream 配置

# 后端服务器池配置
upstream backend_pool {
    # 负载均衡策略
    least_conn;                   # 最少连接数算法

    # 后端服务器配置
    server 192.168.1.10:8080 weight=3 max_fails=2 fail_timeout=10s;
    server 192.168.1.11:8080 weight=3 max_fails=2 fail_timeout=10s;
    server 192.168.1.12:8080 weight=2 max_fails=2 fail_timeout=10s backup;

    # 连接池优化
    keepalive 300;                # 保持300个连接到上游
    keepalive_requests 1000;      # 每个连接最多1000个请求
    keepalive_timeout 60s;        # 连接保持时间
}

server {
    listen 80;
    server_name api.example.com;

    location /api/ {
        proxy_pass http://backend_pool;

        # 连接优化
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_connect_timeout 5s;    # 连接超时
        proxy_read_timeout 30s;      # 读取超时
        proxy_send_timeout 30s;      # 发送超时

        # 请求头优化
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # 缓冲区优化
        proxy_buffering on;
        proxy_buffer_size 8k;        # 第一部分响应缓冲区
        proxy_buffers 8 8k;          # 响应缓冲区数量和大小
        proxy_busy_buffers_size 16k;  # 忙碌缓冲区大小
    }
}

实战案例：API 网关优化

问题现象：高峰期 API 响应时间 P99 超过 3 秒，Nginx error log 出现大量 upstream timeout

分析过程：

# 1. 查看Nginx状态
curl http://localhost/nginx_status
# Active connections: 15000
# server accepts handled requests: 1000000 1000000 2000000
# Reading: 100 Writing: 200 Waiting: 14700

# 2. 分析error log
tail -f /var/log/nginx/error.log | grep timeout
# upstream timed out (110: Connection timed out) while connecting to upstream

优化方案：

# 优化upstream配置
upstream api_servers {
    # 增加服务器数量
    server 10.0.1.10:8080 weight=5 max_fails=3 fail_timeout=30s;
    server 10.0.1.11:8080 weight=5 max_fails=3 fail_timeout=30s;
    server 10.0.1.12:8080 weight=5 max_fails=3 fail_timeout=30s;
    server 10.0.1.13:8080 weight=3 max_fails=3 fail_timeout=30s;

    # 优化连接池
    keepalive 500;                # 增加保持连接数
    keepalive_requests 10000;     # 增加单连接请求数
}

# 优化proxy配置
location /api/ {
    proxy_pass http://api_servers;

    # 调整超时时间
    proxy_connect_timeout 3s;     # 降低连接超时
    proxy_read_timeout 60s;       # 增加读取超时
    proxy_send_timeout 60s;

    # 启用连接复用
    proxy_http_version 1.1;
    proxy_set_header Connection "";

    # 优化缓冲区
    proxy_buffering on;
    proxy_buffer_size 16k;
    proxy_buffers 16 16k;
    proxy_busy_buffers_size 32k;
}

效果：P99 延迟降到 500ms，错误率从 5%降到 0.1%

缓存策略优化

静态文件缓存

# 静态资源缓存配置
location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
    expires 1y;                   # 缓存1年
    add_header Cache-Control "public, immutable";
    add_header Vary "Accept-Encoding";

    # 压缩配置
    gzip on;
    gzip_vary on;
    gzip_comp_level 6;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/javascript
        application/json
        application/xml+rss;
}

API 响应缓存

# 设置缓存路径和参数
proxy_cache_path /var/cache/nginx/api
    levels=1:2
    keys_zone=api_cache:100m
    max_size=10g
    inactive=60m
    use_temp_path=off;

server {
    location /api/static/ {
        proxy_pass http://backend_pool;

        # 缓存配置
        proxy_cache api_cache;
        proxy_cache_key "$scheme$request_method$host$request_uri";
        proxy_cache_valid 200 302 10m;      # 成功响应缓存10分钟
        proxy_cache_valid 404 1m;           # 404缓存1分钟
        proxy_cache_valid any 5m;           # 其他响应缓存5分钟

        # 缓存控制
        proxy_cache_use_stale error timeout invalid_header updating;
        proxy_cache_lock on;                # 防止缓存击穿
        proxy_cache_lock_timeout 3s;

        # 缓存头信息
        add_header X-Cache-Status $upstream_cache_status;
    }
}

限流和安全优化

请求限制配置

http {
    # 定义限流区域
    limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
    limit_req_zone $server_name zone=perserver:10m rate=1000r/s;

    # 连接限制
    limit_conn_zone $binary_remote_addr zone=addr:10m;

    server {
        # 应用限流规则
        limit_req zone=api burst=20 nodelay;      # API限流：10r/s，突发20
        limit_req zone=perserver burst=100;       # 服务器限流
        limit_conn addr 10;                       # 单IP最多10个连接

        # 限制请求大小和速度
        client_body_timeout 10s;
        client_max_body_size 10m;
        limit_rate_after 1m;                      # 1MB后开始限速
        limit_rate 500k;                          # 限制下载速度500KB/s

        location /api/ {
            # 特定接口限流
            limit_req zone=api burst=5 nodelay;
            proxy_pass http://backend_pool;
        }
    }
}

安全头配置

server {
    # 安全头
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-Content-Type-Options "nosniff" always;
    add_header X-XSS-Protection "1; mode=block" always;
    add_header Referrer-Policy "no-referrer-when-downgrade" always;
    add_header Content-Security-Policy "default-src 'self'" always;

    # 隐藏版本信息
    server_tokens off;

    # 防止某些攻击
    if ($request_method !~ ^(GET|HEAD|POST)$) {
        return 405;
    }

    # 过滤恶意请求
    location ~ /\. {
        deny all;
    }
}

监控和日志优化

访问日志格式

# 自定义日志格式
log_format main_ext '$remote_addr - $remote_user [$time_local] '
    '"$request" $status $body_bytes_sent '
    '"$http_referer" "$http_user_agent" '
    '$request_time $upstream_response_time '
    '$upstream_addr $upstream_status';

# 应用日志格式
access_log /var/log/nginx/access.log main_ext buffer=64k flush=1m;

# 错误日志
error_log /var/log/nginx/error.log warn;

性能监控配置

# 启用状态页面
server {
    listen 127.0.0.1:80;
    server_name localhost;

    location /nginx_status {
        stub_status on;
        access_log off;
        allow 127.0.0.1;
        deny all;
    }

    # 详细状态信息 (需要nginx-module-vts)
    location /status {
        vhost_traffic_status_display;
        vhost_traffic_status_display_format html;
        access_log off;
    }
}

高级优化技巧

SSL 优化

server {
    listen 443 ssl http2;        # 启用HTTP/2

    # SSL证书配置
    ssl_certificate /path/to/cert.pem;
    ssl_certificate_key /path/to/key.pem;

    # SSL优化
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
    ssl_prefer_server_ciphers off;
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
    ssl_session_tickets off;

    # OCSP Stapling
    ssl_stapling on;
    ssl_stapling_verify on;
    resolver 8.8.8.8 8.8.4.4 valid=300s;
    resolver_timeout 5s;
}

动态 upstream

# 使用nginx-plus或第三方模块实现动态upstream
upstream dynamic_backend {
    zone backend 64k;

    # 从配置中心获取服务器列表
    server backend1.example.com service=backend weight=5;
    server backend2.example.com service=backend weight=5;

    # 健康检查
    health_check interval=5s fails=3 passes=2 uri=/health;
}

自动化配置管理

#!/bin/bash
# nginx_config_update.sh - 自动更新nginx配置

# 从服务发现获取后端服务器列表
BACKEND_SERVERS=$(curl -s http://consul:8500/v1/health/service/api-server | jq -r '.[] | select(.Checks[].Status == "passing") | .Service.Address + ":" + (.Service.Port | tostring)')

# 生成新的upstream配置
cat > /etc/nginx/conf.d/upstream.conf << EOF
upstream backend_pool {
    least_conn;
    keepalive 300;
EOF

for server in $BACKEND_SERVERS; do
    echo "    server $server weight=1 max_fails=2 fail_timeout=10s;" >> /etc/nginx/conf.d/upstream.conf
done

echo "}" >> /etc/nginx/conf.d/upstream.conf

# 测试配置并重载
if nginx -t; then
    nginx -s reload
    echo "Nginx configuration updated successfully"
else
    echo "Nginx configuration test failed"
    exit 1
fi

性能测试验证

基准测试脚本

#!/bin/bash
# nginx_benchmark.sh

URL="http://localhost/api/test"
CONCURRENCY=100
REQUESTS=10000

echo "Testing Nginx performance..."

# 使用ab进行测试
ab -n $REQUESTS -c $CONCURRENCY -k $URL

# 使用wrk进行测试
wrk -t12 -c400 -d30s --latency $URL

# 监控Nginx状态
while true; do
    curl -s http://localhost/nginx_status
    sleep 5
done

通过系统性的性能优化，Nginx 反向代理可以轻松处理数万并发连接。关键是要根据实际业务场景调整配置参数，并建立完善的监控体系来持续优化。记住，优化是一个迭代的过程，需要不断测试和调整。