DevOps作为现代软件开发的核心理念,打破了开发与运维之间的壁垒。本文将基于实际项目经验,详细介绍DevOps的完整实践链路,从代码提交到生产部署的全流程自动化。
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
env:
NODE_VERSION: '18'
DOCKER_IMAGE: myapp
KUBE_NAMESPACE: production
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [16, 18, 20]
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run linting
run: npm run lint
- name: Run unit tests
run: npm run test:unit
- name: Run integration tests
run: npm run test:integration
- name: Generate coverage report
run: npm run test:coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
security-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run security audit
run: npm audit --audit-level=moderate
- name: Run Snyk vulnerability scan
uses: snyk/actions/node@master
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
build:
needs: [test, security-scan]
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: |
${{ env.DOCKER_IMAGE }}:latest
${{ env.DOCKER_IMAGE }}:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
needs: build
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v4
- name: Setup kubectl
uses: azure/setup-kubectl@v3
with:
version: 'latest'
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Update kubeconfig
run: aws eks update-kubeconfig --name production-cluster
- name: Deploy to Kubernetes
run: |
kubectl set image deployment/myapp-deployment \
myapp=${{ env.DOCKER_IMAGE }}:${{ github.sha }} \
-n ${{ env.KUBE_NAMESPACE }}
kubectl rollout status deployment/myapp-deployment -n ${{ env.KUBE_NAMESPACE }}
# 环境特定的配置管理
# environments/development.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
namespace: development
data:
NODE_ENV: "development"
LOG_LEVEL: "debug"
DATABASE_URL: "postgresql://dev-db:5432/myapp"
REDIS_URL: "redis://dev-redis:6379"
---
# environments/production.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
namespace: production
data:
NODE_ENV: "production"
LOG_LEVEL: "warn"
DATABASE_URL: ${{ secrets.PROD_DATABASE_URL }}
REDIS_URL: ${{ secrets.PROD_REDIS_URL }}
# Dockerfile
# 构建阶段
FROM node:18-alpine AS builder
WORKDIR /app
# 复制依赖文件
COPY package*.json ./
RUN npm ci --only=production && npm cache clean --force
# 复制源代码并构建
COPY . .
RUN npm run build
# 运行时阶段
FROM node:18-alpine AS runtime
# 创建非root用户
RUN addgroup -g 1001 -S nodejs
RUN adduser -S nextjs -u 1001
WORKDIR /app
# 复制构建产物
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package.json ./package.json
# 设置权限
USER nextjs
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000/health || exit 1
EXPOSE 3000
CMD ["npm", "start"]
# docker-compose.dev.yml
version: '3.8'
services:
app:
build:
context: .
target: builder
ports:
- "3000:3000"
volumes:
- .:/app
- /app/node_modules
environment:
- NODE_ENV=development
- WATCH_CHANGES=true
depends_on:
- postgres
- redis
networks:
- app-network
postgres:
image: postgres:15-alpine
environment:
POSTGRES_DB: myapp_dev
POSTGRES_USER: developer
POSTGRES_PASSWORD: dev_password
volumes:
- postgres_data:/var/lib/postgresql/data
- ./database/init:/docker-entrypoint-initdb.d
ports:
- "5432:5432"
networks:
- app-network
redis:
image: redis:7-alpine
command: redis-server --requirepass dev_password
volumes:
- redis_data:/data
ports:
- "6379:6379"
networks:
- app-network
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx/dev.conf:/etc/nginx/nginx.conf
depends_on:
- app
networks:
- app-network
volumes:
postgres_data:
redis_data:
networks:
app-network:
driver: bridge
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp-deployment
namespace: production
labels:
app: myapp
version: v1
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: myapp
template:
metadata:
labels:
app: myapp
version: v1
spec:
containers:
- name: myapp
image: myapp:latest
imagePullPolicy: Always
ports:
- containerPort: 3000
name: http
env:
- name: NODE_ENV
value: "production"
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: app-secrets
key: database-url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 15"]
---
apiVersion: v1
kind: Service
metadata:
name: myapp-service
namespace: production
spec:
selector:
app: myapp
ports:
- port: 80
targetPort: 3000
protocol: TCP
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: myapp-ingress
namespace: production
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
tls:
- hosts:
- myapp.example.com
secretName: myapp-tls
rules:
- host: myapp.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: myapp-service
port:
number: 80
# helm/myapp/values.yaml
replicaCount: 3
image:
repository: myapp
tag: latest
pullPolicy: IfNotPresent
service:
type: ClusterIP
port: 80
targetPort: 3000
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- host: myapp.example.com
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: myapp-tls
hosts:
- myapp.example.com
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 256Mi
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 10
targetCPUUtilizationPercentage: 80
# helm/myapp/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "myapp.fullname" . }}
labels:
{{- include "myapp.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "myapp.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "myapp.selectorLabels" . | nindent 8 }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.targetPort }}
protocol: TCP
resources:
{{- toYaml .Values.resources | nindent 12 }}
// 应用监控指标
const prometheus = require('prom-client');
// 创建指标收集器
const httpDuration = new prometheus.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});
const httpRequestsTotal = new prometheus.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code']
});
const activeConnections = new prometheus.Gauge({
name: 'active_connections',
help: 'Number of active connections'
});
// 中间件集成
function prometheusMiddleware(req, res, next) {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
const route = req.route ? req.route.path : req.path;
httpDuration
.labels(req.method, route, res.statusCode)
.observe(duration);
httpRequestsTotal
.labels(req.method, route, res.statusCode)
.inc();
});
next();
}
// 健康检查端点
app.get('/metrics', (req, res) => {
res.set('Content-Type', prometheus.register.contentType);
res.end(prometheus.register.metrics());
});
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
memory: process.memoryUsage()
});
});
// 日志配置
const winston = require('winston');
const { ElasticsearchTransport } = require('winston-elasticsearch');
const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
),
defaultMeta: {
service: 'myapp',
version: process.env.APP_VERSION,
environment: process.env.NODE_ENV
},
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple()
)
}),
new ElasticsearchTransport({
level: 'info',
clientOpts: {
node: process.env.ELASTICSEARCH_URL
},
index: 'app-logs'
})
]
});
// 请求日志中间件
function requestLogger(req, res, next) {
const start = Date.now();
res.on('finish', () => {
const duration = Date.now() - start;
logger.info('HTTP Request', {
method: req.method,
url: req.url,
statusCode: res.statusCode,
duration,
userAgent: req.get('User-Agent'),
ip: req.ip,
userId: req.user?.id
});
});
next();
}
// 错误日志
function errorHandler(error, req, res, next) {
logger.error('Application Error', {
error: error.message,
stack: error.stack,
method: req.method,
url: req.url,
userId: req.user?.id,
requestId: req.id
});
res.status(500).json({
error: 'Internal Server Error',
requestId: req.id
});
}
# 安全的Dockerfile
FROM node:18-alpine AS base
# 安装安全更新
RUN apk update && apk upgrade && \
apk add --no-cache dumb-init && \
rm -rf /var/cache/apk/*
# 创建应用用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S myapp -u 1001 -G nodejs
# 设置工作目录
WORKDIR /app
# 复制文件并设置权限
COPY --chown=myapp:nodejs package*.json ./
RUN npm ci --only=production && npm cache clean --force
COPY --chown=myapp:nodejs . .
# 移除不必要的文件
RUN rm -rf .git .github .gitignore README.md
# 切换到非root用户
USER myapp
# 使用dumb-init作为入口点
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "server.js"]
# 扫描漏洞
# docker run --rm -v /var/run/docker.sock:/var/run/docker.sock \
# -v $(pwd):/src aquasec/trivy fs /src
# k8s/security.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: myapp-sa
namespace: production
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: production
name: myapp-role
rules:
- apiGroups: [""]
resources: ["configmaps", "secrets"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: myapp-rolebinding
namespace: production
subjects:
- kind: ServiceAccount
name: myapp-sa
namespace: production
roleRef:
kind: Role
name: myapp-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: Pod
metadata:
name: myapp-pod
spec:
serviceAccountName: myapp-sa
securityContext:
runAsNonRoot: true
runAsUser: 1001
fsGroup: 1001
containers:
- name: myapp
image: myapp:latest
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumeMounts:
- name: temp-volume
mountPath: /tmp
volumes:
- name: temp-volume
emptyDir: {}
// 单元测试 (Jest)
// __tests__/user.service.test.js
const UserService = require('../src/services/user.service');
const { User } = require('../src/models');
jest.mock('../src/models');
describe('UserService', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('createUser', () => {
it('should create a new user successfully', async () => {
const userData = {
name: 'John Doe',
email: 'john@example.com'
};
User.create.mockResolvedValue({ id: 1, ...userData });
const result = await UserService.createUser(userData);
expect(User.create).toHaveBeenCalledWith(userData);
expect(result).toEqual({ id: 1, ...userData });
});
it('should throw error for invalid email', async () => {
const userData = {
name: 'John Doe',
email: 'invalid-email'
};
await expect(UserService.createUser(userData))
.rejects.toThrow('Invalid email format');
});
});
});
// 集成测试 (Supertest)
// __tests__/integration/auth.test.js
const request = require('supertest');
const app = require('../../src/app');
describe('Authentication API', () => {
describe('POST /api/auth/login', () => {
it('should login successfully with valid credentials', async () => {
const response = await request(app)
.post('/api/auth/login')
.send({
email: 'test@example.com',
password: 'password123'
})
.expect(200);
expect(response.body).toHaveProperty('token');
expect(response.body.user.email).toBe('test@example.com');
});
it('should return 401 for invalid credentials', async () => {
await request(app)
.post('/api/auth/login')
.send({
email: 'test@example.com',
password: 'wrongpassword'
})
.expect(401);
});
});
});
// e2e/login.spec.js
const { test, expect } = require('@playwright/test');
test.describe('User Login Flow', () => {
test('should login successfully', async ({ page }) => {
await page.goto('/login');
// 填写表单
await page.fill('[data-testid="email"]', 'test@example.com');
await page.fill('[data-testid="password"]', 'password123');
// 点击登录按钮
await page.click('[data-testid="login-button"]');
// 验证重定向到仪表板
await expect(page).toHaveURL('/dashboard');
await expect(page.locator('[data-testid="welcome-message"]'))
.toContainText('Welcome, Test User');
});
test('should show error for invalid credentials', async ({ page }) => {
await page.goto('/login');
await page.fill('[data-testid="email"]', 'test@example.com');
await page.fill('[data-testid="password"]', 'wrongpassword');
await page.click('[data-testid="login-button"]');
await expect(page.locator('[data-testid="error-message"]'))
.toContainText('Invalid credentials');
});
});
DevOps实践的核心在于自动化、协作和持续改进。通过本文介绍的实践,我们可以: