chore(deploy): add backend ECR deployment flow
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
.git
|
||||
.gitea
|
||||
.github
|
||||
.trellis
|
||||
.venv
|
||||
.env
|
||||
.env.*
|
||||
__pycache__
|
||||
*.py[cod]
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
.mypy_cache
|
||||
.pyright
|
||||
logs
|
||||
midscene_run
|
||||
apps/.dart_tool
|
||||
apps/build
|
||||
apps/.pub
|
||||
apps/.gradle
|
||||
backend/.ruff_cache
|
||||
infra/docker/supabase/volumes
|
||||
@@ -0,0 +1,94 @@
|
||||
name: Build production Docker image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-backend-image:
|
||||
runs-on: wsl2-docker-host
|
||||
env:
|
||||
IMAGE_NAME: eryao-backend
|
||||
IMAGE_SIZE_LIMIT_BYTES: 500000000
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Validate ECR configuration
|
||||
run: |
|
||||
set -euo pipefail
|
||||
test -n "${{ secrets.AWS_ACCESS_KEY_ID }}"
|
||||
test -n "${{ secrets.AWS_SECRET_ACCESS_KEY }}"
|
||||
test -n "${{ secrets.AWS_REGION }}"
|
||||
test -n "${{ secrets.AWS_ACCOUNT_ID }}"
|
||||
test -n "${{ secrets.ECR_REPOSITORY }}"
|
||||
|
||||
- name: Build backend production image
|
||||
run: |
|
||||
set -euo pipefail
|
||||
docker buildx build \
|
||||
--provenance=false \
|
||||
--load \
|
||||
--file backend/Dockerfile \
|
||||
--tag ${IMAGE_NAME}:prod-${GITHUB_SHA} \
|
||||
--tag ${IMAGE_NAME}:prod-latest \
|
||||
.
|
||||
|
||||
- name: Check image size budget
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image_size_bytes="$(docker image inspect ${IMAGE_NAME}:prod-${GITHUB_SHA} --format '{{.Size}}')"
|
||||
echo "Image size: ${image_size_bytes} bytes"
|
||||
if [ "${image_size_bytes}" -gt "${IMAGE_SIZE_LIMIT_BYTES}" ]; then
|
||||
echo "Image exceeds ${IMAGE_SIZE_LIMIT_BYTES} bytes" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Smoke test backend image
|
||||
run: |
|
||||
set -euo pipefail
|
||||
docker run --rm \
|
||||
-e ERYAO_RUNTIME__ENVIRONMENT=prod \
|
||||
-e ERYAO_SUPABASE__PUBLIC_URL=http://localhost:8001 \
|
||||
-e ERYAO_POINTS_POLICY__REGISTER_BONUS_HMAC_KEY=ci-smoke-test-key \
|
||||
--entrypoint python \
|
||||
${IMAGE_NAME}:prod-${GITHUB_SHA} \
|
||||
-c "import app; print(app.app.title)"
|
||||
|
||||
- name: Push backend image to ECR
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
|
||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
|
||||
ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
caller_account_id="$(aws sts get-caller-identity --query Account --output text)"
|
||||
if [ "${caller_account_id}" != "${AWS_ACCOUNT_ID}" ]; then
|
||||
echo "AWS_ACCOUNT_ID does not match caller identity" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ecr_registry="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
|
||||
ecr_image="${ecr_registry}/${ECR_REPOSITORY}"
|
||||
|
||||
aws ecr describe-repositories \
|
||||
--region "${AWS_REGION}" \
|
||||
--repository-names "${ECR_REPOSITORY}" >/dev/null 2>&1 \
|
||||
|| aws ecr create-repository \
|
||||
--region "${AWS_REGION}" \
|
||||
--repository-name "${ECR_REPOSITORY}" \
|
||||
--image-scanning-configuration scanOnPush=true \
|
||||
--encryption-configuration encryptionType=AES256 >/dev/null
|
||||
|
||||
aws ecr get-login-password --region "${AWS_REGION}" \
|
||||
| docker login --username AWS --password-stdin "${ecr_registry}"
|
||||
|
||||
docker tag "${IMAGE_NAME}:prod-${GITHUB_SHA}" "${ecr_image}:${GITHUB_SHA}"
|
||||
docker tag "${IMAGE_NAME}:prod-${GITHUB_SHA}" "${ecr_image}:latest"
|
||||
docker push "${ecr_image}:${GITHUB_SHA}"
|
||||
docker push "${ecr_image}:latest"
|
||||
@@ -0,0 +1,27 @@
|
||||
# CI/CD ECR Deployment Flow Completion
|
||||
|
||||
## Completed
|
||||
|
||||
- Production backend Docker image workflow exists at `.gitea/workflows/build-production-docker.yml`.
|
||||
- Workflow trigger is configured for push to `main` and manual `workflow_dispatch`.
|
||||
- Workflow builds `backend/Dockerfile` with Docker Buildx, validates image size, and runs a smoke test.
|
||||
- Workflow logs in to ECR, creates the repository if missing, and pushes both `${GITHUB_SHA}` and `latest` tags.
|
||||
- Production Docker Compose file exists at `deploy/docker-compose.prod.yml` and pulls images from ECR instead of building locally.
|
||||
- Production deploy guide exists at `deploy/README.md` with EC2-side ECR login, Compose pull/up, health check, logs, and stop commands.
|
||||
- Cloudflare IPv4 ingress rules were added to AWS security group `sg-064bf6675c881fde3` for `tcp/80` and `tcp/443`.
|
||||
|
||||
## Deferred Intentionally
|
||||
|
||||
- EC2 will not auto-pull and restart yet. The operator will log in to the single EC2 host and start Docker Compose manually after ECR image confirmation.
|
||||
- Public `0.0.0.0/0` ingress for `tcp/80` and `tcp/443` remains until `https://api.meeyao.com` or the agreed health endpoint is confirmed healthy.
|
||||
- Gitea workflow does not yet include SSH or SSM deployment steps.
|
||||
|
||||
## Verification To Perform After PR Merge
|
||||
|
||||
1. Confirm the PR is merged to `main` or otherwise pushed to `main`.
|
||||
2. Confirm Gitea Actions runs the production Docker workflow successfully.
|
||||
3. Confirm ECR contains the backend image tagged with the commit SHA and `latest`.
|
||||
4. Operator manually logs in to EC2 and runs the documented Compose deployment commands.
|
||||
5. Confirm local EC2 health check returns `{"status":"ok"}`.
|
||||
6. Confirm external API health through Cloudflare.
|
||||
7. Remove `0.0.0.0/0` ingress for `tcp/80` and `tcp/443` only after external health is confirmed.
|
||||
@@ -0,0 +1,27 @@
|
||||
# CI/CD ECR Deployment Flow Record
|
||||
|
||||
## Goal
|
||||
|
||||
Record the current production CI/CD state for the backend Docker deployment path and preserve the handoff point before EC2 manual service startup.
|
||||
|
||||
## Scope
|
||||
|
||||
- Document that pushes to `main` trigger the Gitea workflow to build the backend Docker image.
|
||||
- Document that the workflow validates the image and pushes `${GITHUB_SHA}` and `latest` tags to AWS ECR.
|
||||
- Document that Cloudflare IPv4 CIDR ingress rules were added for `tcp/80` and `tcp/443` on security group `sg-064bf6675c881fde3` in `us-east-2`.
|
||||
- Document that the open `0.0.0.0/0` ingress rules for `tcp/80` and `tcp/443` remain in place until the API is healthy.
|
||||
- Document that final EC2 service startup is intentionally manual: the operator will log in to the single EC2 host and run Docker Compose after confirming the image exists in ECR.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Automated SSH or SSM deployment to EC2.
|
||||
- ECS task definition or service deployment.
|
||||
- Removing the public `0.0.0.0/0` security group rules before API health is confirmed.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- Trellis task records the completed CI/CD preparation work.
|
||||
- The task is archived after recording completion.
|
||||
- The temporary root-level `DEPLOYMENT_REPORT.md` is removed.
|
||||
- Current repository changes are committed on `dev`, pushed, and proposed for merge to `main`.
|
||||
- After merge or main push triggers CI, ECR is checked for the uploaded backend image.
|
||||
@@ -0,0 +1,49 @@
|
||||
{
|
||||
"id": "cicd-ecr-deployment-flow",
|
||||
"name": "cicd-ecr-deployment-flow",
|
||||
"title": "Record CI/CD ECR deployment flow",
|
||||
"description": "Record completed backend Docker CI/CD preparation through ECR push and the remaining manual EC2 Docker Compose startup step.",
|
||||
"status": "completed",
|
||||
"dev_type": "docs",
|
||||
"scope": "deployment",
|
||||
"priority": "P2",
|
||||
"creator": "zl-q",
|
||||
"assignee": "zl-q",
|
||||
"createdAt": "2026-04-29",
|
||||
"completedAt": "2026-04-29",
|
||||
"branch": null,
|
||||
"base_branch": "dev",
|
||||
"worktree_path": null,
|
||||
"current_phase": 0,
|
||||
"next_action": [
|
||||
{
|
||||
"phase": 1,
|
||||
"action": "implement"
|
||||
},
|
||||
{
|
||||
"phase": 2,
|
||||
"action": "check"
|
||||
},
|
||||
{
|
||||
"phase": 3,
|
||||
"action": "finish"
|
||||
},
|
||||
{
|
||||
"phase": 4,
|
||||
"action": "create-pr"
|
||||
}
|
||||
],
|
||||
"commit": null,
|
||||
"pr_url": null,
|
||||
"subtasks": [],
|
||||
"children": [],
|
||||
"parent": null,
|
||||
"relatedFiles": [
|
||||
".gitea/workflows/build-production-docker.yml",
|
||||
"backend/Dockerfile",
|
||||
"deploy/docker-compose.prod.yml",
|
||||
"deploy/README.md"
|
||||
],
|
||||
"notes": "CI/CD is complete through ECR image push. EC2 remains single-host Docker Compose and will be started manually after ECR image confirmation. Cloudflare IPv4 ingress was added; public 0.0.0.0/0 ingress remains until API health is confirmed.",
|
||||
"meta": {}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
UV_LINK_MODE=copy
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN uv sync --frozen --no-dev --no-install-project --no-cache
|
||||
RUN find /app/.venv -type d \( -name __pycache__ -o -name test -o -name tests \) -prune -exec rm -rf {} + \
|
||||
&& if command -v strip >/dev/null 2>&1; then \
|
||||
find /app/.venv -type f -name "*.so" -exec strip --strip-unneeded {} +; \
|
||||
fi
|
||||
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONPATH=/app/backend/src \
|
||||
PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /app/.venv ./.venv
|
||||
|
||||
COPY backend ./backend
|
||||
|
||||
EXPOSE 5775
|
||||
|
||||
CMD ["sh", "-c", "exec uvicorn app:app --host ${ERYAO_WEB__HOST:-0.0.0.0} --port ${ERYAO_WEB__PORT:-5775} --workers ${ERYAO_WEB__WORKERS:-2} --log-level $(printf '%s' ${ERYAO_RUNTIME__LOG_LEVEL:-info} | tr '[:upper:]' '[:lower:]')"]
|
||||
@@ -0,0 +1,159 @@
|
||||
# 觅爻生产部署指南
|
||||
|
||||
## 目录说明
|
||||
|
||||
`deploy/` 用于存放生产环境启动所需文件:
|
||||
|
||||
- `docker-compose.prod.yml`:生产 Docker Compose 启动配置,只拉取已有镜像,不负责构建。
|
||||
- `.env`:生产环境变量文件,本文件包含敏感信息,不应提交到 Git。
|
||||
|
||||
## 前置条件
|
||||
|
||||
生产机器需要安装:
|
||||
|
||||
- Docker
|
||||
- Docker Compose v2
|
||||
- AWS CLI v2
|
||||
|
||||
确认命令:
|
||||
|
||||
```bash
|
||||
docker --version
|
||||
docker compose version
|
||||
aws --version
|
||||
```
|
||||
|
||||
## 环境变量
|
||||
|
||||
`docker-compose.prod.yml` 默认从当前目录读取 `.env`:
|
||||
|
||||
```bash
|
||||
deploy/.env
|
||||
```
|
||||
|
||||
必须包含 AWS ECR 镜像定位变量:
|
||||
|
||||
```text
|
||||
AWS_ACCOUNT_ID=<你的 AWS 账号 ID>
|
||||
AWS_REGION=<ECR 所在区域>
|
||||
ECR_REPOSITORY=<ECR 仓库名>
|
||||
```
|
||||
|
||||
如果本目录下的 `.env` 是从项目根目录 `.env` 复制过来的,通常还需要手动追加以上三个变量。
|
||||
|
||||
默认镜像地址会拼接为:
|
||||
|
||||
```text
|
||||
${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY}:latest
|
||||
```
|
||||
|
||||
如果要手动指定完整镜像地址,可以在 `.env` 中设置:
|
||||
|
||||
```text
|
||||
ERYAO_BACKEND_IMAGE=<完整镜像地址>
|
||||
```
|
||||
|
||||
Web 服务端口使用项目环境变量:
|
||||
|
||||
```text
|
||||
ERYAO_WEB__PORT=5775
|
||||
```
|
||||
|
||||
默认只绑定本机回环地址:
|
||||
|
||||
```text
|
||||
ERYAO_DEPLOY_BIND_HOST=127.0.0.1
|
||||
```
|
||||
|
||||
如果生产机器没有 Nginx、ALB 或其他反向代理,需要直接对外暴露端口,可改为:
|
||||
|
||||
```text
|
||||
ERYAO_DEPLOY_BIND_HOST=0.0.0.0
|
||||
```
|
||||
|
||||
## 登录 ECR
|
||||
|
||||
进入部署目录,并把 `.env` 加载到当前 shell:
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
set -a
|
||||
. ./.env
|
||||
set +a
|
||||
```
|
||||
|
||||
在生产机器上配置好 AWS 凭据后执行:
|
||||
|
||||
```bash
|
||||
aws ecr get-login-password --region "$AWS_REGION" \
|
||||
| docker login --username AWS --password-stdin \
|
||||
"${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
|
||||
```
|
||||
|
||||
## 启动服务
|
||||
|
||||
启动 Web、Redis 和 worker:
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers pull
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers up -d
|
||||
```
|
||||
|
||||
只启动 Web 和 Redis:
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
## 健康检查
|
||||
|
||||
如果 `ERYAO_WEB__PORT=5775`:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:5775/health
|
||||
```
|
||||
|
||||
期望返回:
|
||||
|
||||
```json
|
||||
{"status":"ok"}
|
||||
```
|
||||
|
||||
## 查看状态和日志
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers ps
|
||||
docker logs -f eryao-prod-backend
|
||||
docker logs -f eryao-prod-worker-agent
|
||||
docker logs -f eryao-prod-worker-general
|
||||
docker logs -f eryao-prod-redis
|
||||
```
|
||||
|
||||
## 更新版本
|
||||
|
||||
CI 推送新镜像到 ECR 后,在生产机器执行:
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers pull
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers up -d
|
||||
```
|
||||
|
||||
## 停止服务
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers down
|
||||
```
|
||||
|
||||
如需连 Redis 数据卷一起删除:
|
||||
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers down -v
|
||||
```
|
||||
|
||||
谨慎使用 `down -v`,它会删除 Redis 持久化数据。
|
||||
@@ -0,0 +1,79 @@
|
||||
name: eryao-prod
|
||||
|
||||
x-backend-common: &backend-common
|
||||
image: ${ERYAO_BACKEND_IMAGE:-${AWS_ACCOUNT_ID:?AWS_ACCOUNT_ID is required}.dkr.ecr.${AWS_REGION:?AWS_REGION is required}.amazonaws.com/${ECR_REPOSITORY:?ECR_REPOSITORY is required}:latest}
|
||||
env_file:
|
||||
- path: ./.env
|
||||
required: true
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
services:
|
||||
backend:
|
||||
<<: *backend-common
|
||||
container_name: eryao-prod-backend
|
||||
environment:
|
||||
ERYAO_RUNTIME__ENVIRONMENT: prod
|
||||
ERYAO_RUNTIME__SERVICE_NAME: web
|
||||
ERYAO_REDIS__HOST: redis
|
||||
ERYAO_REDIS__PORT: 6379
|
||||
ports:
|
||||
- "${ERYAO_DEPLOY_BIND_HOST:-127.0.0.1}:${ERYAO_WEB__PORT:-5775}:${ERYAO_WEB__PORT:-5775}"
|
||||
|
||||
worker-agent:
|
||||
<<: *backend-common
|
||||
container_name: eryao-prod-worker-agent
|
||||
profiles: ["workers"]
|
||||
environment:
|
||||
ERYAO_RUNTIME__ENVIRONMENT: prod
|
||||
ERYAO_RUNTIME__SERVICE_NAME: worker-agent
|
||||
ERYAO_REDIS__HOST: redis
|
||||
ERYAO_REDIS__PORT: 6379
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- exec taskiq worker core.taskiq.app:worker_agent_broker core.agentscope.runtime.tasks --workers ${ERYAO_WORKER__GROUPS__AGENT__CONCURRENCY:-2}
|
||||
|
||||
worker-general:
|
||||
<<: *backend-common
|
||||
container_name: eryao-prod-worker-general
|
||||
profiles: ["workers"]
|
||||
environment:
|
||||
ERYAO_RUNTIME__ENVIRONMENT: prod
|
||||
ERYAO_RUNTIME__SERVICE_NAME: worker-general
|
||||
ERYAO_REDIS__HOST: redis
|
||||
ERYAO_REDIS__PORT: 6379
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- exec taskiq worker core.taskiq.app:worker_general_broker core.agentscope.runtime.tasks v1.feedback.tasks --workers ${ERYAO_WORKER__GROUPS__GENERAL__CONCURRENCY:-1}
|
||||
|
||||
redis:
|
||||
image: redis:7.4.2-alpine
|
||||
container_name: eryao-prod-redis
|
||||
env_file:
|
||||
- path: ./.env
|
||||
required: true
|
||||
environment:
|
||||
REDIS_PASSWORD: ${ERYAO_REDIS__PASSWORD:-}
|
||||
command: >
|
||||
sh -c 'if [ -n "$$REDIS_PASSWORD" ]; then redis-server --appendonly yes --requirepass "$$REDIS_PASSWORD"; else redis-server --appendonly yes; fi'
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"sh",
|
||||
"-c",
|
||||
'if [ -n "$$REDIS_PASSWORD" ]; then redis-cli -a "$$REDIS_PASSWORD" ping; else redis-cli ping; fi',
|
||||
]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
redis_data:
|
||||
Reference in New Issue
Block a user