diff --git a/.gitea/workflows/build-production-docker.yml b/.gitea/workflows/build-production-docker.yml index 264bbbb..05ca271 100644 --- a/.gitea/workflows/build-production-docker.yml +++ b/.gitea/workflows/build-production-docker.yml @@ -24,6 +24,7 @@ jobs: test -n "${{ secrets.AWS_REGION }}" test -n "${{ secrets.AWS_ACCOUNT_ID }}" test -n "${{ secrets.ECR_REPOSITORY }}" + test -n "${{ secrets.DEPLOY_SSH_KEY }}" - name: Build backend production image run: | @@ -33,7 +34,6 @@ jobs: --load \ --file backend/Dockerfile \ --tag ${IMAGE_NAME}:prod-${GITHUB_SHA} \ - --tag ${IMAGE_NAME}:prod-latest \ . - name: Check image size budget @@ -88,7 +88,81 @@ jobs: aws ecr get-login-password --region "${AWS_REGION}" \ | docker login --username AWS --password-stdin "${ecr_registry}" - docker tag "${IMAGE_NAME}:prod-${GITHUB_SHA}" "${ecr_image}:${GITHUB_SHA}" docker tag "${IMAGE_NAME}:prod-${GITHUB_SHA}" "${ecr_image}:latest" - docker push "${ecr_image}:${GITHUB_SHA}" + + image_ids="$(aws ecr list-images \ + --region "${AWS_REGION}" \ + --repository-name "${ECR_REPOSITORY}" \ + --query 'imageIds[*]' \ + --output json)" + if [ "${image_ids}" != "[]" ]; then + aws ecr batch-delete-image \ + --region "${AWS_REGION}" \ + --repository-name "${ECR_REPOSITORY}" \ + --image-ids "${image_ids}" >/dev/null + fi + docker push "${ecr_image}:latest" + + deploy-production: + needs: build-backend-image + runs-on: wsl2-docker-host + steps: + - name: Validate deploy configuration + run: | + set -euo pipefail + test -n "${{ secrets.DEPLOY_SSH_KEY }}" + test -n "${{ secrets.DEPLOY_HOST }}" + test -n "${{ secrets.DEPLOY_USER }}" + test -n "${{ secrets.AWS_ACCESS_KEY_ID }}" + test -n "${{ secrets.AWS_SECRET_ACCESS_KEY }}" + test -n "${{ secrets.AWS_REGION }}" + + - name: Deploy production server + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + AWS_REGION: ${{ secrets.AWS_REGION }} + DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }} + DEPLOY_USER: ${{ secrets.DEPLOY_USER }} + run: | + set -euo pipefail + + install -m 700 -d ~/.ssh + printf '%s\n' '${{ secrets.DEPLOY_SSH_KEY }}' > ~/.ssh/eryao_deploy_key + chmod 600 ~/.ssh/eryao_deploy_key + ssh-keyscan -H "${DEPLOY_HOST}" >> ~/.ssh/known_hosts + + ssh -i ~/.ssh/eryao_deploy_key \ + -o IdentitiesOnly=yes \ + "${DEPLOY_USER}@${DEPLOY_HOST}" \ + "AWS_ACCESS_KEY_ID='${AWS_ACCESS_KEY_ID}' AWS_SECRET_ACCESS_KEY='${AWS_SECRET_ACCESS_KEY}' AWS_DEFAULT_REGION='${AWS_REGION}' AWS_REGION='${AWS_REGION}' bash -se" <<'REMOTE' + set -euo pipefail + + cd ~/deploy + set -a + . ./.env + set +a + + ecr_registry="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com" + aws ecr get-login-password --region "${AWS_REGION}" \ + | sudo docker login --username AWS --password-stdin "${ecr_registry}" + + sudo docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers pull + sudo docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers up -d --remove-orphans + + for attempt in $(seq 1 12); do + if curl -fsS "http://127.0.0.1:${ERYAO_WEB__PORT:-5775}/health"; then + break + fi + if [ "${attempt}" -eq 12 ]; then + sudo docker compose --env-file ./.env -f docker-compose.prod.yml --profile workers ps + sudo docker logs --tail 200 eryao-prod-backend || true + exit 1 + fi + sleep 5 + done + + sudo docker image prune -af --filter "until=168h" + REMOTE diff --git a/.gitignore b/.gitignore index e1fd418..b6898b1 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,11 @@ ENV/ env.bak/ venv.bak/ +# Local deployment secrets +*.pem +deploy_eryao_ci +deploy_eryao_ci.pub + # Spyder project settings .spyderproject .spyproject diff --git a/deploy/README.md b/deploy/README.md index 5811027..a637e7a 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -151,7 +151,30 @@ docker logs -f eryao-prod-redis ## 更新版本 -CI 推送新镜像到 ECR 后,在生产机器执行: +当前 CI/CD 会在 `main` 分支构建后自动部署到生产机器: + +- 推送前清空 ECR 仓库旧镜像,只保留新推送的 `latest`。 +- 通过 `DEPLOY_SSH_KEY` 登录生产机器。 +- 在生产机器执行 ECR 登录、`docker compose pull`、`docker compose up -d`。 +- 健康检查通过后清理 7 天前未使用的本地 Docker 镜像。 + +Gitea Secrets 需要包含: + +```text +AWS_ACCESS_KEY_ID +AWS_SECRET_ACCESS_KEY +AWS_REGION +AWS_ACCOUNT_ID +ECR_REPOSITORY +DEPLOY_SSH_KEY +DEPLOY_HOST +DEPLOY_USER +``` + +`DEPLOY_SSH_KEY` 是已加入生产机器 `ubuntu` 用户 `~/.ssh/authorized_keys` 的部署专用私钥。 +当前生产机器对应:`DEPLOY_HOST=18.218.38.213`,`DEPLOY_USER=ubuntu`。 + +如需手动更新,在生产机器执行: ```bash cd deploy