From a36095d99e307fc4f6fb78fb62b19c9c2c2e51d4 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 17 Jun 2026 13:37:34 +0200 Subject: [PATCH 1/2] ci: add Lychee link checker for the docs Adds a daily (and manually dispatchable) Lychee workflow that builds the docs and checks the generated HTML for broken links, and flips onBrokenLinks/onBrokenMarkdownLinks to 'throw' so broken internal links fail the docs build. Mirrors the apify-docs setup. --- .github/workflows/lychee.yml | 71 ++++++++++++++++++++++++++++++++++++ .lycheeignore | 12 ++++++ website/docusaurus.config.js | 4 +- 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/lychee.yml create mode 100644 .lycheeignore diff --git a/.github/workflows/lychee.yml b/.github/workflows/lychee.yml new file mode 100644 index 00000000..258e74b4 --- /dev/null +++ b/.github/workflows/lychee.yml @@ -0,0 +1,71 @@ +name: Periodic link checker + +on: + # Run daily at midnight UTC. + schedule: + - cron: '0 0 * * *' + # Allow manual triggering from the GitHub UI. + workflow_dispatch: + +permissions: + contents: read + +env: + NODE_VERSION: 22 + PYTHON_VERSION: 3.14 + +jobs: + link_check: + name: Link check + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Node + uses: actions/setup-node@v6 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up uv package manager + uses: astral-sh/setup-uv@v8.2.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install Python dependencies + run: uv run poe install-dev + + - name: Install pnpm and website dependencies + uses: apify/actions/pnpm-install@v1.2.0 + with: + working-directory: website + + - name: Build docs + run: uv run poe build-docs + env: + APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }} + SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} + + - name: Run Lychee link checker + id: lychee + uses: lycheeverse/lychee-action@v2.8.0 + env: + GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + with: + fail: true + args: > + --base https://docs.apify.com + --max-retries 6 + --verbose + --no-progress + --timeout '60' + --accept '100..=103,200..=299,429' + --max-redirects 5 + --format markdown + './website/build/**/*.html' diff --git a/.lycheeignore b/.lycheeignore new file mode 100644 index 00000000..ca8949f4 --- /dev/null +++ b/.lycheeignore @@ -0,0 +1,12 @@ +http:\/\/localhost:3000.* +https:\/\/www\.youtube.* +\.(jpg|jpeg|png|gif|bmp|webp|svg)$ +https:\/\/github\.com\/apify\/apify-sdk-python\/edit\/[^ ]* +https:\/\/docs\.apify\.com\/assets\/[^ ]* +https:\/\/api\.apify\.com/.* +file:\/\/\/.* +https://chrome\.google\.com/webstore/.* +https?:\/\/(www\.)?npmjs\.com\/.* +^https://apify\.com/og-image.* +https://console-backend.apify.com/ +https://mcp.apify.com/ diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d6ef5fd6..f08f3d87 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -60,9 +60,9 @@ module.exports = { }, }, onBrokenLinks: - /** @type {import('@docusaurus/types').ReportingSeverity} */ ('warn'), + /** @type {import('@docusaurus/types').ReportingSeverity} */ ('throw'), onBrokenMarkdownLinks: - /** @type {import('@docusaurus/types').ReportingSeverity} */ ('warn'), + /** @type {import('@docusaurus/types').ReportingSeverity} */ ('throw'), themes: [ [ '@apify/docs-theme', From 8a271730cabb56d77f592081c9397d6868db7694 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 17 Jun 2026 14:18:46 +0200 Subject: [PATCH 2/2] ci: run link check on a weekday schedule with Slack failure alerts Mirrors crawlee-python's scheduled-tests workflow: runs weekdays at 01:00 UTC (same as crawlee) and pings the team Slack channel via SLACK_WEBHOOK_URL when a scheduled run fails (skipped on manual dispatch). Renames lychee.yml to on_schedule_lychee.yaml to match the on_schedule_* convention. --- .github/workflows/lychee.yml | 71 ----------- .github/workflows/on_schedule_lychee.yaml | 148 ++++++++++++++++++++++ 2 files changed, 148 insertions(+), 71 deletions(-) delete mode 100644 .github/workflows/lychee.yml create mode 100644 .github/workflows/on_schedule_lychee.yaml diff --git a/.github/workflows/lychee.yml b/.github/workflows/lychee.yml deleted file mode 100644 index 258e74b4..00000000 --- a/.github/workflows/lychee.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Periodic link checker - -on: - # Run daily at midnight UTC. - schedule: - - cron: '0 0 * * *' - # Allow manual triggering from the GitHub UI. - workflow_dispatch: - -permissions: - contents: read - -env: - NODE_VERSION: 22 - PYTHON_VERSION: 3.14 - -jobs: - link_check: - name: Link check - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Set up Node - uses: actions/setup-node@v6 - with: - node-version: ${{ env.NODE_VERSION }} - - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Set up uv package manager - uses: astral-sh/setup-uv@v8.2.0 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Install Python dependencies - run: uv run poe install-dev - - - name: Install pnpm and website dependencies - uses: apify/actions/pnpm-install@v1.2.0 - with: - working-directory: website - - - name: Build docs - run: uv run poe build-docs - env: - APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }} - SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} - - - name: Run Lychee link checker - id: lychee - uses: lycheeverse/lychee-action@v2.8.0 - env: - GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} - with: - fail: true - args: > - --base https://docs.apify.com - --max-retries 6 - --verbose - --no-progress - --timeout '60' - --accept '100..=103,200..=299,429' - --max-redirects 5 - --format markdown - './website/build/**/*.html' diff --git a/.github/workflows/on_schedule_lychee.yaml b/.github/workflows/on_schedule_lychee.yaml new file mode 100644 index 00000000..1230ae56 --- /dev/null +++ b/.github/workflows/on_schedule_lychee.yaml @@ -0,0 +1,148 @@ +name: Scheduled link check + +on: + # Runs when manually triggered from the GitHub UI. + workflow_dispatch: + + # Runs on weekdays at 01:00 UTC. + schedule: + - cron: '0 1 * * 1-5' + +concurrency: + group: scheduled-link-check + cancel-in-progress: false + +permissions: + contents: read + +env: + NODE_VERSION: 22 + PYTHON_VERSION: 3.14 + +jobs: + link_check: + name: Link check + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Node + uses: actions/setup-node@v6 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up uv package manager + uses: astral-sh/setup-uv@v8.2.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install Python dependencies + run: uv run poe install-dev + + - name: Install pnpm and website dependencies + uses: apify/actions/pnpm-install@v1.2.0 + with: + working-directory: website + + - name: Build docs + run: uv run poe build-docs + env: + APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }} + SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} + + - name: Run Lychee link checker + id: lychee + uses: lycheeverse/lychee-action@v2.8.0 + env: + GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }} + with: + fail: true + args: > + --base https://docs.apify.com + --max-retries 6 + --verbose + --no-progress + --timeout '60' + --accept '100..=103,200..=299,429' + --max-redirects 5 + --format markdown + './website/build/**/*.html' + + # Send a Slack notification to the team alerting channel when the scheduled link check fails. + # Skipped on workflow_dispatch (manual runs) so that ad-hoc triggers don't spam the channel. + notify_on_failure: + name: Notify Slack on failure + needs: link_check + if: failure() && github.event_name == 'schedule' + runs-on: ubuntu-latest + permissions: + contents: read + actions: read + + steps: + - name: Build Slack payload + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + RUN_ID: ${{ github.run_id }} + RUN_ATTEMPT: ${{ github.run_attempt }} + WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + HEADING: ':red_circle: Scheduled link check failed' + run: | + # Retry the API call to tolerate transient 5xx from GitHub. + max_attempts=5 + fetched=0 + for attempt in $(seq 1 "${max_attempts}"); do + if failed_jobs=$(gh api \ + "repos/${REPO}/actions/runs/${RUN_ID}/attempts/${RUN_ATTEMPT}/jobs?per_page=100" \ + --jq '[.jobs[] | select(.conclusion == "failure") | "• \(.name)"] | join("\n")'); then + fetched=1 + break + fi + if [[ "${attempt}" -lt "${max_attempts}" ]]; then + sleep "$((attempt * 5))" + fi + done + if [[ "${fetched}" -eq 0 ]]; then + echo "Failed to fetch job list after ${max_attempts} attempts; sending notification without it." >&2 + failed_jobs="(unable to fetch job list — see workflow run)" + fi + jq -n \ + --arg repo "${REPO}" \ + --arg url "${WORKFLOW_URL}" \ + --arg heading "${HEADING}" \ + --arg failed "${failed_jobs}" \ + '{ + text: "\($heading) in \($repo)", + blocks: [ + { + type: "header", + text: { type: "plain_text", text: $heading, emoji: true } + }, + { + type: "section", + fields: [ + { type: "mrkdwn", text: "*Repository:*\n\($repo)" }, + { type: "mrkdwn", text: "*Workflow run:*\n<\($url)|View on GitHub>" } + ] + }, + { + type: "section", + text: { type: "mrkdwn", text: "*Failed jobs:*\n\($failed)" } + } + ] + }' > slack-payload.json + + - name: Send Slack notification + uses: slackapi/slack-github-action@v3.0.3 + with: + webhook: ${{ secrets.SLACK_WEBHOOK_URL }} + webhook-type: incoming-webhook + payload-file-path: slack-payload.json