diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index d5653c12fa..47d6af5eb4 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -21,11 +21,12 @@ jobs: runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}", "extras=s3-cache"] container: image: ghcr.io/px4/px4-dev:v1.17.0-rc2 - outputs: - has_findings: ${{ steps.clang_tidy.outputs.has_findings }} + permissions: + contents: read + pull-requests: read steps: - uses: runs-on/action@v2 - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 fetch-tags: true @@ -47,96 +48,50 @@ jobs: run: | if [ "${{ github.event_name }}" != "pull_request" ]; then make -j$(nproc) clang-tidy - echo "has_findings=false" >> $GITHUB_OUTPUT else - output=$(python3 Tools/ci/run-clang-tidy-pr.py origin/${{ github.base_ref }} 2>&1) - exit_code=$? - echo "$output" - # Helper prints this message on both early-exit paths - # (no changed C++ files, or no matching TUs in the compile DB) - if echo "$output" | grep -q "skipping clang-tidy"; then - echo "has_findings=false" >> $GITHUB_OUTPUT - else - echo "has_findings=true" >> $GITHUB_OUTPUT - fi - exit $exit_code + python3 Tools/ci/run-clang-tidy-pr.py origin/${{ github.base_ref }} fi - - name: Upload compile_commands.json + # On PRs, also produce a `pr-review` artifact for the PR Review Poster + # workflow to consume. clang-tidy-diff-18 emits a unified fixes.yml that + # the producer script translates into line-anchored review comments. + # Running this inside the same container as the build means there is no + # workspace-path rewriting and no cross-runner artifact handoff. + - name: Export clang-tidy fixes for PR review if: always() && github.event_name == 'pull_request' - uses: actions/upload-artifact@v4 + run: | + mkdir -p pr-review + git diff -U0 origin/${{ github.base_ref }}...HEAD \ + | clang-tidy-diff-18.py -p1 \ + -path build/px4_sitl_default-clang \ + -export-fixes pr-review/fixes.yml \ + -j0 || true + + - name: Build pr-review artifact + if: always() && github.event_name == 'pull_request' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python3 Tools/ci/clang-tidy-fixes-to-review.py \ + --fixes pr-review/fixes.yml \ + --repo-root "$GITHUB_WORKSPACE" \ + --repo "$GITHUB_REPOSITORY" \ + --pr-number "${{ github.event.pull_request.number }}" \ + --commit-sha "${{ github.event.pull_request.head.sha }}" \ + --out-dir pr-review \ + --event REQUEST_CHANGES + + - name: Upload pr-review artifact + if: always() && github.event_name == 'pull_request' + uses: actions/upload-artifact@v7 with: - name: compile-commands - path: build/px4_sitl_default-clang/compile_commands.json + name: pr-review + path: | + pr-review/manifest.json + pr-review/comments.json retention-days: 1 - uses: ./.github/actions/save-ccache if: always() with: cache-primary-key: ${{ steps.ccache.outputs.cache-primary-key }} - - post_clang_tidy_comments: - name: Clang-Tidy PR Annotations - needs: [clang_tidy] - runs-on: ubuntu-latest - permissions: - pull-requests: write - contents: read - if: >- - always() - && github.event.pull_request - && github.event.pull_request.head.repo.full_name == github.repository - && (needs.clang_tidy.result == 'success' || needs.clang_tidy.result == 'failure') - && needs.clang_tidy.outputs.has_findings == 'true' - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install clang-tools (for clang-tidy-diff) - run: sudo apt-get install -y clang-tools - - - name: Download compile_commands.json - uses: actions/download-artifact@v4 - with: - name: compile-commands - path: build/px4_sitl_default-clang - - - name: Run clang-tidy-diff and export fixes - run: | - # WHY WE REWRITE compile_commands.json PATHS - # - # The clang_tidy job runs on a RunsOn/AWS runner where the workspace - # root is "/__w/PX4-Autopilot/PX4-Autopilot". All absolute paths baked - # into compile_commands.json (the "file" and "directory" fields, and - # every -I include path in "command") use that prefix. - # - # This annotation job runs on a GitHub-hosted runner where the - # workspace root is "/home/runner/work/PX4-Autopilot/PX4-Autopilot". - # When clang-tidy-diff invokes clang-tidy, it reads the "directory" - # field from compile_commands.json and calls chdir() on it. Since - # the AWS-style path does not exist here, clang-tidy crashes with: - # LLVM ERROR: Cannot chdir into "/__w/.../build/px4_sitl_default-clang" - # and silently produces an empty fixes.yml, so no annotations are posted. - # - # Fix: rewrite all occurrences of the AWS workspace prefix to the - # current runner workspace ($GITHUB_WORKSPACE) before invoking - # clang-tidy-diff. Safe because compile_commands.json is a local - # scratch file pulled from the artifact; no source file is modified. - sed -i "s|/__w/PX4-Autopilot/PX4-Autopilot|${GITHUB_WORKSPACE}|g" \ - build/px4_sitl_default-clang/compile_commands.json - - mkdir -p clang-tidy-result - git diff -U0 origin/${{ github.base_ref }}...HEAD \ - | clang-tidy-diff-18.py -p1 \ - -path build/px4_sitl_default-clang \ - -export-fixes clang-tidy-result/fixes.yml \ - -j0 || true - - - name: Annotate PR with clang-tidy findings - uses: platisd/clang-tidy-pr-comments@v1 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - clang_tidy_fixes: clang-tidy-result/fixes.yml - request_changes: true - suggestions_per_comment: 10 diff --git a/.github/workflows/pr-comment-poster.yml b/.github/workflows/pr-comment-poster.yml index 5635267505..9775dc7a23 100644 --- a/.github/workflows/pr-comment-poster.yml +++ b/.github/workflows/pr-comment-poster.yml @@ -40,12 +40,13 @@ name: PR Comment Poster # manifest and body itself, NOT copied fork-controlled content into # them. Producer workflows are responsible for that. # -# 6. `actions/checkout@v4` below uses NO ref (so it pulls the base branch, +# 6. `actions/checkout@v6` below uses NO ref (so it pulls the base branch, # the default-branch commit this workflow file was loaded from) AND uses -# sparse-checkout to materialize ONLY Tools/ci/pr-comment-poster.py. The -# rest of the repo never touches the workspace. This is safe: the only -# file the job executes is a base-repo Python script that was reviewed -# through normal code review, never anything from the PR. +# sparse-checkout to materialize ONLY Tools/ci/pr-comment-poster.py and +# its stdlib-only helper module Tools/ci/_github_helpers.py. The rest of +# the repo never touches the workspace. This is safe: the only files the +# job executes are base-repo Python scripts that were reviewed through +# normal code review, never anything from the PR. # # ============================================================================== # ARTIFACT CONTRACT @@ -91,22 +92,29 @@ jobs: post: name: Post PR Comment runs-on: ubuntu-latest - if: github.event.workflow_run.conclusion != 'cancelled' + # Only run for pull_request producer runs. Push-to-main and other + # non-PR triggers would have no comment to post, and silently no-oping + # inside the script made it look like the poster was broken. Gating at + # the job level surfaces those as a clean "Skipped" in the UI instead. + if: >- + github.event.workflow_run.conclusion != 'cancelled' + && github.event.workflow_run.event == 'pull_request' steps: # Checkout runs first so the poster script is available AND so that - # actions/checkout@v4's default clean step does not delete the artifact + # actions/checkout@v6's default clean step does not delete the artifact # zip that the next step writes into the workspace. Sparse-checkout # restricts the materialized tree to just the poster script. - name: Checkout poster script only - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: sparse-checkout: | Tools/ci/pr-comment-poster.py + Tools/ci/_github_helpers.py sparse-checkout-cone-mode: false - name: Download pr-comment artifact id: download - uses: actions/github-script@v7 + uses: actions/github-script@v8 with: script: | const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ diff --git a/.github/workflows/pr-review-poster.yml b/.github/workflows/pr-review-poster.yml new file mode 100644 index 0000000000..27c1f5a709 --- /dev/null +++ b/.github/workflows/pr-review-poster.yml @@ -0,0 +1,177 @@ +name: PR Review Poster + +# Generic PR review-comment poster. Sibling of "PR Comment Poster": that +# workflow posts sticky issue-style comments, this one posts line-anchored +# review comments on the "Files changed" tab. Any analysis workflow that +# wants to flag specific lines can produce a `pr-review` artifact and this +# workflow will dismiss any stale matching review and post a fresh one. +# Designed so analysis jobs running on untrusted fork PRs can still get +# their inline annotations posted back to the PR. +# +# ============================================================================== +# SECURITY INVARIANTS +# ============================================================================== +# This workflow runs on `workflow_run` which means it runs in the BASE REPO +# context with a WRITE token, even when the triggering PR comes from a fork. +# That is the entire reason it exists, and also the reason it is a loaded +# footgun. Anyone modifying this file MUST preserve the following invariants: +# +# 1. NEVER check out PR code. No `actions/checkout` with a ref. No git clone +# of a fork branch. No execution of scripts from the downloaded artifact. +# The ONLY things read from the artifact are `manifest.json` and +# `comments.json`, and both are treated as opaque data (JSON parsed by +# the poster script and the comment fields posted via the GitHub API). +# +# 2. `pr_number` is validated to be a positive integer before use. +# `marker` is validated to be printable ASCII only before use. +# `commit_sha` is validated to be 40 lowercase hex characters. +# `event` is validated against an allowlist of `COMMENT` and +# `REQUEST_CHANGES`. `APPROVE` is intentionally forbidden so a bot +# cannot approve a pull request. Validation happens inside +# Tools/ci/pr-review-poster.py which is checked out from the base +# branch, not from the artifact. +# +# 3. Comment bodies and the optional summary are passed to the GitHub API +# as JSON fields, never interpolated into a shell command string. +# +# 4. This workflow file lives on the default branch. `workflow_run` only +# loads workflow files from the default branch, so a fork cannot modify +# THIS file as part of a PR. The fork CAN cause this workflow to fire +# by triggering a producer workflow that uploads a `pr-review` +# artifact. That is intended. +# +# 5. The artifact-name filter (`pr-review`) is the only gate on which +# workflow runs get processed. Any workflow in this repo that uploads +# an artifact named `pr-review` is trusted to have written the +# manifest and comments itself, NOT copied fork-controlled content +# into them. Producer workflows are responsible for that. +# +# 6. `actions/checkout@v6` below uses NO ref (so it pulls the base branch, +# the default-branch commit this workflow file was loaded from) AND +# uses sparse-checkout to materialize ONLY +# Tools/ci/pr-review-poster.py and its stdlib-only helper module +# Tools/ci/_github_helpers.py. The rest of the repo never touches the +# workspace. This is safe: the only files the job executes are +# base-repo Python scripts that were reviewed through normal code +# review, never anything from the PR. +# +# 7. Stale-review dismissal is restricted to reviews whose AUTHOR is +# `github-actions[bot]` AND whose body contains the producer's +# marker. A fork PR cannot impersonate the bot login, and cannot +# inject the marker into a human reviewer's body without API +# access. Both filters together prevent the poster from ever +# dismissing a human review. +# +# ============================================================================== +# ARTIFACT CONTRACT +# ============================================================================== +# Producers upload an artifact named exactly `pr-review` containing: +# +# manifest.json: +# { +# "pr_number": 12345, // required, int > 0 +# "marker": "", // required, printable ASCII +# "event": "REQUEST_CHANGES", // required, "COMMENT" | "REQUEST_CHANGES" +# "commit_sha": "0123456789abcdef0123456789abcdef01234567", // required, 40 hex chars +# "summary": "Optional review summary text" // optional +# } +# +# comments.json: JSON array of line-anchored review comment objects: +# [ +# {"path": "src/foo.cpp", "line": 42, "side": "RIGHT", "body": "..."}, +# {"path": "src/bar.hpp", "start_line": 10, "line": 15, +# "side": "RIGHT", "start_side": "RIGHT", "body": "..."} +# ] +# +# The `marker` string is used to find an existing matching review to +# dismiss before posting a new one. It MUST be unique per producer (e.g. +# include the producer name). +# +# Producers MUST write `pr_number` and `commit_sha` from their own +# workflow context (`github.event.pull_request.number` and +# `github.event.pull_request.head.sha`) and MUST NOT read either from any +# fork-controlled source. + +on: + workflow_run: + # Producers that may upload a `pr-review` artifact. When a new + # producer is wired up, add its workflow name here. Runs of workflows + # not in this list will never trigger the poster. Every run of a + # listed workflow will trigger the poster, which will no-op if no + # `pr-review` artifact exists. + workflows: + - "Static Analysis" + types: + - completed + +permissions: + pull-requests: write + actions: read + contents: read + +jobs: + post: + name: Post PR Review + runs-on: ubuntu-latest + # Only run for pull_request producer runs. Push-to-main and other + # non-PR triggers have no review to post, so gating at the job level + # surfaces those as a clean "Skipped" in the UI instead of a + # silent no-op buried inside the script. + if: >- + github.event.workflow_run.conclusion != 'cancelled' + && github.event.workflow_run.event == 'pull_request' + steps: + # Checkout runs first so the poster scripts are available AND so + # that actions/checkout@v6's default clean step does not delete the + # artifact zip that the next step writes into the workspace. + # Sparse-checkout restricts the materialized tree to just the + # poster script and its stdlib helper module. + - name: Checkout poster script only + uses: actions/checkout@v6 + with: + sparse-checkout: | + Tools/ci/pr-review-poster.py + Tools/ci/_github_helpers.py + sparse-checkout-cone-mode: false + + - name: Download pr-review artifact + id: download + uses: actions/github-script@v8 + with: + script: | + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id, + }); + const match = artifacts.data.artifacts.find(a => a.name === 'pr-review'); + if (!match) { + core.info('No pr-review artifact on this run; nothing to post.'); + core.setOutput('found', 'false'); + return; + } + const download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: match.id, + archive_format: 'zip', + }); + const fs = require('fs'); + fs.writeFileSync('pr-review.zip', Buffer.from(download.data)); + core.setOutput('found', 'true'); + + - name: Unpack artifact + if: steps.download.outputs.found == 'true' + run: | + mkdir -p pr-review + unzip -q pr-review.zip -d pr-review + + - name: Validate artifact + if: steps.download.outputs.found == 'true' + run: python3 Tools/ci/pr-review-poster.py validate pr-review + + - name: Post PR review + if: steps.download.outputs.found == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python3 Tools/ci/pr-review-poster.py post pr-review diff --git a/Tools/ci/_github_helpers.py b/Tools/ci/_github_helpers.py new file mode 100644 index 0000000000..628e4dc4b4 --- /dev/null +++ b/Tools/ci/_github_helpers.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +Shared GitHub REST helpers for PX4 CI scripts. + +This module is imported by the PR poster scripts under Tools/ci/. It is +NOT an executable entry point; do not run it directly. + +Provides: + - fail(msg) terminates the caller with a clear error + - GitHubClient(token) thin stdlib-only GitHub REST client with + single-request and paginated helpers + +Python stdlib only. No third-party dependencies. + +History: extracted from Tools/ci/pr-comment-poster.py so that +pr-comment-poster.py and pr-review-poster.py share the same HTTP plumbing +without duplicating ~100 lines of request/pagination/error-handling code. +""" + +import json +import sys +import typing +import urllib.error +import urllib.request + + +GITHUB_API = 'https://api.github.com' +DEFAULT_USER_AGENT = 'px4-ci' +API_VERSION = '2022-11-28' + + +def fail(msg: str) -> typing.NoReturn: + """Print an error to stderr and exit with status 1. + + Annotated NoReturn so static checkers understand control does not + continue past a fail() call. + """ + print('error: {}'.format(msg), file=sys.stderr) + sys.exit(1) + + +def _parse_next_link(link_header): + """Return the URL for rel="next" from an RFC 5988 Link header, or None. + + The Link header is comma-separated entries of the form: + ; rel="next", ; rel="last" + We walk each entry and return the URL of the one whose rel attribute is + "next". Accept single-quoted rel values for robustness even though + GitHub always emits double quotes. + """ + if not link_header: + return None + for part in link_header.split(','): + segs = part.strip().split(';') + if len(segs) < 2: + continue + url_seg = segs[0].strip() + if not (url_seg.startswith('<') and url_seg.endswith('>')): + continue + url = url_seg[1:-1] + for attr in segs[1:]: + attr = attr.strip() + if attr == 'rel="next"' or attr == "rel='next'": + return url + return None + + +class GitHubClient: + """Minimal GitHub REST client backed by the Python stdlib. + + Each instance holds a token and a user-agent so callers do not have to + thread them through every call. Methods return parsed JSON (or None for + empty responses) and raise RuntimeError with the server response body on + HTTP errors, so CI logs show what the API actually objected to. + + Usage: + client = GitHubClient(token, user_agent='px4-pr-comment-poster') + body, headers = client.request('GET', 'repos/{o}/{r}/pulls/123') + for item in client.paginated('repos/{o}/{r}/pulls/123/reviews'): + ... + """ + + def __init__(self, token, user_agent=DEFAULT_USER_AGENT): + if not token: + raise ValueError('GitHub token is required') + self._token = token + self._user_agent = user_agent + + def request(self, method, path_or_url, json_body=None): + """GET/POST/PATCH/PUT/DELETE a single API path or absolute URL. + + `path_or_url` may be either a relative API path (e.g. + "repos/PX4/PX4-Autopilot/pulls/123") or an absolute URL such as the + next-page URL returned from paginated results. Relative paths are + prefixed with the GitHub API base. + + Returns (parsed_json_or_none, headers_dict). Raises RuntimeError + on HTTP or transport errors. + """ + url = self._resolve(path_or_url) + return self._do_request(method, url, json_body) + + def paginated(self, path, per_page=100): + """GET a path and follow rel="next" Link headers. + + Yields items from each page's JSON array. Bumps per_page to 100 + (GitHub's max) so large result sets take fewer round-trips. + Raises RuntimeError if any page response is not a JSON array. + """ + url = self._resolve(path) + sep = '&' if '?' in url else '?' + url = '{}{}per_page={}'.format(url, sep, per_page) + while url is not None: + body, headers = self._do_request('GET', url, None) + if body is None: + return + if not isinstance(body, list): + raise RuntimeError( + 'expected JSON array from {}, got {}'.format( + url, type(body).__name__)) + for item in body: + yield item + url = _parse_next_link(headers.get('Link')) + + def _resolve(self, path_or_url): + if path_or_url.startswith('http://') or path_or_url.startswith('https://'): + return path_or_url + return '{}/{}'.format(GITHUB_API.rstrip('/'), path_or_url.lstrip('/')) + + def _do_request(self, method, url, json_body): + data = None + headers = { + 'Authorization': 'Bearer {}'.format(self._token), + 'Accept': 'application/vnd.github+json', + # Pin the API version so GitHub deprecations don't silently + # change the response shape under us. + 'X-GitHub-Api-Version': API_VERSION, + 'User-Agent': self._user_agent, + } + if json_body is not None: + data = json.dumps(json_body).encode('utf-8') + headers['Content-Type'] = 'application/json; charset=utf-8' + + req = urllib.request.Request( + url, data=data, method=method, headers=headers) + try: + with urllib.request.urlopen(req) as resp: + raw = resp.read() + # HTTPMessage is case-insensitive on lookup but its items() + # preserves the original case. GitHub sends "Link" with a + # capital L, which is what _parse_next_link expects. + resp_headers = dict(resp.headers.items()) + if not raw: + return None, resp_headers + return json.loads(raw.decode('utf-8')), resp_headers + except urllib.error.HTTPError as e: + # GitHub error bodies are JSON with a "message" field and often + # a "documentation_url". Dump the raw body into the exception so + # the CI log shows exactly what the API objected to. A bare + # "HTTP 422" tells us nothing useful. + try: + err_body = e.read().decode('utf-8', errors='replace') + except Exception: + err_body = '(no body)' + raise RuntimeError( + 'GitHub API {} {} failed: HTTP {} {}\n{}'.format( + method, url, e.code, e.reason, err_body)) + except urllib.error.URLError as e: + # Network layer failure (DNS, TLS, connection reset). No HTTP + # response to parse; just surface the transport reason. + raise RuntimeError( + 'GitHub API {} {} failed: {}'.format(method, url, e.reason)) diff --git a/Tools/ci/clang-tidy-fixes-to-review.py b/Tools/ci/clang-tidy-fixes-to-review.py new file mode 100644 index 0000000000..eb95621a41 --- /dev/null +++ b/Tools/ci/clang-tidy-fixes-to-review.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +# +# clang-tidy-fixes-to-review.py +# +# Producer-side helper that converts a clang-tidy fixes.yml file into a +# pr-review artifact (manifest.json + comments.json) suitable for +# Tools/ci/pr-review-poster.py. +# +# This script runs inside the clang-tidy job's px4-dev container so it can +# read the source tree directly and look up byte offsets in the original +# files. The output it writes is a fully-baked array of review comments; +# the poster never reads source files or fixes.yml. +# +# ---------------------------------------------------------------------------- +# ATTRIBUTION +# ---------------------------------------------------------------------------- +# This script reuses the diagnostic-to-review-comment translation logic +# from platisd/clang-tidy-pr-comments. The original work is: +# +# MIT License +# +# Copyright (c) 2021 Dimitris Platis +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Adapted parts: +# - get_diff_line_ranges_per_file() and its inner change_to_line_range() +# - generate_review_comments() and its nested helpers +# (get_line_by_offset, validate_warning_applicability, +# calculate_replacements_diff, markdown, markdown_url, +# diagnostic_name_visual, generate_single_comment) +# - reorder_diagnostics() +# +# Removed parts (handled by Tools/ci/pr-review-poster.py instead): +# - post_review_comments / dismiss_change_requests / resolve_conversations +# - the original argparse main and the requests-based HTTP layer +# +# Adaptation notes: +# - The HTTP layer is rewritten on top of Tools/ci/_github_helpers.py so +# this script does not depend on the third-party `requests` package. +# - Conversation resolution (the GraphQL path) is intentionally dropped +# for v1; revisit if it turns out to be missed. +# - Clang-Tidy 8 upconvert is preserved verbatim. +# +# ---------------------------------------------------------------------------- +# Bounded assumptions (documented for future maintainers): +# - Source files are UTF-8 (we read them as latin_1, matching clang-tidy's +# own byte-offset model, and the offsets we surface are line counts) +# - Source files use LF line endings +# - Malformed entries in fixes.yml are skipped with a warning rather than +# crashing the job +# +# Dependencies: pyyaml + Tools/ci/_github_helpers.py. +# pyyaml is preinstalled in the px4-dev container; this script is intended +# to run there, not on bare ubuntu-latest. +"""Convert a clang-tidy fixes.yml into a pr-review artifact.""" + +import argparse +import difflib +import json +import os +import posixpath +import re +import sys +import urllib.parse + +import yaml + +import _github_helpers +from _github_helpers import fail as _fail + + +# Markers used inside the per-comment body to call out severity. Plain +# strings rather than emojis to keep the file emoji-free per project +# preferences; the rendered Markdown is unaffected. +SINGLE_COMMENT_MARKERS = { + 'Error': '**[error]**', + 'Warning': '**[warning]**', + 'Remark': '**[remark]**', + 'fallback': '**[note]**', +} + + +# --------------------------------------------------------------------------- +# Diff-range parsing (adapted from platisd) +# --------------------------------------------------------------------------- + +def get_diff_line_ranges_per_file(pr_files): + """Return a dict mapping each PR file path to a list of line ranges + (the +new-side hunks) parsed from its patch.""" + + def change_to_line_range(change): + split_change = change.split(',') + start = int(split_change[0]) + size = int(split_change[1]) if len(split_change) > 1 else 1 + return range(start, start + size) + + result = {} + for pr_file in pr_files: + # Removed binary files etc. have no patch section. + if 'patch' not in pr_file: + continue + file_name = pr_file['filename'] + # Match lines like '@@ -101,8 +102,11 @@' + git_line_tags = re.findall( + r'^@@ -.*? +.*? @@', pr_file['patch'], re.MULTILINE) + changes = [ + tag.replace('@@', '').strip().split()[1].replace('+', '') + for tag in git_line_tags + ] + result[file_name] = [ + change_to_line_range(change) for change in changes + ] + return result + + +def fetch_pull_request_files(client, repo, pr_number): + """Yield file metadata objects for each file modified by the PR.""" + path = 'repos/{}/pulls/{}/files'.format(repo, pr_number) + for entry in client.paginated(path): + yield entry + + +# --------------------------------------------------------------------------- +# Diagnostic ordering (adapted from platisd) +# --------------------------------------------------------------------------- + +def reorder_diagnostics(diags): + """Return diagnostics ordered Error -> Warning -> Remark -> other.""" + errors = [d for d in diags if d.get('Level') == 'Error'] + warnings = [d for d in diags if d.get('Level') == 'Warning'] + remarks = [d for d in diags if d.get('Level') == 'Remark'] + others = [ + d for d in diags + if d.get('Level') not in {'Error', 'Warning', 'Remark'} + ] + if others: + print( + 'warning: some fixes have an unexpected Level (not Error, ' + 'Warning, or Remark)', file=sys.stderr) + return errors + warnings + remarks + others + + +# --------------------------------------------------------------------------- +# Comment generation (adapted from platisd) +# --------------------------------------------------------------------------- + +def generate_review_comments(clang_tidy_fixes, repository_root, + diff_line_ranges_per_file, + single_comment_markers): + """Yield review comment dicts for each clang-tidy diagnostic that + intersects the PR diff.""" + + def get_line_by_offset(file_path, offset): + # Clang-Tidy doesn't support multibyte encodings and measures + # offsets in bytes; latin_1 makes byte offsets and string offsets + # equivalent. + with open(repository_root + file_path, encoding='latin_1') as fh: + source = fh.read() + return source[:offset].count('\n') + 1 + + def validate_warning_applicability(file_path, start_line_num, end_line_num): + assert end_line_num >= start_line_num + for line_range in diff_line_ranges_per_file[file_path]: + assert line_range.step == 1 + if (line_range.start <= start_line_num + and end_line_num < line_range.stop): + return True + return False + + def calculate_replacements_diff(file_path, replacements): + # Apply replacements in reverse order so subsequent offsets do not + # shift. + replacements.sort(key=lambda item: (-item['Offset'])) + with open(repository_root + file_path, encoding='latin_1') as fh: + source = fh.read() + changed = source + for replacement in replacements: + changed = ( + changed[:replacement['Offset']] + + replacement['ReplacementText'] + + changed[replacement['Offset'] + replacement['Length']:] + ) + return difflib.Differ().compare( + source.splitlines(keepends=True), + changed.splitlines(keepends=True), + ) + + def markdown(s): + md_chars = '\\`*_{}[]<>()#+-.!|' + + def escape_chars(s): + for ch in md_chars: + s = s.replace(ch, '\\' + ch) + return s + + def unescape_chars(s): + for ch in md_chars: + s = s.replace('\\' + ch, ch) + return s + + s = escape_chars(s) + s = re.sub( + "'([^']*)'", + lambda m: '`` ' + unescape_chars(m.group(1)) + ' ``', + s, + ) + return s + + def markdown_url(label, url): + return '[{}]({})'.format(label, url) + + def diagnostic_name_visual(diagnostic_name): + visual = '**{}**'.format(markdown(diagnostic_name)) + try: + first_dash_idx = diagnostic_name.index('-') + except ValueError: + return visual + namespace = urllib.parse.quote_plus(diagnostic_name[:first_dash_idx]) + check_name = urllib.parse.quote_plus( + diagnostic_name[first_dash_idx + 1:]) + return markdown_url( + visual, + 'https://clang.llvm.org/extra/clang-tidy/checks/{}/{}.html'.format( + namespace, check_name), + ) + + def generate_single_comment(file_path, start_line_num, end_line_num, + name, message, single_comment_marker, + replacement_text=None): + result = { + 'path': file_path, + 'line': end_line_num, + 'side': 'RIGHT', + 'body': '{} {} {}\n{}'.format( + single_comment_marker, + diagnostic_name_visual(name), + single_comment_marker, + markdown(message), + ), + } + if start_line_num != end_line_num: + result['start_line'] = start_line_num + result['start_side'] = 'RIGHT' + if replacement_text is not None: + if not replacement_text or replacement_text[-1] != '\n': + replacement_text += '\n' + result['body'] += '\n```suggestion\n{}```'.format(replacement_text) + return result + + for diag in clang_tidy_fixes['Diagnostics']: + # Upconvert clang-tidy 8 format to 9+ + if 'DiagnosticMessage' not in diag: + diag['DiagnosticMessage'] = { + 'FileOffset': diag.get('FileOffset'), + 'FilePath': diag.get('FilePath'), + 'Message': diag.get('Message'), + 'Replacements': diag.get('Replacements', []), + } + + diag_message = diag['DiagnosticMessage'] + diag_message['FilePath'] = posixpath.normpath( + (diag_message.get('FilePath') or '').replace(repository_root, '')) + for replacement in diag_message.get('Replacements') or []: + replacement['FilePath'] = posixpath.normpath( + replacement['FilePath'].replace(repository_root, '')) + + diag_name = diag.get('DiagnosticName', '') + diag_message_msg = diag_message.get('Message', '') + level = diag.get('Level', 'Warning') + single_comment_marker = single_comment_markers.get( + level, single_comment_markers['fallback']) + + replacements = diag_message.get('Replacements') or [] + if not replacements: + file_path = diag_message['FilePath'] + offset = diag_message.get('FileOffset') + if offset is None: + print('warning: skipping {!r}: missing FileOffset'.format( + diag_name), file=sys.stderr) + continue + if file_path not in diff_line_ranges_per_file: + print( + "'{}' for {} does not apply to the files changed in " + 'this PR'.format(diag_name, file_path)) + continue + try: + line_num = get_line_by_offset(file_path, offset) + except (OSError, ValueError) as e: + print('warning: skipping {!r} on {}: {}'.format( + diag_name, file_path, e), file=sys.stderr) + continue + + print("Processing '{}' at line {} of {}...".format( + diag_name, line_num, file_path)) + if validate_warning_applicability(file_path, line_num, line_num): + yield generate_single_comment( + file_path, + line_num, + line_num, + diag_name, + diag_message_msg, + single_comment_marker=single_comment_marker, + ) + else: + print('This warning does not apply to the lines changed ' + 'in this PR') + else: + for file_path in {item['FilePath'] for item in replacements}: + if file_path not in diff_line_ranges_per_file: + print( + "'{}' for {} does not apply to the files changed " + 'in this PR'.format(diag_name, file_path)) + continue + + line_num = 1 + start_line_num = None + end_line_num = None + replacement_text = None + + try: + diff_iter = calculate_replacements_diff( + file_path, + [r for r in replacements if r['FilePath'] == file_path], + ) + except (OSError, ValueError) as e: + print('warning: skipping {!r} on {}: {}'.format( + diag_name, file_path, e), file=sys.stderr) + continue + + for line in diff_iter: + # Comment line, ignore. + if line.startswith('? '): + continue + # A '-' line is the start or continuation of a region + # to replace. + if line.startswith('- '): + if start_line_num is None: + start_line_num = line_num + end_line_num = line_num + else: + end_line_num = line_num + if replacement_text is None: + replacement_text = '' + line_num += 1 + # A '+' line is part of the replacement text. + elif line.startswith('+ '): + if replacement_text is None: + replacement_text = line[2:] + else: + replacement_text += line[2:] + # A context line marks the end of a replacement region. + elif line.startswith(' '): + if replacement_text is not None: + if start_line_num is None: + # Pure addition: synthesize a one-line + # range and append the context line to + # the replacement. + start_line_num = line_num + end_line_num = line_num + replacement_text += line[2:] + + print("Processing '{}' at lines {}-{} of {}...".format( + diag_name, start_line_num, end_line_num, file_path)) + + if validate_warning_applicability( + file_path, start_line_num, end_line_num): + yield generate_single_comment( + file_path, + start_line_num, + end_line_num, + diag_name, + diag_message_msg, + single_comment_marker=single_comment_marker, + replacement_text=replacement_text, + ) + else: + print( + 'This warning does not apply to the ' + 'lines changed in this PR') + + start_line_num = None + end_line_num = None + replacement_text = None + + line_num += 1 + else: + # Unknown difflib prefix; skip rather than abort. + print('warning: unexpected diff prefix {!r}; ' + 'skipping diagnostic'.format(line[:2]), + file=sys.stderr) + break + + # End of file with a pending replacement region. + if replacement_text is not None and start_line_num is not None: + print("Processing '{}' at lines {}-{} of {}...".format( + diag_name, start_line_num, end_line_num, file_path)) + if validate_warning_applicability( + file_path, start_line_num, end_line_num): + yield generate_single_comment( + file_path, + start_line_num, + end_line_num, + diag_name, + diag_message_msg, + single_comment_marker=single_comment_marker, + replacement_text=replacement_text, + ) + else: + print('This warning does not apply to the lines ' + 'changed in this PR') + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main(argv=None): + parser = argparse.ArgumentParser( + description='Convert a clang-tidy fixes.yml into a pr-review ' + 'artifact (manifest.json + comments.json).', + ) + parser.add_argument('--fixes', required=True, + help='Path to fixes.yml from clang-tidy') + parser.add_argument('--repo-root', required=True, + help='Path to the repository root containing the ' + 'source files referenced by fixes.yml') + parser.add_argument('--repo', required=True, + help='owner/name of the repository') + parser.add_argument('--pr-number', required=True, type=int, + help='Pull request number') + parser.add_argument('--commit-sha', required=True, + help='40-char hex commit SHA the review will pin to') + parser.add_argument('--out-dir', required=True, + help='Directory to write manifest.json and ' + 'comments.json') + parser.add_argument( + '--marker', + default='', + help='Marker string embedded in the review body so the poster ' + 'can find and dismiss stale runs') + parser.add_argument( + '--event', + default='REQUEST_CHANGES', + choices=('COMMENT', 'REQUEST_CHANGES'), + help='GitHub review event type') + parser.add_argument( + '--summary', default='', + help='Optional review summary text appended to the review body') + args = parser.parse_args(argv) + + if args.pr_number <= 0: + _fail('--pr-number must be > 0') + if not re.match(r'^[0-9a-f]{40}$', args.commit_sha): + _fail('--commit-sha must be a 40-char lowercase hex string') + + token = os.environ.get('GITHUB_TOKEN') + if not token: + _fail('GITHUB_TOKEN is not set') + + # Normalize the repo root with a trailing slash so the platisd-style + # str.replace() trick still strips it cleanly. + repo_root = args.repo_root + if not repo_root.endswith(os.sep): + repo_root = repo_root + os.sep + + os.makedirs(args.out_dir, exist_ok=True) + + client = _github_helpers.GitHubClient(token, user_agent='px4-clang-tidy-fixes-to-review') + + print('Fetching PR file list from GitHub...') + pr_files = list(fetch_pull_request_files(client, args.repo, args.pr_number)) + diff_line_ranges_per_file = get_diff_line_ranges_per_file(pr_files) + + print('Loading clang-tidy fixes from {}...'.format(args.fixes)) + if not os.path.isfile(args.fixes): + # No fixes file means clang-tidy ran cleanly. Emit an empty + # comments.json so the poster can short-circuit. + comments = [] + else: + with open(args.fixes, encoding='utf-8') as fh: + clang_tidy_fixes = yaml.safe_load(fh) + if (not clang_tidy_fixes + or 'Diagnostics' not in clang_tidy_fixes + or not clang_tidy_fixes['Diagnostics']): + comments = [] + else: + clang_tidy_fixes['Diagnostics'] = reorder_diagnostics( + clang_tidy_fixes['Diagnostics']) + comments = list(generate_review_comments( + clang_tidy_fixes, + repo_root, + diff_line_ranges_per_file, + single_comment_markers=SINGLE_COMMENT_MARKERS, + )) + + print('Generated {} review comment(s)'.format(len(comments))) + + manifest = { + 'pr_number': args.pr_number, + 'marker': args.marker, + 'event': args.event, + 'commit_sha': args.commit_sha, + } + if args.summary: + manifest['summary'] = args.summary + + manifest_path = os.path.join(args.out_dir, 'manifest.json') + comments_path = os.path.join(args.out_dir, 'comments.json') + with open(manifest_path, 'w', encoding='utf-8') as fh: + json.dump(manifest, fh, indent=2) + fh.write('\n') + with open(comments_path, 'w', encoding='utf-8') as fh: + json.dump(comments, fh, indent=2) + fh.write('\n') + + print('Wrote {} and {}'.format(manifest_path, comments_path)) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/Tools/ci/pr-comment-poster.py b/Tools/ci/pr-comment-poster.py index ddd205c8d4..c28b127568 100755 --- a/Tools/ci/pr-comment-poster.py +++ b/Tools/ci/pr-comment-poster.py @@ -37,9 +37,9 @@ import argparse import json import os import sys -import typing -import urllib.error -import urllib.request + +import _github_helpers +from _github_helpers import fail as _fail # GitHub hard limit is 65535 bytes. Cap well under to leave headroom for @@ -53,7 +53,6 @@ MARKER_MAX_LEN = 200 ACCEPTED_MODES = ('upsert',) -GITHUB_API = 'https://api.github.com' USER_AGENT = 'px4-pr-comment-poster' @@ -61,11 +60,6 @@ USER_AGENT = 'px4-pr-comment-poster' # Validation # --------------------------------------------------------------------------- -def _fail(msg: str) -> typing.NoReturn: - print('error: {}'.format(msg), file=sys.stderr) - sys.exit(1) - - def _is_printable_ascii(s): # Space (0x20) through tilde (0x7E) inclusive. return all(0x20 <= ord(ch) <= 0x7E for ch in s) @@ -161,121 +155,11 @@ def validate_manifest(directory): } -# --------------------------------------------------------------------------- -# GitHub HTTP helpers -# --------------------------------------------------------------------------- - -def _github_request(method, url, token, json_body=None): - """Perform a single GitHub REST request. - - Returns a tuple (parsed_json_or_none, headers_dict). Raises RuntimeError - with the server response body on HTTP errors so CI logs show what - GitHub complained about. - """ - data = None - headers = { - 'Authorization': 'Bearer {}'.format(token), - 'Accept': 'application/vnd.github+json', - # Pin the API version so GitHub deprecations don't silently change - # the response shape under us. - 'X-GitHub-Api-Version': '2022-11-28', - 'User-Agent': USER_AGENT, - } - if json_body is not None: - data = json.dumps(json_body).encode('utf-8') - headers['Content-Type'] = 'application/json; charset=utf-8' - - req = urllib.request.Request(url, data=data, method=method, headers=headers) - try: - with urllib.request.urlopen(req) as resp: - raw = resp.read() - # HTTPMessage is case-insensitive on lookup but its items() preserves - # the original case. GitHub sends "Link" with a capital L, which is - # what _parse_next_link expects. - resp_headers = dict(resp.headers.items()) - if not raw: - return None, resp_headers - return json.loads(raw.decode('utf-8')), resp_headers - except urllib.error.HTTPError as e: - # GitHub error bodies are JSON with a "message" field and often a - # "documentation_url". Dump the raw body into the exception so the CI - # log shows exactly what the API objected to. A bare "HTTP 422" - # tells us nothing useful. - try: - err_body = e.read().decode('utf-8', errors='replace') - except Exception: - err_body = '(no body)' - raise RuntimeError( - 'GitHub API {} {} failed: HTTP {} {}\n{}'.format( - method, url, e.code, e.reason, err_body)) - except urllib.error.URLError as e: - # Network layer failure (DNS, TLS, connection reset). No HTTP response - # to parse; just surface the transport reason. - raise RuntimeError( - 'GitHub API {} {} failed: {}'.format(method, url, e.reason)) - - -def _parse_next_link(link_header): - """Return the URL for rel="next" from an RFC 5988 Link header, or None. - - The Link header is comma-separated entries of the form: - ; rel="next", ; rel="last" - We walk each entry and return the URL of the one whose rel attribute is - "next". Accept single-quoted rel values for robustness even though GitHub - always emits double quotes. - """ - if not link_header: - return None - for part in link_header.split(','): - segs = part.strip().split(';') - if len(segs) < 2: - continue - url_seg = segs[0].strip() - if not (url_seg.startswith('<') and url_seg.endswith('>')): - continue - url = url_seg[1:-1] - for attr in segs[1:]: - attr = attr.strip() - if attr == 'rel="next"' or attr == "rel='next'": - return url - return None - - -def github_api(method, path, token, json_body=None): - """GET/POST/PATCH a single GitHub API path. Non-paginated.""" - url = '{}/{}'.format(GITHUB_API.rstrip('/'), path.lstrip('/')) - body, _ = _github_request(method, url, token, json_body=json_body) - return body - - -def github_api_paginated(path, token): - """GET a GitHub API path and follow rel="next" Link headers. - - Yields items from each page's JSON array. - """ - url = '{}/{}'.format(GITHUB_API.rstrip('/'), path.lstrip('/')) - # GitHub defaults to per_page=30. Bump to 100 (the max) so a PR with a - # few hundred comments fetches in 3 or 4 round-trips instead of 10+. - sep = '&' if '?' in url else '?' - url = '{}{}per_page=100'.format(url, sep) - while url is not None: - body, headers = _github_request('GET', url, token) - if body is None: - return - if not isinstance(body, list): - raise RuntimeError( - 'expected JSON array from {}, got {}'.format( - url, type(body).__name__)) - for item in body: - yield item - url = _parse_next_link(headers.get('Link')) - - # --------------------------------------------------------------------------- # Comment upsert # --------------------------------------------------------------------------- -def find_existing_comment_id(token, repo, pr_number, marker): +def find_existing_comment_id(client, repo, pr_number, marker): """Return the id of the first PR comment whose body contains marker, or None. PR comments are issue comments in GitHub's data model, so we hit @@ -286,7 +170,7 @@ def find_existing_comment_id(token, repo, pr_number, marker): appear in user-written prose. """ path = 'repos/{}/issues/{}/comments'.format(repo, pr_number) - for comment in github_api_paginated(path, token): + for comment in client.paginated(path): body = comment.get('body') or '' if marker in body: return comment.get('id') @@ -308,24 +192,22 @@ def build_final_body(body, marker): return '{}\n\n{}\n'.format(body.rstrip('\n'), marker) -def upsert_comment(token, repo, pr_number, marker, body): +def upsert_comment(client, repo, pr_number, marker, body): final_body = build_final_body(body, marker) - existing_id = find_existing_comment_id(token, repo, pr_number, marker) + existing_id = find_existing_comment_id(client, repo, pr_number, marker) if existing_id is not None: print('Updating comment {} on PR #{}'.format(existing_id, pr_number)) - github_api( + client.request( 'PATCH', 'repos/{}/issues/comments/{}'.format(repo, existing_id), - token, json_body={'body': final_body}, ) else: print('Creating new comment on PR #{}'.format(pr_number)) - github_api( + client.request( 'POST', 'repos/{}/issues/{}/comments'.format(repo, pr_number), - token, json_body={'body': final_body}, ) @@ -364,8 +246,9 @@ def cmd_post(args): _fail('GITHUB_REPOSITORY must be "owner/name", got {!r}'.format(repo)) try: + client = _github_helpers.GitHubClient(token, user_agent=USER_AGENT) upsert_comment( - token=token, + client=client, repo=repo, pr_number=result['pr_number'], marker=result['marker'], diff --git a/Tools/ci/pr-review-poster.py b/Tools/ci/pr-review-poster.py new file mode 100644 index 0000000000..9e91a33eeb --- /dev/null +++ b/Tools/ci/pr-review-poster.py @@ -0,0 +1,468 @@ +#!/usr/bin/env python3 +""" +PR review-comment poster for analysis workflows. + +Sibling of Tools/ci/pr-comment-poster.py. Where pr-comment-poster.py posts +sticky issue-style PR comments, this script posts line-anchored review +comments on the "Files changed" tab. Use it for tools like clang-tidy that +want to flag specific lines instead of (or in addition to) a rollup +comment. + +This script is invoked from the `PR Review Poster` workflow which runs on +`workflow_run` in the base repository context. It consumes a `pr-review` +artifact produced by an upstream analysis job and posts a fresh PR review +via the GitHub REST API, dismissing any stale review the same producer +left on a previous run. + +Artifact contract (directory passed on the command line): + + manifest.json + { + "pr_number": 12345, (required, int > 0) + "marker": "", (required, printable ASCII) + "event": "REQUEST_CHANGES", (required, "COMMENT" | "REQUEST_CHANGES") + "commit_sha": "0123456789abcdef0123456789abcdef01234567",(required, 40 hex chars) + "summary": "Optional review body text" (optional) + } + + comments.json + JSON array of line-anchored review comment objects: + [ + {"path": "src/foo.cpp", "line": 42, "side": "RIGHT", + "body": "..."}, + {"path": "src/bar.hpp", "start_line": 10, "line": 15, + "side": "RIGHT", "start_side": "RIGHT", "body": "..."} + ] + +Note: an `APPROVE` event is intentionally NOT supported. Bots should never +approve a pull request. + +Security: this script is run in a write-token context from a workflow that +MUST NOT check out PR code. Both manifest.json and comments.json are +treated as opaque data. The marker is validated to printable ASCII only +before use, and only reviews authored by github-actions[bot] whose body +contains the marker can be dismissed (a fork cannot spoof either). + +Subcommands: + + validate Validate that contains a conforming manifest + + comments file. + post Validate, then dismiss any stale matching review and + post a new review on the target PR. Requires env + GITHUB_TOKEN and GITHUB_REPOSITORY. + +Python stdlib only. No third-party dependencies. +""" + +import argparse +import json +import os +import re +import sys +import time + +import _github_helpers +from _github_helpers import fail as _fail + + +USER_AGENT = 'px4-pr-review-poster' + +# Marker length bounds. 1..200 is plenty for an HTML comment tag such as +# "". +MARKER_MIN_LEN = 1 +MARKER_MAX_LEN = 200 + +# Cap per-comment body size well under GitHub's hard limit so we leave +# headroom for the wrapping JSON envelope. Empirically GitHub allows ~65535 +# bytes per review comment body; 60000 is a safe ceiling. +MAX_COMMENT_BODY_BYTES = 60000 + +# Cap on number of comments per single review POST. platisd uses 10. The +# value matters because GitHub's review-creation endpoint has a payload +# size limit and review comments occasionally trip an abuse-detection +# threshold when posted in very large batches. Smaller chunks also let us +# spread the work across multiple reviews so a single bad entry only +# fails its own chunk. +COMMENTS_PER_REVIEW = 10 + +# Sleep between successive review POSTs to stay clear of GitHub's +# secondary rate limits. platisd uses 10s; 5s is enough for our volume +# and cuts user-visible latency. +SLEEP_BETWEEN_CHUNKS_SECONDS = 5 + +ACCEPTED_EVENTS = ('COMMENT', 'REQUEST_CHANGES') +ACCEPTED_SIDES = ('LEFT', 'RIGHT') +COMMIT_SHA_RE = re.compile(r'^[0-9a-f]{40}$') + +# The login GitHub assigns to the built-in actions token. Used to filter +# the list of existing reviews so we never touch a human reviewer's review. +BOT_LOGIN = 'github-actions[bot]' + + +# --------------------------------------------------------------------------- +# Validation +# --------------------------------------------------------------------------- + +def _is_printable_ascii(s): + return all(0x20 <= ord(ch) <= 0x7E for ch in s) + + +def validate_marker(marker): + """Validate the marker string. See pr-comment-poster.py for rationale.""" + if not isinstance(marker, str): + _fail('marker must be a string') + n = len(marker) + if n < MARKER_MIN_LEN or n > MARKER_MAX_LEN: + _fail('marker length out of range ({}..{}): {}'.format( + MARKER_MIN_LEN, MARKER_MAX_LEN, n)) + if not _is_printable_ascii(marker): + _fail('marker contains non-printable or non-ASCII character') + + +def _validate_comment_entry(idx, entry): + """Validate a single review-comment entry. Raises via _fail on error.""" + if not isinstance(entry, dict): + _fail('comments[{}]: must be an object'.format(idx)) + + path = entry.get('path') + if not isinstance(path, str) or not path: + _fail('comments[{}].path: required non-empty string'.format(idx)) + + line = entry.get('line') + if not isinstance(line, int) or isinstance(line, bool) or line <= 0: + _fail('comments[{}].line: required positive integer'.format(idx)) + + side = entry.get('side', 'RIGHT') + if side not in ACCEPTED_SIDES: + _fail('comments[{}].side: must be one of {} (got {!r})'.format( + idx, ', '.join(ACCEPTED_SIDES), side)) + + if 'start_line' in entry: + start_line = entry['start_line'] + if (not isinstance(start_line, int) + or isinstance(start_line, bool) + or start_line <= 0): + _fail('comments[{}].start_line: must be positive integer'.format(idx)) + if start_line >= line: + _fail('comments[{}].start_line ({}) must be < line ({})'.format( + idx, start_line, line)) + start_side = entry.get('start_side', side) + if start_side not in ACCEPTED_SIDES: + _fail('comments[{}].start_side: must be one of {}'.format( + idx, ', '.join(ACCEPTED_SIDES))) + + body = entry.get('body') + if not isinstance(body, str) or not body: + _fail('comments[{}].body: required non-empty string'.format(idx)) + body_bytes = len(body.encode('utf-8')) + if body_bytes > MAX_COMMENT_BODY_BYTES: + _fail('comments[{}].body too large: {} bytes (max {})'.format( + idx, body_bytes, MAX_COMMENT_BODY_BYTES)) + + +def validate_manifest(directory): + """Validate /manifest.json and /comments.json. + + Returns a dict with keys: pr_number, marker, event, commit_sha, + summary, comments (list of validated comment dicts). + """ + manifest_path = os.path.join(directory, 'manifest.json') + comments_path = os.path.join(directory, 'comments.json') + + if not os.path.isfile(manifest_path): + _fail('manifest.json missing at {}'.format(manifest_path)) + if not os.path.isfile(comments_path): + _fail('comments.json missing at {}'.format(comments_path)) + + try: + with open(manifest_path, 'r', encoding='utf-8') as f: + manifest = json.load(f) + except (OSError, json.JSONDecodeError) as e: + _fail('manifest.json is not valid JSON: {}'.format(e)) + + if not isinstance(manifest, dict): + _fail('manifest.json must be a JSON object') + + pr_number = manifest.get('pr_number') + if not isinstance(pr_number, int) or isinstance(pr_number, bool): + _fail('pr_number must be an integer') + if pr_number <= 0: + _fail('pr_number must be > 0 (got {})'.format(pr_number)) + + marker = manifest.get('marker') + validate_marker(marker) + + event = manifest.get('event') + if event not in ACCEPTED_EVENTS: + _fail('event must be one of {} (got {!r}). APPROVE is intentionally ' + 'forbidden.'.format(', '.join(ACCEPTED_EVENTS), event)) + + commit_sha = manifest.get('commit_sha') + if not isinstance(commit_sha, str) or not COMMIT_SHA_RE.match(commit_sha): + _fail('commit_sha must be a 40-character lowercase hex string') + + summary = manifest.get('summary', '') + if summary is None: + summary = '' + if not isinstance(summary, str): + _fail('summary must be a string if present') + + try: + with open(comments_path, 'r', encoding='utf-8') as f: + comments = json.load(f) + except (OSError, json.JSONDecodeError) as e: + _fail('comments.json is not valid JSON: {}'.format(e)) + + if not isinstance(comments, list): + _fail('comments.json must be a JSON array') + + for idx, entry in enumerate(comments): + _validate_comment_entry(idx, entry) + + return { + 'pr_number': pr_number, + 'marker': marker, + 'event': event, + 'commit_sha': commit_sha, + 'summary': summary, + 'comments': comments, + } + + +# --------------------------------------------------------------------------- +# Stale-review dismissal +# --------------------------------------------------------------------------- + +def find_stale_reviews(client, repo, pr_number, marker): + """Yield (id, state) for each existing review owned by the bot AND + whose body contains the marker. + + Filtering on BOTH author == github-actions[bot] AND marker-in-body is + the security invariant: a fork PR cannot impersonate the bot login, + and a fork PR also cannot inject the marker into a human reviewer's + body without API access. + """ + path = 'repos/{}/pulls/{}/reviews'.format(repo, pr_number) + for review in client.paginated(path): + user = review.get('user') or {} + if user.get('login') != BOT_LOGIN: + continue + body = review.get('body') or '' + if marker not in body: + continue + yield review.get('id'), review.get('state') + + +def dismiss_stale_reviews(client, repo, pr_number, marker): + """Dismiss (or, for PENDING reviews, delete) every stale matching review.""" + dismissal_message = 'Superseded by a newer run' + for review_id, state in find_stale_reviews(client, repo, pr_number, marker): + if review_id is None: + continue + if state == 'DISMISSED': + # Already inert; nothing to do. + continue + if state == 'PENDING': + # PENDING reviews cannot be dismissed; they must be deleted. + print('Deleting pending stale review {}'.format(review_id)) + try: + client.request( + 'DELETE', + 'repos/{}/pulls/{}/reviews/{}'.format( + repo, pr_number, review_id)) + except RuntimeError as e: + # Don't abort the run on dismissal failure: the new review + # will still be posted. + print('warning: failed to delete pending review {}: {}'.format( + review_id, e), file=sys.stderr) + continue + print('Dismissing stale review {} (state={})'.format(review_id, state)) + try: + client.request( + 'PUT', + 'repos/{}/pulls/{}/reviews/{}/dismissals'.format( + repo, pr_number, review_id), + json_body={ + 'message': dismissal_message, + 'event': 'DISMISS', + }, + ) + except RuntimeError as e: + print('warning: failed to dismiss review {}: {}'.format( + review_id, e), file=sys.stderr) + + +# --------------------------------------------------------------------------- +# Review posting +# --------------------------------------------------------------------------- + +def _chunk(lst, n): + """Yield successive n-sized slices of lst.""" + for i in range(0, len(lst), n): + yield lst[i:i + n] + + +def _build_review_body(marker, summary, chunk_index, chunk_total): + """Construct the review body text. + + Always begins with the marker (so future runs can find and dismiss + this review). Appends a chunk index when the comment set is split + across multiple reviews, and the producer-supplied summary if any. + """ + parts = [marker] + if chunk_total > 1: + parts.append('({}/{})'.format(chunk_index + 1, chunk_total)) + if summary: + parts.append('') + parts.append(summary) + return '\n'.join(parts) + + +def _comment_to_api(entry): + """Project a validated comment dict to the GitHub API shape.""" + api = { + 'path': entry['path'], + 'line': entry['line'], + 'side': entry.get('side', 'RIGHT'), + 'body': entry['body'], + } + if 'start_line' in entry: + api['start_line'] = entry['start_line'] + api['start_side'] = entry.get('start_side', api['side']) + return api + + +def post_review(client, repo, pr_number, marker, event, commit_sha, summary, + comments): + """Post one or more reviews containing the validated comments. + + Comments are split into COMMENTS_PER_REVIEW-sized chunks. Each chunk + becomes its own review POST. A failed chunk logs a warning and the + loop continues to the next chunk. + """ + chunks = list(_chunk(comments, COMMENTS_PER_REVIEW)) + total = len(chunks) + if total == 0: + print('No comments to post; skipping review creation.') + return + + posted_any = False + for idx, chunk in enumerate(chunks): + if idx > 0: + time.sleep(SLEEP_BETWEEN_CHUNKS_SECONDS) + body = _build_review_body(marker, summary, idx, total) + payload = { + 'commit_id': commit_sha, + 'body': body, + 'event': event, + 'comments': [_comment_to_api(c) for c in chunk], + } + print('Posting review chunk {}/{} with {} comment(s)'.format( + idx + 1, total, len(chunk))) + try: + client.request( + 'POST', + 'repos/{}/pulls/{}/reviews'.format(repo, pr_number), + json_body=payload, + ) + posted_any = True + except RuntimeError as e: + # Most common cause is HTTP 422: a comment refers to a line + # GitHub does not consider part of the diff. Skip the bad + # chunk and keep going so other findings still get posted. + print('warning: review chunk {}/{} failed: {}'.format( + idx + 1, total, e), file=sys.stderr) + + if not posted_any: + # If every single chunk failed, surface that as a hard error so + # the workflow turns red and a human looks at it. + _fail('all review chunks failed to post; see warnings above') + + +# --------------------------------------------------------------------------- +# Entry points +# --------------------------------------------------------------------------- + +def cmd_validate(args): + result = validate_manifest(args.directory) + print(('ok: pr_number={} marker_len={} event={} commit_sha={} ' + 'comments={} summary_len={}').format( + result['pr_number'], + len(result['marker']), + result['event'], + result['commit_sha'], + len(result['comments']), + len(result['summary']), + )) + return 0 + + +def cmd_post(args): + result = validate_manifest(args.directory) + + # Empty comment lists short-circuit silently. A producer that ran but + # found nothing to flag should not generate noise on the PR. + if len(result['comments']) == 0: + print('No comments in artifact; nothing to post.') + return 0 + + token = os.environ.get('GITHUB_TOKEN') + if not token: + _fail('GITHUB_TOKEN is not set') + repo = os.environ.get('GITHUB_REPOSITORY') + if not repo: + _fail('GITHUB_REPOSITORY is not set (expected "owner/name")') + if '/' not in repo: + _fail('GITHUB_REPOSITORY must be "owner/name", got {!r}'.format(repo)) + + try: + client = _github_helpers.GitHubClient(token, user_agent=USER_AGENT) + dismiss_stale_reviews( + client=client, + repo=repo, + pr_number=result['pr_number'], + marker=result['marker'], + ) + post_review( + client=client, + repo=repo, + pr_number=result['pr_number'], + marker=result['marker'], + event=result['event'], + commit_sha=result['commit_sha'], + summary=result['summary'], + comments=result['comments'], + ) + except RuntimeError as e: + _fail(str(e)) + return 0 + + +def main(argv=None): + parser = argparse.ArgumentParser( + description='Validate and post line-anchored PR review comments ' + 'from CI artifacts.', + ) + sub = parser.add_subparsers(dest='command', required=True) + + p_validate = sub.add_parser( + 'validate', + help='Validate manifest.json and comments.json in the given directory.', + ) + p_validate.add_argument('directory') + p_validate.set_defaults(func=cmd_validate) + + p_post = sub.add_parser( + 'post', + help='Validate, then dismiss any stale review and post a new one. ' + 'Requires env GITHUB_TOKEN and GITHUB_REPOSITORY.', + ) + p_post.add_argument('directory') + p_post.set_defaults(func=cmd_post) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == '__main__': + sys.exit(main())