PX4-Autopilot/Tools/ci/generate_sbom.py
Ramon Roche 685f9248e4 fix(sbom): fix false positives in monthly license audit
Three issues caused the monthly audit to report already-resolved submodules:

1. The audit workflow grepped for "NOASSERTION" anywhere in the output,
   matching the Detected column even when the Final column had a valid
   override (e.g. libtomcrypt detected as NOASSERTION but overridden to
   Unlicense). Changed to grep for "<-- UNRESOLVED" marker instead.

2. Submodules with an explicit NOASSERTION override in license-overrides.yaml
   (like libfc-sensor-api, which is proprietary) were still counted as
   failures. Now treated as "acknowledged" since someone intentionally
   added the override entry.

3. Added missing BSD-3-Clause override for sitl_gazebo-classic (PX4 org
   project with no LICENSE file in repo).

Fixes #26932

Signed-off-by: Ramon Roche <mrpollo@gmail.com>
2026-04-02 11:15:30 -06:00

608 lines
20 KiB
Python
Executable File

#!/usr/bin/env python3
"""Generate SPDX 2.3 JSON SBOM for a PX4 firmware build.
Produces one SBOM per board target containing:
- PX4 firmware as the primary package
- Git submodules as CONTAINS dependencies
- Python build requirements as BUILD_DEPENDENCY_OF packages
- Board-specific modules as CONTAINS packages
Requires PyYAML (pyyaml) for loading license overrides.
"""
import argparse
import configparser
import json
import re
import subprocess
import uuid
from datetime import datetime, timezone
from pathlib import Path
import yaml
# Ordered most-specific first: all keywords must appear for a match.
LICENSE_PATTERNS = [
# Copyleft licenses first (more specific keywords prevent false matches)
("GPL-3.0-only", ["GNU GENERAL PUBLIC LICENSE", "Version 3"]),
("GPL-2.0-only", ["GNU GENERAL PUBLIC LICENSE", "Version 2"]),
("LGPL-3.0-only", ["GNU LESSER GENERAL PUBLIC LICENSE", "Version 3"]),
("LGPL-2.1-only", ["GNU Lesser General Public License", "Version 2.1"]),
("AGPL-3.0-only", ["GNU AFFERO GENERAL PUBLIC LICENSE", "Version 3"]),
# Permissive licenses
("Apache-2.0", ["Apache License", "Version 2.0"]),
("MIT", ["Permission is hereby granted"]),
("BSD-3-Clause", ["Redistribution and use", "Neither the name"]),
("BSD-2-Clause", ["Redistribution and use", "THIS SOFTWARE IS PROVIDED"]),
("ISC", ["Permission to use, copy, modify, and/or distribute"]),
("EPL-2.0", ["Eclipse Public License", "2.0"]),
("Unlicense", ["The Unlicense", "unlicense.org"]),
]
COPYLEFT_LICENSES = {
"GPL-2.0-only", "GPL-3.0-only",
"LGPL-2.1-only", "LGPL-3.0-only",
"AGPL-3.0-only",
}
def load_license_overrides(source_dir):
"""Load license overrides and comments from YAML config file.
Returns (overrides, comments) dicts mapping submodule path to values.
Falls back to empty dicts if the file is missing.
"""
yaml_path = source_dir / "Tools" / "ci" / "license-overrides.yaml"
if not yaml_path.exists():
return {}, {}
with open(yaml_path) as f:
data = yaml.safe_load(f)
overrides = {}
comments = {}
for path, entry in (data.get("overrides") or {}).items():
overrides[path] = entry["license"]
if "comment" in entry:
comments[path] = entry["comment"]
return overrides, comments
LICENSE_FILENAMES = ["LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE", "LICENCE.md", "COPYING", "COPYING.md"]
def detect_license(submodule_dir):
"""Auto-detect SPDX license ID from LICENSE/COPYING file in a directory.
Reads the first 100 lines of the first license file found and matches
keywords against LICENSE_PATTERNS. Returns 'NOASSERTION' if no file
is found or no pattern matches.
"""
for fname in LICENSE_FILENAMES:
license_file = submodule_dir / fname
if license_file.is_file():
try:
lines = license_file.read_text(errors="replace").splitlines()[:100]
text = "\n".join(lines)
except OSError:
continue
text_upper = text.upper()
for spdx_id_val, keywords in LICENSE_PATTERNS:
if all(kw.upper() in text_upper for kw in keywords):
return spdx_id_val
return "NOASSERTION"
return "NOASSERTION"
def get_submodule_license(source_dir, sub_path, license_overrides):
"""Return the SPDX license for a submodule: override > auto-detect."""
if sub_path in license_overrides:
return license_overrides[sub_path]
return detect_license(source_dir / sub_path)
def spdx_id(name: str) -> str:
"""Convert a name to a valid SPDX identifier (letters, digits, dots, hyphens)."""
return re.sub(r"[^a-zA-Z0-9.\-]", "-", name)
def parse_gitmodules(source_dir):
"""Parse .gitmodules and return list of {name, path, url}."""
gitmodules_path = source_dir / ".gitmodules"
if not gitmodules_path.exists():
return []
config = configparser.ConfigParser()
config.read(str(gitmodules_path))
submodules = []
for section in config.sections():
if section.startswith("submodule "):
name = section.split('"')[1] if '"' in section else section.split(" ", 1)[1]
path = config.get(section, "path", fallback="")
url = config.get(section, "url", fallback="")
submodules.append({"name": name, "path": path, "url": url})
return submodules
def get_submodule_commits(source_dir):
"""Get commit hashes for all submodules via git ls-tree -r (works without init)."""
try:
result = subprocess.run(
["git", "ls-tree", "-r", "HEAD"],
cwd=str(source_dir),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
except (subprocess.CalledProcessError, FileNotFoundError):
return {}
commits = {}
for line in result.stdout.splitlines():
parts = line.split()
if len(parts) >= 4 and parts[1] == "commit":
commits[parts[3]] = parts[2]
return commits
def get_git_info(source_dir: Path) -> dict:
"""Get PX4 git version and hash."""
info = {"version": "unknown", "hash": "unknown"}
try:
result = subprocess.run(
["git", "describe", "--always", "--tags", "--dirty"],
cwd=str(source_dir),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
info["version"] = result.stdout.strip()
except (subprocess.CalledProcessError, FileNotFoundError):
pass
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=str(source_dir),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
info["hash"] = result.stdout.strip()
except (subprocess.CalledProcessError, FileNotFoundError):
pass
return info
def parse_requirements(requirements_path):
"""Parse pip requirements.txt into list of {name, version_spec}."""
if not requirements_path.exists():
return []
deps = []
for line in requirements_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or line.startswith("-"):
continue
# Split on version specifiers
match = re.match(r"^([a-zA-Z0-9_\-]+)(.*)?$", line)
if match:
deps.append({
"name": match.group(1),
"version_spec": match.group(2).strip() if match.group(2) else "",
})
return deps
def read_module_list(modules_file, source_dir):
"""Read board-specific module list from file.
Paths may be absolute; they are converted to relative paths under src/.
Duplicates are removed while preserving order.
"""
if not modules_file or not modules_file.exists():
return []
seen = set()
modules = []
source_str = str(source_dir.resolve()) + "/"
for line in modules_file.read_text().splitlines():
path = line.strip()
if not path or path.startswith("#"):
continue
# Convert absolute path to relative
if path.startswith(source_str):
path = path[len(source_str):]
if path not in seen:
seen.add(path)
modules.append(path)
return modules
def make_purl(pkg_type: str, namespace: str, name: str, version: str = "") -> str:
"""Construct a Package URL (purl)."""
purl = f"pkg:{pkg_type}/{namespace}/{name}"
if version:
purl += f"@{version}"
return purl
def extract_git_host_org_repo(url):
"""Extract host type, org, and repo from a git URL.
Returns (host, org, repo) where host is 'github', 'gitlab', or ''.
"""
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?$", url)
if match:
return "github", match.group(1), match.group(2)
match = re.search(r"gitlab\.com[:/](.+?)/([^/]+?)(?:\.git)?$", url)
if match:
return "gitlab", match.group(1), match.group(2)
return "", "", ""
def generate_sbom(source_dir, board, modules_file, compiler, platform=""):
"""Generate a complete SPDX 2.3 JSON document."""
license_overrides, license_comments = load_license_overrides(source_dir)
git_info = get_git_info(source_dir)
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Deterministic namespace using UUID5 from git hash + board
ns_seed = f"{git_info['hash']}:{board}"
doc_namespace = f"https://spdx.org/spdxdocs/{board}-{uuid.uuid5(uuid.NAMESPACE_URL, ns_seed)}"
doc = {
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"name": f"PX4 Firmware SBOM for {board}",
"documentNamespace": doc_namespace,
"creationInfo": {
"created": timestamp,
"creators": [
"Tool: px4-generate-sbom",
"Organization: Dronecode Foundation",
],
"licenseListVersion": "3.22",
},
"packages": [],
"relationships": [],
}
# Primary package: PX4 firmware
primary_spdx_id = f"SPDXRef-PX4-{spdx_id(board)}"
doc["packages"].append({
"SPDXID": primary_spdx_id,
"name": board,
"versionInfo": git_info["version"],
"packageFileName": f"{board}.px4",
"supplier": "Organization: Dronecode Foundation",
"downloadLocation": "https://github.com/PX4/PX4-Autopilot",
"filesAnalyzed": False,
"primaryPackagePurpose": "FIRMWARE",
"licenseConcluded": "BSD-3-Clause",
"licenseDeclared": "BSD-3-Clause",
"copyrightText": "Copyright (c) PX4 Development Team",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": make_purl(
"github", "PX4", "PX4-Autopilot", git_info["version"]
),
}
],
})
doc["relationships"].append({
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": primary_spdx_id,
})
# Git submodules (filtered to those relevant to this board's modules)
submodules = parse_gitmodules(source_dir)
submodule_commits = get_submodule_commits(source_dir)
modules = read_module_list(modules_file, source_dir)
def submodule_is_relevant(sub_path):
"""A submodule is relevant if any board module path overlaps with it."""
# NuttX platform submodules are only relevant for NuttX builds
if sub_path.startswith("platforms/nuttx/"):
return platform in ("nuttx", "")
if not modules:
return True # no module list means include all
# Other platform submodules are always relevant
if sub_path.startswith("platforms/"):
return True
for mod in modules:
# Module is under this submodule, or submodule is under a module
if mod.startswith(sub_path + "/") or sub_path.startswith(mod + "/"):
return True
return False
for sub in submodules:
if not submodule_is_relevant(sub["path"]):
continue
sub_path = sub["path"]
sub_path_id = sub_path.replace("/", "-")
sub_spdx_id = f"SPDXRef-Submodule-{spdx_id(sub_path_id)}"
commit = submodule_commits.get(sub_path, "unknown")
license_id = get_submodule_license(source_dir, sub_path, license_overrides)
host, org, repo = extract_git_host_org_repo(sub["url"])
download = sub["url"] if sub["url"] else "NOASSERTION"
# Use repo name from URL for human-readable name, fall back to last path component
display_name = repo if repo else sub_path.rsplit("/", 1)[-1]
pkg = {
"SPDXID": sub_spdx_id,
"name": display_name,
"versionInfo": commit,
"supplier": f"Organization: {org}" if org else "NOASSERTION",
"downloadLocation": download,
"filesAnalyzed": False,
"licenseConcluded": license_id,
"licenseDeclared": license_id,
"copyrightText": "NOASSERTION",
}
comment = license_comments.get(sub_path)
if comment:
pkg["licenseComments"] = comment
if host and org and repo:
pkg["externalRefs"] = [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": make_purl(host, org, repo, commit),
}
]
doc["packages"].append(pkg)
doc["relationships"].append({
"spdxElementId": primary_spdx_id,
"relationshipType": "CONTAINS",
"relatedSpdxElement": sub_spdx_id,
})
# Python build dependencies
requirements_path = source_dir / "Tools" / "setup" / "requirements.txt"
py_deps = parse_requirements(requirements_path)
for dep in py_deps:
dep_name = dep["name"]
dep_spdx_id = f"SPDXRef-PyDep-{spdx_id(dep_name)}"
version_str = dep["version_spec"] if dep["version_spec"] else "NOASSERTION"
doc["packages"].append({
"SPDXID": dep_spdx_id,
"name": dep_name,
"versionInfo": version_str,
"supplier": "NOASSERTION",
"downloadLocation": f"https://pypi.org/project/{dep_name}/",
"filesAnalyzed": False,
"primaryPackagePurpose": "APPLICATION",
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": f"pkg:pypi/{dep_name}",
}
],
})
doc["relationships"].append({
"spdxElementId": dep_spdx_id,
"relationshipType": "BUILD_DEPENDENCY_OF",
"relatedSpdxElement": primary_spdx_id,
})
# Board-specific modules (already read above for submodule filtering)
for mod in modules:
mod_path_id = mod.replace("/", "-")
mod_spdx_id = f"SPDXRef-Module-{spdx_id(mod_path_id)}"
# Derive short name: strip leading src/ for readability
display_name = mod
if display_name.startswith("src/"):
display_name = display_name[4:]
doc["packages"].append({
"SPDXID": mod_spdx_id,
"name": display_name,
"versionInfo": git_info["version"],
"supplier": "Organization: Dronecode Foundation",
"downloadLocation": "https://github.com/PX4/PX4-Autopilot",
"filesAnalyzed": False,
"licenseConcluded": "BSD-3-Clause",
"licenseDeclared": "BSD-3-Clause",
"copyrightText": "NOASSERTION",
})
doc["relationships"].append({
"spdxElementId": primary_spdx_id,
"relationshipType": "CONTAINS",
"relatedSpdxElement": mod_spdx_id,
})
# Compiler as a build tool
if compiler:
compiler_spdx_id = f"SPDXRef-Compiler-{spdx_id(compiler)}"
doc["packages"].append({
"SPDXID": compiler_spdx_id,
"name": compiler,
"versionInfo": "NOASSERTION",
"supplier": "NOASSERTION",
"downloadLocation": "NOASSERTION",
"filesAnalyzed": False,
"primaryPackagePurpose": "APPLICATION",
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION",
})
doc["relationships"].append({
"spdxElementId": compiler_spdx_id,
"relationshipType": "BUILD_TOOL_OF",
"relatedSpdxElement": primary_spdx_id,
})
return doc
def verify_licenses(source_dir):
"""Verify license detection for all submodules. Returns exit code."""
license_overrides, _ = load_license_overrides(source_dir)
submodules = parse_gitmodules(source_dir)
if not submodules:
print("No submodules found in .gitmodules")
return 1
has_noassertion = False
print(f"{'Submodule Path':<65} {'Detected':<16} {'Override':<16} {'Final'}")
print("-" * 115)
for sub in submodules:
sub_path = sub["path"]
sub_dir = source_dir / sub_path
checked_out = sub_dir.is_dir() and any(sub_dir.iterdir())
has_explicit_override = sub_path in license_overrides
if not checked_out:
detected = "(not checked out)"
override = license_overrides.get(sub_path, "")
final = override if override else "NOASSERTION"
else:
detected = detect_license(sub_dir)
override = license_overrides.get(sub_path, "")
final = override if override else detected
if final == "NOASSERTION" and has_explicit_override:
# Explicitly acknowledged in overrides file — not a failure
marker = " (acknowledged)"
elif final == "NOASSERTION" and checked_out:
has_noassertion = True
marker = " <-- UNRESOLVED"
elif final == "NOASSERTION" and not checked_out:
marker = " (skipped)"
else:
marker = ""
print(f"{sub_path:<65} {str(detected):<16} {str(override) if override else '':<16} {final}{marker}")
# Copyleft warning (informational, not a failure)
copyleft_found = []
for sub in submodules:
sub_path = sub["path"]
sub_dir = source_dir / sub_path
checked_out = sub_dir.is_dir() and any(sub_dir.iterdir())
override = license_overrides.get(sub_path, "")
if checked_out:
final_lic = override if override else detect_license(sub_dir)
else:
final_lic = override if override else "NOASSERTION"
for cl in COPYLEFT_LICENSES:
if cl in final_lic:
copyleft_found.append((sub_path, final_lic))
break
print()
if copyleft_found:
print("Copyleft licenses detected (informational):")
for path, lic in copyleft_found:
print(f" {path}: {lic}")
print()
if has_noassertion:
print("FAIL: Some submodules have unresolved licenses. "
"Add an entry to Tools/ci/license-overrides.yaml or check the LICENSE file.")
return 1
print("OK: All submodules have a resolved license.")
return 0
def main():
parser = argparse.ArgumentParser(
description="Generate SPDX 2.3 JSON SBOM for PX4 firmware"
)
parser.add_argument(
"--source-dir",
type=Path,
default=Path.cwd(),
help="PX4 source directory (default: cwd)",
)
parser.add_argument(
"--verify-licenses",
action="store_true",
help="Verify license detection for all submodules and exit",
)
parser.add_argument(
"--board",
default=None,
help="Board target name (e.g. px4_fmu-v5x_default)",
)
parser.add_argument(
"--modules-file",
type=Path,
default=None,
help="Path to config_module_list.txt",
)
parser.add_argument(
"--compiler",
default="",
help="Compiler identifier (e.g. arm-none-eabi-gcc)",
)
parser.add_argument(
"--platform",
default="",
help="PX4 platform (nuttx, posix, qurt). Filters platform-specific submodules.",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Output SBOM file path",
)
args = parser.parse_args()
if args.verify_licenses:
raise SystemExit(verify_licenses(args.source_dir))
if not args.board:
parser.error("--board is required when not using --verify-licenses")
if not args.output:
parser.error("--output is required when not using --verify-licenses")
sbom = generate_sbom(
source_dir=args.source_dir,
board=args.board,
modules_file=args.modules_file,
compiler=args.compiler,
platform=args.platform,
)
args.output.parent.mkdir(parents=True, exist_ok=True)
with open(args.output, "w") as f:
json.dump(sbom, f, indent=2)
f.write("\n")
pkg_count = len(sbom["packages"])
print(f"SBOM generated: {args.output} ({pkg_count} packages)")
if __name__ == "__main__":
main()