On Tue, Mar 24, 2026 at 11:14 AM Stefano Tondo <[email protected]> wrote:
>
> Enrich Git source download packages in the SPDX 3.0 output with:
> - software_packageVersion set to the full SHA-1 commit hash
> - software_packageUrl set to a PURL for known Git hosting services
> - VCS external reference pointing to the repository URL
>
> The PURL generation recognizes github.com by default and supports
> additional hosting services via the SPDX_GIT_PURL_MAPPINGS variable
> (format: 'domain:purl_type', e.g. 'gitlab.example.com:pkg:gitlab').
>
> Only Git source downloads are enriched. Non-Git downloads are left
> unchanged since their ecosystem PURLs are already set on the recipe
> package by SPDX_PACKAGE_URLS from the previous patch.
>

LGTM, thanks

Reviewed-by: Joshua Watt <[email protected]>

> Signed-off-by: Stefano Tondo <[email protected]>
> ---
>  meta/classes/create-spdx-3.0.bbclass |   7 ++
>  meta/lib/oe/spdx30_tasks.py          | 102 +++++++++++++++++++++++++++
>  2 files changed, 109 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass 
> b/meta/classes/create-spdx-3.0.bbclass
> index 9a6606dce6..432adb14cd 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -156,6 +156,13 @@ SPDX_RECIPE_SBOM_NAME ?= "${PN}-recipe-sbom"
>  SPDX_RECIPE_SBOM_NAME[doc] = "The name of output recipe SBoM when using \
>      create_recipe_sbom"
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> +    mappings to configure PURL generation for Git source downloads. \
> +    For example, 'gitlab.example.com:pkg:gitlab' maps repositories hosted \
> +    on gitlab.example.com to the pkg:gitlab PURL type. \
> +    github.com is always mapped to pkg:github by default."
> +
>  IMAGE_CLASSES:append = " create-spdx-image-3.0"
>  SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index 51e10befba..cd9672c18e 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -14,6 +14,7 @@ import oe.spdx_common
>  import oe.sdk
>  import os
>  import re
> +import urllib.parse
>
>  from contextlib import contextmanager
>  from datetime import datetime, timezone
> @@ -384,6 +385,105 @@ def collect_dep_sources(dep_objsets, dest):
>              index_sources_by_hash(e.to, dest)
>
>
> +
> +def _generate_git_purl(d, download_location, srcrev):
> +    """Generate a Package URL for a Git source from its download location.
> +
> +    Parses the Git URL to identify the hosting service and generates the
> +    appropriate PURL type. Supports github.com by default and custom
> +    mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> +    Returns the PURL string or None if no mapping matches.
> +    """
> +    if not download_location or not download_location.startswith('git+'):
> +        return None
> +
> +    git_url = download_location[4:]  # Remove 'git+' prefix
> +
> +    # Default handler: github.com
> +    git_purl_handlers = {
> +        'github.com': 'pkg:github',
> +    }
> +
> +    # Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> +    # Format: "domain1:purl_type1 domain2:purl_type2"
> +    custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> +    if custom_mappings:
> +        for mapping in custom_mappings.split():
> +            parts = mapping.split(':', 1)
> +            if len(parts) == 2:
> +                git_purl_handlers[parts[0]] = parts[1]
> +                bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} -> 
> {parts[1]}")
> +            else:
> +                bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping} 
> (expected format: domain:purl_type)")
> +
> +    try:
> +        parsed = urllib.parse.urlparse(git_url)
> +    except Exception:
> +        return None
> +
> +    hostname = parsed.hostname
> +    if not hostname:
> +        return None
> +
> +    for domain, purl_type in git_purl_handlers.items():
> +        if hostname == domain:
> +            path = parsed.path.strip('/')
> +            path_parts = path.split('/')
> +            if len(path_parts) >= 2:
> +                owner = path_parts[0]
> +                repo = path_parts[1].replace('.git', '')
> +                return f"{purl_type}/{owner}/{repo}@{srcrev}"
> +            break
> +
> +    return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> +    """Enrich a Git source download package with version, PURL, and external 
> refs.
> +
> +    For Git sources, extracts the full SHA-1 from SRCREV as the version,
> +    generates PURLs for known hosting services, and adds VCS external
> +    references.
> +    """
> +    version = None
> +    purl = None
> +
> +    if fd.type == "git":
> +        # Use full SHA-1 from fd.revision
> +        srcrev = getattr(fd, 'revision', None)
> +        if srcrev and srcrev not in {'${AUTOREV}', 'AUTOINC', 'INVALID'}:
> +            version = srcrev
> +
> +        # Generate PURL for Git hosting services
> +        download_location = getattr(dl, 'software_downloadLocation', None)
> +        if version and download_location:
> +            purl = _generate_git_purl(d, download_location, version)
> +
> +    if version:
> +        dl.software_packageVersion = version
> +
> +    if purl:
> +        dl.software_packageUrl = purl
> +
> +    # Add VCS external reference for Git repositories
> +    download_location = getattr(dl, 'software_downloadLocation', None)
> +    if download_location and isinstance(download_location, str):
> +        if download_location.startswith('git+'):
> +            git_url = download_location[4:]
> +            if '@' in git_url:
> +                git_url = git_url.split('@')[0]
> +
> +            dl.externalRef = dl.externalRef or []
> +            dl.externalRef.append(
> +                oe.spdx30.ExternalRef(
> +                    externalRefType=oe.spdx30.ExternalRefType.vcs,
> +                    locator=[git_url],
> +                )
> +            )
> +
> +
> +
>  def add_download_files(d, objset):
>      inputs = set()
>
> @@ -447,6 +547,8 @@ def add_download_files(d, objset):
>                  )
>              )
>
> +            _enrich_source_package(d, dl, fd, file_name, primary_purpose)
> +
>              if fd.method.supports_checksum(fd):
>                  # TODO Need something better than hard coding this
>                  for checksum_id in ["sha256", "sha1"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#234050): 
https://lists.openembedded.org/g/openembedded-core/message/234050
Mute This Topic: https://lists.openembedded.org/mt/118487340/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to