On Tue, Mar 24, 2026 at 7:30 AM <[email protected]> wrote:
>
> From: Stefano Tondo <[email protected]>
>
> Add SPDX_FILE_EXCLUDE_PATTERNS variable that allows filtering files from
> SPDX output by regex matching. The variable accepts a space-separated
> list of Python regular expressions; files whose paths match any pattern
> (via re.search) are excluded.
>
> When empty (the default), no filtering is applied and all files are
> included, preserving existing behavior.
>
> This enables users to reduce SBOM size by excluding files that are not
> relevant for compliance (e.g., test files, object files, patches).
>
> Excluded files are tracked in a set returned from add_package_files()
> and passed to get_package_sources_from_debug(), which uses the set for
> precise cross-checking rather than re-evaluating patterns.

LGTM, Thanks.

Reviewed-by: Joshua Watt <[email protected]>

>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
>  meta/classes/spdx-common.bbclass |  7 +++
>  meta/lib/oe/spdx30_tasks.py      | 80 +++++++++++++++++++++-----------
>  2 files changed, 60 insertions(+), 27 deletions(-)
>
> diff --git a/meta/classes/spdx-common.bbclass 
> b/meta/classes/spdx-common.bbclass
> index 83f05579b6..40701730a6 100644
> --- a/meta/classes/spdx-common.bbclass
> +++ b/meta/classes/spdx-common.bbclass
> @@ -82,6 +82,13 @@ SPDX_MULTILIB_SSTATE_ARCHS[doc] = "The list of sstate 
> architectures to consider
>      when collecting SPDX dependencies. This includes multilib architectures 
> when \
>      multilib is enabled. Defaults to SSTATE_ARCHS."
>
> +SPDX_FILE_EXCLUDE_PATTERNS ??= ""
> +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of Python regular \
> +    expressions to exclude files from SPDX output. Files whose paths match \
> +    any pattern (via re.search) will be filtered out. Defaults to empty \
> +    (no filtering). Example: \
> +    SPDX_FILE_EXCLUDE_PATTERNS = '\\.patch$ \\.diff$ /test/ \\.pyc$ \\.o$'"
> +
>  python () {
>      from oe.cve_check import extend_cve_status
>      extend_cve_status(d)
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index 353d783fa2..68ed821a8c 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
>  import oe.spdx_common
>  import oe.sdk
>  import os
> +import re
>
>  from contextlib import contextmanager
>  from datetime import datetime, timezone
> @@ -157,17 +158,27 @@ def add_package_files(
>      file_counter = 1
>      if not os.path.exists(topdir):
>          bb.note(f"Skip {topdir}")
> -        return spdx_files
> +        return spdx_files, set()
>
>      check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
>      if check_compiled_sources:
>          compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
>          bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
>
> +    exclude_patterns = [
> +        re.compile(pattern)
> +        for pattern in (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
> +    ]
> +    excluded_files = set()
> +
>      for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
> -        dirs[:] = [d for d in dirs if d not in ignore_dirs]
> +        dirs[:] = [directory for directory in dirs if directory not in 
> ignore_dirs]
>          if subdir == str(topdir):
> -            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
> +            dirs[:] = [
> +                directory
> +                for directory in dirs
> +                if directory not in ignore_top_level_dirs
> +            ]
>
>          dirs.sort()
>          files.sort()
> @@ -177,14 +188,19 @@ def add_package_files(
>                  continue
>
>              filename = str(filepath.relative_to(topdir))
> +
> +            if exclude_patterns and any(
> +                pattern.search(filename) for pattern in exclude_patterns
> +            ):
> +                excluded_files.add(filename)
> +                continue
> +
>              file_purposes = get_purposes(filepath)
>
> -            # Check if file is compiled
> -            if check_compiled_sources:
> -                if not oe.spdx_common.is_compiled_source(
> -                    filename, compiled_sources, types
> -                ):
> -                    continue
> +            if check_compiled_sources and not 
> oe.spdx_common.is_compiled_source(
> +                filename, compiled_sources, types
> +            ):
> +                continue
>
>              spdx_file = objset.new_file(
>                  get_spdxid(file_counter),
> @@ -218,12 +234,15 @@ def add_package_files(
>
>      bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
>
> -    return spdx_files
> +    return spdx_files, excluded_files
>
>
>  def get_package_sources_from_debug(
> -    d, package, package_files, sources, source_hash_cache
> +    d, package, package_files, sources, source_hash_cache, 
> excluded_files=None
>  ):
> +    if excluded_files is None:
> +        excluded_files = set()
> +
>      def file_path_match(file_path, pkg_file):
>          if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
>              return True
> @@ -256,6 +275,12 @@ def get_package_sources_from_debug(
>              continue
>
>          if not any(file_path_match(file_path, pkg_file) for pkg_file in 
> package_files):
> +            if file_path.lstrip("/") in excluded_files:
> +                bb.debug(
> +                    1,
> +                    f"Skipping debug source lookup for excluded file 
> {file_path} in {package}",
> +                )
> +                continue
>              bb.fatal(
>                  "No package file found for %s in %s; SPDX found: %s"
>                  % (str(file_path), package, " ".join(p.name for p in 
> package_files))
> @@ -737,7 +762,7 @@ def create_spdx(d):
>          bb.debug(1, "Adding source files to SPDX")
>          oe.spdx_common.get_patched_src(d)
>
> -        files = add_package_files(
> +        files, _ = add_package_files(
>              d,
>              build_objset,
>              spdx_workdir,
> @@ -909,7 +934,7 @@ def create_spdx(d):
>                  )
>
>              bb.debug(1, "Adding package files to SPDX for package %s" % 
> pkg_name)
> -            package_files = add_package_files(
> +            package_files, excluded_files = add_package_files(
>                  d,
>                  pkg_objset,
>                  pkgdest / package,
> @@ -932,7 +957,8 @@ def create_spdx(d):
>
>              if include_sources:
>                  debug_sources = get_package_sources_from_debug(
> -                    d, package, package_files, dep_sources, source_hash_cache
> +                    d, package, package_files, dep_sources, 
> source_hash_cache,
> +                    excluded_files=excluded_files,
>                  )
>                  debug_source_ids |= set(
>                      oe.sbom30.get_element_link_id(d) for d in debug_sources
> @@ -944,7 +970,7 @@ def create_spdx(d):
>
>      if include_sources:
>          bb.debug(1, "Adding sysroot files to SPDX")
> -        sysroot_files = add_package_files(
> +        sysroot_files, _ = add_package_files(
>              d,
>              build_objset,
>              d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
> @@ -1326,18 +1352,18 @@ def create_image_spdx(d):
>              image_filename = image["filename"]
>              image_path = image_deploy_dir / image_filename
>              if os.path.isdir(image_path):
> -                a = add_package_files(
> -                    d,
> -                    objset,
> -                    image_path,
> -                    lambda file_counter: objset.new_spdxid(
> -                        "imagefile", str(file_counter)
> -                    ),
> -                    lambda filepath: [],
> -                    license_data=None,
> -                    ignore_dirs=[],
> -                    ignore_top_level_dirs=[],
> -                    archive=None,
> +                a, _ = add_package_files(
> +                        d,
> +                        objset,
> +                        image_path,
> +                        lambda file_counter: objset.new_spdxid(
> +                            "imagefile", str(file_counter)
> +                        ),
> +                        lambda filepath: [],
> +                        license_data=None,
> +                        ignore_dirs=[],
> +                        ignore_top_level_dirs=[],
> +                        archive=None,
>                  )
>                  artifacts.extend(a)
>              else:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#233802): 
https://lists.openembedded.org/g/openembedded-core/message/233802
Mute This Topic: https://lists.openembedded.org/mt/118483004/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to