gemini-code-assist[bot] commented on code in PR #213:
URL: https://github.com/apache/tvm-ffi/pull/213#discussion_r2480969200
##########
python/tvm_ffi/cpp/load_inline.py:
##########
@@ -195,6 +206,52 @@ def _run_command_in_dev_prompt(
) from e
+def _resolve_linker(
+ with_cuda: bool,
+ build_dir: str,
+ cpp_files: Sequence[str] | None,
+ cuda_files: Sequence[str] | None,
+) -> tuple[list[str], list[str]]:
+ ninja: list[str] = []
+ link_files: list[str] = []
+ if cpp_files is None and cuda_files is None:
+ ninja.append(
+ "build main.o: compile {}".format(
+ str((Path(build_dir) / "main.cpp").resolve()).replace(":",
"$:")
+ )
+ )
+ link_files.append("main.o")
+ if with_cuda:
+ ninja.append(
+ "build cuda.o: compile_cuda {}".format(
+ str((Path(build_dir) / "cuda.cu").resolve()).replace(":",
"$:")
+ )
+ )
+ link_files.append("cuda.o")
+ else:
+ if cpp_files is not None:
+ for cpp_path in cpp_files:
+ obj_name = f"{Path(cpp_path).stem}.o"
+ ninja.append(
+ "build {}: compile {}".format(
+ obj_name, str(Path(cpp_path).resolve()).replace(":",
"$:")
+ )
Review Comment:

The current method of generating object file names using
`Path(cpp_path).stem` can lead to name collisions if multiple source files from
different directories have the same name (e.g., `dir1/foo.cpp` and
`dir2/foo.cpp`). This would cause one object file to overwrite another, leading
to incorrect builds. A more robust approach is to include a hash of the full,
resolved path in the object file name to ensure uniqueness. Additionally, using
f-strings would improve readability here. Note that `cpp_path` is already a
resolved path, so calling `.resolve()` again is unnecessary.
```python
path_hash =
hashlib.sha1(cpp_path.encode('utf-8')).hexdigest()[:8]
obj_name = f"{Path(cpp_path).stem}_{path_hash}.o"
ninja.append(f"build {obj_name}: compile
{cpp_path.replace(':', '$:')}")
```
##########
python/tvm_ffi/cpp/load_inline.py:
##########
@@ -45,23 +47,32 @@ def _hash_sources(
) -> str:
"""Generate a unique hash for the given sources and functions."""
m = hashlib.sha256()
- m.update(cpp_source.encode("utf-8"))
- m.update(cuda_source.encode("utf-8"))
+
+ def _maybe_hash_string(source: str | None) -> None:
+ if source is not None:
+ m.update(source.encode("utf-8"))
+
+ def _hash_sequence(seq: Sequence[str]) -> None:
+ for item in seq:
+ m.update(item.encode("utf-8"))
+
+ def _hash_mapping(mapping: Mapping[str, str]) -> None:
+ for key in sorted(mapping):
+ m.update(key.encode("utf-8"))
+ m.update(mapping[key].encode("utf-8"))
+
+ _maybe_hash_string(cpp_source)
+ _maybe_hash_string(cuda_source)
+ _hash_sequence(sorted(cpp_files or []))
+ _hash_sequence(sorted(cuda_files or []))
if isinstance(functions, Mapping):
- for name in sorted(functions):
- m.update(name.encode("utf-8"))
- m.update(functions[name].encode("utf-8"))
+ _hash_mapping(functions)
else:
- for name in sorted(functions):
- m.update(name.encode("utf-8"))
- for flag in extra_cflags:
- m.update(flag.encode("utf-8"))
- for flag in extra_cuda_cflags:
- m.update(flag.encode("utf-8"))
- for flag in extra_ldflags:
- m.update(flag.encode("utf-8"))
- for path in extra_include_paths:
- m.update(path.encode("utf-8"))
+ _hash_sequence(functions)
Review Comment:

The `else` branch for hashing `functions` does not sort the sequence. If
`functions` is a `Sequence` (as the type hint allows) and its order is not
guaranteed, this will result in a non-deterministic hash, potentially breaking
the caching mechanism. The previous implementation correctly sorted the
sequence.
```suggestion
_hash_sequence(sorted(functions))
```
##########
python/tvm_ffi/cpp/load_inline.py:
##########
@@ -195,6 +206,52 @@ def _run_command_in_dev_prompt(
) from e
+def _resolve_linker(
+ with_cuda: bool,
+ build_dir: str,
+ cpp_files: Sequence[str] | None,
+ cuda_files: Sequence[str] | None,
+) -> tuple[list[str], list[str]]:
+ ninja: list[str] = []
+ link_files: list[str] = []
+ if cpp_files is None and cuda_files is None:
+ ninja.append(
+ "build main.o: compile {}".format(
+ str((Path(build_dir) / "main.cpp").resolve()).replace(":",
"$:")
+ )
+ )
+ link_files.append("main.o")
+ if with_cuda:
+ ninja.append(
+ "build cuda.o: compile_cuda {}".format(
+ str((Path(build_dir) / "cuda.cu").resolve()).replace(":",
"$:")
+ )
+ )
+ link_files.append("cuda.o")
+ else:
+ if cpp_files is not None:
+ for cpp_path in cpp_files:
+ obj_name = f"{Path(cpp_path).stem}.o"
+ ninja.append(
+ "build {}: compile {}".format(
+ obj_name, str(Path(cpp_path).resolve()).replace(":",
"$:")
+ )
+ )
+ link_files.append(obj_name)
+ if cuda_files is not None:
+ assert with_cuda, "cuda_files is provided but with_cuda is False"
+ for cuda_path in cuda_files:
+ obj_name = f"{Path(cuda_path).stem}.o"
+ ninja.append(
+ "build {}: compile_cuda {}".format(
+ obj_name, str(Path(cuda_path).resolve()).replace(":",
"$:")
+ )
Review Comment:

Similar to the C++ files, CUDA object file names can collide if source files
from different directories share the same name. Using a hash of the full path
in the object file name will prevent this. Using f-strings here would also
improve readability. Note that `cuda_path` is already a resolved path, so
calling `.resolve()` again is unnecessary.
```python
path_hash =
hashlib.sha1(cuda_path.encode('utf-8')).hexdigest()[:8]
obj_name = f"{Path(cuda_path).stem}_{path_hash}.o"
ninja.append(f"build {obj_name}: compile_cuda
{cuda_path.replace(':', '$:')}")
```
##########
python/tvm_ffi/cpp/load_inline.py:
##########
@@ -702,3 +760,94 @@ def load_inline(
build_directory=build_directory,
)
)
+
+
+def build(
+ name: str,
+ *,
+ cpp_files: Sequence[str] | str | None = None,
+ cuda_files: Sequence[str] | str | None = None,
+ extra_cflags: Sequence[str] | None = None,
+ extra_cuda_cflags: Sequence[str] | None = None,
+ extra_ldflags: Sequence[str] | None = None,
+ extra_include_paths: Sequence[str] | None = None,
+ build_directory: str | None = None,
+) -> str:
+ """Compile and build a C++/CUDA module from source files."""
+ # need to resolve the path to make it unique
+ cpp_path_list = [str(Path(p).resolve()) for p in _str_seq2list(cpp_files)]
+ cuda_path_list = [str(Path(p).resolve()) for p in
_str_seq2list(cuda_files)]
+ with_cpp = bool(cpp_path_list)
+ with_cuda = bool(cuda_path_list)
+ assert with_cpp or with_cuda, "Either cpp_files or cuda_files must be
provided."
+
+ extra_ldflags_list = list(extra_ldflags) if extra_ldflags is not None else
[]
+ extra_cflags_list = list(extra_cflags) if extra_cflags is not None else []
+ extra_cuda_cflags_list = list(extra_cuda_cflags) if extra_cuda_cflags is
not None else []
+ extra_include_paths_list = list(extra_include_paths) if
extra_include_paths is not None else []
Review Comment:

These lines for converting optional `Sequence` arguments to lists are a bit
repetitive. You can make this more concise by using `or []`, which is a common
Python idiom for handling default empty lists.
```python
extra_ldflags_list = list(extra_ldflags or [])
extra_cflags_list = list(extra_cflags or [])
extra_cuda_cflags_list = list(extra_cuda_cflags or [])
extra_include_paths_list = list(extra_include_paths or [])
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]