Copilot commented on code in PR #63814:
URL: https://github.com/apache/airflow/pull/63814#discussion_r3066479418
##########
providers/git/src/airflow/providers/git/bundles/git.py:
##########
@@ -123,16 +123,41 @@ def _is_pruned_worktree(self) -> bool:
return False
return not (self.repo_path / ".git").exists()
+ def _local_repo_has_version(self) -> bool:
+ """Check if the local repo already has the correct version checked
out."""
+ if not self.version or not self.repo_path.is_dir() or not
(self.repo_path / ".git").exists():
+ return False
+ repo = None
+ try:
+ repo = Repo(self.repo_path)
+ expected_commit = repo.commit(self.version)
+ has_version = repo.head.commit.hexsha == expected_commit.hexsha
+ return has_version
+ except (InvalidGitRepositoryError, NoSuchPathError, BadName,
GitCommandError, ValueError):
+ return False
+ finally:
+ if repo is not None:
+ repo.close()
+
def _initialize(self):
with self.lock():
- # Avoids re-cloning on every task run when
prune_dotgit_folder=True.
+ # Avoids re-cloning on every task run when:
+ # 1. A pruned worktree already exists (prune_dotgit_folder=True)
+ # 2. The local repo already has the expected version
if self._is_pruned_worktree():
self._log.debug(
"Using existing pruned worktree",
repo_path=self.repo_path,
version=self.version,
)
return
+ if self._local_repo_has_version():
+ self._log.debug(
+ "Using existing local repo with correct version",
+ repo_path=self.repo_path,
+ version=self.version,
+ )
Review Comment:
When `_local_repo_has_version()` is true, `_initialize()` returns early
without setting `self.repo`. For versioned bundles with
`prune_dotgit_folder=False`, the normal path sets `self.repo =
Repo(self.repo_path)` and `get_current_version()` then returns `HEAD`’s full
SHA; with this early-return path `get_current_version()` will instead return
the raw `self.version` string (e.g. a tag/short SHA), which is an observable
behavior change compared to a first-time initialize. Consider opening/assigning
`self.repo` (and ensuring it’s closed) before returning, or adjusting
`get_current_version()` to resolve `HEAD` when `.git` exists even if
`self.repo` isn’t set; adding a regression test for the tag/short-SHA case
would prevent this from reappearing.
```suggestion
)
if not self.prune_dotgit_folder and self.repo is None:
try:
self.repo = Repo(self.repo_path)
except InvalidGitRepositoryError as e:
raise RuntimeError(f"Invalid git repository at
{self.repo_path}") from e
if self.repo is not None:
self.repo.close()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]