diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 45f58eb8ac93cf..4f1f949f7a8c92 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2723,6 +2723,9 @@ def makelink_with_filter(self, tarinfo, targetpath, return else: if os.path.exists(tarinfo._link_target): + if os.path.lexists(targetpath): + # Avoid FileExistsError on following os.link. + os.unlink(targetpath) os.link(tarinfo._link_target, targetpath) return except symlink_exception: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 28914df6b010d0..b23ff34bc327c6 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -841,6 +841,57 @@ def test_next_on_empty_tarfile(self): with tarfile.open(fileobj=fd, mode="r") as tf: self.assertEqual(tf.next(), None) + def _setup_symlink_to_target(self, temp_dirpath): + target_filepath = os.path.join(temp_dirpath, "target") + ustar_dirpath = os.path.join(temp_dirpath, "ustar") + hardlink_filepath = os.path.join(ustar_dirpath, "lnktype") + with open(target_filepath, "wb") as f: + f.write(b"target") + os.makedirs(ustar_dirpath) + os.symlink(target_filepath, hardlink_filepath) + return target_filepath, hardlink_filepath + + def _assert_on_file_content(self, filepath, digest): + with open(filepath, "rb") as f: + data = f.read() + self.assertEqual(sha256sum(data), digest) + + @unittest.skipUnless( + hasattr(os, "link"), "Missing hardlink implementation" + ) + @os_helper.skip_unless_symlink + def test_extract_hardlink_on_symlink(self): + """ + This test verifies that extracting a hardlink will not follow an + existing symlink after a FileExistsError on os.link. + """ + with os_helper.temp_dir() as DIR: + target_filepath, hardlink_filepath = self._setup_symlink_to_target(DIR) + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tar.extract("ustar/regtype", DIR, filter="data") + tar.extract("ustar/lnktype", DIR, filter="data") + self._assert_on_file_content(target_filepath, sha256sum(b"target")) + self._assert_on_file_content(hardlink_filepath, sha256_regtype) + + @unittest.skipUnless( + hasattr(os, "link"), "Missing hardlink implementation" + ) + @os_helper.skip_unless_symlink + def test_extractall_hardlink_on_symlink(self): + """ + This test verifies that extracting a hardlink will not follow an + existing symlink after a FileExistsError on os.link. + """ + with os_helper.temp_dir() as DIR: + target_filepath, hardlink_filepath = self._setup_symlink_to_target(DIR) + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tar.extractall( + DIR, members=["ustar/regtype", "ustar/lnktype"], filter="data", + ) + self._assert_on_file_content(target_filepath, sha256sum(b"target")) + self._assert_on_file_content(hardlink_filepath, sha256_regtype) + + class MiscReadTest(MiscReadTestBase, unittest.TestCase): test_fail_comp = None diff --git a/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst b/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst new file mode 100644 index 00000000000000..00b15503b50ba3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-01-23-52-49.gh-issue-75989.5aYXNJ.rst @@ -0,0 +1,3 @@ +:func:`tarfile.TarFile.extractall` and :func:`tarfile.TarFile.extract` now +overwrite symlinks when extracting hardlinks. +(Contributed by Alexander Enrique Urieles Nieto in :gh:`75989`.)