s3fs.exists incorrectly returns False after calling glob
bbtfr opened this issue · 0 comments
bbtfr commented
Sample code
import os
from s3fs import S3FileSystem
fs = S3FileSystem()
base_path = "s3://moonshot-train-data/test_data/test_s3fs/"
def join_path(*args):
return os.path.join(base_path, *args)
def test_exists(path):
print(path, fs.exists(os.path.join(path)))
fs.touch(join_path("a/b/c.txt"))
print("=== before glob ===")
test_exists(join_path("a/b/c.txt"))
test_exists(join_path("a/b/"))
test_exists(join_path("a/b"))
test_exists(join_path("a/"))
test_exists(join_path("a"))
list(fs.glob(join_path("**/*.txt")))
print("=== after glob ===")
test_exists(join_path("a/b/c.txt"))
test_exists(join_path("a/b/"))
test_exists(join_path("a/b"))
test_exists(join_path("a/"))
test_exists(join_path("a"))
fs.invalidate_cache()
print("=== invalidate_cache ===")
test_exists(join_path("a/b/c.txt"))
test_exists(join_path("a/b/"))
test_exists(join_path("a/b"))
test_exists(join_path("a/"))
test_exists(join_path("a"))
Got
=== before glob ===
s3://moonshot-train-data/test_data/test_s3fs/a/b/c.txt True
s3://moonshot-train-data/test_data/test_s3fs/a/b/ True
s3://moonshot-train-data/test_data/test_s3fs/a/b True
s3://moonshot-train-data/test_data/test_s3fs/a/ True
s3://moonshot-train-data/test_data/test_s3fs/a True
=== after glob ===
s3://moonshot-train-data/test_data/test_s3fs/a/b/c.txt True
s3://moonshot-train-data/test_data/test_s3fs/a/b/ True
s3://moonshot-train-data/test_data/test_s3fs/a/b True
s3://moonshot-train-data/test_data/test_s3fs/a/ False # <-- Here
s3://moonshot-train-data/test_data/test_s3fs/a False # <-- And here
=== invalidate_cache ===
s3://moonshot-train-data/test_data/test_s3fs/a/b/c.txt True
s3://moonshot-train-data/test_data/test_s3fs/a/b/ True
s3://moonshot-train-data/test_data/test_s3fs/a/b True
s3://moonshot-train-data/test_data/test_s3fs/a/ True
s3://moonshot-train-data/test_data/test_s3fs/a True
Something wrong with the DirCache
, create S3FileSystem with use_listings_cache=True
or call invalidate_cache()
can workaround
Python==3.11.9
fsspec==2024.5.0
s3fs==2024.5.0