MultiFileTileSource: specify backing source module
Closed this issue · 6 comments
Could we specify to the MultiFileTileSource
to use the GDALFileTileSource
to open each of the images?
I have a lot of pyramidal tiffs (non-geospatial) and I compose them in YAML to stitch them together for use with MultiFileTileSource
. However the images are all stored on S3 and I have hundreds of thousands of images. I want to be able to use GDAL's virtual file system and /vsis3/
for performant reads directly from S3 (no downloading of data locally)
I think this is feasible from my look over the MultiFileTileSource, but would appreciate any clarifying details. Or is it possible to performantly load pyramidal tiffs from cloud storage with another tile source?
You can specify the "sourceName" of each source, which I think lets you do this.
Ooh! This is a part of the YAML spec itself, I see now!
However, this is leading to an error with the gdal
source:
TypeError: large_image.tilesource.base.TileSource.tileIterator() got multiple values for keyword argument 'resample'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[14], line 1
----> 1 core.getThumbnail()[0]
File /python/lib/python3.11/site-packages/large_image/cache_util/cache.py:106, in methodcache.<locals>.decorator.<locals>.wrapper(self, *args, **kwargs)
103 except (ValueError, pickle.UnpicklingError):
104 # this can happen if a different version of python wrote the record
105 pass
--> 106 v = func(self, *args, **kwargs)
107 try:
108 if lock:
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:1599, in TileSource.getThumbnail(self, width, height, **kwargs)
1597 params['output'] = {'maxWidth': width, 'maxHeight': height}
1598 params.pop('region', None)
-> 1599 return self.getRegion(**params)
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:1748, in TileSource.getRegion(self, format, **kwargs)
1746 kwargs = kwargs.copy()
1747 resample = kwargs.pop('resample', None)
-> 1748 tileIter = TileIterator(self, format=TILE_FORMAT_NUMPY, resample=None, **kwargs)
1749 if tileIter.info is None:
1750 pilimage = PIL.Image.new('RGB', (0, 0))
File /python/lib/python3.11/site-packages/large_image/tilesource/tileiterator.py:33, in TileIterator.__init__(self, source, format, resample, **kwargs)
31 self.resample = resample
32 iterFormat = format if resample in (False, None) else (TILE_FORMAT_PIL, )
---> 33 self.info = self._tileIteratorInfo(format=iterFormat, resample=resample, **kwargs)
34 if self.info is None:
35 self._iter = None
File /python/lib/python3.11/site-packages/large_image/tilesource/tileiterator.py:212, in TileIterator._tileIteratorInfo(self, **kwargs)
210 return None
211 mag = source.getMagnificationForLevel(magLevel)
--> 212 metadata = source.metadata
213 left, top, right, bottom = source._getRegionBounds(
214 metadata, desiredMagnification=mag, **kwargs.get('region', {}))
215 regionWidth = right - left
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:1315, in TileSource.metadata(self)
1313 @property
1314 def metadata(self) -> JSONDict:
-> 1315 return self.getMetadata()
File /python/lib/python3.11/site-packages/large_image_source_multi/__init__.py:944, in MultiFileTileSource.getMetadata(self)
937 def getMetadata(self):
938 """
939 Return a dictionary of metadata containing levels, sizeX, sizeY,
940 tileWidth, tileHeight, magnification, mm_x, mm_y, and frames.
941
942 :returns: metadata dictionary.
943 """
--> 944 result = super().getMetadata()
945 if len(self._frames) > 1:
946 result['frames'] = [
947 {k: v for k, v in frame.items() if k.startswith('Index')}
948 for frame in self._frames]
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:1309, in TileSource.getMetadata(self)
1254 """
1255 Return metadata about this tile source. This contains
1256
(...)
1297 sources may do so.
1298 """
1299 mag = self.getNativeMagnification()
1300 return JSONDict({
1301 'levels': self.levels,
1302 'sizeX': self.sizeX,
1303 'sizeY': self.sizeY,
1304 'tileWidth': self.tileWidth,
1305 'tileHeight': self.tileHeight,
1306 'magnification': mag['magnification'],
1307 'mm_x': mag['mm_x'],
1308 'mm_y': mag['mm_y'],
-> 1309 'dtype': str(self.dtype),
1310 'bandCount': self.bandCount,
1311 })
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:290, in TileSource.dtype(self)
287 if not self._dtype:
288 self._dtype = 'check'
289 sample, _ = cast(Tuple[np.ndarray, Any], getattr(
--> 290 self, '_unstyledInstance', self).getRegion(
291 region=dict(left=0, top=0, width=1, height=1),
292 format=TILE_FORMAT_NUMPY))
293 self._dtype = sample.dtype
294 self._bandCount = len(
295 getattr(getattr(self, '_unstyledInstance', self), '_bandInfo', []))
File /python/lib/python3.11/site-packages/large_image/tilesource/base.py:1763, in TileSource.getRegion(self, format, **kwargs)
1760 tiledimage = None
1761 for tile in tileIter:
1762 # Add each tile to the image
-> 1763 subimage, _ = _imageToNumpy(tile['tile'])
1764 x0, y0 = tile['x'] - left, tile['y'] - top
1765 if tiled:
File /python/lib/python3.11/site-packages/large_image/tilesource/tiledict.py:185, in LazyTileDict.__getitem__(self, key, *args, **kwargs)
183 tileData = tileData[self.crop[1]:self.crop[3], self.crop[0]:self.crop[2]]
184 else:
--> 185 tileData = self._retileTile()
187 pilData = None
188 # resample if needed
File /python/lib/python3.11/site-packages/large_image/tilesource/tiledict.py:139, in LazyTileDict._retileTile(self)
137 for y in range(ymin, ymax):
138 for x in range(xmin, xmax):
--> 139 tileData = self.source.getTile(
140 x, y, level,
141 numpyAllowed='always', sparseFallback=True, frame=frame)
142 if not isinstance(tileData, np.ndarray) or len(tileData.shape) != 3:
143 tileData, _ = _imageToNumpy(tileData)
File /python/lib/python3.11/site-packages/large_image/cache_util/cache.py:106, in methodcache.<locals>.decorator.<locals>.wrapper(self, *args, **kwargs)
103 except (ValueError, pickle.UnpicklingError):
104 # this can happen if a different version of python wrote the record
105 pass
--> 106 v = func(self, *args, **kwargs)
107 try:
108 if lock:
File /python/lib/python3.11/site-packages/large_image_source_multi/__init__.py:1286, in MultiFileTileSource.getTile(self, x, y, z, pilImageAllowed, numpyAllowed, **kwargs)
1284 # Add each source to the tile
1285 for sourceEntry in sourceList:
-> 1286 tile = self._addSourceToTile(tile, sourceEntry, corners, scale)
1287 if tile is None:
1288 # TODO number of channels?
1289 colors = self._info.get('backgroundColor', [0])
File /python/lib/python3.11/site-packages/large_image_source_multi/__init__.py:1218, in MultiFileTileSource._addSourceToTile(self, tile, sourceEntry, corners, scale)
1216 region[key] = int(round(region[key]))
1217 self.logger.debug('getRegion: ts: %r, region: %r, output: %r', ts, region, output)
-> 1218 sourceTile, _ = ts.getRegion(
1219 region=region, output=output, frame=sourceEntry.get('frame', 0),
1220 resample=None, format=TILE_FORMAT_NUMPY)
1221 else:
1222 sourceTile, x, y = self._getTransformedTile(
1223 ts, transform, corners, scale, sourceEntry.get('frame', 0),
1224 source.get('position', {}).get('crop'))
File /python/lib/python3.11/site-packages/large_image_source_gdal/__init__.py:882, in GDALFileTileSource.getRegion(self, format, **kwargs)
880 format = (format, )
881 # The tile iterator handles determining the output region
--> 882 iterInfo = self.tileIterator(format=TILE_FORMAT_NUMPY, resample=None, **kwargs).info
883 # Only use gdal.Warp of the original image if the region has not been
884 # styled.
885 useGDALWarp = (
886 iterInfo and
887 not self._jsonstyle and
888 TILE_FORMAT_IMAGE in format and
889 kwargs.get('encoding') == 'TILED')
TypeError: large_image.tilesource.base.TileSource.tileIterator() got multiple values for keyword argument 'resample'
Hmm.. The fix is probably
diff --git a/sources/gdal/large_image_source_gdal/__init__.py b/sources/gdal/large_image_source_gdal/__init__.py
index ba484e9e..2033eeb5 100644
--- a/sources/gdal/large_image_source_gdal/__init__.py
+++ b/sources/gdal/large_image_source_gdal/__init__.py
@@ -879,6 +879,9 @@ class GDALFileTileSource(GDALBaseFileTileSource, metaclass=LruCacheMetaclass):
if not isinstance(format, (tuple, set, list)):
format = (format, )
# The tile iterator handles determining the output region
+ if 'resample' in kwargs:
+ kwargs = kwargs.copy()
+ kwargs.pop('resample')
iterInfo = self.tileIterator(format=TILE_FORMAT_NUMPY, resample=None, **kwargs).info
# Only use gdal.Warp of the original image if the region has not been
# styled.
#1640 fixes that issue, thanks!
However, there are some quite different results when using the GDALFileTileSource
source vs. the VipsFileTileSource
. behind the MultiFileTileSource
. I'll try to make a reproducible example
However, there are some quite different results when using the
GDALFileTileSource
source vs. theVipsFileTileSource
. behind theMultiFileTileSource
. I'll try to make a reproducible example
For probably bad historical reasons, our geospatial sources (even with non-geospatial data) have a default style. Try adding style: {}
to the gdal sources.
Try adding style: {} to the gdal sources.
Aha this did it! Thank you!!