meggart/DiskArrays.jl

With changes to chunking, `copyto!` now fails in places it used to succeed.

rafaqz opened this issue · 3 comments

Writing with GDAL to a non-chunked file like ".rst" from a chunked file like ".tif" gives the dreaded

Chunks do not align in dimension 2

error.

This is an unfortunate effect of the recent changes. I'm not sure of the exact reason.

An example stack trace. In this case this happens when the internal data in Rasters.jl is a regular array, and so returns Unchunked(). So the guessed eachchunk is just the size of the array - but this doesn't match the chunks of the "MEM" RasterDataset used in warp.

Can we just guess (in copyto?) that eachchunk matches the Chunked array when one of them is Unchunked?

Probably we need some tests of this with Unchunked as well. I assumed everything would just work with Unchunked() as it imposes no chunk structure of its own.

fieldnames(typeof(rds)) = (:ds, :size)
ERROR: Chunks do not align in dimension 2
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:33
  [2] #50
    @ ~/.julia/packages/DiskArrays/7xJDq/src/ops.jl:56 [inlined]
  [3] ntuple(f::DiskArrays.var"#50#55"{Tuple{Int64, Int64}, Vector{DiskArrays.GridChunks}}, n::Int64)
    @ Base ./ntuple.jl:19
  [4] common_chunks(::Tuple{Int64, Int64}, ::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}, ::Vararg{Any})
    @ DiskArrays ~/.julia/packages/DiskArrays/7xJDq/src/ops.jl:52
  [5] copyto!(dest::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}, bc::Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{2}, Nothing, typeof(identity), Tuple{DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}}})
    @ DiskArrays ~/.julia/packages/DiskArrays/7xJDq/src/ops.jl:34
  [6] copyto!(dest::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}, bc::Base.Broadcast.Broadcasted{DimensionalData.DimensionalStyle{DiskArrays.ChunkStyle{2}}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}}})
    @ DimensionalData ~/.julia/dev/DimensionalData/src/array/broadcast.jl:49
  [7] materialize!
    @ ./broadcast.jl:871 [inlined]
  [8] materialize!
    @ ./broadcast.jl:868 [inlined]
  [9] (::Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}})(a::Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32})
    @ Rasters ~/.julia/dev/Rasters/src/sources/gdal.jl:374
 [10] (::Rasters.var"#28#29"{Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}}, Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}, Union, UnionAll})(x::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset})
    @ Rasters ~/.julia/dev/Rasters/src/array.jl:164
 [11] (::ComposedFunction{typeof(Rasters.cleanreturn), Rasters.var"#28#29"{Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}}, Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}, Union, UnionAll}})(x::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Base ./operators.jl:1085
 [12] (::ComposedFunction{typeof(Rasters.cleanreturn), Rasters.var"#28#29"{Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}}, Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}, Union, UnionAll}})(x::ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset})
    @ Base ./operators.jl:1085
 [13] readraster(f::ComposedFunction{typeof(Rasters.cleanreturn), Rasters.var"#28#29"{Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}}, Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}, Union, UnionAll}}, args::String; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ ArchGDAL ~/.julia/packages/ArchGDAL/kmLdS/src/context.jl:267
 [14] readraster(f::Function, args::String)
    @ ArchGDAL ~/.julia/packages/ArchGDAL/kmLdS/src/context.jl:265
 [15] _open(f::Function, ::Type{Rasters.GDALfile}, filename::String; write::Bool, kw::Base.Pairs{Symbol, Nothing, Tuple{Symbol}, NamedTuple{(:key,), Tuple{Nothing}}})
    @ Rasters ~/.julia/dev/Rasters/src/sources/gdal.jl:255
 [16] #open#19
    @ ~/.julia/dev/Rasters/src/filearray.jl:40 [inlined]
 [17] open(f::Function, A::Rasters.FileArray{Rasters.GDALfile, Int32, 3, Nothing, DiskArrays.GridChunks{3}, DiskArrays.Chunked})
    @ Rasters ~/.julia/dev/Rasters/src/filearray.jl:40
 [18] open(f::Rasters.var"#850#851"{ArchGDAL.RasterDataset{Int32, ArchGDAL.Dataset}}, A::Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Rasters ~/.julia/dev/Rasters/src/array.jl:161
 [19] open(f::Function, A::Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.PermutedDiskArray{Int32, 2, PermutedDimsArray{Int32, 2, (1, 2), (1, 2), DiskArrays.SubDiskArray{Int32, 2}}}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32})
    @ Rasters ~/.julia/dev/Rasters/src/array.jl:155
 [20] ArchGDAL.Dataset(f::Rasters.var"#559#562"{Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.SubDiskArray{Int32, 2}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}, Tuple{}, Vector{Any}}, A::Raster{Int32, 2, Tuple{X{Projected{Float64, LinRange{Float64, Int64}, ForwardOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, X{Colon}}}, Y{Projected{Float64, LinRange{Float64, Int64}, ReverseOrdered, Regular{Float64}, Intervals{Start}, Metadata{Rasters.GDALfile, Dict{Any, Any}}, WellKnownText{GeoFormatTypes.CRS, String}, Nothing, Y{Colon}}}}, Tuple{Band{Categorical{Int64, UnitRange{Int64}, ForwardOrdered, NoMetadata}}}, DiskArrays.SubDiskArray{Int32, 2}, Symbol, Metadata{Rasters.GDALfile, Dict{Symbol, Any}}, Int32}; filename::Nothing)
    @ Rasters ~/.julia/dev/Rasters/src/sources/gdal.jl:372

Hi @meggart

This is an example of printing the chunks that are clashing in dimension 2 (where dim 1 is 1:514 and 3 is 1:1), in commonchunks line 58 of ops.jl:

s.chunks[n] = UnitRange[1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 17:17, 18:18, 19:19, 20:20, 21:21, 22:22, 23:23, 24:24, 25:25, 26:26, 27:27, 28:28, 29:29, 30:30, 31:31, 32:32, 33:33, 34:34, 35:35, 36:36, 37:37, 38:38, 39:39, 40:40, 41:41, 42:42, 43:43, 44:44, 45:45, 46:46, 47:47, 48:48, 49:49, 50:50, 51:51, 52:52, 53:53, 54:54, 55:55, 56:56, 57:57, 58:58, 59:59, 60:60, 61:61, 62:62, 63:63, 64:64, 65:65, 66:66, 67:67, 68:68, 69:69, 70:70, 71:71, 72:72, 73:73, 74:74, 75:75, 76:76, 77:77, 78:78, 79:79, 80:80, 81:81, 82:82, 83:83, 84:84, 85:85, 86:86, 87:87, 88:88, 89:89, 90:90, 91:91, 92:92, 93:93, 94:94, 95:95, 96:96, 97:97, 98:98, 99:99, 100:100, 101:101, 102:102, 103:103, 104:104, 105:105, 106:106, 107:107, 108:108, 109:109, 110:110, 111:111, 112:112, 113:113, 114:114, 115:115, 116:116, 117:117, 118:118, 119:119, 120:120, 121:121, 122:122, 123:123, 124:124, 125:125, 126:126, 127:127, 128:128, 129:129, 130:130, 131:131, 132:132, 133:133, 134:134, 135:135, 136:136, 137:137, 138:138, 139:139, 140:140, 141:141, 142:142, 143:143, 144:144, 145:145, 146:146, 147:147, 148:148, 149:149, 150:150, 151:151, 152:152, 153:153, 154:154, 155:155, 156:156, 157:157, 158:158, 159:159, 160:160, 161:161, 162:162, 163:163, 164:164, 165:165, 166:166, 167:167, 168:168, 169:169, 170:170, 171:171, 172:172, 173:173, 174:174, 175:175, 176:176, 177:177, 178:178, 179:179, 180:180, 181:181, 182:182, 183:183, 184:184, 185:185, 186:186, 187:187, 188:188, 189:189, 190:190, 191:191, 192:192, 193:193, 194:194, 195:195, 196:196, 197:197, 198:198, 199:199, 200:200, 201:201, 202:202, 203:203, 204:204, 205:205, 206:206, 207:207, 208:208, 209:209, 210:210, 211:211, 212:212, 213:213, 214:214, 215:215, 216:216, 217:217, 218:218, 219:219, 220:220, 221:221, 222:222, 223:223, 224:224, 225:225, 226:226, 227:227, 228:228, 229:229, 230:230, 231:231, 232:232, 233:233, 234:234, 235:235, 236:236, 237:237, 238:238, 239:239, 240:240, 241:241, 242:242, 243:243, 244:244, 245:245, 246:246, 247:247, 248:248, 249:249, 250:250, 251:251, 252:252, 253:253, 254:254, 255:255, 256:256, 257:257, 258:258, 259:259, 260:260, 261:261, 262:262, 263:263, 264:264, 265:265, 266:266, 267:267, 268:268, 269:269, 270:270, 271:271, 272:272, 273:273, 274:274, 275:275, 276:276, 277:277, 278:278, 279:279, 280:280, 281:281, 282:282, 283:283, 284:284, 285:285, 286:286, 287:287, 288:288, 289:289, 290:290, 291:291, 292:292, 293:293, 294:294, 295:295, 296:296, 297:297, 298:298, 299:299, 300:300, 301:301, 302:302, 303:303, 304:304, 305:305, 306:306, 307:307, 308:308, 309:309, 310:310, 311:311, 312:312, 313:313, 314:314, 315:315, 316:316, 317:317, 318:318, 319:319, 320:320, 321:321, 322:322, 323:323, 324:324, 325:325, 326:326, 327:327, 328:328, 329:329, 330:330, 331:331, 332:332, 333:333, 334:334, 335:335, 336:336, 337:337, 338:338, 339:339, 340:340, 341:341, 342:342, 343:343, 344:344, 345:345, 346:346, 347:347, 348:348, 349:349, 350:350, 351:351, 352:352, 353:353, 354:354, 355:355, 356:356, 357:357, 358:358, 359:359, 360:360, 361:361, 362:362, 363:363, 364:364, 365:365, 366:366, 367:367, 368:368, 369:369, 370:370, 371:371, 372:372, 373:373, 374:374, 375:375, 376:376, 377:377, 378:378, 379:379, 380:380, 381:381, 382:382, 383:383, 384:384, 385:385, 386:386, 387:387, 388:388, 389:389, 390:390, 391:391, 392:392, 393:393, 394:394, 395:395, 396:396, 397:397, 398:398, 399:399, 400:400, 401:401, 402:402, 403:403, 404:404, 405:405, 406:406, 407:407, 408:408, 409:409, 410:410, 411:411, 412:412, 413:413, 414:414, 415:415, 416:416, 417:417, 418:418, 419:419, 420:420, 421:421, 422:422, 423:423, 424:424, 425:425, 426:426, 427:427, 428:428, 429:429, 430:430, 431:431, 432:432, 433:433, 434:434, 435:435, 436:436, 437:437, 438:438, 439:439, 440:440, 441:441, 442:442, 443:443, 444:444, 445:445, 446:446, 447:447, 448:448, 449:449, 450:450, 451:451, 452:452, 453:453, 454:454, 455:455, 456:456, 457:457, 458:458, 459:459, 460:460, 461:461, 462:462, 463:463, 464:464, 465:465, 466:466, 467:467, 468:468, 469:469, 470:470, 471:471, 472:472, 473:473, 474:474, 475:475, 476:476, 477:477, 478:478, 479:479, 480:480, 481:481, 482:482, 483:483, 484:484, 485:485, 486:486, 487:487, 488:488, 489:489, 490:490, 491:491, 492:492, 493:493, 494:494, 495:495, 496:496, 497:497, 498:498, 499:499, 500:500, 501:501, 502:502, 503:503, 504:504, 505:505, 506:506, 507:507, 508:508, 509:509, 510:510, 511:511, 512:512, 513:513, 514:514, 515:515]

s.chunks[n] = UnitRange[1:15, 16:30, 31:45, 46:60, 61:75, 76:90, 91:105, 106:120, 121:135, 136:150, 151:165, 166:180, 181:195, 196:210, 211:225, 226:240, 241:255, 256:270, 271:285, 286:300, 301:315, 316:330, 331:345, 346:360, 361:375, 376:390, 391:405, 406:420, 421:435, 436:450, 451:465, 466:480, 481:495, 496:510, 511:515]
ERROR: Chunks do not align in dimension 2

To me it seems like these should be compatible? The first is single columns, while the second is blocks of 15 columns. Don't we just load 15 of the first for each block of the second? Your input would be appreciated here, I don't totally understand your recent changes to chunking and this is holding up Rasters.jl switching to 0.3 - which will start to make some problems in the ecosystem with other changes happening elsewhere.

Is there a reason we don't check if one group of chunks is a subset of the other? or that's just not implemented yet?

Just using the second set of chunks for both seems to work fine in practice already.