childmindresearch/bids2table

Incorrect identification of BIDS files

clane9 opened this issue · 1 comments

The is_bids_files() function doesn't correctly detect all/only BIDS files. For example, the ome.zarr directories are not included in the generated table.

def is_bids_file(path: StrOrPath) -> bool:
"""
Check if `path` is a valid BIDS data file. E.g. not a directory or JSON sidecar
associated to another data file.
"""
# TODO: other checks?
# - skip files matching patterns in .bidsignore?
path = Path(path)
return (
not path.is_dir()
and path.name.startswith("sub-")
and not is_associated_sidecar(path)
)

One possible better way to identify BIDS files could use bidschematools and match the extension to known BIDS extensions.

cc @effigies.

@clane9 , To test this more, I am initializing the bids2table in few different CPAC output directories and listing the files that are not included in the table.
Looks like in most of my case, its the .json files those are not associated with the image files

  1. abcd-options output dir
    'func/sub-0025428_ses-1_task-rest_run-1_atlas-DesikanKilliany_space-fsLR_den32k.dlabel.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_midthickness.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_midthickness.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_desc-summary_motion.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-FS_strainR.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_desc-regReg_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-FS_edgedistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-FS_strainR.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_inflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-FS_arealdistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_curv.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_curv.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-164k_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_wbSpec.spec.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_pial.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_pial.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-MSMSulc_strainJ.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-atlasroi_mask.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-FS_edgedistortion.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_veryinflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_midthickness.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-164k_midthickness.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-FS_strainJ.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_veryinflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-FS_strainJ.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_atlas-DesikanKilliany_space-fsLR_den164k.dlabel.json'
    'func/sub-0025428_ses-1_task-rest_run-1_from-bold_to-T1w_mode-image_desc-linear_xfm.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-MSMSulc_strainR.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-MSMSulc_strainR.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_desc-movementParameters_motion.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_den-32k_pial.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-MSMSulc_edgedistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-MSMSulc_strainR.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-atlasroi_mask.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-MSMSulc_edgedistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_sulc.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-FS_strainJ.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_curv.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_bold.func.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_thickness.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_veryinflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_surf-R_reho.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-FS_arealdistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-native_wbSpec.spec.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_desc-rot_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-MSMSulc_strainJ.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-FS_edgedistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_desc-reg_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_den-32k_pial.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-MSMSulc_arealdistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_desc-regReg_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_inflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-MSMSulc_arealdistortion.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-atlasroi_bold.func.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_desc-FS_strainR.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_flat.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_thickness.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_flat.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_thickness.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_desc-rot_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_motion.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_sulc.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-164k_midthickness.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_bold.func.json'
    'func/sub-0025428_ses-1_task-rest_run-1_atlas-Destrieux_space-fsLR_den164k.dlabel.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-FS_arealdistortion.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_sulc.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_inflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_desc-MSMSulc_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-32k_veryinflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_atlas-Destrieux_space-fsLR_den32k.dlabel.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-fsLR_den-32k_desc-MSMSulc_arealdistortion.shape.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-fsLR_den-164k_white.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_desc-reg_sphere.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_desc-MSMSulc_edgedistortion.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_surf-L_reho.dscalar.json'
    'func/sub-0025428_ses-1_task-rest_run-1_space-fsLR_den-32k_bold.dtseries.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-L_space-native_inflated.surf.json'
    'func/sub-0025428_ses-1_task-rest_run-1_hemi-R_space-native_desc-MSMSulc_sphere.surf.json'

  2. Benchmark FNIRT
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-PartialNilearn3_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnr2_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-2_desc-Voxel5_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-PearsonNilearn5_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-3_desc-Voxel6_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrAxial3_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnr6_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-Mean4_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-PearsonNilearn3_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-framewiseDisplacementJenkinsonPlot_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-Mean5_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-PearsonNilearn4_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-PearsonNilearn6_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-PartialNilearn6_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-1_desc-Voxel4_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-2_desc-Voxel2_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrHist6_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnr5_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrSagittal3_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnr3_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-ndmg1_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrHist1_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-PearsonNilearn2_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrHist3_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-confounds_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_motion.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-confounds_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-2_desc-SpatReg2_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-Mean1_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnr1_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrSagittal4_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrAxial5_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-ndmg5_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrAxial2_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-PartialNilearn1_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-PartialNilearn4_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnr4_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-3_desc-SpatReg3_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrSagittal2_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-1_desc-SpatReg4_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParametersTrans_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParameters_motion.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-1_desc-Voxel1_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParametersRot_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-1_desc-SpatReg1_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-ndmg2_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-ndmg4_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-2_desc-SpatReg5_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrHist5_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrSagittal5_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-Mean3_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-ndmg3_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-Mean6_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-Mean2_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-3_desc-ndmg6_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-PartialNilearn5_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-summary_motion.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-3_desc-SpatReg6_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrSagittal1_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-1_desc-PearsonNilearn1_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-confounds_timeseries.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrAxial6_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrAxial1_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_space-MNI152NLin6ASym_reg-2_desc-PartialNilearn2_correlations.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-3_desc-boldSnrSagittal6_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrHist4_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-2_desc-boldSnrHist2_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_reg-1_desc-boldSnrAxial4_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_from-bold_to-T1w_mode-image_desc-linear_xfm.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_atlas-rois_reg-3_desc-Voxel3_timeseries.json'

  3. ccs-options
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-boldSnrAxial_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-boldSnr_quality.json'
    'ses-1/anat/sub-0025428_ses-1_desc-dsegSagittal_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-boldSnrSagittal_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParametersTrans_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-framewiseDisplacementJenkinsonPlot_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParametersRot_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_motion.json'
    'ses-1/anat/sub-0025428_ses-1_desc-dsegAxial_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_from-bold_to-T1w_mode-image_desc-linear_xfm.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-movementParameters_motion.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-boldSnrHist_quality.json'
    'ses-1/func/sub-0025428_ses-1_task-rest_run-1_desc-summary_motion.json'