broadinstitute/fiss

BUG: config_get does not return valid json on the CLI for String and File variables

Closed this issue · 5 comments

String and File outputs and inputs should be delimited like so: "\"<value>\"" and are as such when accessed via the API via either the documentation or curl command.

config_get removes the escape characters from the inner double quotes in the CLI, resulting in JSON files that are invalid, and thus not usable by config_put.

e.g.
https://api.firecloud.org/api/workspaces/nci-dheiman-b-org/analyses_packaging/method_configs/broadgdac/mRNA_Preprocess

{
  "name": "CopyNumber_Gistic2",
  "methodRepoMethod": {
    "methodNamespace": "broadgdac",
    "methodName": "copy_number_gistic2",
    "methodVersion": 18
  },
  "outputs": {
    "copy_number_gistic2.report_gistic2.del_o": "this.gistic2__del",
    "copy_number_gistic2.report_gistic2.amp_o": "this.gistic2__amp",
    "copy_number_gistic2.tool_gistic2.broad_values_by_arm": "this.gistic2__broad_values_by_arm",
    "copy_number_gistic2.tool_gistic2.all_data_by_genes": "this.gistic2__all_data_by_genes",
    "copy_number_gistic2.tool_gistic2.all_lesions": "this.gistic2__all_lesions",
    "copy_number_gistic2.tool_gistic2.all_thresholded_by_genes": "this.gistic2__all_thresholded_by_genes",
    "copy_number_gistic2.report_gistic2.report_gistic2_pkg": "this.pkg__CopyNumber_Gistic2",
    "copy_number_gistic2.report_gistic2.broad_sig_res_o": "this.gistic2__broad_sig_res"
  },
  "inputs": {
    "copy_number_gistic2.tool_gistic2.seg_file": "this.CNV__snp6",
    "copy_number_gistic2.tool_gistic2.do_gene_gistic": "1",
    "copy_number_gistic2.tool_gistic2.remove_X": "0",
    "copy_number_gistic2.tool_gistic2.refgene_file": "\"gs://broad-institute-gdac/gdc/reference/gistic/gistic2.refgene.hg38.UCSC.add_miR.160920.mat\"",
    "copy_number_gistic2.tool_gistic2.amp_thresh": "0.1",
    "copy_number_gistic2.tool_gistic2.markers_file": "\"gs://broad-institute-gdac/gdc/reference/gistic/gistic2.markers.snp6.na35.liftoverhg38.txt\"",
    "copy_number_gistic2.report_gistic2.type": "",
    "copy_number_gistic2.tool_gistic2.cnv_files": "\"gs://broad-institute-gdac/gdc/reference/gistic/hg38_GDC_SNP6_CNV_list.161107.txt\"",
    "copy_number_gistic2.tool_gistic2.arm_peel": "1",
    "copy_number_gistic2.tool_gistic2.cap": "1.5",
    "copy_number_gistic2.report_gistic2.package": "true",
    "copy_number_gistic2.tool_gistic2.join_segment_size": "4",
    "copy_number_gistic2.tool_gistic2.max_sample_segs": "2000",
    "copy_number_gistic2.tool_gistic2.del_thresh": "0.1",
    "copy_number_gistic2.tool_gistic2.qv_thresh": "0.25",
    "copy_number_gistic2.tool_gistic2.conf_level": "0.99",
    "copy_number_gistic2.tool_gistic2.broad_length_cutoff": "0.98",
    "copy_number_gistic2.tool_gistic2.gene_collapse_method": "\"extreme\""
  },
  "rootEntityType": "sample_set",
  "prerequisites": {},
  "methodConfigVersion": 3,
  "deleted": false,
  "namespace": "broadgdac"
}

% fissfc config_get -w analyses_packaging -c CopyNumber_Gistic2

{
  "name": "CopyNumber_Gistic2",
  "methodRepoMethod": {
    "methodNamespace": "broadgdac",
    "methodName": "copy_number_gistic2",
    "methodVersion": 18
  },
  "outputs": {
    "copy_number_gistic2.report_gistic2.del_o": "this.gistic2__del",
    "copy_number_gistic2.report_gistic2.amp_o": "this.gistic2__amp",
    "copy_number_gistic2.tool_gistic2.broad_values_by_arm": "this.gistic2__broad_values_by_arm",
    "copy_number_gistic2.tool_gistic2.all_data_by_genes": "this.gistic2__all_data_by_genes",
    "copy_number_gistic2.tool_gistic2.all_lesions": "this.gistic2__all_lesions",
    "copy_number_gistic2.tool_gistic2.all_thresholded_by_genes": "this.gistic2__all_thresholded_by_genes",
    "copy_number_gistic2.report_gistic2.report_gistic2_pkg": "this.pkg__CopyNumber_Gistic2",
    "copy_number_gistic2.report_gistic2.broad_sig_res_o": "this.gistic2__broad_sig_res"
  },
  "inputs": {
    "copy_number_gistic2.tool_gistic2.seg_file": "this.CNV__snp6",
    "copy_number_gistic2.tool_gistic2.do_gene_gistic": "1",
    "copy_number_gistic2.tool_gistic2.remove_X": "0",
    "copy_number_gistic2.tool_gistic2.refgene_file": ""gs://broad-institute-gdac/gdc/reference/gistic/gistic2.refgene.hg38.UCSC.add_miR.160920.mat"",
    "copy_number_gistic2.tool_gistic2.amp_thresh": "0.1",
    "copy_number_gistic2.tool_gistic2.markers_file": ""gs://broad-institute-gdac/gdc/reference/gistic/gistic2.markers.snp6.na35.liftoverhg38.txt"",
    "copy_number_gistic2.report_gistic2.type": "",
    "copy_number_gistic2.tool_gistic2.cnv_files": ""gs://broad-institute-gdac/gdc/reference/gistic/hg38_GDC_SNP6_CNV_list.161107.txt"",
    "copy_number_gistic2.tool_gistic2.arm_peel": "1",
    "copy_number_gistic2.tool_gistic2.cap": "1.5",
    "copy_number_gistic2.report_gistic2.package": "true",
    "copy_number_gistic2.tool_gistic2.join_segment_size": "4",
    "copy_number_gistic2.tool_gistic2.max_sample_segs": "2000",
    "copy_number_gistic2.tool_gistic2.del_thresh": "0.1",
    "copy_number_gistic2.tool_gistic2.qv_thresh": "0.25",
    "copy_number_gistic2.tool_gistic2.conf_level": "0.99",
    "copy_number_gistic2.tool_gistic2.broad_length_cutoff": "0.98",
    "copy_number_gistic2.tool_gistic2.gene_collapse_method": ""extreme""
  },
  "rootEntityType": "sample_set",
  "prerequisites": {

  },
  "methodConfigVersion": 3,
  "deleted": false,
  "namespace": "broadgdac"
}

Good catch. Are you intending to address these ASAP, before I return, or wait until after?

Also, if you ARE intending to address this yourself ... please tweak the highlevel regression test to exercise this edge case and ensure it never reappears, ok?

Also, another way to play with this is to see if the low-level API does preserve the quote-escapes, which I haven't explored yet

I found the issue - printToCLI wipes out the escapes. The easiest solution seems to be adding a test for isinstance(value, six.text_type) (python version-agnostic unicode string check) and simply printing the unicode string rather than always running print(u("{0}".format(value))).

Since the .text aspect of requests' response objects are always in unicode, this should be a viable fix.

Love it: well contained and applicable to everything else that also makes implicit use of printToCLI()