YosefLab/ImpulseDE2

plotHeatmap Rownames

AQPH222 opened this issue · 6 comments

Hello,

When using the plotHeatmap function, I think it could be very helpful to see the gene names in each row for visualization. Looking through the function source code, I tried to add the correct row names from lsvecGeneGroups to the data matrix in the # 1. Plot raw data and #2 Plot fitted data sections.

Could you double check that my implementation is correct? I am assuming lsvecGeneGroups is in the correct order from top to bottom of the gene names.

Below is the modified plotHeatmap function that includes row names
(my added code was 3 lines with comments in all caps while the rest is the same):

plotHeatmap2 <- function(
  objectImpulseDE2, strCondition, boolIdentifyTransients, 
  scaQThres = 0.01) {
  
  dfAnnot <- get_dfAnnotationProc(obj=objectImpulseDE2)
  
  scaNGenes <- dim(get_matCountDataProc(obj=objectImpulseDE2))[1]
  # Order genes by time of extremum (peak/valley)
  vecSignificantIDs <- rownames(objectImpulseDE2$dfImpulseDE2Results[
    !is.na(objectImpulseDE2$dfImpulseDE2Results$padj) & 
      objectImpulseDE2$dfImpulseDE2Results$padj < scaQThres, ])
  vecTimePointsToEval <- sort(unique(dfAnnot$Time), 
                              decreasing = FALSE)
  scaNTPtoEvaluate <- length(vecTimePointsToEval)
  matImpulseValue <- do.call(rbind, lapply(
    vecSignificantIDs, function(x) {
      evalImpulse_comp(
        vecImpulseParam = 
          get_lsModelFits(obj=objectImpulseDE2)[[strCondition]][[x]]$
          lsImpulseFit$vecImpulseParam, 
        vecTimepoints = vecTimePointsToEval)
    }))
  rownames(matImpulseValue) <- vecSignificantIDs
  matidxMaxTimeSort <- t(apply(matImpulseValue, 1, function(genevalues) {
    sort(genevalues, decreasing = TRUE, index.return = TRUE)$ix
  }))
  vecMaxTime <- vecTimePointsToEval[matidxMaxTimeSort[, 1]]
  matidxMinTimeSort <- t(apply(matImpulseValue, 1, function(genevalues) {
    sort(genevalues, decreasing = FALSE, index.return = TRUE)$ix
  }))
  
  if (boolIdentifyTransients) {
    # Group into transients and monotonous
    vecidxTransient <- which(objectImpulseDE2$dfImpulseDE2Results[vecSignificantIDs, 
                                                                  ]$isTransient)
    vecidxMonotonous <- which(objectImpulseDE2$dfImpulseDE2Results[vecSignificantIDs, 
                                                                   ]$isMonotonous)
    
    # Fine sort montonous transition signals into up/down
    if (length(vecidxMonotonous) == 1) {
      matImpulseMonot <- t(matImpulseValue[vecidxMonotonous, ])
    } else { 
      matImpulseMonot <- matImpulseValue[vecidxMonotonous, ]
    }
    vecboolMonotonousUp <- apply(matImpulseMonot, 1, function(gene) {
      gene[1] < gene[scaNTPtoEvaluate]
    })
    vecboolMonotonousDown <- !vecboolMonotonousUp
    
    vecidxMonotonousUp <- vecidxMonotonous[vecboolMonotonousUp]
    vecidxMonotonousUpSort <- vecidxMonotonousUp[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxMonotonousUp,1]))]
    vecidxMonotonousDown <- vecidxMonotonous[vecboolMonotonousDown]
    vecidxMonotonousDownSort <- vecidxMonotonousDown[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxMonotonousDown, 1]))]
    
    # Fine sort transitive signals into off/on
    if (length(vecidxTransient) == 1) {
      matImpulseTransient <- t(matImpulseValue[vecidxTransient, ]) 
    } else {
      matImpulseTransient <- matImpulseValue[vecidxTransient, ]
    }
    vecboolTransientValley <- apply(
      matImpulseTransient, 1, function(genevalues) {
        boolValley <- any(
          genevalues[2:(scaNTPtoEvaluate - 1)] < genevalues[1] & 
            genevalues[2:(scaNTPtoEvaluate - 1)] < 
            genevalues[scaNTPtoEvaluate])
        return(boolValley)
      })
    vecboolTransientPeak <- !vecboolTransientValley
    
    vecidxTransientPeak <- vecidxTransient[vecboolTransientPeak]
    vecidxTransientPeakSort <- vecidxTransientPeak[
      do.call(order, as.data.frame(
        matidxMaxTimeSort[vecidxTransientPeak, 1]))]
    vecidxTransientValley <- vecidxTransient[vecboolTransientValley]
    vecidxTransientValleySort <- vecidxTransientValley[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxTransientValley,1]))]
    
    vecidxAllSort <- c(
      vecidxMonotonousUpSort, vecidxMonotonousDownSort, 
      vecidxTransientPeakSort, vecidxTransientValleySort)
    
    vecTrajectoryType <- c(
      rep("up", length(vecidxMonotonousUpSort)), 
      rep("down", length(vecidxMonotonousDownSort)), 
      rep("*up", length(vecidxTransientPeakSort)), 
      rep("*down", length(vecidxTransientValleySort)))
    lsvecGeneGroups <- list(
      transition_up = vecSignificantIDs[vecidxMonotonousUpSort], 
      transition_down =  vecSignificantIDs[vecidxMonotonousDownSort], 
      transient_up = vecSignificantIDs[vecidxTransientPeakSort], 
      transient_down = vecSignificantIDs[vecidxTransientValleySort])
  } else {
    vecidxAllSort <- sort(vecMaxTime, decreasing = FALSE, 
                          index.return = TRUE)$ix
    vecTrajectoryType <- rep(" ", length(vecidxAllSort))
    lsvecGeneGroups <- list(all = vecSignificantIDs[vecidxAllSort])
  }
  
  vecUniqueTP <- unique(dfAnnot$Time)
  vecSizeFactors <- computeNormConst(
    matCountDataProc = get_matCountDataProc(obj=objectImpulseDE2), 
    vecSizeFactorsExternal = get_vecSizeFactors(obj=objectImpulseDE2))
  matSizefactors <- matrix(
    vecSizeFactors, nrow = length(vecSignificantIDs), 
    ncol = dim(get_matCountDataProc(obj=objectImpulseDE2))[2], byrow = TRUE)
  
  # 1. Plot raw data
  matDataNorm <- get_matCountDataProc(obj=objectImpulseDE2)[vecSignificantIDs, ]/matSizefactors
  matDataHeat <- do.call(cbind, lapply(vecUniqueTP, function(tp) {
    vecidxCols <- which(dfAnnot$Time %in% tp)
    if (length(vecidxCols) > 1) {
      return(rowMeans(matDataNorm[, vecidxCols], na.rm = TRUE))
    } else {
      return(matDataNorm[, vecidxCols])
    }
  }))
  colnames(matDataHeat) <- vecUniqueTP
  rownames(matDataHeat) <- NULL
  # Row normalisation: z-scores
  matDataHeatZ <- do.call(rbind, lapply(
    seq(1, dim(matDataHeat)[1]), function(i) {
      (matDataHeat[i, ] - mean(matDataHeat[i, ], na.rm = TRUE))/
        sd(matDataHeat[i,], na.rm = TRUE)
    }))
  
  row_labs = unlist(lsvecGeneGroups) #CREATE A SEPARATE VARIABLE CONTAINING ROW LABELS

  # Create heatmap of raw data, ordered by fits
  complexHeatmapRaw <- Heatmap(
    matDataHeatZ[vecidxAllSort, ], 
    gap = unit(5, "mm"), split = vecTrajectoryType, 
    cluster_rows = FALSE, cluster_columns = FALSE, 
    row_labels = row_labs, row_names_side = "left", #ADD ROW LABELS AND FORMATTING
    heatmap_legend_param = list(title = "z-score"))
  
  # 2. Plot fitted data
  matDataHeat <- matImpulseValue
  colnames(matDataHeat) <- vecUniqueTP
  rownames(matDataHeat) <- NULL
  # Row normalisation: z-scores
  matDataHeatZ <- do.call(rbind, lapply(
    seq(1, dim(matDataHeat)[1]), function(i) {
      (matDataHeat[i, ] - mean(matDataHeat[i, ], na.rm = TRUE))/
        sd(matDataHeat[i,], na.rm = TRUE)
    }))
  
  # Create heatmap of raw data, ordered by fits
  complexHeatmapFit <- Heatmap(
    matDataHeatZ[vecidxAllSort, ], 
    gap = unit(5, "mm"), split = vecTrajectoryType, 
    cluster_rows = FALSE, cluster_columns = FALSE,
    row_labels = row_labs, row_names_side = "left", #ADD ROW LABELS AND FORMATTING
    heatmap_legend_param = list(title = "z-score"))
  
  return(list(complexHeatmapRaw = complexHeatmapRaw, 
              complexHeatmapFit = complexHeatmapFit, 
              lsvecGeneGroups = lsvecGeneGroups))
}

Thank you for your time and dedication to improving RNA-seq DEG analysis. I hope this could help others as well if it is correct.

Best,
AP

I tried this function, but it had no change in my heatmap. T_T
I had change the code in line 17 "evalImpulse_comp" to "ImpulseDE2:::evalImpulse_comp",otherwise it will report this error.
Error in evalImpulse_comp(vecImpulseParam = get_lsModelFits(obj = objectImpulseDE2)[[strCondition]][[x]]$lsImpulseFit$vecImpulseParam, : could not find function "evalImpulse_comp

Have you sovled this problem? I am also wondering how to obtian the gene names in the heatmap?

Hey Sally, unfortunately no reply from the package developers. When I ran this in 2020, it worked for me. I didn't have the error that millersan saw (sorry just saw his reply from a year ago).

Did you show the gene names in the heatmap? I return the script of srcImpulseDE2_plotHeatmap.R, and then the gene comes out. But in fact, there are so many DEGs, I think it's not reality to show all the genes in the heatmap.

What you can do it save the image as a PDF and then edit it to show the gene names that you care about on Adobe Illustrator or another PDF editor. Through this you can also retrieve all of the gene names or print them out in the console by modifying the code. It depends on what you actually want to show or know.

JH-42 commented

new update

plotHeatmap2 <- function(
    objectImpulseDE2, strCondition, boolIdentifyTransients, 
    scaQThres = 0.01) {
  
  dfAnnot <- get_dfAnnotationProc(obj=objectImpulseDE2)
  
  scaNGenes <- dim(get_matCountDataProc(obj=objectImpulseDE2))[1]
  # Order genes by time of extremum (peak/valley)
  vecSignificantIDs <- rownames(objectImpulseDE2$dfImpulseDE2Results[
    !is.na(objectImpulseDE2$dfImpulseDE2Results$padj) & 
      objectImpulseDE2$dfImpulseDE2Results$padj < scaQThres, ])
  vecTimePointsToEval <- sort(unique(dfAnnot$Time), 
                              decreasing = FALSE)
  scaNTPtoEvaluate <- length(vecTimePointsToEval)
  matImpulseValue <- do.call(rbind, lapply(
    vecSignificantIDs, function(x) {
      evalImpulse(
        vecImpulseParam = 
          get_lsModelFits(obj=objectImpulseDE2)[[strCondition]][[x]]$
          lsImpulseFit$vecImpulseParam, 
        vecTimepoints = vecTimePointsToEval)
    }))
  rownames(matImpulseValue) <- vecSignificantIDs
  matidxMaxTimeSort <- t(apply(matImpulseValue, 1, function(genevalues) {
    sort(genevalues, decreasing = TRUE, index.return = TRUE)$ix
  }))
  vecMaxTime <- vecTimePointsToEval[matidxMaxTimeSort[, 1]]
  matidxMinTimeSort <- t(apply(matImpulseValue, 1, function(genevalues) {
    sort(genevalues, decreasing = FALSE, index.return = TRUE)$ix
  }))
  
  if (boolIdentifyTransients) {
    # Group into transients and monotonous
    vecidxTransient <- which(objectImpulseDE2$dfImpulseDE2Results[vecSignificantIDs, 
    ]$isTransient)
    vecidxMonotonous <- which(objectImpulseDE2$dfImpulseDE2Results[vecSignificantIDs, 
    ]$isMonotonous)
    
    # Fine sort montonous transition signals into up/down
    if (length(vecidxMonotonous) == 1) {
      matImpulseMonot <- t(matImpulseValue[vecidxMonotonous, ])
    } else { 
      matImpulseMonot <- matImpulseValue[vecidxMonotonous, ]
    }
    vecboolMonotonousUp <- apply(matImpulseMonot, 1, function(gene) {
      gene[1] < gene[scaNTPtoEvaluate]
    })
    vecboolMonotonousDown <- !vecboolMonotonousUp
    
    vecidxMonotonousUp <- vecidxMonotonous[vecboolMonotonousUp]
    vecidxMonotonousUpSort <- vecidxMonotonousUp[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxMonotonousUp,1]))]
    vecidxMonotonousDown <- vecidxMonotonous[vecboolMonotonousDown]
    vecidxMonotonousDownSort <- vecidxMonotonousDown[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxMonotonousDown, 1]))]
    
    # Fine sort transitive signals into off/on
    if (length(vecidxTransient) == 1) {
      matImpulseTransient <- t(matImpulseValue[vecidxTransient, ]) 
    } else {
      matImpulseTransient <- matImpulseValue[vecidxTransient, ]
    }
    vecboolTransientValley <- apply(
      matImpulseTransient, 1, function(genevalues) {
        boolValley <- any(
          genevalues[2:(scaNTPtoEvaluate - 1)] < genevalues[1] & 
            genevalues[2:(scaNTPtoEvaluate - 1)] < 
            genevalues[scaNTPtoEvaluate])
        return(boolValley)
      })
    vecboolTransientPeak <- !vecboolTransientValley
    
    vecidxTransientPeak <- vecidxTransient[vecboolTransientPeak]
    vecidxTransientPeakSort <- vecidxTransientPeak[
      do.call(order, as.data.frame(
        matidxMaxTimeSort[vecidxTransientPeak, 1]))]
    vecidxTransientValley <- vecidxTransient[vecboolTransientValley]
    vecidxTransientValleySort <- vecidxTransientValley[
      do.call(order, as.data.frame(
        matidxMinTimeSort[vecidxTransientValley,1]))]
    
    vecidxAllSort <- c(
      vecidxMonotonousUpSort, vecidxMonotonousDownSort, 
      vecidxTransientPeakSort, vecidxTransientValleySort)
    
    vecTrajectoryType <- c(
      rep("up", length(vecidxMonotonousUpSort)), 
      rep("down", length(vecidxMonotonousDownSort)), 
      rep("*up", length(vecidxTransientPeakSort)), 
      rep("*down", length(vecidxTransientValleySort)))
    lsvecGeneGroups <- list(
      transition_up = vecSignificantIDs[vecidxMonotonousUpSort], 
      transition_down =  vecSignificantIDs[vecidxMonotonousDownSort], 
      transient_up = vecSignificantIDs[vecidxTransientPeakSort], 
      transient_down = vecSignificantIDs[vecidxTransientValleySort])
  } else {
    vecidxAllSort <- sort(vecMaxTime, decreasing = FALSE, 
                          index.return = TRUE)$ix
    vecTrajectoryType <- rep(" ", length(vecidxAllSort))
    lsvecGeneGroups <- list(all = vecSignificantIDs[vecidxAllSort])
  }
  
  vecUniqueTP <- unique(dfAnnot$Time)
  vecSizeFactors <- computeNormConst(
    matCountDataProc = get_matCountDataProc(obj=objectImpulseDE2), 
    vecSizeFactorsExternal = get_vecSizeFactors(obj=objectImpulseDE2))
  matSizefactors <- matrix(
    vecSizeFactors, nrow = length(vecSignificantIDs), 
    ncol = dim(get_matCountDataProc(obj=objectImpulseDE2))[2], byrow = TRUE)
  
  # 1. Plot raw data
  matDataNorm <- get_matCountDataProc(obj=objectImpulseDE2)[vecSignificantIDs, ]/matSizefactors
  matDataHeat <- do.call(cbind, lapply(vecUniqueTP, function(tp) {
    vecidxCols <- which(dfAnnot$Time %in% tp)
    if (length(vecidxCols) > 1) {
      return(rowMeans(matDataNorm[, vecidxCols], na.rm = TRUE))
    } else {
      return(matDataNorm[, vecidxCols])
    }
  }))
  colnames(matDataHeat) <- vecUniqueTP
  rownames(matDataHeat) <- NULL
  # Row normalisation: z-scores
  matDataHeatZ <- do.call(rbind, lapply(
    seq(1, dim(matDataHeat)[1]), function(i) {
      (matDataHeat[i, ] - mean(matDataHeat[i, ], na.rm = TRUE))/
        sd(matDataHeat[i,], na.rm = TRUE)
    }))
  
  row_labs = unlist(lsvecGeneGroups) #CREATE A SEPARATE VARIABLE CONTAINING ROW LABELS
  
  # Create heatmap of raw data, ordered by fits
  complexHeatmapRaw <- Heatmap(
    matDataHeatZ[vecidxAllSort, ], 
    gap = unit(5, "mm"), split = vecTrajectoryType, 
    cluster_rows = FALSE, cluster_columns = FALSE, 
    row_labels = row_labs, row_names_side = "left", #ADD ROW LABELS AND FORMATTING
    heatmap_legend_param = list(title = "z-score"))
  
  # 2. Plot fitted data
  matDataHeat <- matImpulseValue
  colnames(matDataHeat) <- vecUniqueTP
  rownames(matDataHeat) <- NULL
  # Row normalisation: z-scores
  matDataHeatZ <- do.call(rbind, lapply(
    seq(1, dim(matDataHeat)[1]), function(i) {
      (matDataHeat[i, ] - mean(matDataHeat[i, ], na.rm = TRUE))/
        sd(matDataHeat[i,], na.rm = TRUE)
    }))
  
  # Create heatmap of raw data, ordered by fits
  complexHeatmapFit <- Heatmap(
    matDataHeatZ[vecidxAllSort, ], 
    gap = unit(5, "mm"), split = vecTrajectoryType, 
    cluster_rows = FALSE, cluster_columns = FALSE,
    row_labels = row_labs, row_names_side = "left", #ADD ROW LABELS AND FORMATTING
    heatmap_legend_param = list(title = "z-score"))
  
  return(list(complexHeatmapRaw = complexHeatmapRaw, 
              complexHeatmapFit = complexHeatmapFit, 
              lsvecGeneGroups = lsvecGeneGroups))
}


evalImpulse <- function(vecImpulseParam, vecTimepoints) {
  
  # beta is vecImpulseParam[1] h0 is vecImpulseParam[2] h1 is
  # vecImpulseParam[3] h2 is vecImpulseParam[4] t1 is vecImpulseParam[5]
  # t2 is vecImpulseParam[6]
  
  vecImpulseValue <- sapply(vecTimepoints, function(t) {
    (1/vecImpulseParam[3]) * 
      (vecImpulseParam[2] + (vecImpulseParam[3] - vecImpulseParam[2]) *
         (1/(1 + exp(-vecImpulseParam[1] * (t - vecImpulseParam[5]))))) *
      (vecImpulseParam[4] + (vecImpulseParam[3] - vecImpulseParam[4]) *
         (1/(1 + exp(vecImpulseParam[1] * (t - vecImpulseParam[6])))))
  })
  vecImpulseValue[vecImpulseValue < 10^(-10)] <- 10^(-10)
  
  return(vecImpulseValue)
}