# run this every time you open R for this kind of analysis library(rnaseqWrapper) # Load your data # the folder chosen in setwd needs to be customized for user and project # need subfolders geneResult and annotations, copied from server setwd("/Users/jupreziosi/Desktop/Findinggenes") #####compare treatments within organ countData <- mergeCountFiles("intestineResult/") # Pull out counts, make them integers, and view them myCountData <- round( countData[,grep(".expected_count",names(countData))],0) names(myCountData) <- gsub(".expected_count","",names(myCountData)) #filtering to keep only those genes whose mean expression is > 10 #could play with this value to #see how the size of toSearch changes. myMeans <- apply(as.matrix(myCountData), 1, mean) toSearch <- myCountData[myMeans > 1 & !is.na(myMeans) & myMeans < 1000,] # Run DESeq - this saves files to folder given by outNamePrefix Small Intestine deOut <- DESeqWrapper(toSearch, # Our count data to use conditions=c("intestine_no","intestine_fed"), # conditions to compare outNamePrefix="DEseq/") # Where to save the outputs # Save the DESeq R object: Small Intestine save(deOut,file="DEseq/deOut_intestine_treatment.Rdata") ## Save qvals for easier use Small Intestine myQvals <- deOut$deOutputs$intestine_novsintestine_fed$pval normCounts <- deOut$normalizedReads ## Limit data to interesting genes toPlot <- as.matrix(toSearch[myQvals < 0.001 & !is.na(myQvals),]) toPlotnorm <- as.matrix(normCounts[myQvals < 0.001 & !is.na(myQvals),]) #find the genes in the heatmap (with and without scaling to zero and 1 stdev) heatstuff = heatmap.mark(toPlot, margins = c(10,10)) heatstuff_noscale = heatmap.mark(toPlot, scale="none", margins = c(10,10)) #genterates a table with rows and columns in tact (bottom to top) #gene names (columns) are associated with the values and tissue sources (rows) carp =heatstuff$carpet #saving a list of genes after heatmap write.csv(colnames(carp),file="Expected_fromcarp.csv") # I am interested in this gene: # Contig12414_Npffr1_Neuropeptide_FF_receptor_1_Rattus_norvegicus # from Expected_fromcarp.csv #Correleation distance code (z-score scale) heatcorr = heatmap.mark(toPlot, distfun=function(x) as.dist((1 - cor(t(x))/2)), margins = c(10,10)) heatcorrnorm = heatmap.mark(toPlotnorm, distfun=function(x) as.dist(( 1 - cor(t(x)))/2), margins = c(10,10)) #Correleation distance code (scale based on number of reads) heatcorr = heatmap.mark(toPlot, scale="none", distfun=function(x) as.dist((1 - cor(t(x))/2)), margins = c(10,10)) heatcorrnorm = heatmap.mark(toPlotnorm, scale="none", distfun=function(x) as.dist(( 1 - cor(t(x)))/2), margins = c(10,10)) ######supervised clustering by giving a gene to start it goi = gene of interest; start with the gene name goi = which(rownames(toSearch)=="Contig12414_Npffr1_Neuropeptide_FF_receptor_1_Rattus_norvegicus") # Compute distance from goi to every other gene using (1-corr)/2 goiData = toSearch[goi,] corrgoi = (1 - cor(t(goiData), t(toSearch)))/2 # Get count data for genes within threshold distance of goi thresh = 0.1 supClustCounts = toSearch[corrgoi < thresh ,] # ordered by gene number supClustOrder = order(corrgoi)[1:nrow(supClustCounts)] supClustCounts = toSearch[supClustOrder ,] # ordered by distance from goi # Save supervised cluster to CSV file in working directory write.csv(supClustCounts,file="Npffr1.csv") # could print/save just the gene names stored in variable genes ## Here are some commands that adjust the graphs. ## We need to figure out other parameters that will give us the full graph and not cut off the labels. ## Add color and a legend to show more of the options heatmap.mark(toPlot, cexCol=.9, # make column labels smaller ColSideColors = rep(c("red","blue"),each=3), # match column order scaleLabel="") # turn off label to leave room legend(x="topleft",inset=c(-.01,.13), # where the label should go bty="n", cex=.9, # no box around it, and set the size legend=c("intestine_no","intestine_fed"), # What should be there fill=c("red","blue"), # Colors to use title="Conditions") #Label for the legend