0a. Intro

Macrophages were derived from freshly isolated bone marrow from mice of various genotypes. These cells were then infected with WNV-Tx i.c. p3 at an MOI of 2.5 for 24 or 48 hours (plus a 48h mock). RNA was then isolated and sent to Expression Analysis, Inc. The targets file describes the replication strategy and layout of all samples. The next steps are differential expression analysis to determine gene signatures specifically induced (or lacking) in each knockout.

1. Set Locations

# Input files 

in.target <- "raw_data/RNA_seq/targets_filtered_BMM.txt"

in.matrix <- "raw_data/RNA_seq/voom_normalized_matrix_batch_RRG.csv"



# Output files

out.counts <- "tidy_data/RNA_seq/tidied_counts.txt"

out.normalized <- "tidy_data/RNA_seq/norm_data.txt"

out.expressMatrix <- "results/ExpressMatrix.txt"

out.sigMask <- "results/sig_dataMask.txt"

2. Load Libraries and supplemental files

# Install libraries if needed

#source("http://www.bioconductor.org/biocLite.R")

#biocLite(pkgs = c("limma" "doBy", "sva", "FactoMineR"))



# Load libraries

library(limma)

library(doBy)

library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-16. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
library(FactoMineR) 



# Print out session data

sessionInfo()
## R version 3.3.2 (2016-10-31)

## Platform: x86_64-w64-mingw32/x64 (64-bit)

## Running under: Windows 7 x64 (build 7601) Service Pack 1

## 

## locale:

## [1] LC_COLLATE=English_United States.1252 

## [2] LC_CTYPE=English_United States.1252   

## [3] LC_MONETARY=English_United States.1252

## [4] LC_NUMERIC=C                          

## [5] LC_TIME=English_United States.1252    

## 

## attached base packages:

## [1] stats     graphics  grDevices utils     datasets  methods   base     

## 

## other attached packages:

## [1] FactoMineR_1.34   sva_3.22.0        genefilter_1.56.0 mgcv_1.8-16      

## [5] nlme_3.1-128      doBy_4.5-15       limma_3.30.6     

## 

## loaded via a namespace (and not attached):

##  [1] Rcpp_0.12.8          bitops_1.0-6         tools_3.3.2         

##  [4] digest_0.6.10        annotate_1.52.0      evaluate_0.10       

##  [7] RSQLite_1.1          memoise_1.0.0        lattice_0.20-34     

## [10] Matrix_1.2-7.1       DBI_0.5-1            yaml_2.1.14         

## [13] parallel_3.3.2       cluster_2.0.5        stringr_1.1.0       

## [16] knitr_1.15.1         S4Vectors_0.12.1     IRanges_2.8.1       

## [19] flashClust_1.01-2    scatterplot3d_0.3-37 stats4_3.3.2        

## [22] rprojroot_1.1        grid_3.3.2           Biobase_2.34.0      

## [25] AnnotationDbi_1.36.0 XML_3.98-1.5         survival_2.40-1     

## [28] rmarkdown_1.2        magrittr_1.5         leaps_2.9           

## [31] backports_1.0.4      htmltools_0.3.5      MASS_7.3-45         

## [34] BiocGenerics_0.20.0  splines_3.3.2        xtable_1.8-2        

## [37] stringi_1.1.2        RCurl_1.95-4.8

3. Load Data and Correct Batch Effects

Load Target File

# Read in targets file

targets <- read.table(in.target, header = TRUE, stringsAsFactors = FALSE, row.names = 1)

Load Normalized Sequenced Data

# Read counts matrix

counts <- read.csv(in.matrix, header = TRUE, row.names = 1, stringsAsFactors = FALSE)



# Filter targets and counts matrix for matching samples, align columns

targets <- targets[which(row.names(targets) %in% colnames(counts)), ]

counts <- as.matrix(counts[, match(row.names(targets), colnames(counts))])

colnames(counts) <- targets$Sample # rename columns by Sample ID from targets file



# Write tidied data set to tidy_data folder

write.table(counts, out.counts, sep = "\t", quote = FALSE, col.names = NA)

Correct batch effects

# Run ComBat to remove batch effects

f <- factor(targets$Group)

modcombat = model.matrix(~1, data = f)

data_adj <- ComBat(counts, targets$Batch, mod = modcombat)
## Found 2 batches

## Adjusting for 0 covariate(s) or covariate level(s)

## Standardizing Data across genes

## Fitting L/S model and finding priors

## Finding parametric adjustments

## Adjusting the Data
# Write tidied data set to tidy_data folder

write.table(data_adj, out.normalized, sep = "\t", quote = FALSE, col.names = NA)

4. Differential Expression Analysis

Design

# Build design matrix

f <- factor(targets$Group)

design <- model.matrix(~0 + f)

colnames(design) <- levels(f)

Contrasts

# Build contrast matrix

contrasts <- makeContrasts(BMM_WT_24h = BMM_WT_24h-BMM_WT_Mock,

                           BMM_LGP2_24h = BMM_LGP2_24h-BMM_LGP2_Mock,

                           BMM_MDA5_24h = BMM_MDA5_24h-BMM_MDA5_Mock,

                           BMM_DKO_24h = BMM_DKO_24h-BMM_DKO_Mock,

                           BMM_RIGI.WT_24h = BMM_RIGI.WT_24h-BMM_RIGI.WT_Mock,

                           BMM_RIGI_24h = BMM_RIGI_24h-BMM_RIGI_Mock,

                           BMM_WT_48h = BMM_WT_48h-BMM_WT_Mock,

                           BMM_LGP2_48h = BMM_LGP2_48h-BMM_LGP2_Mock,

                           BMM_MDA5_48h = BMM_MDA5_48h-BMM_MDA5_Mock,

                           BMM_RIGI.WT_48h = BMM_RIGI.WT_48h-BMM_RIGI.WT_Mock,

                           BMM_RIGI_48h = BMM_RIGI_48h-BMM_RIGI_Mock,

                           levels = design)

Fit linear model

# Fit linear model

fit <- lmFit(data_adj, design)

fit2 <- contrasts.fit(fit, contrasts) # make comparisons as defined above

fit2 <- eBayes(fit2) # empirical Bayes testing



# decideTests for fitted results (threshold: 2 fold change, p-value < 0.05)

results <- decideTests(fit2, method = "global",  adjust.method = "BH", p.value = 0.05, lfc = log2(2))

summary(results)
##    BMM_WT_24h BMM_LGP2_24h BMM_MDA5_24h BMM_DKO_24h BMM_RIGI.WT_24h

## -1       3511         3757         3160         238            3562

## 0        8792         8612         9203       12605            8680

## 1         861          795          801         321             922

##    BMM_RIGI_24h BMM_WT_48h BMM_LGP2_48h BMM_MDA5_48h BMM_RIGI.WT_48h

## -1         3350       2783         2694         2886            2160

## 0          8974       9622         9708         9468           10273

## 1           840        759          762          810             731

##    BMM_RIGI_48h

## -1         2369

## 0         10049

## 1           746
# Compile expression matrix

dataMatrix <- fit2$coefficients # Extract results of differential expression

sigMask <- dataMatrix * (results**2) # 1 if significant, 0 otherwise

ExpressMatrix <- subset(dataMatrix, rowSums(sigMask) != 0) # filter for significant genes



# Filter sigMask to use for selecting DE genes from ExpressMatrix

sigMask <- subset(sigMask, rowSums(sigMask) != 0)



# Write data to file (ExpressMatrix and data mask for identifying DE genes of individual KOs)

write.table(ExpressMatrix, file = out.expressMatrix, sep = "\t", quote = FALSE, col.names = NA)

write.table(sigMask, file = out.sigMask, sep = "\t", quote = FALSE, col.names = NA)