Macrophages were derived from freshly isolated bone marrow from mice of various genotypes. These cells were then infected with WNV-Tx i.c. p3 at an MOI of 2.5 for 24 or 48 hours (plus a 48h mock). RNA was then isolated and sent to Expression Analysis, Inc. The targets file describes the replication strategy and layout of all samples. The next steps are differential expression analysis to determine gene signatures specifically induced (or lacking) in each knockout.
# Input files
in.target <- "raw_data/RNA_seq/targets_filtered_BMM.txt"
in.matrix <- "raw_data/RNA_seq/voom_normalized_matrix_batch_RRG.csv"
# Output files
out.counts <- "tidy_data/RNA_seq/tidied_counts.txt"
out.normalized <- "tidy_data/RNA_seq/norm_data.txt"
out.expressMatrix <- "results/ExpressMatrix.txt"
out.sigMask <- "results/sig_dataMask.txt"
# Install libraries if needed
#source("http://www.bioconductor.org/biocLite.R")
#biocLite(pkgs = c("limma" "doBy", "sva", "FactoMineR"))
# Load libraries
library(limma)
library(doBy)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-16. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
library(FactoMineR)
# Print out session data
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] FactoMineR_1.34 sva_3.22.0 genefilter_1.56.0 mgcv_1.8-16
## [5] nlme_3.1-128 doBy_4.5-15 limma_3.30.6
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.8 bitops_1.0-6 tools_3.3.2
## [4] digest_0.6.10 annotate_1.52.0 evaluate_0.10
## [7] RSQLite_1.1 memoise_1.0.0 lattice_0.20-34
## [10] Matrix_1.2-7.1 DBI_0.5-1 yaml_2.1.14
## [13] parallel_3.3.2 cluster_2.0.5 stringr_1.1.0
## [16] knitr_1.15.1 S4Vectors_0.12.1 IRanges_2.8.1
## [19] flashClust_1.01-2 scatterplot3d_0.3-37 stats4_3.3.2
## [22] rprojroot_1.1 grid_3.3.2 Biobase_2.34.0
## [25] AnnotationDbi_1.36.0 XML_3.98-1.5 survival_2.40-1
## [28] rmarkdown_1.2 magrittr_1.5 leaps_2.9
## [31] backports_1.0.4 htmltools_0.3.5 MASS_7.3-45
## [34] BiocGenerics_0.20.0 splines_3.3.2 xtable_1.8-2
## [37] stringi_1.1.2 RCurl_1.95-4.8
Load Target File
# Read in targets file
targets <- read.table(in.target, header = TRUE, stringsAsFactors = FALSE, row.names = 1)
Load Normalized Sequenced Data
# Read counts matrix
counts <- read.csv(in.matrix, header = TRUE, row.names = 1, stringsAsFactors = FALSE)
# Filter targets and counts matrix for matching samples, align columns
targets <- targets[which(row.names(targets) %in% colnames(counts)), ]
counts <- as.matrix(counts[, match(row.names(targets), colnames(counts))])
colnames(counts) <- targets$Sample # rename columns by Sample ID from targets file
# Write tidied data set to tidy_data folder
write.table(counts, out.counts, sep = "\t", quote = FALSE, col.names = NA)
Correct batch effects
# Run ComBat to remove batch effects
f <- factor(targets$Group)
modcombat = model.matrix(~1, data = f)
data_adj <- ComBat(counts, targets$Batch, mod = modcombat)
## Found 2 batches
## Adjusting for 0 covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
# Write tidied data set to tidy_data folder
write.table(data_adj, out.normalized, sep = "\t", quote = FALSE, col.names = NA)
Design
# Build design matrix
f <- factor(targets$Group)
design <- model.matrix(~0 + f)
colnames(design) <- levels(f)
Contrasts
# Build contrast matrix
contrasts <- makeContrasts(BMM_WT_24h = BMM_WT_24h-BMM_WT_Mock,
BMM_LGP2_24h = BMM_LGP2_24h-BMM_LGP2_Mock,
BMM_MDA5_24h = BMM_MDA5_24h-BMM_MDA5_Mock,
BMM_DKO_24h = BMM_DKO_24h-BMM_DKO_Mock,
BMM_RIGI.WT_24h = BMM_RIGI.WT_24h-BMM_RIGI.WT_Mock,
BMM_RIGI_24h = BMM_RIGI_24h-BMM_RIGI_Mock,
BMM_WT_48h = BMM_WT_48h-BMM_WT_Mock,
BMM_LGP2_48h = BMM_LGP2_48h-BMM_LGP2_Mock,
BMM_MDA5_48h = BMM_MDA5_48h-BMM_MDA5_Mock,
BMM_RIGI.WT_48h = BMM_RIGI.WT_48h-BMM_RIGI.WT_Mock,
BMM_RIGI_48h = BMM_RIGI_48h-BMM_RIGI_Mock,
levels = design)
Fit linear model
# Fit linear model
fit <- lmFit(data_adj, design)
fit2 <- contrasts.fit(fit, contrasts) # make comparisons as defined above
fit2 <- eBayes(fit2) # empirical Bayes testing
# decideTests for fitted results (threshold: 2 fold change, p-value < 0.05)
results <- decideTests(fit2, method = "global", adjust.method = "BH", p.value = 0.05, lfc = log2(2))
summary(results)
## BMM_WT_24h BMM_LGP2_24h BMM_MDA5_24h BMM_DKO_24h BMM_RIGI.WT_24h
## -1 3511 3757 3160 238 3562
## 0 8792 8612 9203 12605 8680
## 1 861 795 801 321 922
## BMM_RIGI_24h BMM_WT_48h BMM_LGP2_48h BMM_MDA5_48h BMM_RIGI.WT_48h
## -1 3350 2783 2694 2886 2160
## 0 8974 9622 9708 9468 10273
## 1 840 759 762 810 731
## BMM_RIGI_48h
## -1 2369
## 0 10049
## 1 746
# Compile expression matrix
dataMatrix <- fit2$coefficients # Extract results of differential expression
sigMask <- dataMatrix * (results**2) # 1 if significant, 0 otherwise
ExpressMatrix <- subset(dataMatrix, rowSums(sigMask) != 0) # filter for significant genes
# Filter sigMask to use for selecting DE genes from ExpressMatrix
sigMask <- subset(sigMask, rowSums(sigMask) != 0)
# Write data to file (ExpressMatrix and data mask for identifying DE genes of individual KOs)
write.table(ExpressMatrix, file = out.expressMatrix, sep = "\t", quote = FALSE, col.names = NA)
write.table(sigMask, file = out.sigMask, sep = "\t", quote = FALSE, col.names = NA)