Radish_SynCom

Study performed at INRAE - IRHS : Script to reproduce bioinformatics and figures of the article

Title: Transmission of synthetic seed bacterial communities to radish seedlings: impact on microbiota assembly and plant phenotype

Bioinformatic analysis on raw reads - Denoising with DADA2 and filtering

Raw Fastq reads are available on ENA accession number: PRJEB58635

removing primers with cutadapt

for i in cat group; do cutadapt --discard-untrimmed -o $i.gyrB.R1.fq -p $i.gyrB.R2.fq -g MGNCCNGSNATGTAYATHGG -G ACNCCRTGNARDCCDCCNGA -e 0.1 -O 20 $iL001_R1_001.fastq.gz $i_L001_R2_001.fastq.gz; done

Denoising in R with DADA2

library(dada2); packageVersion("dada2")

list.files(path)

fnFs <- sort(list.files(path, pattern="gyrB.R1.fq", full.names = TRUE))

fnRs <- sort(list.files(path, pattern="gyrB.R2.fq", full.names = TRUE))

sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)

plotQualityProfile(fnFs[1:8]) #select 200 (run1)

plotQualityProfile(fnRs[1:8]) #select 160 (run1)

filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))

filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))

names(filtFs) <- sample.names

names(filtRs) <- sample.names

out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(200,160),

                     maxN=0, maxEE=c(1,1), truncQ=2, rm.phix=TRUE,

                     compress=TRUE, multithread=TRUE) #

head(out)

errF <- learnErrors(filtFs, multithread=TRUE)

errR <- learnErrors(filtRs, multithread=TRUE)

plotErrors(errF, nominalQ=TRUE)

dadaFs <- dada(filtFs, err=errF, multithread=TRUE) #without pooling or pseudo-pooling (no need to detect rare ASV)

dadaRs <- dada(filtRs, err=errR, multithread=TRUE)

dadaFs[[1]]

mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE)

seqtab <- makeSequenceTable(mergers)

dim(seqtab)

table(nchar(getSequences(seqtab)))

Since gyrB is a protein-coding genes only triplets should be conserved (244-247-250-253-256-259-262-265-268)

seqtab244 <- seqtab[,nchar(colnames(seqtab)) %in% 244]

seqtab247 <- seqtab[,nchar(colnames(seqtab)) %in% 247]

seqtab250 <- seqtab[,nchar(colnames(seqtab)) %in% 250]

seqtab253 <- seqtab[,nchar(colnames(seqtab)) %in% 253]

seqtab256 <- seqtab[,nchar(colnames(seqtab)) %in% 256]

seqtab259 <- seqtab[,nchar(colnames(seqtab)) %in% 259]

seqtab262 <- seqtab[,nchar(colnames(seqtab)) %in% 262]

seqtab265 <- seqtab[,nchar(colnames(seqtab)) %in% 265]

seqtab268 <- seqtab[,nchar(colnames(seqtab)) %in% 268]

Merge all files

seq.final <- cbind(seqtab244, seqtab247, seqtab250, seqtab253, seqtab256, seqtab259, seqtab262, seqtab265, seqtab268)

dim(seq.final)

sum(seq.final)/sum(seqtab)

#Detect/Remove chimera

seqtab.nochim <- removeBimeraDenovo(seq.final, method="consensus", multithread=TRUE, verbose=TRUE)

dim(seqtab.nochim)

sum(seqtab.nochim)/sum(seq.final)

## Summary

getN <- function(x) sum(getUniques(x))

track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))

colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")

rownames(track) <- sample.names

head(track)

Figures of the paper

Figure 1 - Meta-analysis radish samples

SV_gyrB<-read.table("Subset3-gyrB-MiSeq_table-FINAL-rarefied-transposed.txt", header=TRUE, check.names = FALSE, sep = "\t")
head(SV_gyrB)
dim(SV_gyrB)
meta2 <- read.table("Metadata_gyrB_withDivSubset2_Jan2021.txt", header=TRUE, check.names = FALSE, sep = "\t")
head(meta2)
dim(meta2)
SV_gyrB_use1<-merge(meta2,SV_gyrB,by="SampleID")
dim(SV_gyrB_use1)

head(SV_gyrB_use1)

#Subset only radish samples
SV_gyrB_R=subset(SV_gyrB_use1, Plant=="Radish")
head(SV_gyrB_R)
dim(SV_gyrB_R) #295 samples

matrix<-SV_gyrB_R[c(40:ncol(SV_gyrB_R))]
dim(matrix)
head(matrix)

##Keeping ASVs with at least 100 reads in the meta-analysis dataset
dim(matrix)
matrix_use<-matrix[,colSums(matrix)>=100]
dim(matrix_use)

#transposing
matrix_uset=t(matrix_use)
head(matrix_uset)
dim(matrix_uset)

## Calculate prevalence and relative abundance for each ASV
#Code Shade lab: https://github.com/ShadeLab/PAPER_Shade_CurrOpinMicro/blob/master/script/Core_prioritizing_script.R
#presence-absence data
SV_gyrB_PA <- 1*((matrix_uset>0)==1)                                              
# occupancy calculation
SV_gyrB_Prevalence <- rowSums(SV_gyrB_PA)/ncol(SV_gyrB_PA) 
# relative abundance  
library(vegan)
library(dplyr)
SV_gyrB_relative_abundance <- apply(decostand(matrix_uset, method="total", MARGIN=2),1, mean)     

# combining occupancy and relative abundance of each SV_gyrB in a table
SV_gyrBprev_rel <- add_rownames(as.data.frame(cbind(SV_gyrB_Prevalence, SV_gyrB_relative_abundance)),'SV') 
head(SV_gyrBprev_rel)
dim(SV_gyrBprev_rel)

## Merge prevalence/rel abund data with taxonomic info for each SV
taxo_gyrB<-read.table("Subset1-2_All_studies_merged_gyrB-rep-seqs-FINAL-filtered-taxonomy-final.tsv", header=TRUE, check.names = FALSE, sep = "\t")
SV_gyrBprev_rel_taxo<-merge(SV_gyrBprev_rel,taxo_gyrB,by="SV")
dim(SV_gyrBprev_rel_taxo)

head(SV_gyrBprev_rel_taxo)

Plot Figure 1 Abundance-occupancy graph - gyrB - All phyla

library(ggplot2)
tax_colors <-  c('Xanthomonas campestris'='red','Plantibacter sp'='lightblue' ,'Stenotrophomonas rhizophila'='#ffa600', "Erwinia persicina"="#bc5090", "Enterobacter cancerogenus"="purple", "Paenibacillus sp"= "#ff7c43", "Pseudomonas fluorescens 1"="#488f31", "Pseudomonas fluorescens 2"="#b7d7a7", "Pseudomonas fluorescens 3"="#004900", "Pseudomonas fluorescens 4"="#aebd38", "Pantoea agglomerans"="#216fd2", "Pseudomonas viridiflava"= "#f9b0d3", "Other taxa"="lightgrey" )

figure1=ggplot(data=SV_gyrBprev_rel_taxo, aes(x=SV_gyrB_relative_abundance, y=SV_gyrB_Prevalence)) +
    geom_point(aes(shape=Core_radish,color=Taxa), size=2) + xlab("Mean Taxa (ASV) Relative Abundance") + ylab("Taxa (ASV) Prevalence")+scale_x_log10(labels = scales::percent_format(accuracy = 0.1))+ scale_y_log10(labels = scales::percent_format(accuracy = 1))+ theme_classic()+ theme(axis.title = element_text(color="black", size=10, face="bold"))+ theme(axis.text = element_text(color="black", size=9, face="bold"))+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold")) +ggtitle("gyrB gene - Bacteria")+theme(plot.title = element_text(hjust = 0.5, face="bold"))+scale_color_manual(values=tax_colors)
figure1

###Subset only strains to prepare the table in Figure 1
SV_gyrBprev_rel_taxo_strain=subset(SV_gyrBprev_rel_taxo, Taxa!="Other taxa")
SV_gyrBprev_rel_taxo_strain
dim(SV_gyrBprev_rel_taxo_strain)

Figure 2 Seedling bacterial biomass (CFU) for the different inoculation treatments

pop=read.table("phenotype-pop-single-syncom.txt", header = T, sep = "\t")
head(pop)
dim(pop)

pop$Condition<-ordered(pop$Condition, levels=c( "Control","Enterobacter cancerogenus","Erwinia persicina","Paenibacillus sp", "Pantoea agglomerans","Plantibacter sp","Pseudomonas fluorescens 1", "Pseudomonas fluorescens 2","Pseudomonas fluorescens 3","Pseudomonas fluorescens 4","Pseudomonas viridiflava","Stenotrophomonas rhizophila","Xanthomonas campestris", "6 strains", "8 strains", "12 strains"))
colors <-  c('Xanthomonas campestris'='red','Plantibacter sp'='lightblue' ,'Stenotrophomonas rhizophila'='#ffa600', "Erwinia persicina"="#bc5090", "Enterobacter cancerogenus"="purple", "Paenibacillus sp"= "#ff7c43", "Pseudomonas fluorescens 1"="#488f31", "Pseudomonas fluorescens 2"="#b7d7a7", "Pseudomonas fluorescens 3"="#004900", "Pseudomonas fluorescens 4"="#aebd38", "Pantoea agglomerans"="#216fd2", "Pseudomonas viridiflava"= "#f9b0d3", "6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
Figure2=ggplot(pop, aes(x=log_CFU, y=Condition, fill=Condition)) + 
  geom_boxplot(outlier.shape = NA) + xlab("Log CFU by seedling")+ylab("Condition")+ theme_classic()+ theme(axis.title = element_text(color="black", size=12, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))	+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))+scale_fill_manual(values=c(colors))+facet_grid(Type~., scales  = "free", space = "free")+ theme(strip.text.y = element_text(size=8, face = "bold")) 
Figure2

Stats Figure 2

###Fit the  model. 
ggplot(pop, aes(log_CFU))+geom_histogram(binwidth=0.1)

### Run first the full model
#Gaussian or Gamma (continuous data)
library(lme4)
library(MASS)
model.nb<-glm(log_CFU ~ Condition,  data=pop, family = Gamma(link="log"))
summary(model.nb)
#check if model fits the data P>0.05 is good
1- pchisq(summary(model.nb)$deviance, summary(model.nb)$df.residual)
plot(model.nb)

###Estimate P values for main effects and interactions
library(car)
Anova(model.nb)

###Estimate P values for main effects and interactions
fit <- aov(log_CFU ~ Condition, pop)# analyse de variance
summary(fit)

###Posthoc tests
library(multcompView)
library(emmeans)
warp.emm <- emmeans(model.nb, ~Condition)
multcomp::cld(warp.emm)

Metabarcoding figures

Import gyrB dataset

meta2 <- read.table("metadata_syncom_Anne_gyrB.txt", header=TRUE, check.names = FALSE, sep = "\t")
SV_16S<-read.table("SynCom_Anne_gyrB_SV_tax_filtered.txt", header=TRUE, check.names = FALSE, sep = "\t")
head(SV_16S)
head(meta2)
dim(meta2)
dim(SV_16S)
SV_16S_use1<-merge(meta2,SV_16S,by="Sample_ID")
dim(SV_16S_use1)

head(SV_16S_use1)

##Subset - remove neg controls and low samples (<1000 reads/sample)
SV_16S_use1=subset(SV_16S_use1, Final=="Yes")
dim(SV_16S_use1)

### Make matrix of just SVs without metadata (needed to make distance matrix for NMDS and stats). Absolutely no metadata should be present, select just the columns with SV names.
matrix<-SV_16S_use1[c(11:ncol(SV_16S_use1))]

dim(matrix)
head(matrix)

##Check rarefaction curves
#determine what is the lowest sequence count in a sample (if not already rarefied). 
#First column could be sampleID to be displayed on the graph.
library(vegan)
raremax <- min(rowSums(matrix))
raremax
rar=rarecurve(matrix, step=50, sample = raremax, cex = 0.6, ylab = "ASV richness", label = FALSE)

##Rarefy dataset to the lowest number of sequences observed in a sample (raremax) or replace "raremax" by the level you want to use

#create rarefied matrix
matrix2=rrarefy(matrix, raremax)
#verify that now we have the same number of sequence per sample (Sample size) across the dataset
rar=rarecurve(matrix2, step=20, sample = raremax, cex = 0.6)

### Remove singletons and SVs with abundance of 0 in all samples - OPTIONAL if not already done
dim(matrix2)
matrix_use<-matrix2[,colSums(matrix2)>=1]
dim(matrix_use)

# 1. Ordination: NMDS with Bray-Curtis distances
##Prepare an ordination (NMDS) based on Bray-Curtis dissimilarity matrix to analyze the community structure
library(vegan)
NMDS <- metaMDS(matrix_use, distance = "bray", trymax = 100)

# Check if the NMDS is representing correctly the data in 2 dimensions: Stress must be inferior to 0.2 and R2 fit on the stressplot should be high >0.95
NMDS
stressplot(NMDS)

##Extract scores for sites (samples) and add these results as new columns in SV_use1 dataframe with the metadata
NMDSsites=scores(NMDS, display="sites")
SV_16S_use1=cbind(SV_16S_use1,NMDSsites)

Figure 3 panels A

library(ggplot2)
tax_colors_16S <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
SV_16S_use1$Condition<-ordered(SV_16S_use1$Condition, levels=c("Control", "6 strains", "8 strains", "12 strains"))

p1=ggplot(data=SV_16S_use1, aes(NMDS1, NMDS2,
color=Condition, shape=Sample_type))+geom_point(size=2.5)+theme_classic()+xlab("NMDS1")+ylab("NMDS2")+scale_shape_manual(values=c(8, 16, 0))+ theme(legend.text = element_text(color="black", size=10, face="bold"))+scale_color_manual(values=tax_colors_16S)+ labs(shape = "Sample Type", color = "Condition")+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=10, face="bold"))+ theme(axis.text = element_text(color="black", size=8, face="bold"))
p1

Figure 3 panels D

library(ggplot2)
#Plot without controls
SV_16S_use1_nocontrols=subset(SV_16S_use1, Condition!="Control")
dim(SV_16S_use1_nocontrols)
matrix_nocontrols<-SV_16S_use1_nocontrols[c(11:4943)]
dim(matrix_nocontrols)
library(vegan)
matrix3=rrarefy(matrix_nocontrols, 14116)
matrix_use2<-matrix3[,colSums(matrix3)>=1]
dim(matrix_use2)
NMDS2 <- metaMDS(matrix_use2, distance = "bray", trymax = 100)
NMDS2
stressplot(NMDS2)
NMDSsites=scores(NMDS2, display="sites")
SV_16S_use1_nocontrols=cbind(SV_16S_use1_nocontrols,NMDSsites)

Permanova Fig 3D

# Permanova - Distance based multivariate analysis of variance
##Effect of SynCom and sample type
Alltreatments=SV_16S_use1_nocontrols[c(2:10)] 
head(Alltreatments)
#Adonis function in Vegan
adonis2 <- adonis(matrix_use2 ~ Condition*Sample_type, method = "bray", permutations = 999, data=Alltreatments)
adonis2

#looking at difference between conditions
library(pairwiseAdonis)
pairwise.adonis(matrix_use2, Alltreatments$Condition)

plot Figure 3D

p1=ggplot(data=SV_16S_use1_nocontrols, aes(NMDS1, NMDS2,
color=Condition))+geom_point(size=3)+xlab("NMDS1")+ylab("NMDS2")+theme_classic()+facet_wrap(~Sample_type, scales ="free", ncol = 3)+scale_color_manual(values=tax_colors_16S)+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=10, face="bold"))+ theme(axis.text = element_text(color="black", size=8, face="bold"))+ labs(shape = "Sample Type", color = "Condition")+ theme(strip.text.x = element_text(size=10, face = "bold")) 
p1

# Permanova - Distance based multivariate analysis of variance
##Effect of SynCom and sample type
Alltreatments=SV_16S_use1[c(2:10)] 
head(Alltreatments)
#Adonis function in Vegan
adonis <- adonis(matrix_use ~ Condition*Sample_type, method = "bray", permutations = 999, data=Alltreatments)
adonis

#looking at difference between conditions
library(pairwiseAdonis)
pairwise.adonis(matrix_use, Alltreatments$Condition)

Figure 3B Analysis beta dispersion on seeds and seedlings separately

#Seeds
SV_16S_use1_seeds=subset(SV_16S_use1, Sample_type=="Seed")
Alltreatments=SV_16S_use1_seeds[c(2:10)] 
head(Alltreatments)

matrix_seed<-SV_16S_use1_seeds[c(11:ncol(SV_16S_use1_seeds))]
matrix_seed<-matrix_seed[,colSums(matrix_seed)>=1]
## Calculate multivariate dispersions
dis <- vegdist(matrix_seed)
mod <- betadisper(dis, Alltreatments$Condition)
mod
## Perform test
anova(mod)

## Permutation test for F
permutest(mod, pairwise = TRUE, permutations = 99)

## Tukey's Honest Significant Differences
mod.HSD <- TukeyHSD(mod)
plot(mod.HSD)

## Plot the groups and distances to centroids 
plot(mod)

df <- data.frame(Distance_to_centroid=mod$distances,Group=mod$group)
groups <- mod$group

tax_colors_16S <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
p<- ggplot(data=df,aes(x=Group,y=Distance_to_centroid,fill=groups))
p <-p + geom_boxplot()
p <-p +theme_classic()+scale_fill_manual(values=tax_colors_16S)+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=11, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))+ labs( color = "Condition")+ xlab("Condition")+ylab("Beta-dispersion: Distance to centroid")+theme(legend.position = "none") +ggtitle("Seeds")+theme(plot.title = element_text(hjust = 0.5, face="bold"))
p

Figure 3C

#Seedlings
SV_16S_use1_seedling=subset(SV_16S_use1, Sample_type=="Seedling")
Alltreatments=SV_16S_use1_seedling[c(2:10)] 
head(Alltreatments)

matrix_seedling<-SV_16S_use1_seedling[c(11:ncol(SV_16S_use1_seedling))]
matrix_seedling<-matrix_seedling[,colSums(matrix_seedling)>=1]
## Calculate multivariate dispersions
dis <- vegdist(matrix_seedling)
mod <- betadisper(dis, Alltreatments$Condition)
mod
## Perform test
anova(mod)

## Permutation test for F
permutest(mod, pairwise = TRUE, permutations = 99)

## Tukey's Honest Significant Differences
mod.HSD <- TukeyHSD(mod)
plot(mod.HSD)

## Plot the groups and distances to centroids 
plot(mod)

df <- data.frame(Distance_to_centroid=mod$distances,Group=mod$group)
groups <- mod$group

tax_colors_16S <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
p<- ggplot(data=df,aes(x=Group,y=Distance_to_centroid,fill=groups))
p <-p + geom_boxplot()
p <-p +theme_classic()+scale_fill_manual(values=tax_colors_16S)+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=11, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))+ labs( color = "Condition")+ xlab("Condition")+ylab("Beta-dispersion: Distance to centroid")+theme(legend.position = "none") +ggtitle("Seedlings")+theme(plot.title = element_text(hjust = 0.5, face="bold"))
p

Figure 4

library(microbiome)
##Convert data as phyloseq object
OTU2 = otu_table(matrix_use, taxa_are_rows = FALSE)
taxo_gyrB2 <- read.table(file="SynCom_Anne_gyrB_SV_Taxo.txt", sep='\t', header=TRUE,check.names=FALSE,row.names=1) 
dim(taxo_gyrB2)
taxo_gyrB2=as.matrix(taxo_gyrB2)
TAXO_gyrB = tax_table(taxo_gyrB2)
meta=SV_16S_use1[c(1:10)]
META=sample_data(meta)
physeq_gyrB = phyloseq(OTU2,META,TAXO_gyrB)
physeq_gyrB

ASV of SynComs

library(tidyverse)
dataframe_taxo_gyrB2 <- as.data.frame(taxo_gyrB2, raw.names = FALSE)
dataframe_taxo_gyrB2 <- rownames_to_column(dataframe_taxo_gyrB2, var = "ASV")%>%as_tibble()
ASV_SC <- subset(dataframe_taxo_gyrB2, SynCom == "yes")

Colors used in graphs

tax_colors <-  c(
                 "Enterobacter cancerogenus"="purple",
                 "Erwinia persicina"="#bc5090", 
                 "Paenibacillus sp"= "#ff7c43", 
                 "Pantoea agglomerans"="#216fd2",
                 'Plantibacter sp'='lightblue' ,
                 "Pseudomonas fluorescens 1"="#488f31", 
                 "Pseudomonas fluorescens 2"="#b7d7a7", 
                 "Pseudomonas fluorescens 3"="#004900", 
                 "Pseudomonas fluorescens 4"="#aebd38", 
                 "Pseudomonas viridiflava"= "#f9b0d3",
                 'Stenotrophomonas rhizophila'='#ffa600',
                 'Xanthomonas campestris'='red',
                 "Other taxa"="lightgrey" 
                 )


treatment_colors <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")

Figure 4B Taxonomic profile of the inoculum, seeds and seedlings

library(ggplot2)
#relative abundance
count_to_prop <- function(x) {return( x / sum(x))}
physeq_gyrB_abrel <- transform_sample_counts(physeq_gyrB, count_to_prop)
sample_sums(physeq_gyrB_abrel)

#phyloseq to data.frame 
otutable <- otu_table(physeq_gyrB_abrel)
gyrB_abrel <- as.data.frame(otutable)
gyrB_abrel$sample <- sample_data(physeq_gyrB_abrel)$Sample_ID
gyrB_abrel <- gyrB_abrel %>% pivot_longer(-sample) 
colnames(gyrB_abrel) <- c("Sample_ID", "ASV", "relative.abundance")
head(gyrB_abrel)

data_SC=merge(x=gyrB_abrel,y=meta,by.x="Sample_ID",by.y="Sample_ID",all.x = TRUE,all.y = TRUE)
data_SC=merge(x=data_SC,y=dataframe_taxo_gyrB2,by.x="ASV",by.y="ASV",all.x = TRUE,all.y = TRUE, na.rm=TRUE)

data_SC$Strains <- factor(data_SC$Strains, labels =  c("Other taxa", "Enterobacter cancerogenus" ,       "Erwinia persicina"   ,        "Paenibacillus sp"        ,    "Pantoea agglomerans"       ,  "Plantibacter sp"         ,    "Pseudomonas fluorescens 1",   "Pseudomonas fluorescens 2" , "Pseudomonas fluorescens 3" ,  "Pseudomonas fluorescens 4"  , "Pseudomonas viridiflava"  ,   "Stenotrophomonas rhizophila", "Xanthomonas campestris"))


#rename sample-type
data_SC$Sample_type <- factor(data_SC$Sample_type, labels = c("Inoculum", "Seeds", "Seedlings"))

#reorder Strains factors alphabetically 
old.lvl <- levels(data_SC$Strains)
data_SC$Strains <- factor(data_SC$Strains, levels=c("Other taxa", sort(old.lvl[old.lvl!="Other taxa"], decreasing=F)))

#subset for graph
data_SC6 <- subset(data_SC, Condition == "6 strains")
data_SC8 <- subset(data_SC, Condition == "8 strains")
data_SC12 <- subset(data_SC, Condition == "12 strains")
data_control <- subset(data_SC, Condition == "Control")

#graph
plot_SC6<- ggplot(data_SC6, aes(x=Sample_ID, y = round(relative.abundance, 4)))+
  geom_col(aes(fill=Strains))+ 
  scale_fill_manual(values = tax_colors)+ 
  #facet_grid(~Sample_type, scales = "free", space = "free")+
  facet_grid(~Sample_type, scales = "free_x")+
  theme_classic()+
  ggtitle("6 Strains") +  
  xlab("") + 
  ylab("Relative Abundance")+ 
  labs(fill="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  theme(axis.title.y = element_text(size=25, face = "bold"),
        plot.title = element_text(size=25, face="bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 25),
       # legend.text = element_blank(),
        strip.text =  element_text(size = 25),
       strip.background = element_blank(),
       legend.text = element_text(size = 24, face = "italic"))


plot_SC8<- ggplot(data_SC8, aes(x=Sample_ID, y = round(relative.abundance, 4)))+
  geom_col(aes(fill=Strains))+ 
  scale_fill_manual(values = tax_colors)+ 
  #facet_grid(~Sample_type, scales = "free", space = "free")+
  facet_grid(~Sample_type, scales = "free_x")+
  theme_classic()+
  ggtitle("8 Strains") +  
  xlab("") + 
  ylab("Relative Abundance")+ 
  labs(fill="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  theme(axis.title.y = element_text(size=25, face = "bold"),
        plot.title = element_text(size=25, face="bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 25),
       # legend.text = element_blank(),
        strip.text =  element_text(size = 25),
       strip.background = element_blank(),
       legend.text = element_text(size = 24, face = "italic"))

plot_SC12<- ggplot(data_SC12, aes(x=Sample_ID, y = round(relative.abundance, 4)))+
  geom_col(aes(fill=Strains))+ 
  scale_fill_manual(values = tax_colors)+ 
  #facet_grid(~Sample_type, scales = "free", space = "free")+
  facet_grid(~Sample_type, scales = "free_x")+
  theme_classic()+
  ggtitle("12 Strains") +  
  xlab("") + 
  ylab("Relative Abundance")+ 
  labs(fill="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  theme(axis.title.y = element_text(size=25, face = "bold"),
        plot.title = element_text(size=25, face="bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 25),
       # legend.text = element_blank(),
        strip.text = element_text(size = 25),
       strip.background = element_blank(),
       legend.text = element_text(size = 24, face = "italic"))

data_control <- add_row(data_control)
data_control[18817,5] <- "Inoculum"
data_control[18817,6] <- "Control"
plot_control<- ggplot(data_control, aes(x=Sample_ID, y = round(relative.abundance, 4)))+
  geom_col(aes(fill=Strains))+ 
  scale_fill_manual(values = tax_colors)+ 
  #facet_grid(~Sample_type, scales = "free", space = "free")+
  facet_grid(~Sample_type, scales = "free_x")+
  theme_classic()+
  ggtitle("Control") +  
  xlab("") + 
  ylab("Relative Abundance")+ 
  labs(fill="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  theme(axis.title.y = element_text(size=25, face = "bold"),
        plot.title = element_text(size=25, face="bold"),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 25),
       # legend.text = element_blank(),
        strip.text = element_text(size = 25),
       strip.background = element_blank(),
       legend.text = element_text(size = 24, face = "italic"))


library(ggpubr)

plot <- ggarrange(plot_control, plot_SC6, plot_SC8, plot_SC12, ncol=1, nrow=4, common.legend = TRUE, legend = "right")
ggsave("plot.png" , width = 40, height = 40, units = "cm")

Figure S3 - taxonomic composition of control seedlings

#Keep only control seedlings
physeq_gyrB_seedling <- subset_samples(physeq_gyrB, Sample_type == "Seedling")
physeq_gyrB_seedling <- subset_samples(physeq_gyrB_seedling, Condition == "Control")
physeq_gyrB_seedling <- filter_taxa(physeq_gyrB_seedling,function(x) sum (x)>10, TRUE)
physeq_gyrB_seedling

#Turn all OTUs into species counts
glom16S <- tax_glom(physeq_gyrB_seedling, taxrank = 'Species')
glom16S # should list # taxa as # Species
glom16S2 = transform_sample_counts(glom16S, function(x) x / sum(x) )
glom16S2

data_glom16S<- psmelt(glom16S2) # create dataframe from phyloseq object
data_glom16S$Species <- as.character(data_glom16S$Species) #convert to character

#simple way to rename Species with < 1% abundance
data_glom_16S3=data_glom16S
data_glom_16S3$Species[data_glom_16S3$Abundance < 0.01] <- "Other"

library(stringr)
data_glom_16S3$Species <- str_replace(data_glom_16S3$Species, "_", " ")
data_glom_16S3$Species <- str_replace(data_glom_16S3$Species, "unclassified", "spp")
data_glom_16S3$Species <- str_replace(data_glom_16S3$Species, "spp unclassified", "Unclassified")

#Count # Species to set color palette
Count = length(unique(data_glom_16S3$Species))
Count

tax_colors_16S_core <-  c('Aeromicrobium spp'='#ffbb94','Agrobacterium tumefaciens_complex'='#b29e54' ,'Duganella spp'='darkblue', 'Erwinia persicina'='lightgreen','Shigella spp'='lightblue' ,'Klebsiella cf.'='#7E1717', 'Herbaspirillum lusitanum'='#547dae','Nocardioides spp'='#F6E6C2' ,'Pseudolabrys spp'='#c05805','Pseudarthrobacter spp'='#9CA777', 'Variovorax boronicumulans'= "#f4cc95", "Pseudomonas putida"="#CD6688", "Paenibacillus spp"="red", "Pantoea agglomerans"="orange", "Stenotrophomonas rhizophila"="yellow", "Pseudomonas viridiflava"="#90639f", "Sphingomonas spp"="#cf7773","Xanthomonas campestris"="pink","Other"="black", "Xanthomonas arboricola"="#FFAACF", "Unclassified" = "grey", "Herminiimonas spp"="darkgreen", "Massilia spp"="#F11A7B" )

Figure S3 <- ggplot(data=data_glom_16S3, aes(x=Abundance, y=Sample_ID, fill=Species))+ geom_bar(aes(), stat="identity", position="stack") +
theme(legend.position="bottom",legend.justification="left") + theme_classic()+ theme(axis.title = element_text(color="black", size=12, face="bold"))+ theme(axis.text = element_text(color="black", size=11, face="bold"))+ theme(legend.text = element_text(color="black", size=11, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold")) + theme(strip.background = element_rect(fill = "#f0eeec"),strip.text = element_text(colour = "black", face = "bold"))+ theme(panel.background = element_rect(fill = "white",colour = "black",size = 0.5, linetype="solid"))+ggtitle("Bacteria - gyrB gene")+theme(plot.title = element_text(hjust = 0.5, face="bold"))+theme(strip.text.y = element_text(size=8, angle=0, face = "bold")) +xlab("Relative Abundance")+ylab("Samples")+ scale_x_continuous(labels = scales::percent_format(accuracy = 1))+theme(legend.position = "bottom")+ guides(fill = guide_legend(ncol = 3))+scale_fill_manual(values=tax_colors_16S_core)+scale_fill_manual(values=tax_colors_16S_core)+ scale_y_discrete(labels=c('Seedling 1', 'Seedling 2', 'Seedling 3','Seedling 4', 'Seedling 5', 'Seedling 6','Seedling 7', 'Seedling 8','Seedling 9', 'Seedling 10', 'Seedling 11' ))
Figure S3

Figure 4A - Alpha-diversity (Observed richness)

library(phyloseq)
physeq_SC <- subset_taxa(physeq_gyrB, SynCom == "yes")

#Table of alpha-diversity estimators
table_rgyrB <- estimate_richness(physeq_SC, split = TRUE, measures=c("Observed", "Shannon"))
#Add evenness
HgyrB <- table_rgyrB$Shannon
S1gyrB <- table_rgyrB$Observed
SgyrB <- log(S1gyrB)
eve_gyrB <- HgyrB/SgyrB
table_rgyrB$Evenness <- eve_gyrB
#Bind sampledata + table of alpha_div (here R1 only)
datargyrB <- cbind(sample_data(physeq_SC), table_rgyrB)
#Reorder manually strains variables
datargyrB$Condition <- factor(datargyrB$Condition, levels=c("Control", "6 strains", "8 strains", "12 strains"))

#Plot

library(ggplot2); packageVersion("ggplot2")

treatment_colors <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")

p_gyrB_ric <- ggplot(data=datargyrB, 
                             aes_string(x='Condition',y='Observed', fill='Condition')) + 
  geom_boxplot(outlier.shape = NA)+
  facet_wrap(~Sample_type, scales = "free_y") +
  geom_jitter(aes(colour = Condition), size=2, alpha=0.3) +
  scale_color_manual(values = treatment_colors)+
  scale_fill_manual(values = treatment_colors)+ 
  theme_classic()+
  #theme(axis.text.x = element_text(angle = 90)) +
  theme(strip.background = element_blank(), strip.text =  element_text(face = "bold", size = 12))+
  ggtitle("A - Richness")+
  xlab("") + 
  ylab("ASV Richness")+
  scale_y_continuous(limits = c(0,12.5),breaks = seq(0,12,1))+
  theme(axis.text.x = element_text(face = "bold"), axis.title.y = element_text(face = "bold", size = 11))	+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))
p_gyrB_ric


ggsave("richness.png" , width = 30, height = 10, units = "cm")

Figure 5 - Transmission Seed-seedling

library(microbiome)
##Convert data as phyloseq object
OTU2 = otu_table(matrix_use, taxa_are_rows = FALSE)
meta=SV_16S_use1[c(1:10)]
META=sample_data(meta)
physeq_OTU = phyloseq(OTU2,META)
physeq_OTU

#Transform into relative abundance
physeq_OTU_rel <- transform_sample_counts(physeq_OTU, function(x) x / sum(x) )
#Extract OTU table
physeq_OTU_rel_table <- data.frame(otu_table(physeq_OTU_rel))
physeq_OTU_rel_table <- cbind(Sample = rownames(physeq_OTU_rel_table), physeq_OTU_rel_table)
row.names(physeq_OTU_rel_table) <- NULL
physeq_OTU_rel_table$Sample <- as.factor(physeq_OTU_rel_table$Sample)
head(physeq_OTU_rel_table)
#Extract metadata
META2 <- data.frame(sample_data(physeq_OTU_rel))
META2 <- cbind(Sample = rownames(META2), META2)
row.names(META2) <- NULL
META2$Sample_ID <- as.factor(META2$Sample_ID)

### Transform table matrix into long table format
library(reshape2)
table_rel_abund_long=setNames(melt(physeq_OTU_rel_table), c('Sample', 'ASV', 'Relative_Abundance'))
head(table_rel_abund_long)
dim(table_rel_abund_long)
## merge with metadata
table_rel_abund_long_meta<-merge(META2,table_rel_abund_long,by="Sample")
dim(table_rel_abund_long_meta)
head(table_rel_abund_long_meta)
##merge with taxonomic info
taxo<-read.table("SynCom_Anne_gyrB_SV_Taxo.txt", header=TRUE, check.names = FALSE, sep = "\t")
table_rel_abund_long_meta<-merge(taxo,table_rel_abund_long_meta,by="ASV")
dim(table_rel_abund_long_meta)
head(table_rel_abund_long_meta)


###filter rows with Relative_Abundance= 0 
library(dplyr)
table_rel_abund_long_meta2 <- filter(table_rel_abund_long_meta, Relative_Abundance > 0)
head(table_rel_abund_long_meta2)
dim(table_rel_abund_long_meta2)

Figure 5 & 6: Transmission

##Keep only inoculated ASVs
table_rel_abund_long_meta2_strain=subset(table_rel_abund_long_meta2, SynCom=="yes")
dim(table_rel_abund_long_meta2_strain)

#Average rel abund by condition and compartment
library(Rmisc)
library(ggplot2)
grouped_stat_slope_rich=summarySE(table_rel_abund_long_meta2_strain, measurevar=c("Relative_Abundance"),groupvars=c("Condition","Sample_type", "Strains"), na.rm = TRUE)

Figure 6B

#slop plot by strains - seed vs seedling - without controls - with standard errors
seedVSseedlings_rich=grouped_stat_slope_rich[grouped_stat_slope_rich$Sample_type!="Inoculum",]
seedVSseedlings_rich=seedVSseedlings_rich[seedVSseedlings_rich$Condition!="Control",]

#remove rows of strains that have not been inoculated in SynComs
seedVSseedlings_rich2seedling <- subset(seedVSseedlings_rich, Sample_type=="Seedling" & N>16)
seedVSseedlings_rich2seed <- subset(seedVSseedlings_rich, Sample_type=="Seed")
seedVSseedlings_rich_final=rbind(seedVSseedlings_rich2seed,seedVSseedlings_rich2seedling)

#ordered by response type
seedVSseedlings_rich_final$Strains<-ordered(seedVSseedlings_rich_final$Strains, levels=c('Stenotrophomonas rhizophila',"Pseudomonas viridiflava","Paenibacillus sp", "Pseudomonas fluorescens 1","Pantoea agglomerans", "Pseudomonas fluorescens 4","Erwinia persicina","Enterobacter cancerogenus", "Pseudomonas fluorescens 3",'Xanthomonas campestris','Plantibacter sp' , "Pseudomonas fluorescens 2"))
tax_colors_16S <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
seedVSseedlings_rich_final$Condition<-ordered(seedVSseedlings_rich_final$Condition, levels=c("6 strains", "8 strains", "12 strains"))
plot_strains2=ggplot(seedVSseedlings_rich_final, aes(x = Sample_type, y = Relative_Abundance, color = Condition,group=Condition)) + geom_point() + geom_line() +  facet_wrap(~Strains)+theme_classic()+ylab("Relative abundance (%)")+xlab("Sample Type")+scale_color_manual(values=tax_colors_16S)+ scale_y_log10(labels = scales::percent_format(accuracy = 0.1))+ theme(strip.text.x = element_text(size=10,  face = "bold")) + theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=11, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold")) + geom_errorbar(data=seedVSseedlings_rich_final, aes(x=Sample_type, ymin=Relative_Abundance-se, ymax=Relative_Abundance+se, color=Condition), width=.1, alpha=0.7) 
plot_strains2

Figure 5A - slope inoculum - seed

#slop plot by strains - seed vs seedling - without controls - with standard errors
inocVSseed_rich=grouped_stat_slope_rich[grouped_stat_slope_rich$Sample_type!="Seedling",]
inocVSseed_rich=inocVSseed_rich[inocVSseed_rich$Condition!="Control",]

#ordered by response type
inocVSseed_rich$Strains<-ordered(inocVSseed_rich$Strains, levels=c('Stenotrophomonas rhizophila',"Pseudomonas viridiflava","Paenibacillus sp", "Pseudomonas fluorescens 1","Pantoea agglomerans", "Pseudomonas fluorescens 4","Erwinia persicina","Enterobacter cancerogenus", "Pseudomonas fluorescens 3",'Xanthomonas campestris','Plantibacter sp' , "Pseudomonas fluorescens 2"))
tax_colors_16S <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
plot_strains2=ggplot(inocVSseed_rich, aes(x = Sample_type, y = Relative_Abundance, color = Condition,group=Condition)) + geom_point() + geom_line() +  facet_wrap(~Strains)+theme_classic()+ylab("Relative abundance (%)")+xlab("Sample Type")+scale_color_manual(values=tax_colors_16S)+ scale_y_log10(labels = scales::percent_format(accuracy = 0.1))+ theme(strip.text.x = element_text(size=10,  face = "bold")) + theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=11, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold")) + geom_errorbar(data=inocVSseed_rich, aes(x=Sample_type, ymin=Relative_Abundance-se, ymax=Relative_Abundance+se, color=Condition), width=.1, alpha=0.7) 
plot_strains2

Figure 5B Ratio abundance Seed / Inoculum

PS_run2_rich_no0=transform_sample_counts(physeq_OTU, function(x) x+1 )
PS_run2_rich_no0relative=transform_sample_counts(PS_run2_rich_no0, function(x) x/sum(x)*100)
data_fitness=as.data.frame(otu_table(PS_run2_rich_no0relative))
data_fitness2 <- tibble::rownames_to_column(data_fitness, "Sample_ID")
data_fitness3=pivot_longer(data_fitness2,cols=colnames(data_fitness),names_to = "ASV")
colnames(data_fitness3)=c("Sample_ID","ASV","Relative_abundance")
data_fitness3=as.data.frame(data_fitness3)
data_fitness4=left_join(data_fitness3, taxo,by="ASV")
#data_fitness4$Strain_Figure_name = data_fitness4$Strain_Figure_name %>% replace_na('Native strain')
#keep only strains of SynComs
data_fitness4b <- subset(data_fitness4, SynCom == "yes")
sample_data_fitness=as.data.frame(sample_data(physeq_OTU))
sample_data_fitness$Sample_ID=row.names(sample_data_fitness)
data_fitness5=left_join(data_fitness4b,sample_data_fitness,by="Sample_ID")
#####
data_fitness5_inoc=data_fitness5[data_fitness5$Sample_type=="Inoculum",]
names(data_fitness5_inoc)[names(data_fitness5_inoc) == "Relative_abundance"] <- "Relative_abundance_inoc"
data_fitness5_inoc_stat=summarySE(data_fitness5_inoc,measurevar=c("Relative_abundance_inoc"), groupvars=c("ASV","Condition","Strains"), na.rm = TRUE)
data_fitness5_seed=data_fitness5[data_fitness5$Sample_type=="Seed",]
names(data_fitness5_seed)[names(data_fitness5_seed) == "Relative_abundance"] <- "Relative_abundance_seed"
data_fitness5_seed_stat=summarySE(data_fitness5_seed,measurevar=c("Relative_abundance_seed"), groupvars=c("ASV","Strains","Condition"), na.rm = TRUE)
ALL_data_seed_inocs_rich=full_join(data_fitness5_inoc_stat,data_fitness5_seed_stat,by=c("ASV","Condition"))

ALL_data_seed_inocs_rich$fitness=ALL_data_seed_inocs_rich$Relative_abundance_seed/ALL_data_seed_inocs_rich$Relative_abundance_inoc
names(ALL_data_seed_inocs_rich)[names(ALL_data_seed_inocs_rich) == "Strains.x"] <- "Strains"
ALL_data_seed_inocs_rich_nocontrol <- subset(ALL_data_seed_inocs_rich, Condition != "Control")
#remove rows of strains that have not been inoculated in SynComs
ALL_data_seed_inocs_rich_nocontrol <- subset(ALL_data_seed_inocs_rich_nocontrol, sd.y != 0)
#ALL_data_seed_inocs_rich_meta=merge(taxo,ALL_data_seed_inocs_rich,by="Strains",all.x = T)
treatment_colors <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
ALL_data_seed_inocs_rich_nocontrol$Condition<-ordered(ALL_data_seed_inocs_rich_nocontrol$Condition, levels=c("6 strains", "8 strains", "12 strains"))

plot_strains_fitness=ggplot(ALL_data_seed_inocs_rich_nocontrol,aes(x=log10(fitness),y=Strains,color=Condition))+geom_point(size=5)+scale_color_manual(values=treatment_colors)+theme_classic()+theme(text = element_text(size = 20))+guides(fill=guide_legend(ncol=1))+ylab("Strains")+xlab("log10(Relative abundance ratio Seed / Inoculum)")+geom_vline(xintercept = 0)
plot_strains_fitness
#write.csv(ALL_data_seed_inocs_rich_nocontrol, "SynCom_radis_strain_fitness_inoc.csv")

Figure 6B Ratio abundance Seedling / Seed

PS_run2_rich_no0=transform_sample_counts(physeq_OTU, function(x) x+1 )
PS_run2_rich_no0relative=transform_sample_counts(PS_run2_rich_no0, function(x) x/sum(x)*100)
data_fitness=as.data.frame(otu_table(PS_run2_rich_no0relative))
data_fitness2 <- tibble::rownames_to_column(data_fitness, "Sample_ID")
data_fitness3=pivot_longer(data_fitness2,cols=colnames(data_fitness),names_to = "ASV")
colnames(data_fitness3)=c("Sample_ID","ASV","Relative_abundance")
data_fitness3=as.data.frame(data_fitness3)
data_fitness4=left_join(data_fitness3, taxo,by="ASV")
#data_fitness4$Strain_Figure_name = data_fitness4$Strain_Figure_name %>% replace_na('Native strain')
#keep only strains of SynComs
data_fitness4b <- subset(data_fitness4, SynCom == "yes")
sample_data_fitness=as.data.frame(sample_data(physeq_OTU))
sample_data_fitness$Sample_ID=row.names(sample_data_fitness)
data_fitness5=left_join(data_fitness4b,sample_data_fitness,by="Sample_ID")
#####
data_fitness5_seedling=data_fitness5[data_fitness5$Sample_type=="Seedling",]
names(data_fitness5_seedling)[names(data_fitness5_seedling) == "Relative_abundance"] <- "Relative_abundance_seedling"
data_fitness5_seedling_stat=summarySE(data_fitness5_seedling,measurevar=c("Relative_abundance_seedling"), groupvars=c("ASV","Condition","Strains"), na.rm = TRUE)
data_fitness5_seed=data_fitness5[data_fitness5$Sample_type=="Seed",]
names(data_fitness5_seed)[names(data_fitness5_seed) == "Relative_abundance"] <- "Relative_abundance_seed"
data_fitness5_seed_stat=summarySE(data_fitness5_seed,measurevar=c("Relative_abundance_seed"), groupvars=c("ASV","Strains","Condition"), na.rm = TRUE)
ALL_data_seed_seedlings_rich=full_join(data_fitness5_seedling_stat,data_fitness5_seed_stat,by=c("ASV","Condition"))

ALL_data_seed_seedlings_rich$fitness=ALL_data_seed_seedlings_rich$Relative_abundance_seedling/ALL_data_seed_seedlings_rich$Relative_abundance_seed
names(ALL_data_seed_seedlings_rich)[names(ALL_data_seed_seedlings_rich) == "Strains.x"] <- "Strains"
ALL_data_seed_seedlings_rich_nocontrol <- subset(ALL_data_seed_seedlings_rich, Condition != "Control")

#remove rows of strains that have not been inoculated in SynComs
ALL_data_seed_seedlings_rich_nocontrol <- subset(ALL_data_seed_seedlings_rich_nocontrol, sd.y != 0)

#ALL_data_seed_seedlings_rich_meta=merge(taxo,ALL_data_seed_seedlings_rich,by="Strains",all.x = T)
treatment_colors <-  c('Control'='#ffbb94',"6 strains"="ivory3", "8 strains"="ivory4", "12 strains"="black", "Control"="white")
ALL_data_seed_seedlings_rich_nocontrol$Condition<-ordered(ALL_data_seed_seedlings_rich_nocontrol$Condition, levels=c("6 strains", "8 strains", "12 strains"))

plot_strains_fitness=ggplot(ALL_data_seed_seedlings_rich_nocontrol,aes(x=log10(fitness),y=Strains,color=Condition))+geom_point(size=5)+scale_color_manual(values=treatment_colors)+theme_classic()+theme(text = element_text(size = 20))+guides(fill=guide_legend(ncol=1))+ylab("Strains")+xlab("log10(Relative abundance ratio Seedling / Seed)")+geom_vline(xintercept = 0)
plot_strains_fitness

Figure 7 Effect on germination and seedling phenotypes

pheno=read.table("Phenotype_Syncom_strain.txt", header = T, sep = "\t")
head(pheno)
dim(pheno)

Graph Figure 7 phenotype

pheno$Phenotype<-ordered(pheno$Phenotype, levels=c("Non Germinated","Abnormal", "Normal"))

pheno$Condition<-ordered(pheno$Condition, levels=c("6 strains", "8 strains", "12 strains","Enterobacter cancerogenus","Erwinia persicina","Paenibacillus sp", "Pantoea agglomerans","Plantibacter sp","Pseudomonas fluorescens 1", "Pseudomonas fluorescens 2","Pseudomonas fluorescens 3","Pseudomonas fluorescens 4","Pseudomonas viridiflava","Stenotrophomonas rhizophila","Xanthomonas campestris", "Control"))
	
library(ggplot2)
Figure7=ggplot(pheno, aes(x=Condition, y=Frequency, fill=Phenotype)) + 
  geom_bar(position = "stack", stat="identity") + ylab("Frequency of phenotype")+xlab("Condition")+scale_fill_manual(values=c("darkred","#E69F00","#41AB5D"))+ theme(axis.title = element_text(color="black", size=12, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))+ scale_y_continuous(labels = scales::percent_format(accuracy = 1))+ theme_classic()	+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))+ coord_flip()+ theme(axis.title = element_text(color="black", size=14, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))+facet_grid(Type~., scales  = "free", space = "free")+ theme(strip.text.y = element_text(size=9, face = "bold")) + theme(legend.position = c(-0.26, 0.09)) + theme(legend.background = element_rect(fill="white", size=0.5,linetype="solid", colour ="black"))
Figure7

Stats on seedling phenotype using Chi square and Fisher test for small sampling size - normal vs abnormal

library(reshape2)
#transform to wide format
pheno_wide <- dcast(pheno, Phenotype ~ Condition, value.var="Count")
head(pheno_wide)
pheno_wide_abnormal=subset(pheno_wide, Phenotype!="Non Germinated")

library(dplyr)
pheno_wide_abnormal2=pheno_wide_abnormal %>%
  select(Control, `Xanthomonas campestris`)
chisq.test(pheno_wide_abnormal2)$expected
chisq.test(pheno_wide_abnormal2, correct=FALSE)
fisher.test(pheno_wide_abnormal2)

pheno_wide_abnormal3=pheno_wide_abnormal %>%
  select(Control, `Stenotrophomonas rhizophila`)
chisq.test(pheno_wide_abnormal3)$expected
chisq.test(pheno_wide_abnormal3, correct=FALSE)
fisher.test(pheno_wide_abnormal3)

pheno_wide_abnormal4=pheno_wide_abnormal %>%
  select(Control, `Pseudomonas viridiflava`)
chisq.test(pheno_wide_abnormal4)$expected
chisq.test(pheno_wide_abnormal4, correct=FALSE)
fisher.test(pheno_wide_abnormal4) #significant P<0.001

pheno_wide_abnormal5=pheno_wide_abnormal %>%
  select(Control, `Pseudomonas fluorescens 4`)
chisq.test(pheno_wide_abnormal5)$expected
chisq.test(pheno_wide_abnormal5, correct=FALSE)
fisher.test(pheno_wide_abnormal5)

pheno_wide_abnormal6=pheno_wide_abnormal %>%
  select(Control, `Pseudomonas fluorescens 3`)
chisq.test(pheno_wide_abnormal6)$expected
chisq.test(pheno_wide_abnormal6, correct=FALSE)
fisher.test(pheno_wide_abnormal6)

pheno_wide_abnormal7=pheno_wide_abnormal %>%
  select(Control, `Pseudomonas fluorescens 2`)
chisq.test(pheno_wide_abnormal7)$expected
chisq.test(pheno_wide_abnormal7, correct=FALSE)
fisher.test(pheno_wide_abnormal7)

pheno_wide_abnormal8=pheno_wide_abnormal %>%
  select(Control, `Pseudomonas fluorescens 1`)
chisq.test(pheno_wide_abnormal8)$expected
chisq.test(pheno_wide_abnormal8, correct=FALSE)
fisher.test(pheno_wide_abnormal8)

pheno_wide_abnormal9=pheno_wide_abnormal %>%
  select(Control, `Plantibacter sp`)
chisq.test(pheno_wide_abnormal9)$expected
chisq.test(pheno_wide_abnormal9, correct=FALSE)
fisher.test(pheno_wide_abnormal9)

pheno_wide_abnormal10=pheno_wide_abnormal %>%
  select(Control, `Pantoea agglomerans`)
chisq.test(pheno_wide_abnormal10)$expected
chisq.test(pheno_wide_abnormal10, correct=FALSE)
fisher.test(pheno_wide_abnormal10)

pheno_wide_abnormal11=pheno_wide_abnormal %>%
  select(Control, `Paenibacillus sp`)
chisq.test(pheno_wide_abnormal11)$expected
chisq.test(pheno_wide_abnormal11, correct=FALSE)
fisher.test(pheno_wide_abnormal11)#significant P<0.001

pheno_wide_abnormal12=pheno_wide_abnormal %>%
  select(Control, `Erwinia persicina`)
chisq.test(pheno_wide_abnormal12)$expected
chisq.test(pheno_wide_abnormal12, correct=FALSE)
fisher.test(pheno_wide_abnormal12)

pheno_wide_abnormal13=pheno_wide_abnormal %>%
  select(Control, `Enterobacter cancerogenus`)
chisq.test(pheno_wide_abnormal13)$expected
chisq.test(pheno_wide_abnormal13, correct=FALSE)
fisher.test(pheno_wide_abnormal13)

pheno_wide_abnormal14=pheno_wide_abnormal %>%
  select(Control, `Enterobacter cancerogenus`)
chisq.test(pheno_wide_abnormal14)$expected
chisq.test(pheno_wide_abnormal14, correct=FALSE)
fisher.test(pheno_wide_abnormal14)

pheno_wide_abnormal15=pheno_wide_abnormal %>%
  select(Control, `6 strains`)
chisq.test(pheno_wide_abnormal15)$expected
chisq.test(pheno_wide_abnormal15, correct=FALSE)
fisher.test(pheno_wide_abnormal15)#significant P<0.001

pheno_wide_abnormal16=pheno_wide_abnormal %>%
  select(Control, `8 strains`)
chisq.test(pheno_wide_abnormal16)$expected
chisq.test(pheno_wide_abnormal16, correct=FALSE)
fisher.test(pheno_wide_abnormal16)#significant P<0.001

pheno_wide_abnormal17=pheno_wide_abnormal %>%
  select(Control, `12 strains`)
chisq.test(pheno_wide_abnormal17)$expected
chisq.test(pheno_wide_abnormal17, correct=FALSE)
fisher.test(pheno_wide_abnormal17)#significant P=0.003

Figure 8- SynCom effect on phenotype

Figure 8 effect of phenotype on microbiota structure

##subset seedling to look at seedling type effect
SV_16S_use1_seedling=subset(SV_16S_use1, Sample_type=="Seedling")

#without controls
SV_16S_use1_seedlings_nocontrols=subset(SV_16S_use1_seedling, Condition!="Control")
SV_16S_use1_seedlings_nocontrols$Seedling_type<-ordered(SV_16S_use1_seedlings_nocontrols$Seedling_type, levels=c("PN", "PA"))

matrix3<-SV_16S_use1_seedlings_nocontrols[c(11:4943)]
dim(matrix3)
head(matrix3)

#create rarefied matrix
matrix4=rrarefy(matrix3, raremax)
#verify that now we have the same number of sequence per sample (Sample size) across the dataset
dim(matrix4)
matrix_use2<-matrix4[,colSums(matrix4)>=1]
dim(matrix_use2)

##Prepare an ordination (NMDS) based on Bray-Curtis dissimilarity matrix to analyze the community structure
NMDS2 <- metaMDS(matrix_use2, distance = "bray", trymax = 100)
NMDS2
stressplot(NMDS2)

NMDSsites2=scores(NMDS2, display="sites")
SV_16S_use1_seedlings_nocontrols=cbind(SV_16S_use1_seedlings_nocontrols,NMDSsites2)

Figure 8A

library(ggplot2)
SV_16S_use1_seedlings_nocontrols$Condition<-ordered(SV_16S_use1_seedlings_nocontrols$Condition, levels=c( "6 strains", "8 strains", "12 strains"))
Figure8A=ggplot(data=SV_16S_use1_seedlings_nocontrols, aes(NMDS1, NMDS2,
color=Seedling_type))+geom_point(size=3)+xlab("NMDS1")+ylab("NMDS2")+facet_wrap(~Condition, scales ="free", ncol = 2)+theme_classic()+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=10, face="bold"))+ theme(axis.text = element_text(color="black", size=8, face="bold"))+ labs(color = "Seedling Phenotype")+ theme(strip.text.x = element_text(size=10, face = "bold")) +
  scale_color_manual(labels = c("Normal", "Abnormal"), values = c("#41AB5D", "#E69F00"))+ theme(legend.position = c(0.65, 0.3))
Figure8A

# Permanova - Distance based multivariate analysis of variance
##Effect of SynCom and sample type
Alltreatments=SV_16S_use1_seedlings_nocontrols[c(2:10)] 
head(Alltreatments)
#Adonis function in Vegan
adonis <- adonis(matrix_use2 ~ Condition*Seedling_type, method = "bray", permutations = 999, data=Alltreatments)
adonis

#looking at difference between conditions
library(pairwiseAdonis)
pairwise.adonis(matrix_use2, Alltreatments$Condition)

Figure 8B rel abund strains in function of seedling type

table_rel_abund_long_meta2_seedling=subset(table_rel_abund_long_meta2, Sample_type=="Seedling")
table_rel_abund_long_meta2_seedling=table_rel_abund_long_meta2_seedling[table_rel_abund_long_meta2_seedling$Condition!="Control",]
library(Rmisc)
library(ggplot2)
Diversity_stat <- summarySE(table_rel_abund_long_meta2_seedling, measurevar="Relative_Abundance", groupvars=c("Seedling_type", "Condition", "Strains"), na.rm = TRUE)

Figure 8B rel abund strains in function of seedling type - just 12 strains SynCom

table_rel_abund_long_meta2_seedling12strains=table_rel_abund_long_meta2_seedling[table_rel_abund_long_meta2_seedling$Condition=="12 strains",]
dim(table_rel_abund_long_meta2_seedling12strains)

##Remove empty rows qith ASVs other than the inoculated strains
table_rel_abund_long_meta2_seedling12strains <- table_rel_abund_long_meta2_seedling12strains[-which(table_rel_abund_long_meta2_seedling12strains$Strains == ""), ]
dim(table_rel_abund_long_meta2_seedling12strains)

table_rel_abund_long_meta2_seedling12strains$Seedling_type<-ordered(table_rel_abund_long_meta2_seedling12strains$Seedling_type, levels=c("PN","PA"))

Figure8B=ggplot(data=table_rel_abund_long_meta2_seedling12strains, aes(x=Strains, y=Relative_Abundance, fill=Seedling_type)) +geom_boxplot(outlier.shape = NA) + theme_classic()+xlab("Strains")+ylab("ASV Relative Abundance")+ theme(axis.title = element_text(color="black", size=12, face="bold"))+ theme(axis.text = element_text(color="black", size=10, face="bold"))+ scale_y_log10(labels = scales::percent_format(accuracy = 0.1))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))	+ theme(legend.text = element_text(color="black", size=8, face="bold"))+ theme(legend.title = element_text(color="black", size=10, face="bold"))+ scale_fill_manual(name = "Seedling Phenotype", labels = c("Normal", "Abnormal"), values = c("#41AB5D", "#E69F00"))
Figure8B

stats rel abund in different phenotypes

#choose the strain
strain=subset(table_rel_abund_long_meta2_seedling12strains, Strains=="Xanthomonas campestris")
dim(strain)
ggplot(strain, aes(Relative_Abundance))+geom_histogram(binwidth=0.001)

#Gaussian or Gamma (continuous data)
library(lme4)
library(MASS)
model.nb<-glm(Relative_Abundance ~ Seedling_type,  data=strain, family = Gamma(link="log"))
summary(model.nb)
#check if model fits the data P>0.05 is good
1- pchisq(summary(model.nb)$deviance, summary(model.nb)$df.residual)
plot(model.nb)

###Estimate P values for main effects and interactions
library(car)
Anova(model.nb)

fit <- aov(Relative_Abundance ~ Seedling_type, strain)# analyse de variance
summary(fit)
#posthoc
library(multcompView)
library(emmeans)
warp.emm <- emmeans(model.nb, ~Seedling_type)
multcomp::cld(warp.emm)

stats on all strains

ggplot(table_rel_abund_long_meta2_seedling12strains, aes(Relative_Abundance))+geom_histogram(binwidth=0.005)

#Gaussian or Gamma (continuous data)
library(lme4)
library(MASS)
model.nb<-glm(Relative_Abundance ~ Strains*Seedling_type,  data=table_rel_abund_long_meta2_seedling12strains, family = Gamma(link="log"))
summary(model.nb)
#check if model fits the data P>0.05 is good
1- pchisq(summary(model.nb)$deviance, summary(model.nb)$df.residual)
plot(model.nb)

###Estimate P values for main effects and interactions
library(car)
Anova(model.nb)

fit <- aov(Relative_Abundance ~ Strains*Seedling_type, table_rel_abund_long_meta2_seedling12strains)# analyse de variance
summary(fit)
#posthoc
library(multcompView)
library(emmeans)
warp.emm <- emmeans(model.nb, ~Seedling_type|Strains)
multcomp::cld(warp.emm)

Supplementary Figure S2 native vs surface disinfected seeds

##Import gyrB dataset with native seeds

meta2 <- read.table("metadata_syncom_Anne_gyrB.txt", header=TRUE, check.names = FALSE, sep = "\t")
SV_16S<-read.table("SynCom_Anne_gyrB_SV_tax_filtered_md5_native.txt", header=TRUE, check.names = FALSE, sep = "\t")
head(SV_16S)
head(meta2)
dim(meta2)
dim(SV_16S)
SV_16S_use1<-merge(meta2,SV_16S,by="Sample_ID")
dim(SV_16S_use1)
head(SV_16S_use1)

##Subset to keep only native and surface disinfected seeds
SV_16S_use1=subset(SV_16S_use1, Native=="Yes")
dim(SV_16S_use1)

### Make matrix of just SVs without metadata (needed to make distance matrix for NMDS and stats). Absolutely no metadata should be present, select just the columns with SV names.
matrix<-SV_16S_use1[c(13:ncol(SV_16S_use1))]

dim(matrix)
head(matrix)

##Check rarefaction curves
#determine what is the lowest sequence count in a sample (if not already rarefied). 
#First column could be sampleID to be displayed on the graph.
library(vegan)
raremax <- min(rowSums(matrix))
raremax
rar=rarecurve(matrix, step=50, sample = raremax, cex = 0.6, ylab = "ASV richness", label = FALSE)

#create rarefied matrix
matrix2=rrarefy(matrix, raremax)
#verify that now we have the same number of sequence per sample (Sample size) across the dataset
rar=rarecurve(matrix2, step=20, sample = raremax, cex = 0.6)

### Remove rare SVs - less than 20 reads 
dim(matrix2)
matrix_use<-matrix2[,colSums(matrix2)>=20]
dim(matrix_use)

# 1. Ordination: NMDS with Bray-Curtis distances
##Prepare an ordination (NMDS) based on Bray-Curtis dissimilarity matrix to analyze the community structure
library(vegan)
NMDS <- metaMDS(matrix_use, distance = "bray", trymax = 100, k=3)

# Check if the NMDS is representing correctly the data in 2 dimensions: Stress must be inferior to 0.2 and R2 fit on the stressplot should be high >0.95
NMDS
stressplot(NMDS)
##Extract scores for sites (samples) and add these results as new columns in SV_use1 dataframe with the metadata
NMDSsites=scores(NMDS, display="sites")
SV_16S_use1=cbind(SV_16S_use1,NMDSsites)

Figure S2 panel A

library(ggplot2)
p1=ggplot(data=SV_16S_use1, aes(NMDS1, NMDS2,
color=Surface_Sterilization))+geom_point(size=2.5)+theme_classic()+xlab("NMDS1")+ylab("NMDS2")+ theme(legend.text = element_text(color="black", size=10, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold"))	+ theme(axis.title = element_text(color="black", size=10, face="bold"))+ theme(axis.text = element_text(color="black", size=8, face="bold"))+ labs(color = "Surface Sterilization")
p1

Permanova

# Permanova - Distance based multivariate analysis of variance
##Effect of Surface Sterilization
Alltreatments=SV_16S_use1[c(2:12)] 
head(Alltreatments)
#Adonis function in Vegan
adonis2 <- adonis(matrix_use ~ Surface_Sterilization, method = "bray", permutations = 999, data=Alltreatments)
adonis2

Figure S2B - Taxonomic Bargraph at Species level

library(microbiome)
##Convert data as phyloseq object
matrix_16St=t(matrix_use)
dim(matrix_16St)
taxo_16S2 <- read.table(file="Subset1-2_All_studies_merged_gyrB-rep-seqs-FINAL-filtered-taxonomy-final.tsv", sep='\t', header=TRUE,check.names=FALSE,row.names=1) 
dim(taxo_16S2)
taxo_16S2=as.matrix(taxo_16S2)
TAXO_16S = tax_table(taxo_16S2)
OTU_16S = otu_table(matrix_16St, taxa_are_rows = TRUE)
meta_16S=SV_16S_use1[c(1:8)]
META_16S=sample_data(meta_16S)
physeq_16S = phyloseq(OTU_16S, TAXO_16S,META_16S)
physeq_16S

#Turn all OTUs into class (or phylum or order level) counts
glom16S <- tax_glom(physeq_16S, taxrank = 'Species')
glom16S # should list # taxa as # species
glom16S2 = transform_sample_counts(glom16S, function(x) x / sum(x) )
glom16S2

data_glom16S<- psmelt(glom16S2) # create dataframe from phyloseq object
data_glom16S$Species <- as.character(data_glom16S$Species) #convert to character

#simple way to rename Species with < 0.1% abundance
data_glom_16S3=data_glom16S
data_glom_16S3$Genus[data_glom_16S3$Abundance < 0.001] <- "Other"

library(stringr)
data_glom_16S3$Species <- str_replace(data_glom_16S3$Species, "_", " ")
data_glom_16S3$Species <- str_replace(data_glom_16S3$Species, "unclassified", "spp")

#Count # species to set color palette
Count = length(unique(data_glom_16S3$Species))
Count

tax_colors_16S_core <-  c('Achromobacter spp'='#ffbb94','Agrobacterium tumefaciens'='#b29e54' ,'Asticcacaulis benevestitus'='#B9F3E4', 'Bordetella spp'='#547dae','Cutibacterium acnes'='lightblue' ,'Pseudomonas fluorescens'='#6554AF', 'Erwinia persicina'='lightgreen','Plantibacter spp'='#F6E6C2' ,'Pseudolabrys spp'='#c05805','Serratia plymuthica'='red', 'Saccharibacillus spp'= "#f4cc95", "Pseudomonas koreensis"="#461959", "Massilia"="#d4e79c", "Pantoea agglomerans"="orange", "Stenotrophomonas spp"="yellow", "Pseudomonas viridiflava"="#90639f", "Rhizobium spp"="#4f6457", "Sphingomonas spp"="#cf7773","Xanthomonas campestris"="pink","Other"="black", "Xanthomonas arboricola"="#FFAACF" )

FigureS2B <- ggplot(data=data_glom_16S3, aes(x=Abundance, y=Sample_ID, fill=Species))+ geom_bar(aes(), stat="identity", position="stack") +
theme(legend.position="bottom",legend.justification="left") + theme_classic()+ theme(axis.title = element_text(color="black", size=12, face="bold"))+ theme(axis.text = element_text(color="black", size=11, face="bold"))+ theme(legend.text = element_text(color="black", size=11, face="bold"))+ theme(legend.title = element_text(color="black", size=12, face="bold")) + theme(strip.background = element_rect(fill = "#f0eeec"),strip.text = element_text(colour = "black", face = "bold"))+ theme(panel.background = element_rect(fill = "white",colour = "black",size = 0.5, linetype="solid"))+ggtitle("Bacteria - gyrB gene")+theme(plot.title = element_text(hjust = 0.5, face="bold"))+theme(strip.text.y = element_text(size=8, angle=0, face = "bold")) +xlab("Relative Abundance")+ylab("Samples")+ scale_x_continuous(labels = scales::percent_format(accuracy = 1))+theme(legend.position = "bottom")+ scale_y_discrete(labels=c('Surface-sterilized 1', 'Surface-sterilized 2', 'Surface-sterilized 3','Native 1', 'Native 2', 'Native 3' ))+scale_fill_manual(values=tax_colors_16S_core)+ guides(fill = guide_legend(ncol = 3))
FigureS2B

marie-simonin/Radish_SynCom