diff --git a/CpG_annotations.R b/CpG_annotations.R
index 0b0ffc579952c45fe8f6c192523e57b8dc390ec7..fad40006fda0aee7223b78110f87a7ecb3275628 100644
--- a/CpG_annotations.R
+++ b/CpG_annotations.R
@@ -1,12 +1,16 @@
 library(data.table)
 library(ChIPpeakAnno)
 library(BSgenome.Ggallus.UCSC.galGal6)
-library(TxDb.Ggallus.UCSC.galGal6.refGene)
+library(GenomicFeatures)
 library(ggplot2)
+library(Repitools)
+
+# genome annotation ---------
+ens_galgal6 <- makeTxDbFromGFF("http://hgdownload.soe.ucsc.edu/goldenPath/galGal6/bigZips/genes/galGal6.ensGene.gtf.gz")
 
 # all CpG sites ---------
 # https://support.bioconductor.org/p/95239/
-chrs <- names(Ggallus) 
+chrs <- names(Ggallus)
 cgs <- lapply(chrs, function(x) start(matchPattern("CG", Ggallus[[x]])))
 cgs <- do.call(
     c,
@@ -21,7 +25,6 @@ cgs <- do.call(
 )
 
 # CpG islands ----------
-
 cgi <- data.table::fread("http://hgdownload.soe.ucsc.edu/goldenPath/galGal6/database/cpgIslandExtUnmasked.txt.gz")
 cgi <- with(cgi, GRanges(V2, IRanges(V3, V4)))
 
@@ -41,40 +44,51 @@ rrbs <- with(rrbs, GRanges(chr, IRanges(pos, pos +1)))
 length(rrbs) # 920670
 length(intersect(rrbs, cgs)) # 809995, why are we loosing so much yet so few? -> probably non standards chr
 
-                                        # DMC ? ---------
+# DMC ? ---------
+dmc_c <- fread("/work2/genphyse/genepi/RRBS/ChickStress/AnalyseDiff/methdiff/run20200515/results/DMC.bed", skip = 1)
+dmc_c <- with(dmc_c, GRanges(paste0("chr",V1), IRanges(V2, V3)))
 
-                                        # annotattion -----------
+dmc_s <- fread("/work2/genphyse/genepi/RRBS/ChickStress/AnalyseDiff/methdiff/run20200513/results/DMC.bed", skip = 1)
+dmc_s <- with(dmc_s, GRanges(paste0("chr",V1), IRanges(V2, V3), direction = V4, difference = V5))
 
-regions <- list(cgs = cgs, cgis = cgis, rrbs = rrbs)
+# annotattion -----------
 
+regions <- list(cgs = cgs, cgis = cgis, rrbs = rrbs, dmc_c = dmc_c, dmc_s = dmc_s)
+nCpG <- vapply(regions, function(x) length(x), 0L)
+nCpG
+
+# data aquisition and formating
 aCRs <- lapply(regions, function(x) assignChromosomeRegion(
     x,
-    nucleotideLevel = FALSE, 
-    precedence = c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"), 
-    TxDb = TxDb.Ggallus.UCSC.galGal6.refGene
-    ))
+    nucleotideLevel = FALSE,
+    precedence = c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"),
+    TxDb = ens_galgal6
+))
 aCRs <- lapply(seq_along(aCRs), function(i) {
-    res <- as.data.table(aCRs[[i]])
-    res[, "regions"] <-  names(aCRs)[i]
+    res <- as.data.table(aCRs[[i]]$percentage, keep.rownames = TRUE)
+    colnames(res) <- c("regions", "percentage")
+    res[, "data"] <-  names(aCRs)[i]
     res
 })
 aCRs <- rbindlist(aCRs)
-colnames(aCRs) <- c("regions", "percentage", "jaccard", "data")
 
+# renaming and reordering a few things for plot
 aCRs$regions <- with(aCRs, dplyr::case_when(
-                                      regions == "Intergenic.Region" ~ "Intergenic",
-                                      regions == "fiveUTRs" ~ "5'UTR",
-                                      regions == "threeUTRs" ~ "3'UTR",
-                                      regions == "immediateDownstream" ~ "Downstream\n(≤1kb)",
-                                      TRUE ~ regions
-                           ))
-
-col_order <- c("Intergenic", "Promoters", "5'UTR", "Exons", "Introns", "3'UTR", "Downstream\n(≤1kb)")
-aCRs$data <- factor(aCRs$data, levels = c("cgs", "cgis", "rrbs"))
-
-
+    regions == "Intergenic.Region" ~ "Intergenic",
+    regions == "fiveUTRs" ~ "5'UTR",
+    regions == "threeUTRs" ~ "3'UTR",
+    regions == "immediateDownstream" ~ "Downstream\n(≤1kb)",
+    regions == "Promoters" ~ "Promoters\n(≤1kb)",
+    TRUE ~ regions
+))
+
+col_order <- c("Intergenic", "Promoters\n(≤1kb)", "5'UTR", "Exons", "Introns", "3'UTR", "Downstream\n(≤1kb)")
+aCRs$data <- factor(aCRs$data, levels = c("cgs", "cgis", "rrbs", "dmc_c", "dmc_s"))
+aCRs$isDMC <- ifelse(aCRs$data %in% c("dmc_c", "dmc_s"), TRUE, FALSE)
+
+# plot
 p1 <- ggplot(aCRs, aes(x = factor(regions, levels = col_order), y = percentage, fill = data)) +
-    geom_col(position = "dodge", color = "black") +
+    geom_col(aes(linetype = isDMC), position = "dodge", color = "black") +
     labs(
         title = "CpG localisations",
         x = "",
@@ -83,10 +97,95 @@ p1 <- ggplot(aCRs, aes(x = factor(regions, levels = col_order), y = percentage,
     #coord_flip() +
     scale_fill_manual(
         name = "",
-        labels = c("All CpG", "CpG in CGI", "CpG in RRBS"),
-        values = viridisLite::viridis(3)
+        labels = c(
+            "All CpG\n(n=12M)",
+            "CpG in CGI\n(n=2.5M)",
+            "CpG in RRBS\n(n=0.9M)",
+            paste0("DMC per condition\n(n=", length(dmc_c), ")"),
+            paste0("DMC per sex\n(n=", length(dmc_s), ")")
+        ),
+        values = viridisLite::viridis(length(regions))
     ) +
     theme_bw(base_size = 14) +
-    theme(legend.position = "top")
+    theme(legend.position = "top") +
+    guides(linetype = FALSE)
+
+ggsave("CpG_annotations.png", p1, width = 8)
+
+
+# distance to closest TSS ------------
+transcripts <- as.data.table(toGRanges(ens_galgal6, feature="transcript"))
+
+# selection of one middle TSS amongst transcripts of the same gene
+getMiddleLineFor <- function(gene) {
+    tempt <- dplyr::filter(transcripts, gene_id == gene)
+    if(tempt$strand[1] == "+") {
+        # si le gène est '+', le TSS est au 'start'
+        tempt <- dplyr::arrange(tempt, start)
+    } else if(tempt$strand[1] == "-") {
+        # si le gène est '-', le TSS est au 'end'
+        tempt <- dplyr::arrange(tempt, end)
+    }
+    # retourne la ligne du mileur
+    dplyr::slice(tempt, ceiling(nrow(tempt)/2))
+}
+
+# quite a long step :-/
+midTranscript <- lapply(
+    unique(transcripts$gene_id),
+    getMiddleLineFor
+)
+midTranscript <- rbindlist(midTranscript)
+midTranscript <- with(midTranscript, GRanges(seqnames, IRanges(start, end), strand = strand, gene_id = gene_id))
+
+cpg_arround_tss <- lapply(
+    seq_along(regions[1:3]),
+    function(i) {
+        fs <- featureScores( # utility function from Repitools
+            regions[[i]],
+            midTranscript,
+            up = 9875,
+            down = 9875,
+            freq = 250,
+            use.strand = FALSE, # reads must not be on same strand as feature
+        )
+        fs <- tables(fs)[[1]]
+        fs <- fs * length(regions[[i]]) * 1000/250  # transform values in nCpG per kb
+        data.table(
+            data = names(regions)[i],
+            pos = as.numeric(colnames(fs)),
+            mean = apply(fs, 2, mean),
+            sem = apply(fs, 2, sd)/sqrt(nrow(fs)-1)
+        )
+    }
+)
+cpg_arround_tss <- rbindlist(cpg_arround_tss)
+cpg_arround_tss$data <- factor(cpg_arround_tss$data, levels = c("cgs", "cgis", "rrbs"))
+
+p2 <- ggplot(cpg_arround_tss, aes(x = pos, y = mean)) +
+    geom_ribbon(aes(ymin = mean - sem, ymax = mean + sem, fill = data), alpha = 0.5) +
+    geom_line(aes(colour = data)) +
+    scale_fill_manual(
+        name = "",
+        labels = c(
+            "All CpG\n(n=12M)",
+            "CpG in CGI\n(n=2.5M)",
+            "CpG in RRBS\n(n=0.9M)"
+        ),
+        values = viridisLite::viridis(3, begin = 0, end = 0.5)
+    ) +
+    scale_color_manual(
+        name = "",
+        labels = c(
+            "All CpG\n(n=12M)",
+            "CpG in CGI\n(n=2.5M)",
+            "CpG in RRBS\n(n=0.9M)"
+        ),
+        values = viridisLite::viridis(3, begin = 0, end = 0.5)
+    ) +
+    labs(title = "CpG distribution arround gene starts", x = "Distance to TSS", y = "Average number of CpG in a 1kb window") +
+    theme_bw(base_size = 14)
+
+ggsave("CpG_annotations_arround_TSS.png", p2, width = 8)
+
 
-ggsave("CpG_annotations.png", p1)
diff --git a/CpG_annotations.png b/CpG_annotations.png
index 1b8a55740b17cc81b09d6093dcc796fe382b0e41..6d20e2cbaf4b277d1cabef0973c9f3bae63a6163 100644
Binary files a/CpG_annotations.png and b/CpG_annotations.png differ
diff --git a/CpG_annotations_arround_TSS.png b/CpG_annotations_arround_TSS.png
new file mode 100644
index 0000000000000000000000000000000000000000..728d5d603ba9ad8f51d27fb02ab36294c9bfbcfc
Binary files /dev/null and b/CpG_annotations_arround_TSS.png differ