From 325b8e5dc5378549c81fb9f41e025ab1611380f2 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 08:52:04 +0200
Subject: [PATCH 01/86] Improving odgi figures

---
 config.yaml               | 4 ++--
 scripts/contig_position.R | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/config.yaml b/config.yaml
index f604523..bb630d2 100644
--- a/config.yaml
+++ b/config.yaml
@@ -16,8 +16,8 @@ wfmash.secondary: '-k 19 -H 0.001 -X'
 seqwish.params: '-B 10000000 -k 19 -f 0'
 
 # Odgi 1D and path coverage viz parameters 
-odgi.1Dviz.params: '-x 2000 -a 25 -b'
-odgi.pcov.params: '-x 2000 -a 25 -O'
+odgi.1Dviz.params: '-x 2000 -a 50 -b -H'
+odgi.pcov.params: '-x 2000 -a 50 -O'
 
 ## Optional parts of the workflow
 # Running Quast to get statistics on input haplotypes
diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 7ccd486..dc3a2ce 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -33,8 +33,11 @@ my.cytobands <- toGRanges(x)
 # Creating figure
 # Customizing figure parameters : see https://bernatgel.github.io/karyoploter_tutorial//Tutorial/PlotParams/PlotParams.html
 pp <- getDefaultPlotParams(plot.type=1)
-pp$ideogramheight <- 200
+pp$ideogramheight <- 50
 pp$data1outmargin <- 50
+pp$data2outmargin <- 0
+pp$data1inmargin <- 0
+pp$data2inmargin <- 0
 pp$leftmargin <- 0.28
 pp$rightmargin <- 0
 pp$data1height <- 0
-- 
GitLab


From 9a10808d2453923cb8962ea8a585f77dd01485cf Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 09:32:35 +0200
Subject: [PATCH 02/86] Continuing to improve figs

---
 config.yaml               | 4 ++--
 scripts/contig_position.R | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/config.yaml b/config.yaml
index bb630d2..05de181 100644
--- a/config.yaml
+++ b/config.yaml
@@ -16,8 +16,8 @@ wfmash.secondary: '-k 19 -H 0.001 -X'
 seqwish.params: '-B 10000000 -k 19 -f 0'
 
 # Odgi 1D and path coverage viz parameters 
-odgi.1Dviz.params: '-x 2000 -a 50 -b -H'
-odgi.pcov.params: '-x 2000 -a 50 -O'
+odgi.1Dviz.params: '-x 2000 -a 150 -b -H'
+odgi.pcov.params: '-x 2000 -a 150 -O'
 
 ## Optional parts of the workflow
 # Running Quast to get statistics on input haplotypes
diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index dc3a2ce..9ac8182 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -33,9 +33,9 @@ my.cytobands <- toGRanges(x)
 # Creating figure
 # Customizing figure parameters : see https://bernatgel.github.io/karyoploter_tutorial//Tutorial/PlotParams/PlotParams.html
 pp <- getDefaultPlotParams(plot.type=1)
-pp$ideogramheight <- 50
-pp$data1outmargin <- 50
-pp$data2outmargin <- 0
+pp$ideogramheight <- 100
+pp$data1outmargin <- 25
+pp$data2outmargin <- 25
 pp$data1inmargin <- 0
 pp$data2inmargin <- 0
 pp$leftmargin <- 0.28
-- 
GitLab


From a29eff609cbf481963b432fdd62f42a957da81c4 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 09:38:28 +0200
Subject: [PATCH 03/86] Update contig_position.R

---
 scripts/contig_position.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 9ac8182..1d6fed9 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -23,6 +23,7 @@ x = read.table(opt$tsv, sep = "\t", comment.char="^")
 colnames(x) = x[1,]
 x = x[-1,]
 my.genome <- toGRanges(x)
+nChr = ncol(x)
 
 #print("Reading bands file ...")
 x = read.table(opt$bands, sep="\t", comment.char="^")
@@ -43,7 +44,7 @@ pp$rightmargin <- 0
 pp$data1height <- 0
 pp$data2height <- 0
 
-png(opt$out, width=2780, height=500, pointsize=6, res=300, antialias = "none")
+png(opt$out, width=2780, height=(nChr*150), pointsize=6, res=300, antialias = "none")
 kp <- plotKaryotype(genome=my.genome, cytobands=my.cytobands, plot.params=pp, chromosomes="all")
 kp <- kpAddBaseNumbers(kp, cex=0.6, tick.dist=5000000)
 dev.off()
\ No newline at end of file
-- 
GitLab


From 47c4dce4ff50a4eeb41de9f3551aaaacc9f387b9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 09:46:58 +0200
Subject: [PATCH 04/86] Update contig_position.R

---
 scripts/contig_position.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 1d6fed9..0639c1e 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -23,7 +23,7 @@ x = read.table(opt$tsv, sep = "\t", comment.char="^")
 colnames(x) = x[1,]
 x = x[-1,]
 my.genome <- toGRanges(x)
-nChr = ncol(x)
+nChr = nrow(x)
 
 #print("Reading bands file ...")
 x = read.table(opt$bands, sep="\t", comment.char="^")
-- 
GitLab


From 7fd84f0322c67dfaa1849f2d162f23d90ab90121 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 09:55:18 +0200
Subject: [PATCH 05/86] Update contig_position.R

---
 scripts/contig_position.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 0639c1e..d367beb 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -41,6 +41,8 @@ pp$data1inmargin <- 0
 pp$data2inmargin <- 0
 pp$leftmargin <- 0.28
 pp$rightmargin <- 0
+pp$topmargin <- 0
+pp$bottommargin <- 0
 pp$data1height <- 0
 pp$data2height <- 0
 
-- 
GitLab


From 6515a5349ca140ab1156dcd4eabd30098d78f0c1 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 14:49:01 +0200
Subject: [PATCH 06/86] Started creating reports

---
 Snakefile   | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 config.yaml |  4 ++-
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 5c775d6..1a7dae4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -59,6 +59,10 @@ def which_analysis():
         analysis_inputs.append(
             expand("output/chr.contig/{haplotype}.contig.png", haplotype=CHRLIST) 
         )
+    if config["create_report"] == "True": # Create report
+        analysis_inputs.append(
+            "output/pan1c."+config['name']+".report.md"
+        )
     return analysis_inputs
 
 """
@@ -675,3 +679,74 @@ rule panacus_stats:
             -a {params.apppath} \
             -t {threads}
         """
+
+rule create_pan1c_report_fig:
+    # Produces a markdown report figure of chromosomes graphs
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
+        contigfig="output/chr.contig/{chromosome}.contig.png",
+    output:
+        odgifig=temp("tmp/{chromosome}.odgi.png"),
+        namefig=temp("tmp/{chromosome}.name.png"),
+        reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
+    threads: 4
+    params:
+        apppath=config['app.path']
+    shell:
+        """
+        ## Odgi 1D viz
+        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.odgifig} -x 2000 -a 150 -b -H -t {threads} -P
+
+        ## Getting legend from contig figure
+        convert {input.contigfig} -crop 770x+0+0 +repage {output.namefig}
+
+        odgheight=$(identify -ping -format '%h' {output.odgifig})
+        ctgheight=$(identify -ping -format '%h' {input.contigfig})
+
+        combinedheight=$(($odgheight+$ctgheight))
+
+        echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
+
+        ## Creating empty canvas ad adding other figures to it
+        convert -size "2770x$combinedheight" xc:white {output.reportfig}
+        composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
+        composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
+        composite -geometry "+770+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
+        """
+
+rule create_pan1c_report:
+    # Produces a markdown report of chromosomes graphs
+    input:
+        figs=expand("output/"+config['name']+".{chromosome}.report.fig.png", chromsome=CHRLIST),
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    output:
+        reportfig="output/pan1c."+config['name']+".report.md"
+    threads: 4
+    params:
+        apppath=config['app.path']
+    run:
+        shell("touch {output.reportfig}")
+
+        # Adding General stats
+        shell("echo '# General stats' >> {output.reportfig}")
+        shell("cat {input.genstats} | tr '\\t' ',' | csv2md >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding Path stats
+        shell("echo '# Path stats' >> {output.reportfig}")
+        shell("cat {input.pathstats} | tr '\\t' ',' | csv2md >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding chromosomes figures
+        input.figs.sort()
+        
+        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
+        for fig in input.figs:
+            basename=os.path.basename(fig)
+            chr_name=basename.split('.')[2]
+            
+            shell("echo '## {chr_name}' >> {output.reportfig}")
+            shell("echo '[{basename}]({basename}) >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
index 05de181..3c09fb6 100644
--- a/config.yaml
+++ b/config.yaml
@@ -16,7 +16,7 @@ wfmash.secondary: '-k 19 -H 0.001 -X'
 seqwish.params: '-B 10000000 -k 19 -f 0'
 
 # Odgi 1D and path coverage viz parameters 
-odgi.1Dviz.params: '-x 2000 -a 150 -b -H'
+odgi.1Dviz.params: '-x 2000 -a 150 -b'
 odgi.pcov.params: '-x 2000 -a 150 -O'
 
 ## Optional parts of the workflow
@@ -29,3 +29,5 @@ get_PAV: 'False'
 # Computes SyRI figures for haplotypes 
 #get_allASM_SyRI: 'False' # All vs all
 get_ASMs_SyRI: 'False' # Haplotype vs Reference
+# Creating final report
+create_report: 'True'
-- 
GitLab


From 875288343c1b253f03f557d8621538cc915870e0 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 14:56:27 +0200
Subject: [PATCH 07/86] Fixed CICD

---
 .gitlab-ci.yml           | 2 +-
 example/config_CICD.yaml | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 085078b..828b4de 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,7 +14,7 @@ shallow_run_workflow:
     ## Downloading apps
     - mkdir appimgs
     #- apptainer build apps/pggb.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangratools/pggb:latest
-    - apptainer build appimgs/snakebox.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangratools/snakebox:latest
+    - apptainer pull appimgs/snakebox.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cbox:latest
     #- apptainer build apps/pytools.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangetools/pytools:latest
     #- apptainer build apps/PanGeTools.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangetools/pangetools:latest
 
diff --git a/example/config_CICD.yaml b/example/config_CICD.yaml
index 4a415bf..648e686 100644
--- a/example/config_CICD.yaml
+++ b/example/config_CICD.yaml
@@ -29,3 +29,5 @@ get_PAV: 'False'
 # Computes SyRI figures for haplotypes 
 #get_allASM_SyRI: 'False' # All vs all
 get_ASMs_SyRI: 'False' # Haplotype vs Reference
+# Creating final report
+create_report: 'True'
-- 
GitLab


From 2e0935dd2bb68ffbfc68d34666398f8a450caadc Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:00:41 +0200
Subject: [PATCH 08/86] Typo

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 1a7dae4..b7ef24c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -718,7 +718,7 @@ rule create_pan1c_report_fig:
 rule create_pan1c_report:
     # Produces a markdown report of chromosomes graphs
     input:
-        figs=expand("output/"+config['name']+".{chromosome}.report.fig.png", chromsome=CHRLIST),
+        figs=expand("output/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
     output:
-- 
GitLab


From 78c38921c6359962b3d4d6fa00c38c5ac2eb1b6e Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:02:22 +0200
Subject: [PATCH 09/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index b7ef24c..dd513de 100644
--- a/Snakefile
+++ b/Snakefile
@@ -718,7 +718,7 @@ rule create_pan1c_report_fig:
 rule create_pan1c_report:
     # Produces a markdown report of chromosomes graphs
     input:
-        figs=expand("output/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
+        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
     output:
-- 
GitLab


From bc138d4fd351c090782bb7211062993326dea0a9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:15:37 +0200
Subject: [PATCH 10/86] Update Snakefile

---
 Snakefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index dd513de..62344de 100644
--- a/Snakefile
+++ b/Snakefile
@@ -740,10 +740,11 @@ rule create_pan1c_report:
         shell("echo '' >> {output.reportfig}")
 
         # Adding chromosomes figures
-        input.figs.sort()
+        fig_list = input.figs
+        fig_list.sort()
         
         shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
-        for fig in input.figs:
+        for fig in fig_list:
             basename=os.path.basename(fig)
             chr_name=basename.split('.')[2]
             
-- 
GitLab


From 3e28e9b491f01d9b6fe023204ff120c7208057a7 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:18:39 +0200
Subject: [PATCH 11/86] Update Snakefile

---
 Snakefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 62344de..289186a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -740,7 +740,8 @@ rule create_pan1c_report:
         shell("echo '' >> {output.reportfig}")
 
         # Adding chromosomes figures
-        fig_list = input.figs
+        fig_list = [fig for fig in input.figs]
+        print(fig_list)
         fig_list.sort()
         
         shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
-- 
GitLab


From 35f8cb8bf9d1f07fea917c505ec5cf1cae157e28 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:22:57 +0200
Subject: [PATCH 12/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 289186a..782e219 100644
--- a/Snakefile
+++ b/Snakefile
@@ -750,5 +750,5 @@ rule create_pan1c_report:
             chr_name=basename.split('.')[2]
             
             shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '[{basename}]({basename}) >> {output.reportfig}")
+            shell("echo '[{basename}]({basename})' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
\ No newline at end of file
-- 
GitLab


From 4ba2d57f4518d3d88fed8302c5e1dffd3974358b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:27:51 +0200
Subject: [PATCH 13/86] Update Snakefile

---
 Snakefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index 782e219..6d348d0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -731,12 +731,12 @@ rule create_pan1c_report:
 
         # Adding General stats
         shell("echo '# General stats' >> {output.reportfig}")
-        shell("cat {input.genstats} | tr '\\t' ',' | csv2md >> {output.reportfig}")
+        shell("cat {input.genstats} | csv2md -d'\\t' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
 
         # Adding Path stats
         shell("echo '# Path stats' >> {output.reportfig}")
-        shell("cat {input.pathstats} | tr '\\t' ',' | csv2md >> {output.reportfig}")
+        shell("cat {input.pathstats} | csv2md -d'\\t' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
 
         # Adding chromosomes figures
@@ -750,5 +750,5 @@ rule create_pan1c_report:
             chr_name=basename.split('.')[2]
             
             shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '[{basename}]({basename})' >> {output.reportfig}")
+            shell("echo '![{basename}]({basename})' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
\ No newline at end of file
-- 
GitLab


From 7ff03996368e37a17a428c3d49e03a3bc675d2dc Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:30:10 +0200
Subject: [PATCH 14/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 6d348d0..b84a8fd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -731,12 +731,12 @@ rule create_pan1c_report:
 
         # Adding General stats
         shell("echo '# General stats' >> {output.reportfig}")
-        shell("cat {input.genstats} | csv2md -d'\\t' >> {output.reportfig}")
+        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
 
         # Adding Path stats
         shell("echo '# Path stats' >> {output.reportfig}")
-        shell("cat {input.pathstats} | csv2md -d'\\t' >> {output.reportfig}")
+        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
 
         # Adding chromosomes figures
-- 
GitLab


From aff2e1cbba4192bb58ddea8349e8f38ab3df1128 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:38:18 +0200
Subject: [PATCH 15/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index b84a8fd..479ddc7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -747,7 +747,7 @@ rule create_pan1c_report:
         shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
         for fig in fig_list:
             basename=os.path.basename(fig)
-            chr_name=basename.split('.')[2]
+            chr_name=basename.split('.')[1]
             
             shell("echo '## {chr_name}' >> {output.reportfig}")
             shell("echo '![{basename}]({basename})' >> {output.reportfig}")
-- 
GitLab


From 0d907efba1faeeb1635e07d2ac92be5f13d3d3ce Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 15:42:06 +0200
Subject: [PATCH 16/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 479ddc7..e8acceb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -750,5 +750,5 @@ rule create_pan1c_report:
             chr_name=basename.split('.')[1]
             
             shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '![{basename}]({basename})' >> {output.reportfig}")
+            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
         shell("echo '' >> {output.reportfig}")
\ No newline at end of file
-- 
GitLab


From 9a42d8fb2f2c634758cdf1720619e02cb2584e3a Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 16:39:01 +0200
Subject: [PATCH 17/86] Better figures

---
 Snakefile                 | 11 +++++++----
 scripts/contig_position.R | 10 +++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/Snakefile b/Snakefile
index e8acceb..4fc3014 100644
--- a/Snakefile
+++ b/Snakefile
@@ -189,7 +189,7 @@ rule contig_position:
 
         #cat $base/{wildcards.chromosome}.gff
 
-        awk '{{a++; if ((a/2)==int(a/2)){{b="gneg"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
+        awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
             sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
             > $base/{wildcards.chromosome}.bands
 
@@ -696,10 +696,10 @@ rule create_pan1c_report_fig:
         """
         ## Odgi 1D viz
         apptainer run --app odgi {params.apppath}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.odgifig} -x 2000 -a 150 -b -H -t {threads} -P
+            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 100 -b -H -t {threads} -P
 
         ## Getting legend from contig figure
-        convert {input.contigfig} -crop 770x+0+0 +repage {output.namefig}
+        convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
 
         odgheight=$(identify -ping -format '%h' {output.odgifig})
         ctgheight=$(identify -ping -format '%h' {input.contigfig})
@@ -709,7 +709,7 @@ rule create_pan1c_report_fig:
         echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
 
         ## Creating empty canvas ad adding other figures to it
-        convert -size "2770x$combinedheight" xc:white {output.reportfig}
+        convert -size "2800x$combinedheight" xc:white {output.reportfig}
         composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
         composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
         composite -geometry "+770+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
@@ -729,6 +729,9 @@ rule create_pan1c_report:
     run:
         shell("touch {output.reportfig}")
 
+        # Adding Summary
+        ## To Do
+
         # Adding General stats
         shell("echo '# General stats' >> {output.reportfig}")
         shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index d367beb..8d4f5e4 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -34,19 +34,19 @@ my.cytobands <- toGRanges(x)
 # Creating figure
 # Customizing figure parameters : see https://bernatgel.github.io/karyoploter_tutorial//Tutorial/PlotParams/PlotParams.html
 pp <- getDefaultPlotParams(plot.type=1)
-pp$ideogramheight <- 100
-pp$data1outmargin <- 25
-pp$data2outmargin <- 25
+pp$ideogramheight <- 80
+pp$data1outmargin <- 10
+pp$data2outmargin <- 10
 pp$data1inmargin <- 0
 pp$data2inmargin <- 0
-pp$leftmargin <- 0.28
+pp$leftmargin <- 0.24
 pp$rightmargin <- 0
 pp$topmargin <- 0
 pp$bottommargin <- 0
 pp$data1height <- 0
 pp$data2height <- 0
 
-png(opt$out, width=2780, height=(nChr*150), pointsize=6, res=300, antialias = "none")
+png(opt$out, width=3300, height=(nChr*100), pointsize=6, res=300, antialias = "none")
 kp <- plotKaryotype(genome=my.genome, cytobands=my.cytobands, plot.params=pp, chromosomes="all")
 kp <- kpAddBaseNumbers(kp, cex=0.6, tick.dist=5000000)
 dev.off()
\ No newline at end of file
-- 
GitLab


From 1593a417d6709739197f7f9088aa2c03483749e9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 17:01:08 +0200
Subject: [PATCH 18/86] Lexographical sorting of chromosomes

---
 scripts/chrInputStats.py       | 3 +++
 scripts/chrStatsAggregation.py | 2 ++
 scripts/coreStats.py           | 5 +++++
 3 files changed, 10 insertions(+)

diff --git a/scripts/chrInputStats.py b/scripts/chrInputStats.py
index 8b3015b..85d3d26 100644
--- a/scripts/chrInputStats.py
+++ b/scripts/chrInputStats.py
@@ -55,6 +55,9 @@ Sequence dictionnary :
 """
 seqDict = {}
 
+# Sorting
+args.fastafiles.sort()
+
 # Parsing fasta files
 for filename in args.fastafiles:
     
diff --git a/scripts/chrStatsAggregation.py b/scripts/chrStatsAggregation.py
index 3ac2b10..404549e 100644
--- a/scripts/chrStatsAggregation.py
+++ b/scripts/chrStatsAggregation.py
@@ -43,6 +43,8 @@ args = arg_parser.parse_args()
 ## Main script
 # Getting the list of stats files
 fileList = [k for k in os.listdir(args.inputDir) if k[-3:] == "tsv"]
+# Sorting
+fileList.sort()
 
 stats = {
     "general": [],
diff --git a/scripts/coreStats.py b/scripts/coreStats.py
index 91e693f..db0991c 100644
--- a/scripts/coreStats.py
+++ b/scripts/coreStats.py
@@ -72,6 +72,10 @@ arg_parser.add_argument(
     )
 args = arg_parser.parse_args()
 
+# Sorting
+args.chrGraphStatsFiles.sort()
+args.chrInputStatsFiles.sort()
+
 ## Toolbox
 def getRegEquation(x, y):
     (slope, intercept), ssr, _1, _2, _3 = np.polyfit(x, y, 1, full = True)
@@ -89,6 +93,7 @@ PGGBsteps = [] # Storing PGGB steps name
 
 # Iterating through all pggb time stats
 TimeStatsList = [os.path.join(args.pggbStats, file) for file in os.listdir(args.pggbStats) if file.split('.')[-2] == "time"]
+TimeStatsList.sort()
 print(TimeStatsList)
 for pggbStats in TimeStatsList: 
 
-- 
GitLab


From 4330f3e27a76cd44f4a135ec9b6a184154c20c2a Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 17:14:13 +0200
Subject: [PATCH 19/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 4fc3014..8b778d7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -709,10 +709,10 @@ rule create_pan1c_report_fig:
         echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
 
         ## Creating empty canvas ad adding other figures to it
-        convert -size "2800x$combinedheight" xc:white {output.reportfig}
+        convert -size "3300x$combinedheight" xc:white {output.reportfig}
         composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
         composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
-        composite -geometry "+770+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
+        composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
         """
 
 rule create_pan1c_report:
-- 
GitLab


From 1dbe14c244827c8725481f16fb4f8dc623cd4e8e Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Fri, 21 Jun 2024 18:54:02 +0200
Subject: [PATCH 20/86] Updated report figures

---
 Snakefile                 | 2 +-
 scripts/contig_position.R | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index 8b778d7..5b3598a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -696,7 +696,7 @@ rule create_pan1c_report_fig:
         """
         ## Odgi 1D viz
         apptainer run --app odgi {params.apppath}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 100 -b -H -t {threads} -P
+            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
 
         ## Getting legend from contig figure
         convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 8d4f5e4..01f9f93 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -34,9 +34,9 @@ my.cytobands <- toGRanges(x)
 # Creating figure
 # Customizing figure parameters : see https://bernatgel.github.io/karyoploter_tutorial//Tutorial/PlotParams/PlotParams.html
 pp <- getDefaultPlotParams(plot.type=1)
-pp$ideogramheight <- 80
-pp$data1outmargin <- 10
-pp$data2outmargin <- 10
+pp$ideogramheight <- 50
+pp$data1outmargin <- 15
+pp$data2outmargin <- 15
 pp$data1inmargin <- 0
 pp$data2inmargin <- 0
 pp$leftmargin <- 0.24
@@ -46,7 +46,7 @@ pp$bottommargin <- 0
 pp$data1height <- 0
 pp$data2height <- 0
 
-png(opt$out, width=3300, height=(nChr*100), pointsize=6, res=300, antialias = "none")
+png(opt$out, width=3300, height=(nChr*80), pointsize=6, res=300, antialias = "none")
 kp <- plotKaryotype(genome=my.genome, cytobands=my.cytobands, plot.params=pp, chromosomes="all")
 kp <- kpAddBaseNumbers(kp, cex=0.6, tick.dist=5000000)
 dev.off()
\ No newline at end of file
-- 
GitLab


From f9f272511a61bb99a489e2b6a38b869acf78dcc8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 08:14:08 +0200
Subject: [PATCH 21/86] Update getSyriFigs.sh

Renaming chromosomes based on reference to prevent SyRI from matching chromosomes and crashing
---
 scripts/getSyriFigs.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 3f3c7ff..ab2f0d1 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -45,6 +45,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     # Setting filepaths for later
     bamFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).bam"
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
+    hapID=$(basename ${asmList[i + 1]} .fa.gz | sed 's/.hap/#/')
+    refID=$(basename ${asmList[i]} .fa.gz | sed 's/.hap/#/')
     
     # Adding the output syri file to the array
     syriFileList+=($syriFile)
@@ -53,6 +55,9 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     apptainer run $appdir/pan1c-env.sif python scripts/syri_pruning.py \
         -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir
 
+    # Renaming chromosomes with same ids as the reference
+    sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
+
     # Minimap2 genome vs genome alignment
     apptainer run --app minimap2 $appdir/PanGeTools.sif \
         -a -x asm5 -c --eqx \
-- 
GitLab


From 856620b5e6de2eb84fb7b7bcfeab53c661ec64c7 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 08:14:52 +0200
Subject: [PATCH 22/86] Update getPanacusHG.sh

Not grouping samples anymore
---
 scripts/getPanacusHG.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/getPanacusHG.sh b/scripts/getPanacusHG.sh
index 7f3ddb9..d26dda3 100755
--- a/scripts/getPanacusHG.sh
+++ b/scripts/getPanacusHG.sh
@@ -35,4 +35,4 @@ grep '^P' $gfa | cut -f2 | grep -ve "$ref" > ${chrdir}/$chrname.paths.noref.txt
 # Running Panacus
 echo "[getPanacusHG::panacus] Running on ${gfa}"
 apptainer run --app panacus $appdir/PanGeTools.sif histgrowth \
-    -t $threads -l 1,2,1,1,1 -q 0,0,1,0.5,0.1 -S -a -s ${chrdir}/$chrname.paths.noref.txt -c all -o html $gfa > ${output}
+    -t $threads -l 1,2,1,1,1 -q 0,0,1,0.5,0.1 -a -s ${chrdir}/$chrname.paths.noref.txt -c all -o html $gfa > ${output}
-- 
GitLab


From cdd042f90b8e40012452b2d60e68b677f2d56c70 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 09:08:03 +0200
Subject: [PATCH 23/86] Update Snakefile

Better RagTag fasta emptyness check
---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 5b3598a..c7683a2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -125,8 +125,8 @@ rule ragtag_scaffolding:
             -q {input.fa} \
             -o {output.fa}
 
-        if [ ! -s {output.fa} ]; then
-            echo "Empty fasta"
+        if [[ -z $(grep '[^[:space:]]' {output.fa}) ]] ; then
+            echo "Error : Empty final fasta"
             exit 1
         fi
         """
-- 
GitLab


From 6d6ecbef184438bd350a6eccf790577458a5df48 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 09:09:06 +0200
Subject: [PATCH 24/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index c7683a2..6752afc 100644
--- a/Snakefile
+++ b/Snakefile
@@ -125,7 +125,7 @@ rule ragtag_scaffolding:
             -q {input.fa} \
             -o {output.fa}
 
-        if [[ -z $(grep '[^[:space:]]' {output.fa}) ]] ; then
+        if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
             echo "Error : Empty final fasta"
             exit 1
         fi
-- 
GitLab


From 2d0eb5bdccf2bf161269dca6bddfd1f72510a219 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 09:20:29 +0200
Subject: [PATCH 25/86] Update contig_position.R

---
 scripts/contig_position.R | 51 +++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/scripts/contig_position.R b/scripts/contig_position.R
index 01f9f93..b0eeb0a 100644
--- a/scripts/contig_position.R
+++ b/scripts/contig_position.R
@@ -33,20 +33,45 @@ my.cytobands <- toGRanges(x)
 
 # Creating figure
 # Customizing figure parameters : see https://bernatgel.github.io/karyoploter_tutorial//Tutorial/PlotParams/PlotParams.html
+
 pp <- getDefaultPlotParams(plot.type=1)
-pp$ideogramheight <- 50
-pp$data1outmargin <- 15
-pp$data2outmargin <- 15
-pp$data1inmargin <- 0
-pp$data2inmargin <- 0
-pp$leftmargin <- 0.24
-pp$rightmargin <- 0
-pp$topmargin <- 0
-pp$bottommargin <- 0
-pp$data1height <- 0
-pp$data2height <- 0
-
-png(opt$out, width=3300, height=(nChr*80), pointsize=6, res=300, antialias = "none")
+
+if (nChr >= 4){
+    
+    pp$ideogramheight <- 50
+    pp$data1outmargin <- 15
+    pp$data2outmargin <- 15
+    pp$data1inmargin <- 0
+    pp$data2inmargin <- 0
+    pp$leftmargin <- 0.24
+    pp$rightmargin <- 0
+    pp$topmargin <- 0
+    pp$bottommargin <- 0
+    pp$data1height <- 0
+    pp$data2height <- 0
+
+    png(opt$out, width=3300, height=(nChr*80), pointsize=6, res=300, antialias = "none")
+
+} else {
+    # Too low chromosomes cause crash in par handler.
+    pp$ideogramheight <- 50
+    pp$data1outmargin <- 15
+    pp$data2outmargin <- 15
+    pp$data1inmargin <- 0
+    pp$data2inmargin <- 0
+    pp$leftmargin <- 0.24
+    pp$rightmargin <- 0
+    pp$topmargin <- 0.5
+    pp$bottommargin <- 0
+    pp$data1height <- 0
+    pp$data2height <- 0
+
+    png(opt$out, width=3300, height=(nChr*80*2), pointsize=6, res=300, antialias = "none")
+
+}
+
+
+
 kp <- plotKaryotype(genome=my.genome, cytobands=my.cytobands, plot.params=pp, chromosomes="all")
 kp <- kpAddBaseNumbers(kp, cex=0.6, tick.dist=5000000)
 dev.off()
\ No newline at end of file
-- 
GitLab


From b8a30f4d9460051f4b68b4cc0987995fb3b6fb84 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 10:21:01 +0200
Subject: [PATCH 26/86] Update Snakefile

Allowing retries for SyRI
---
 Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Snakefile b/Snakefile
index 6752afc..e0c3e0d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -266,6 +266,7 @@ rule create_sglAsm_syri_fig:
         fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
         wrkdir=directory('data/asm.syri/{haplotype}')
     threads: 4
+    retries: 1
     resources:
         mem_gb=48
     params:
-- 
GitLab


From 0e16a0448879356cb7405f49db2390fb4686b469 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 13:30:42 +0200
Subject: [PATCH 27/86] Update Snakefile

---
 Snakefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index e0c3e0d..839292a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -125,10 +125,10 @@ rule ragtag_scaffolding:
             -q {input.fa} \
             -o {output.fa}
 
-        if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
-            echo "Error : Empty final fasta"
-            exit 1
-        fi
+        #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
+        #    echo "Error : Empty final fasta"
+        #    exit 1
+        #fi
         """
 
 rule quast_stats:
-- 
GitLab


From a9ed84f4e4f8f1350c226e4744821f0c1214384c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 15:01:25 +0200
Subject: [PATCH 28/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index ab2f0d1..0b02e0d 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -47,6 +47,9 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
     hapID=$(basename ${asmList[i + 1]} .fa.gz | sed 's/.hap/#/')
     refID=$(basename ${asmList[i]} .fa.gz | sed 's/.hap/#/')
+
+    # Debug
+    echo -e "[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID"
     
     # Adding the output syri file to the array
     syriFileList+=($syriFile)
@@ -65,7 +68,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
         -t $threads > $wrkdir/$bamFile.sam
 
     apptainer run --app samtools $appdir/PanGeTools.sif sort -O BAM -@ $threads $wrkdir/$bamFile.sam > $wrkdir/$bamFile
-    rm $wrkdir/$bamFile.sam $wrkdir/*.fa
+    rm $wrkdir/$bamFile.sam 
+    #rm $wrkdir/*.fa
 
     # Syri on previous alignment
     apptainer run $appdir/pan1c-env.sif \
-- 
GitLab


From e01c06ad43a3fc80568bf718568fafdb1bf18b0c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 15:41:09 +0200
Subject: [PATCH 29/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 0b02e0d..8f845b9 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -45,8 +45,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     # Setting filepaths for later
     bamFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).bam"
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
-    hapID=$(basename ${asmList[i + 1]} .fa.gz | sed 's/.hap/#/')
-    refID=$(basename ${asmList[i]} .fa.gz | sed 's/.hap/#/')
+    hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
 
     # Debug
     echo -e "[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID"
-- 
GitLab


From bffc5e642d18f395cbd04c7325cffb58c1041897 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 24 Jun 2024 15:48:31 +0200
Subject: [PATCH 30/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 8f845b9..3baf716 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -45,8 +45,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     # Setting filepaths for later
     bamFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).bam"
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
-    hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
-    refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    #hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    #refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
 
     # Debug
     echo -e "[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID"
@@ -58,8 +58,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     apptainer run $appdir/pan1c-env.sif python scripts/syri_pruning.py \
         -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir
 
-    # Renaming chromosomes with same ids as the reference
-    sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
+    # Renaming chromosomes with same ids as the reference (Not working)
+    #sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
 
     # Minimap2 genome vs genome alignment
     apptainer run --app minimap2 $appdir/PanGeTools.sif \
-- 
GitLab


From 511e110b6289e05fa0629691b2c3fa557779ae0c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:36:56 +0200
Subject: [PATCH 31/86] Testing moving rules to dedicated files

---
 Snakefile       | 235 ++++++++++++++++++++++++------------------------
 rules/tools.smk |  16 ++++
 2 files changed, 133 insertions(+), 118 deletions(-)
 create mode 100644 rules/tools.smk

diff --git a/Snakefile b/Snakefile
index 839292a..052810c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,5 +1,8 @@
 configfile: "config.yaml"
 
+include: "rules/tools.smk"
+
+
 ## Modules
 import os
 import numpy as np
@@ -7,15 +10,15 @@ import gzip
 import re
 
 """
-
-Main variables used in the workflow
-
+Main variables used in the workflow ---------------------------------------------------------------
 """
-# Retrieving the list of haplotypes (SAMPLES) in data/haplotypes
+
+# Retrieving the list of haplotypes (SAMPLES) in data/haplotypes.
+# File with .fa extension only will be used. (.fna will not be used)
 SAMPLES = np.unique([
-    os.path.basename(f).split('.fa')[0] 
-    for f in os.listdir("data/haplotypes/")
-    if re.search(r"\.fa", f)
+    os.path.basename(file).split('.fa')[0] 
+    for file in os.listdir("data/haplotypes/")
+    if re.search(r"\.fa", file)
     ])
 
 # Retrieving the list of haplotypes excluding the reference
@@ -32,9 +35,10 @@ nHAP = len(SAMPLES)
 with gzip.open("data/haplotypes/"+config['reference'], "r") as handle:
     CHRLIST = [line.decode().split("#")[-1].split('\n')[0] for line in handle.readlines() if line.decode()[0] == ">"]
 
-# Adjusting the requested outputs according to the config file
+# Adding optionnal output based on config.yaml, using the following function
 def which_analysis():
-    # Creating a list with default analysis steps (to prevent the function from returning an empty list)
+    
+    ## Default analysis
     analysis_inputs = [     
         "output/stats/pan1c."+config['name']+".core.stats.tsv", # core stats
         expand("output/panacus.reports/"+config['name']+".{chromosome}.histgrowth.html", chromosome=CHRLIST), # panacus histgrowth 
@@ -42,33 +46,32 @@ def which_analysis():
         "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv" # chromosomes graph statistics
     ]
     
-    # Optionals analysis steps
+    ## Optionals analysis steps
     if config["get_PAV"] == "True": # Adding PAV matrix creation
         analysis_inputs.append("output/pav.matrices")
-    #if config["get_allASM_SyRI"] == "True": # Creating the figure for all assemblies
-    #    analysis_inputs.append("output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png")
-    if config["get_ASMs_SyRI"] == "True": # Creating SyRI for each figures 
+
+    if config["get_ASMs_SyRI"] == "True": # Creating SyRI  for each figures 
         analysis_inputs.append(
-            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF), # syri for haplotypes
+            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF),
         )
-    if config["run_Quast"] == "True": # Running quast on haplotypes
+    if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
             "output/quast/Quast.report.html"
         )
-    if config["get_contig_pos"] == "True": # Contig position in chromosomes figure
+    if config["get_contig_pos"] == "True": # Chromosome decomposition into its contig figure
         analysis_inputs.append(
             expand("output/chr.contig/{haplotype}.contig.png", haplotype=CHRLIST) 
         )
-    if config["create_report"] == "True": # Create report
-        analysis_inputs.append(
-            "output/pan1c."+config['name']+".report.md"
-        )
-    return analysis_inputs
 
-"""
+        if config["create_report"] == "True": # Creating report (need contig)
+            analysis_inputs.append(
+                "output/pan1c."+config['name']+".report.md"
+            )
 
-Rules
+    return analysis_inputs
 
+"""
+Rules   -------------------------------------------------------------------------------------------
 """
 # Main target rule
 rule all:
@@ -77,29 +80,25 @@ rule all:
         "output/pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
         which_analysis()
 
-"""
-
-Tool rules
-
-"""
-rule samtools_index:
-    # Samtools faidx to index compressed fasta
-    input:
-        fa="{sample}.fa.gz"
-    output:
-        "{sample}.fa.gz.fai",
-        "{sample}.fa.gz.gzi"
-    threads: 1
-    params: 
-        apppath=config["app.path"]
-    shell:
-        "apptainer run --app samtools {params.apppath}/PanGeTools.sif "
-        "faidx {input.fa}"
+# """
+# Tool rules  ---------------------------------------------------------------------------------------
+# """
+# rule samtools_index:
+#     # Samtools faidx to index compressed fasta
+#     input:
+#         fa="{sample}.fa.gz"
+#     output:
+#         "{sample}.fa.gz.fai",
+#         "{sample}.fa.gz.gzi"
+#     threads: 1
+#     params: 
+#         app_path=config["app.path"]
+#     shell:
+#         "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
+#         "faidx {input.fa}"
 
 """
-
-Pre-processing section : preparing pggb inputs
-
+Pre-processing section : preparing pggb inputs  ---------------------------------------------------
 """
 rule ragtag_scaffolding:
     # Scaffold input haplotype against the reference to infer chromosome scale sequences
@@ -114,12 +113,12 @@ rule ragtag_scaffolding:
     threads: 8
     retries: 1
     params:
-        apppath=config["app.path"]
+        app_path=config["app.path"]
     shell:
         """
         bash scripts/ragtagChromInfer.sh \
             -d $(dirname {output.fa})/{wildcards.haplotype} \
-            -a {params.apppath} \
+            -a {params.app_path} \
             -t {threads} \
             -r {input.ref} \
             -q {input.fa} \
@@ -137,14 +136,14 @@ rule quast_stats:
         fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
         ref="data/haplotypes/"+config['reference']
     output:
-        report="output/quast/Quast.report.html"
-    threads: workflow.cores//2
+        report="output/quast/"+config['name']+".quast.report.html"
+    threads: 8
     params:
-        apppath=config["app.path"],
-        panname=config["name"]
+        app_path=config["app.path"],
+        pan_name=config["name"]
     shell:
         """
-        apptainer run {params.apppath}/pan1c-env.sif quast.py \
+        apptainer run {params.app_path}/pan1c-env.sif quast.py \
             -t {threads} \
             -o "$(dirname {output.report})" \
             -r {input.ref} \
@@ -156,9 +155,9 @@ rule quast_stats:
             {input.fas}
 
         # Compressing temporary files
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
 
         mv $(dirname {output.report})/report.html {output.report}
@@ -174,7 +173,7 @@ rule contig_position:
         outdir=directory("output/chr.contig/{chromosome}")
     threads: 1
     params:
-        apppath=config["app.path"]
+        app_path=config["app.path"]
     shell:
         """
         mkdir {output.outdir}
@@ -182,7 +181,7 @@ rule contig_position:
         hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
         
         # Creating .bands file
-        apptainer run {params.apppath}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
             -s 5 \
             -f {input.fa} \
             > $base/{wildcards.chromosome}.gff
@@ -206,7 +205,7 @@ rule contig_position:
         #cat $base/{wildcards.chromosome}.tsv
 
         # Creating figure
-        apptainer run --app Renv {params.apppath}/pan1c-env.sif Rscript scripts/contig_position.R \
+        apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
             -b $base/{wildcards.chromosome}.bands \
             -t $base/{wildcards.chromosome}.tsv \
             -o {output.fig}
@@ -220,15 +219,15 @@ rule clustering:
         expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
     threads: workflow.cores
     params:
-        apppath=config["app.path"],
-        panname=config["name"]
+        app_path=config["app.path"],
+        pan_name=config["name"]
     shell:
         """
         mkdir -p $(dirname {output[0]})
-        apptainer run {params.apppath}/pan1c-env.sif python scripts/inputClustering.py \
-            --fasta {input} --output $(dirname {output[0]}) --panname {params.panname}
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
+            --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
         for file in $(dirname {output[0]})/*.fa; do
-            apptainer run --app bgzip {params.apppath}/PanGeTools.sif -@ {threads} $file
+            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
         done
         """
 
@@ -242,13 +241,13 @@ rule create_allAsm_syri_fig:
         wrkdir=directory('data/asm.syri/all')
     threads: 8
     params:
-        apppath=config["app.path"]
+        app_path=config["app.path"]
     shell:
         """
         mkdir {output.wrkdir}
 
         bash scripts/getSyriFigs.sh \
-            -a {params.apppath} \
+            -a {params.app_path} \
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
@@ -270,12 +269,12 @@ rule create_sglAsm_syri_fig:
     resources:
         mem_gb=48
     params:
-        apppath=config["app.path"]
+        app_path=config["app.path"]
     shell:
         """
         mkdir -p {output.wrkdir}
         bash scripts/getSyriFigs.sh \
-            -a {params.apppath} \
+            -a {params.app_path} \
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
@@ -298,7 +297,7 @@ rule create_pggb_input_syri_fig:
         wrkdir=directory('data/chrInput/syri/{chromosome}')
     threads: 8
     params:
-        apppath=config["app.path"],
+        app_path=config["app.path"],
         ref=config['reference']
     shell:
         """
@@ -318,7 +317,7 @@ rule create_pggb_input_syri_fig:
         done
 
         bash scripts/getSyriFigs.sh \
-            -a {params.apppath} \
+            -a {params.app_path} \
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
@@ -338,7 +337,7 @@ rule wfmash_on_chr:
         aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
     threads: 16
     params:
-        apppath=config['app.path'],
+        app_path=config['app.path'],
         segment_length=config['wfmash.segment_length'],
         mapping_id=config['wfmash.mapping_id'],
         wfmash_sec=config['wfmash.secondary']
@@ -351,7 +350,7 @@ rule wfmash_on_chr:
         """
         ## Mapping
         /usr/bin/time -v -o {log.time_map} \
-            apptainer exec {params.apppath}/pggb.sif wfmash \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
             --tmp-base $(dirname {output.aln}) \
@@ -361,7 +360,7 @@ rule wfmash_on_chr:
 
         ## Aligning
         /usr/bin/time -v -o {log.time_aln} \
-            apptainer exec {params.apppath}/pggb.sif wfmash \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
             --tmp-base $(dirname {output.aln}) \
@@ -371,10 +370,10 @@ rule wfmash_on_chr:
             2> >(tee {log.cmd_aln} >&2)
 
         # Compressing
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} -k {output.mapping}
 
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} -k {output.aln}
         """
 
@@ -387,7 +386,7 @@ rule seqwish:
         temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
     threads: 8
     params:
-        apppath=config['app.path'],
+        app_path=config['app.path'],
         seqwish=config['seqwish.params']
     log: 
         cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
@@ -395,14 +394,14 @@ rule seqwish:
     shell:
         """
         /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.apppath}/pggb.sif seqwish \
+            apptainer exec {params.app_path}/pggb.sif seqwish \
             -s {input.fa} -p {input.aln} -g {output} \
             {params.seqwish} -t {threads} \
             --temp-dir $(dirname {output}) -P 2>&1 | \
             tee {log.cmd}
 
         # Compressing
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} -k {output}
         """
 
@@ -415,18 +414,18 @@ rule gfaffix_on_chr:
         transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
     threads: 1
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     log: 
         time="logs/pggb/{chromosome}.gfaffix.time.log"
     shell:
         """
         /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.apppath}/pggb.sif gfaffix \
+            apptainer exec {params.app_path}/pggb.sif gfaffix \
             {input} -o {output.gfa} -t {output.transform} \
             > /dev/null
 
         # Compressing
-        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} -k {output.gfa}
         """
 
@@ -438,7 +437,7 @@ rule odgi_postprocessing:
         gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     threads: 8
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     log: 
         cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
         time_build="logs/pggb/{chromosome}.odgi_build.time.log",
@@ -454,26 +453,26 @@ rule odgi_postprocessing:
 
         ## Converting to odgi format and optimizing namespace
         /usr/bin/time -v -o {log.time_build} \
-            apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
             tee {log.cmd_build}
         
         ## Unchoping the nodes (merges unitigs into single nodes)
         /usr/bin/time -v -o {log.time_unchop} \
-            apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
             tee {log.cmd_unchop}
 
         ## Sorting the nodes
         /usr/bin/time -v -o {log.time_sort} \
-            apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
             -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
             tee {log.cmd_sort}
 
         ## Exporting to gfa
         /usr/bin/time -v -o {log.time_view} \
-            apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             view -i $OGfile.unchoped.sorted.og -g \
             1> {output.gfa} \
             2> >(tee {log.cmd_view} >&2) 
@@ -483,12 +482,12 @@ rule odgi_postprocessing:
 
         ## Adding metadata
         python scripts/getTags.py \
-            --appdir {params.apppath} --config-file config.yaml \
+            --appdir {params.app_path} --config-file config.yaml \
             > "$(dirname {input})/metadata.txt"
         sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
 
         # Compressing
-        # apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+        # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
         #     -@ {threads} -k {output.gfa}
         """
 
@@ -513,12 +512,12 @@ rule graph_squeeze:
         "output/pan1c."+config['name']+".gfa"
     threads: 16
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     shell:
         """
-        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
-        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             view -t {threads} -P -i {output}.og -g > {output}
         rm {output}.og
         """
@@ -532,13 +531,13 @@ rule graph_stats:
         pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
     threads: 4
     params:
-        apppath=config['app.path'],
-        panname=config['name']
+        app_path=config['app.path'],
+        pan_name=config['name']
     shell:
         """
-        apptainer run --app gfastats {params.apppath}/PanGeTools.sif \
+        apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
             -g {input.graph} -P \
-            -o $(dirname {output.genstats})/{params.panname}.{wildcards.chromosome} \
+            -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
             -t {threads}
         """
 
@@ -551,17 +550,17 @@ rule graph_figs:
         pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
     threads: 4
     params:
-        apppath=config['app.path'],
+        app_path=config['app.path'],
         oneDviz=config['odgi.1Dviz.params'],
         pcov=config['odgi.pcov.params']
     shell:
         """
         ## 1D viz
-        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
 
         ## Pcov viz
-        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
         """
 
@@ -573,13 +572,13 @@ rule aggregate_graphs_stats:
         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
     params:
-        apppath=config['app.path'],
-        panname=config['name']
+        app_path=config['app.path'],
+        pan_name=config['name']
     shell:
         """
-        apptainer run {params.apppath}/pan1c-env.sif python scripts/chrStatsAggregation.py \
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
             --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
-            --outputPaths {output.pathstats} --panname {params.panname} 
+            --outputPaths {output.pathstats} --panname {params.pan_name} 
         """
 
 rule final_graph_tagging:
@@ -590,12 +589,12 @@ rule final_graph_tagging:
         "output/pan1c."+config['name']+".gfa.metadata"
     threads: 1
     params:
-        apppath=config['app.path'],
-        panname=config['name']
+        app_path=config['app.path'],
+        pan_name=config['name']
     shell:
         """
         python scripts/getTags.py \
-            --appdir {params.apppath} --config-file config.yaml > {output}
+            --appdir {params.app_path} --config-file config.yaml > {output}
         sed -i '/^H/r {output}' {input.graph}
         """
 
@@ -606,12 +605,12 @@ rule pggb_input_stats:
     output:
         "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
     params:
-        apppath=config['app.path'],
-        panname=config['name']
+        app_path=config['app.path'],
+        pan_name=config['name']
     shell:
         """
-        apptainer run {params.apppath}/pan1c-env.sif python scripts/chrInputStats.py \
-            -f data/chrInputs/*.fa.gz -o {output} -p {params.panname}
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
+            -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
         """
 
 rule core_statistics:
@@ -623,14 +622,14 @@ rule core_statistics:
         tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
         dir=directory("output/pggb.usage.figs")
     params:
-        apppath=config['app.path'],
-        panname=config['name']
+        app_path=config['app.path'],
+        pan_name=config['name']
     shell:
         """
         mkdir -p {output.dir}
-        apptainer run {params.apppath}/pan1c-env.sif python scripts/coreStats.py \
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
             --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
-            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.panname}
+            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
         """
 
 """
@@ -649,7 +648,7 @@ rule get_pav:
         directory("output/pav.matrices")
     threads: 16
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     run:
         shell("mkdir {output}")
         # Getting the list of graphs
@@ -657,7 +656,7 @@ rule get_pav:
             graphList = [graph.rstrip("\n") for graph in handle.readlines()]
         # Iterating over graphs
         for graph in graphList:
-            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.apppath} -t {threads}")
+            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
 
 rule panacus_stats:
     # Produces panacus reports for a chromosome graph
@@ -666,8 +665,8 @@ rule panacus_stats:
     output:
         html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
     params:
-        apppath=config['app.path'],
-        panname=config['name'],
+        app_path=config['app.path'],
+        pan_name=config['name'],
         refname=config['reference']
     threads: 8
     shell:
@@ -677,7 +676,7 @@ rule panacus_stats:
             -r $(basename {params.refname} .fa.gz) \
             -d data/chrGraphs/{wildcards.chromosome} \
             -o {output.html} \
-            -a {params.apppath} \
+            -a {params.app_path} \
             -t {threads}
         """
 
@@ -692,11 +691,11 @@ rule create_pan1c_report_fig:
         reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
     threads: 4
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     shell:
         """
         ## Odgi 1D viz
-        apptainer run --app odgi {params.apppath}/PanGeTools.sif \
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
 
         ## Getting legend from contig figure
@@ -726,7 +725,7 @@ rule create_pan1c_report:
         reportfig="output/pan1c."+config['name']+".report.md"
     threads: 4
     params:
-        apppath=config['app.path']
+        app_path=config['app.path']
     run:
         shell("touch {output.reportfig}")
 
diff --git a/rules/tools.smk b/rules/tools.smk
new file mode 100644
index 0000000..7528154
--- /dev/null
+++ b/rules/tools.smk
@@ -0,0 +1,16 @@
+"""
+Tool rules  ---------------------------------------------------------------------------------------
+"""
+rule samtools_index:
+    # Samtools faidx to index compressed fasta
+    input:
+        fa="{sample}.fa.gz"
+    output:
+        "{sample}.fa.gz.fai",
+        "{sample}.fa.gz.gzi"
+    threads: 1
+    params: 
+        app_path=config["app.path"]
+    shell:
+        "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
+        "faidx {input.fa}"
\ No newline at end of file
-- 
GitLab


From 715361a591f020f8b5bf3c14378ea8def17685a5 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:40:18 +0200
Subject: [PATCH 32/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 052810c..a57f18d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -56,7 +56,7 @@ def which_analysis():
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
-            "output/quast/Quast.report.html"
+            "output/quast/"+config['name']+"Quast.report.html"
         )
     if config["get_contig_pos"] == "True": # Chromosome decomposition into its contig figure
         analysis_inputs.append(
-- 
GitLab


From 8b78a0262b616a724a1b13074dcab1ac45515c0d Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:41:04 +0200
Subject: [PATCH 33/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index a57f18d..40bba47 100644
--- a/Snakefile
+++ b/Snakefile
@@ -56,7 +56,7 @@ def which_analysis():
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
-            "output/quast/"+config['name']+"Quast.report.html"
+            "output/quast/"+config['name']+".quast.report.html"
         )
     if config["get_contig_pos"] == "True": # Chromosome decomposition into its contig figure
         analysis_inputs.append(
-- 
GitLab


From 32673952cdc1dc79cf5c9e751c2768826906b767 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:47:15 +0200
Subject: [PATCH 34/86] Moved rules to dedicated SMK files

---
 Snakefile                | 1294 +++++++++++++++++++-------------------
 rules/core.smk           |  346 ++++++++++
 rules/post-analysis.smk  |  118 ++++
 rules/pre-processing.smk |  185 ++++++
 4 files changed, 1295 insertions(+), 648 deletions(-)
 create mode 100644 rules/core.smk
 create mode 100644 rules/post-analysis.smk
 create mode 100644 rules/pre-processing.smk

diff --git a/Snakefile b/Snakefile
index 40bba47..0577e00 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,7 +1,9 @@
 configfile: "config.yaml"
 
 include: "rules/tools.smk"
-
+include: "rules/pre-processing.smk"
+include: "rules/core.smk"
+include: "rules/post-analysis.smk"
 
 ## Modules
 import os
@@ -97,661 +99,657 @@ rule all:
 #         "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
 #         "faidx {input.fa}"
 
-"""
-Pre-processing section : preparing pggb inputs  ---------------------------------------------------
-"""
-rule ragtag_scaffolding:
-    # Scaffold input haplotype against the reference to infer chromosome scale sequences
-    input:
-        ref="data/haplotypes/"+config['reference'],
-        reffai="data/haplotypes/"+config['reference']+".fai",
-        refgzi="data/haplotypes/"+config['reference']+".gzi",
-        fa="data/haplotypes/{haplotype}.fa.gz"
-    output:
-        fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
-        tar="data/hap.ragtagged/{haplotype}.tar.gz"
-    threads: 8
-    retries: 1
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        bash scripts/ragtagChromInfer.sh \
-            -d $(dirname {output.fa})/{wildcards.haplotype} \
-            -a {params.app_path} \
-            -t {threads} \
-            -r {input.ref} \
-            -q {input.fa} \
-            -o {output.fa}
-
-        #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
-        #    echo "Error : Empty final fasta"
-        #    exit 1
-        #fi
-        """
-
-rule quast_stats:
-    # Run Quast on ragtagged genomes
-    input:
-        fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
-        ref="data/haplotypes/"+config['reference']
-    output:
-        report="output/quast/"+config['name']+".quast.report.html"
-    threads: 8
-    params:
-        app_path=config["app.path"],
-        pan_name=config["name"]
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif quast.py \
-            -t {threads} \
-            -o "$(dirname {output.report})" \
-            -r {input.ref} \
-            --plots-format png \
-            --no-read-stats \
-            --no-icarus \
-            -s \
-            --large \
-            {input.fas}
-
-        # Compressing temporary files
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
-
-        mv $(dirname {output.report})/report.html {output.report}
-        """
-
-rule contig_position:
-    # Produce figures with contig positions
-    input:
-        fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
-        fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
-    output:
-        fig="output/chr.contig/{chromosome}.contig.png",
-        outdir=directory("output/chr.contig/{chromosome}")
-    threads: 1
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir {output.outdir}
-        base="{output.outdir}"
-        hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
+# """
+# Pre-processing section : preparing pggb inputs  ---------------------------------------------------
+# """
+# rule ragtag_scaffolding:
+#     # Scaffold input haplotype against the reference to infer chromosome scale sequences
+#     input:
+#         ref="data/haplotypes/"+config['reference'],
+#         reffai="data/haplotypes/"+config['reference']+".fai",
+#         refgzi="data/haplotypes/"+config['reference']+".gzi",
+#         fa="data/haplotypes/{haplotype}.fa.gz"
+#     output:
+#         fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
+#         tar="data/hap.ragtagged/{haplotype}.tar.gz"
+#     threads: 8
+#     retries: 1
+#     params:
+#         app_path=config["app.path"]
+#     shell:
+#         """
+#         bash scripts/ragtagChromInfer.sh \
+#             -d $(dirname {output.fa})/{wildcards.haplotype} \
+#             -a {params.app_path} \
+#             -t {threads} \
+#             -r {input.ref} \
+#             -q {input.fa} \
+#             -o {output.fa}
+
+#         #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
+#         #    echo "Error : Empty final fasta"
+#         #    exit 1
+#         #fi
+#         """
+
+# rule quast_stats:
+#     # Run Quast on ragtagged genomes
+#     input:
+#         fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
+#         ref="data/haplotypes/"+config['reference']
+#     output:
+#         report="output/quast/"+config['name']+".quast.report.html"
+#     threads: 8
+#     params:
+#         app_path=config["app.path"],
+#         pan_name=config["name"]
+#     shell:
+#         """
+#         apptainer run {params.app_path}/pan1c-env.sif quast.py \
+#             -t {threads} \
+#             -o "$(dirname {output.report})" \
+#             -r {input.ref} \
+#             --plots-format png \
+#             --no-read-stats \
+#             --no-icarus \
+#             -s \
+#             --large \
+#             {input.fas}
+
+#         # Compressing temporary files
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
+
+#         mv $(dirname {output.report})/report.html {output.report}
+#         """
+
+# rule contig_position:
+#     # Produce figures with contig positions
+#     input:
+#         fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
+#         fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
+#     output:
+#         fig="output/chr.contig/{chromosome}.contig.png",
+#         outdir=directory("output/chr.contig/{chromosome}")
+#     threads: 1
+#     params:
+#         app_path=config["app.path"]
+#     shell:
+#         """
+#         mkdir {output.outdir}
+#         base="{output.outdir}"
+#         hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
         
-        # Creating .bands file
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
-            -s 5 \
-            -f {input.fa} \
-            > $base/{wildcards.chromosome}.gff
-
-        #cat $base/{wildcards.chromosome}.gff
-
-        awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
-            sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
-            > $base/{wildcards.chromosome}.bands
-
-        #cat $base/{wildcards.chromosome}.bands
-
-        # Creating bed like file
-        cat {input.fai} | \
-            cut -f1,2 | tr '\\t' ',' | \
-            sed "s/,/,1,/" | \
-            sed "1i chr\\tstart\\tend" | \
-            tr ',' '\\t' \
-            > $base/{wildcards.chromosome}.tsv
-
-        #cat $base/{wildcards.chromosome}.tsv
-
-        # Creating figure
-        apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
-            -b $base/{wildcards.chromosome}.bands \
-            -t $base/{wildcards.chromosome}.tsv \
-            -o {output.fig}
-        """
-
-rule clustering:
-    # Read ragtagged fastas and split chromosome sequences into according bins
-    input:
-        expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-    output:
-        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
-    threads: workflow.cores
-    params:
-        app_path=config["app.path"],
-        pan_name=config["name"]
-    shell:
-        """
-        mkdir -p $(dirname {output[0]})
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
-            --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
-        for file in $(dirname {output[0]})/*.fa; do
-            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
-        done
-        """
-
-rule create_allAsm_syri_fig:
-    # Create a SyRI figure containing all input assemblies
-    input:
-        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-        qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-    output:
-        fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
-        wrkdir=directory('data/asm.syri/all')
-    threads: 8
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir {output.wrkdir}
-
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {input.ref} \
-            -q "{input.qry}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        """
-
-rule create_sglAsm_syri_fig:
-    # Create a SyRI figure for a single input assembly
-    input:
-        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-        qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
-    output:
-        fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
-        wrkdir=directory('data/asm.syri/{haplotype}')
-    threads: 4
-    retries: 1
-    resources:
-        mem_gb=48
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir -p {output.wrkdir}
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {input.ref} \
-            -q "{input.qry}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        """
-
-"""
+#         # Creating .bands file
+#         apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
+#             -s 5 \
+#             -f {input.fa} \
+#             > $base/{wildcards.chromosome}.gff
+
+#         #cat $base/{wildcards.chromosome}.gff
+
+#         awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
+#             sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
+#             > $base/{wildcards.chromosome}.bands
+
+#         #cat $base/{wildcards.chromosome}.bands
+
+#         # Creating bed like file
+#         cat {input.fai} | \
+#             cut -f1,2 | tr '\\t' ',' | \
+#             sed "s/,/,1,/" | \
+#             sed "1i chr\\tstart\\tend" | \
+#             tr ',' '\\t' \
+#             > $base/{wildcards.chromosome}.tsv
+
+#         #cat $base/{wildcards.chromosome}.tsv
+
+#         # Creating figure
+#         apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
+#             -b $base/{wildcards.chromosome}.bands \
+#             -t $base/{wildcards.chromosome}.tsv \
+#             -o {output.fig}
+#         """
+
+# rule clustering:
+#     # Read ragtagged fastas and split chromosome sequences into according bins
+#     input:
+#         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+#     output:
+#         expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
+#     threads: workflow.cores
+#     params:
+#         app_path=config["app.path"],
+#         pan_name=config["name"]
+#     shell:
+#         """
+#         mkdir -p $(dirname {output[0]})
+#         apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
+#             --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
+#         for file in $(dirname {output[0]})/*.fa; do
+#             apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
+#         done
+#         """
+
+# rule create_allAsm_syri_fig:
+#     # Create a SyRI figure containing all input assemblies
+#     input:
+#         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+#         qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+#     output:
+#         fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
+#         wrkdir=directory('data/asm.syri/all')
+#     threads: 8
+#     params:
+#         app_path=config["app.path"]
+#     shell:
+#         """
+#         mkdir {output.wrkdir}
+
+#         bash scripts/getSyriFigs.sh \
+#             -a {params.app_path} \
+#             -t {threads} \
+#             -d {output.wrkdir} \
+#             -o $(basename {output.fig}) \
+#             -r {input.ref} \
+#             -q "{input.qry}"
+#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+#         """
+
+# rule create_sglAsm_syri_fig:
+#     # Create a SyRI figure for a single input assembly
+#     input:
+#         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+#         qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
+#     output:
+#         fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
+#         wrkdir=directory('data/asm.syri/{haplotype}')
+#     threads: 4
+#     retries: 1
+#     resources:
+#         mem_gb=48
+#     params:
+#         app_path=config["app.path"]
+#     shell:
+#         """
+#         mkdir -p {output.wrkdir}
+#         bash scripts/getSyriFigs.sh \
+#             -a {params.app_path} \
+#             -t {threads} \
+#             -d {output.wrkdir} \
+#             -o $(basename {output.fig}) \
+#             -r {input.ref} \
+#             -q "{input.qry}"
+#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+#         """
 
-Core section : Running PGGB
+# """
+# Core section : Running PGGB
+# """
 
-"""
+# rule create_pggb_input_syri_fig:
+#     input:
+#         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
+#     output:
+#         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
+#         wrkdir=directory('data/chrInput/syri/{chromosome}')
+#     threads: 8
+#     params:
+#         app_path=config["app.path"],
+#         ref=config['reference']
+#     shell:
+#         """
+#         mkdir {output.wrkdir}
+#         refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
 
-rule create_pggb_input_syri_fig:
-    input:
-        fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
-    output:
-        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
-        wrkdir=directory('data/chrInput/syri/{chromosome}')
-    threads: 8
-    params:
-        app_path=config["app.path"],
-        ref=config['reference']
-    shell:
-        """
-        mkdir {output.wrkdir}
-        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
-
-        ## Creating single fasta from multifasta
-        zcat {input.fasta} | awk -F"#" \
-            '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
+#         ## Creating single fasta from multifasta
+#         zcat {input.fasta} | awk -F"#" \
+#             '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
         
-        ## Getting the list of sequences
-        asmList=()
-        for file in {output.wrkdir}/*.fa; do
-            asm="$(basename $file .fa | cut -f1 -d"#").fa"
-            mv $file "$(dirname $file)/$asm"
-            asmList+=("$asm")
-        done
-
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {output.wrkdir}/"${refname}.fa" \
-            -q "${asmList[@]}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        rm {output.wrkdir}/*.fa
-        """
-
-rule wfmash_on_chr:
-    # Run wfmash on a specific chromosome input
-    input:
-        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-        fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
-    output:
-        mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
-        aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
-    threads: 16
-    params:
-        app_path=config['app.path'],
-        segment_length=config['wfmash.segment_length'],
-        mapping_id=config['wfmash.mapping_id'],
-        wfmash_sec=config['wfmash.secondary']
-    log: 
-        cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
-        time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
-        cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
-        time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
-    shell:
-        """
-        ## Mapping
-        /usr/bin/time -v -o {log.time_map} \
-            apptainer exec {params.app_path}/pggb.sif wfmash \
-            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-            --tmp-base $(dirname {output.aln}) \
-            {input.fa} --approx-map \
-            1> {output.mapping} \
-            2> >(tee {log.cmd_map} >&2)
-
-        ## Aligning
-        /usr/bin/time -v -o {log.time_aln} \
-            apptainer exec {params.app_path}/pggb.sif wfmash \
-            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-            --tmp-base $(dirname {output.aln}) \
-            {input.fa} -i {output.mapping} \
-            --invert-filtering \
-            1> {output.aln} \
-            2> >(tee {log.cmd_aln} >&2)
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.mapping}
-
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.aln}
-        """
-
-rule seqwish:
-    # Run seqwish on alignement produced by wfmash
-    input:
-        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-        aln=rules.wfmash_on_chr.output.aln
-    output:
-        temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
-    threads: 8
-    params:
-        app_path=config['app.path'],
-        seqwish=config['seqwish.params']
-    log: 
-        cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
-        time="logs/pggb/{chromosome}.seqwish.time.log"
-    shell:
-        """
-        /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.app_path}/pggb.sif seqwish \
-            -s {input.fa} -p {input.aln} -g {output} \
-            {params.seqwish} -t {threads} \
-            --temp-dir $(dirname {output}) -P 2>&1 | \
-            tee {log.cmd}
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output}
-        """
-
-rule gfaffix_on_chr:
-    # Run gfaffix on seqwish graph
-    input:
-        rules.seqwish.output
-    output:
-        gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
-        transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
-    threads: 1
-    params:
-        app_path=config['app.path']
-    log: 
-        time="logs/pggb/{chromosome}.gfaffix.time.log"
-    shell:
-        """
-        /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.app_path}/pggb.sif gfaffix \
-            {input} -o {output.gfa} -t {output.transform} \
-            > /dev/null
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.gfa}
-        """
-
-rule odgi_postprocessing:
-    # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
-    input:
-        rules.gfaffix_on_chr.output.gfa
-    output:
-        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    threads: 8
-    params:
-        app_path=config['app.path']
-    log: 
-        cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
-        time_build="logs/pggb/{chromosome}.odgi_build.time.log",
-        cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
-        time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
-        cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
-        time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
-        cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
-        time_view="logs/pggb/{chromosome}.odgi_view.time.log"
-    shell:
-        """
-        OGfile="$(dirname {input})/$(basename {input} .gfa)"
-
-        ## Converting to odgi format and optimizing namespace
-        /usr/bin/time -v -o {log.time_build} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
-            tee {log.cmd_build}
+#         ## Getting the list of sequences
+#         asmList=()
+#         for file in {output.wrkdir}/*.fa; do
+#             asm="$(basename $file .fa | cut -f1 -d"#").fa"
+#             mv $file "$(dirname $file)/$asm"
+#             asmList+=("$asm")
+#         done
+
+#         bash scripts/getSyriFigs.sh \
+#             -a {params.app_path} \
+#             -t {threads} \
+#             -d {output.wrkdir} \
+#             -o $(basename {output.fig}) \
+#             -r {output.wrkdir}/"${refname}.fa" \
+#             -q "${asmList[@]}"
+#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+#         rm {output.wrkdir}/*.fa
+#         """
+
+# rule wfmash_on_chr:
+#     # Run wfmash on a specific chromosome input
+#     input:
+#         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+#         fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
+#     output:
+#         mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
+#         aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
+#     threads: 16
+#     params:
+#         app_path=config['app.path'],
+#         segment_length=config['wfmash.segment_length'],
+#         mapping_id=config['wfmash.mapping_id'],
+#         wfmash_sec=config['wfmash.secondary']
+#     log: 
+#         cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
+#         time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
+#         cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
+#         time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
+#     shell:
+#         """
+#         ## Mapping
+#         /usr/bin/time -v -o {log.time_map} \
+#             apptainer exec {params.app_path}/pggb.sif wfmash \
+#             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+#             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+#             --tmp-base $(dirname {output.aln}) \
+#             {input.fa} --approx-map \
+#             1> {output.mapping} \
+#             2> >(tee {log.cmd_map} >&2)
+
+#         ## Aligning
+#         /usr/bin/time -v -o {log.time_aln} \
+#             apptainer exec {params.app_path}/pggb.sif wfmash \
+#             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+#             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+#             --tmp-base $(dirname {output.aln}) \
+#             {input.fa} -i {output.mapping} \
+#             --invert-filtering \
+#             1> {output.aln} \
+#             2> >(tee {log.cmd_aln} >&2)
+
+#         # Compressing
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} -k {output.mapping}
+
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} -k {output.aln}
+#         """
+
+# rule seqwish:
+#     # Run seqwish on alignement produced by wfmash
+#     input:
+#         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+#         aln=rules.wfmash_on_chr.output.aln
+#     output:
+#         temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
+#     threads: 8
+#     params:
+#         app_path=config['app.path'],
+#         seqwish=config['seqwish.params']
+#     log: 
+#         cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
+#         time="logs/pggb/{chromosome}.seqwish.time.log"
+#     shell:
+#         """
+#         /usr/bin/time -v -o {log.time} \
+#             apptainer exec {params.app_path}/pggb.sif seqwish \
+#             -s {input.fa} -p {input.aln} -g {output} \
+#             {params.seqwish} -t {threads} \
+#             --temp-dir $(dirname {output}) -P 2>&1 | \
+#             tee {log.cmd}
+
+#         # Compressing
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} -k {output}
+#         """
+
+# rule gfaffix_on_chr:
+#     # Run gfaffix on seqwish graph
+#     input:
+#         rules.seqwish.output
+#     output:
+#         gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
+#         transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
+#     threads: 1
+#     params:
+#         app_path=config['app.path']
+#     log: 
+#         time="logs/pggb/{chromosome}.gfaffix.time.log"
+#     shell:
+#         """
+#         /usr/bin/time -v -o {log.time} \
+#             apptainer exec {params.app_path}/pggb.sif gfaffix \
+#             {input} -o {output.gfa} -t {output.transform} \
+#             > /dev/null
+
+#         # Compressing
+#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#             -@ {threads} -k {output.gfa}
+#         """
+
+# rule odgi_postprocessing:
+#     # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
+#     input:
+#         rules.gfaffix_on_chr.output.gfa
+#     output:
+#         gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+#     threads: 8
+#     params:
+#         app_path=config['app.path']
+#     log: 
+#         cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
+#         time_build="logs/pggb/{chromosome}.odgi_build.time.log",
+#         cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
+#         time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
+#         cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
+#         time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
+#         cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
+#         time_view="logs/pggb/{chromosome}.odgi_view.time.log"
+#     shell:
+#         """
+#         OGfile="$(dirname {input})/$(basename {input} .gfa)"
+
+#         ## Converting to odgi format and optimizing namespace
+#         /usr/bin/time -v -o {log.time_build} \
+#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
+#             tee {log.cmd_build}
         
-        ## Unchoping the nodes (merges unitigs into single nodes)
-        /usr/bin/time -v -o {log.time_unchop} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
-            tee {log.cmd_unchop}
-
-        ## Sorting the nodes
-        /usr/bin/time -v -o {log.time_sort} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
-            -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
-            tee {log.cmd_sort}
-
-        ## Exporting to gfa
-        /usr/bin/time -v -o {log.time_view} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            view -i $OGfile.unchoped.sorted.og -g \
-            1> {output.gfa} \
-            2> >(tee {log.cmd_view} >&2) 
-
-        ## Removing .og files for space savings
-        rm $(dirname {input})/*.og
-
-        ## Adding metadata
-        python scripts/getTags.py \
-            --appdir {params.app_path} --config-file config.yaml \
-            > "$(dirname {input})/metadata.txt"
-        sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
-
-        # Compressing
-        # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-        #     -@ {threads} -k {output.gfa}
-        """
-
-rule generate_graph_list:
-    # Generate a text file containing all created graphs
-    input:
-        expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-    output:
-        "data/chrGraphs/graphsList.txt"
-    threads: 1
-    run:
-        with open(output[0], "w") as handle:
-            for file in input:
-                handle.write(file+"\n")
-
-rule graph_squeeze:
-    # Using odgi to merge every subgraphs into a final one
-    input:
-        glist="data/chrGraphs/graphsList.txt",
-        graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-    output:
-        "output/pan1c."+config['name']+".gfa"
-    threads: 16
-    params:
-        app_path=config['app.path']
-    shell:
-        """
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            view -t {threads} -P -i {output}.og -g > {output}
-        rm {output}.og
-        """
-
-rule graph_stats:
-    # Using GFAstats to produce stats on every chromosome graphs
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
-        pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
-    threads: 4
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
-            -g {input.graph} -P \
-            -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
-            -t {threads}
-        """
-
-rule graph_figs:
-    # Creating figures using odgi viz 
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
-        pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
-    threads: 4
-    params:
-        app_path=config['app.path'],
-        oneDviz=config['odgi.1Dviz.params'],
-        pcov=config['odgi.pcov.params']
-    shell:
-        """
-        ## 1D viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
-
-        ## Pcov viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
-        """
-
-rule aggregate_graphs_stats:
-    # Reading and merging all stats files from chromosome graphs into a .tsv.
-    input:
-        genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
-    output:
-        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
-            --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
-            --outputPaths {output.pathstats} --panname {params.pan_name} 
-        """
-
-rule final_graph_tagging:
-    # Add metadata to the final GFA
-    input:
-        graph="output/pan1c."+config['name']+".gfa",
-    output:
-        "output/pan1c."+config['name']+".gfa.metadata"
-    threads: 1
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        python scripts/getTags.py \
-            --appdir {params.app_path} --config-file config.yaml > {output}
-        sed -i '/^H/r {output}' {input.graph}
-        """
-
-rule pggb_input_stats:
-    # Produces statistics on pggb input sequences
-    input:
-        flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-    output:
-        "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
-            -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
-        """
-
-rule core_statistics:
-    # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
-    input:
-        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
-        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-    output:
-        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
-        dir=directory("output/pggb.usage.figs")
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        mkdir -p {output.dir}
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
-            --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
-            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
-        """
+#         ## Unchoping the nodes (merges unitigs into single nodes)
+#         /usr/bin/time -v -o {log.time_unchop} \
+#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
+#             tee {log.cmd_unchop}
+
+#         ## Sorting the nodes
+#         /usr/bin/time -v -o {log.time_sort} \
+#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
+#             -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
+#             tee {log.cmd_sort}
+
+#         ## Exporting to gfa
+#         /usr/bin/time -v -o {log.time_view} \
+#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             view -i $OGfile.unchoped.sorted.og -g \
+#             1> {output.gfa} \
+#             2> >(tee {log.cmd_view} >&2) 
+
+#         ## Removing .og files for space savings
+#         rm $(dirname {input})/*.og
+
+#         ## Adding metadata
+#         python scripts/getTags.py \
+#             --appdir {params.app_path} --config-file config.yaml \
+#             > "$(dirname {input})/metadata.txt"
+#         sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
+
+#         # Compressing
+#         # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+#         #     -@ {threads} -k {output.gfa}
+#         """
+
+# rule generate_graph_list:
+#     # Generate a text file containing all created graphs
+#     input:
+#         expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+#     output:
+#         "data/chrGraphs/graphsList.txt"
+#     threads: 1
+#     run:
+#         with open(output[0], "w") as handle:
+#             for file in input:
+#                 handle.write(file+"\n")
 
-"""
+# rule graph_squeeze:
+#     # Using odgi to merge every subgraphs into a final one
+#     input:
+#         glist="data/chrGraphs/graphsList.txt",
+#         graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+#     output:
+#         "output/pan1c."+config['name']+".gfa"
+#     threads: 16
+#     params:
+#         app_path=config['app.path']
+#     shell:
+#         """
+#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
+#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             view -t {threads} -P -i {output}.og -g > {output}
+#         rm {output}.og
+#         """
+
+# rule graph_stats:
+#     # Using GFAstats to produce stats on every chromosome graphs
+#     input:
+#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+#     output:
+#         genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
+#         pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
+#     threads: 4
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name']
+#     shell:
+#         """
+#         apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
+#             -g {input.graph} -P \
+#             -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
+#             -t {threads}
+#         """
+
+# rule graph_figs:
+#     # Creating figures using odgi viz 
+#     input:
+#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+#     output:
+#         oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
+#         pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
+#     threads: 4
+#     params:
+#         app_path=config['app.path'],
+#         oneDviz=config['odgi.1Dviz.params'],
+#         pcov=config['odgi.pcov.params']
+#     shell:
+#         """
+#         ## 1D viz
+#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
+
+#         ## Pcov viz
+#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
+#         """
+
+# rule aggregate_graphs_stats:
+#     # Reading and merging all stats files from chromosome graphs into a .tsv.
+#     input:
+#         genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
+#     output:
+#         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+#         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name']
+#     shell:
+#         """
+#         apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
+#             --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
+#             --outputPaths {output.pathstats} --panname {params.pan_name} 
+#         """
+
+# rule final_graph_tagging:
+#     # Add metadata to the final GFA
+#     input:
+#         graph="output/pan1c."+config['name']+".gfa",
+#     output:
+#         "output/pan1c."+config['name']+".gfa.metadata"
+#     threads: 1
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name']
+#     shell:
+#         """
+#         python scripts/getTags.py \
+#             --appdir {params.app_path} --config-file config.yaml > {output}
+#         sed -i '/^H/r {output}' {input.graph}
+#         """
+
+# rule pggb_input_stats:
+#     # Produces statistics on pggb input sequences
+#     input:
+#         flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+#     output:
+#         "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name']
+#     shell:
+#         """
+#         apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
+#             -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
+#         """
 
-Post-processing section :
-    The graph for each chromosome are made as well as some basic statistics.
-    In this section, more stats are produced but more specifics ones requiring dedicated tools (Panacus, PAVs for Panache ...).
-    It also contains rules to use the graph itself. 
+# rule core_statistics:
+#     # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
+#     input:
+#         chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
+#         chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+#     output:
+#         tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
+#         dir=directory("output/pggb.usage.figs")
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name']
+#     shell:
+#         """
+#         mkdir -p {output.dir}
+#         apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
+#             --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
+#             --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
+#         """
 
-"""
-rule get_pav:
-    # Create PAV matrix readable by panache for a given chromosome scale graph
-    input:
-        "data/chrGraphs/graphsList.txt"
-    output:
-        directory("output/pav.matrices")
-    threads: 16
-    params:
-        app_path=config['app.path']
-    run:
-        shell("mkdir {output}")
-        # Getting the list of graphs
-        with open(input[0]) as handle:
-            graphList = [graph.rstrip("\n") for graph in handle.readlines()]
-        # Iterating over graphs
-        for graph in graphList:
-            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
-
-rule panacus_stats:
-    # Produces panacus reports for a chromosome graph
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name'],
-        refname=config['reference']
-    threads: 8
-    shell:
-        """
-        bash scripts/getPanacusHG.sh \
-            -g {input.graph} \
-            -r $(basename {params.refname} .fa.gz) \
-            -d data/chrGraphs/{wildcards.chromosome} \
-            -o {output.html} \
-            -a {params.app_path} \
-            -t {threads}
-        """
-
-rule create_pan1c_report_fig:
-    # Produces a markdown report figure of chromosomes graphs
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
-        contigfig="output/chr.contig/{chromosome}.contig.png",
-    output:
-        odgifig=temp("tmp/{chromosome}.odgi.png"),
-        namefig=temp("tmp/{chromosome}.name.png"),
-        reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
-    threads: 4
-    params:
-        app_path=config['app.path']
-    shell:
-        """
-        ## Odgi 1D viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
-
-        ## Getting legend from contig figure
-        convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
-
-        odgheight=$(identify -ping -format '%h' {output.odgifig})
-        ctgheight=$(identify -ping -format '%h' {input.contigfig})
-
-        combinedheight=$(($odgheight+$ctgheight))
-
-        echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
-
-        ## Creating empty canvas ad adding other figures to it
-        convert -size "3300x$combinedheight" xc:white {output.reportfig}
-        composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
-        composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
-        composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
-        """
-
-rule create_pan1c_report:
-    # Produces a markdown report of chromosomes graphs
-    input:
-        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
-        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-    output:
-        reportfig="output/pan1c."+config['name']+".report.md"
-    threads: 4
-    params:
-        app_path=config['app.path']
-    run:
-        shell("touch {output.reportfig}")
-
-        # Adding Summary
-        ## To Do
-
-        # Adding General stats
-        shell("echo '# General stats' >> {output.reportfig}")
-        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
-
-        # Adding Path stats
-        shell("echo '# Path stats' >> {output.reportfig}")
-        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
-
-        # Adding chromosomes figures
-        fig_list = [fig for fig in input.figs]
-        print(fig_list)
-        fig_list.sort()
+# """
+# Post-processing section :
+#     The graph for each chromosome are made as well as some basic statistics.
+#     In this section, more stats are produced but more specifics ones requiring dedicated tools (Panacus, PAVs for Panache ...).
+#     It also contains rules to use the graph itself. 
+# """
+# rule get_pav:
+#     # Create PAV matrix readable by panache for a given chromosome scale graph
+#     input:
+#         "data/chrGraphs/graphsList.txt"
+#     output:
+#         directory("output/pav.matrices")
+#     threads: 16
+#     params:
+#         app_path=config['app.path']
+#     run:
+#         shell("mkdir {output}")
+#         # Getting the list of graphs
+#         with open(input[0]) as handle:
+#             graphList = [graph.rstrip("\n") for graph in handle.readlines()]
+#         # Iterating over graphs
+#         for graph in graphList:
+#             shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
+
+# rule panacus_stats:
+#     # Produces panacus reports for a chromosome graph
+#     input:
+#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+#     output:
+#         html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
+#     params:
+#         app_path=config['app.path'],
+#         pan_name=config['name'],
+#         refname=config['reference']
+#     threads: 8
+#     shell:
+#         """
+#         bash scripts/getPanacusHG.sh \
+#             -g {input.graph} \
+#             -r $(basename {params.refname} .fa.gz) \
+#             -d data/chrGraphs/{wildcards.chromosome} \
+#             -o {output.html} \
+#             -a {params.app_path} \
+#             -t {threads}
+#         """
+
+# rule create_pan1c_report_fig:
+#     # Produces a markdown report figure of chromosomes graphs
+#     input:
+#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
+#         contigfig="output/chr.contig/{chromosome}.contig.png",
+#     output:
+#         odgifig=temp("tmp/{chromosome}.odgi.png"),
+#         namefig=temp("tmp/{chromosome}.name.png"),
+#         reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
+#     threads: 4
+#     params:
+#         app_path=config['app.path']
+#     shell:
+#         """
+#         ## Odgi 1D viz
+#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+#             viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
+
+#         ## Getting legend from contig figure
+#         convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
+
+#         odgheight=$(identify -ping -format '%h' {output.odgifig})
+#         ctgheight=$(identify -ping -format '%h' {input.contigfig})
+
+#         combinedheight=$(($odgheight+$ctgheight))
+
+#         echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
+
+#         ## Creating empty canvas ad adding other figures to it
+#         convert -size "3300x$combinedheight" xc:white {output.reportfig}
+#         composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
+#         composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
+#         composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
+#         """
+
+# rule create_pan1c_report:
+#     # Produces a markdown report of chromosomes graphs
+#     input:
+#         figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
+#         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+#         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+#     output:
+#         reportfig="output/pan1c."+config['name']+".report.md"
+#     threads: 4
+#     params:
+#         app_path=config['app.path']
+#     run:
+#         shell("touch {output.reportfig}")
+
+#         # Adding Summary
+#         ## To Do
+
+#         # Adding General stats
+#         shell("echo '# General stats' >> {output.reportfig}")
+#         shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
+#         shell("echo '' >> {output.reportfig}")
+
+#         # Adding Path stats
+#         shell("echo '# Path stats' >> {output.reportfig}")
+#         shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
+#         shell("echo '' >> {output.reportfig}")
+
+#         # Adding chromosomes figures
+#         fig_list = [fig for fig in input.figs]
+#         print(fig_list)
+#         fig_list.sort()
         
-        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
-        for fig in fig_list:
-            basename=os.path.basename(fig)
-            chr_name=basename.split('.')[1]
+#         shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
+#         for fig in fig_list:
+#             basename=os.path.basename(fig)
+#             chr_name=basename.split('.')[1]
             
-            shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
+#             shell("echo '## {chr_name}' >> {output.reportfig}")
+#             shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
+#         shell("echo '' >> {output.reportfig}")
\ No newline at end of file
diff --git a/rules/core.smk b/rules/core.smk
new file mode 100644
index 0000000..5473943
--- /dev/null
+++ b/rules/core.smk
@@ -0,0 +1,346 @@
+"""
+Core section : Running PGGB on each chromosome and generating whole pangenome   -------------------
+"""
+
+rule create_pggb_input_syri_fig:
+    input:
+        fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
+    output:
+        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
+        wrkdir=directory('data/chrInput/syri/{chromosome}')
+    threads: 8
+    params:
+        app_path=config["app.path"],
+        ref=config['reference']
+    shell:
+        """
+        mkdir {output.wrkdir}
+        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
+
+        ## Creating single fasta from multifasta
+        zcat {input.fasta} | awk -F"#" \
+            '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
+        
+        ## Getting the list of sequences
+        asmList=()
+        for file in {output.wrkdir}/*.fa; do
+            asm="$(basename $file .fa | cut -f1 -d"#").fa"
+            mv $file "$(dirname $file)/$asm"
+            asmList+=("$asm")
+        done
+
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {output.wrkdir}/"${refname}.fa" \
+            -q "${asmList[@]}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        rm {output.wrkdir}/*.fa
+        """
+
+rule wfmash_on_chr:
+    # Run wfmash on a specific chromosome input
+    input:
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+        fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
+    output:
+        mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
+        aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
+    threads: 16
+    params:
+        app_path=config['app.path'],
+        segment_length=config['wfmash.segment_length'],
+        mapping_id=config['wfmash.mapping_id'],
+        wfmash_sec=config['wfmash.secondary']
+    log: 
+        cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
+        time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
+        cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
+        time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
+    shell:
+        """
+        ## Mapping
+        /usr/bin/time -v -o {log.time_map} \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
+            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+            --tmp-base $(dirname {output.aln}) \
+            {input.fa} --approx-map \
+            1> {output.mapping} \
+            2> >(tee {log.cmd_map} >&2)
+
+        ## Aligning
+        /usr/bin/time -v -o {log.time_aln} \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
+            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+            --tmp-base $(dirname {output.aln}) \
+            {input.fa} -i {output.mapping} \
+            --invert-filtering \
+            1> {output.aln} \
+            2> >(tee {log.cmd_aln} >&2)
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.mapping}
+
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.aln}
+        """
+
+rule seqwish:
+    # Run seqwish on alignement produced by wfmash
+    input:
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+        aln=rules.wfmash_on_chr.output.aln
+    output:
+        temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
+    threads: 8
+    params:
+        app_path=config['app.path'],
+        seqwish=config['seqwish.params']
+    log: 
+        cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
+        time="logs/pggb/{chromosome}.seqwish.time.log"
+    shell:
+        """
+        /usr/bin/time -v -o {log.time} \
+            apptainer exec {params.app_path}/pggb.sif seqwish \
+            -s {input.fa} -p {input.aln} -g {output} \
+            {params.seqwish} -t {threads} \
+            --temp-dir $(dirname {output}) -P 2>&1 | \
+            tee {log.cmd}
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output}
+        """
+
+rule gfaffix_on_chr:
+    # Run gfaffix on seqwish graph
+    input:
+        rules.seqwish.output
+    output:
+        gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
+        transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
+    threads: 1
+    params:
+        app_path=config['app.path']
+    log: 
+        time="logs/pggb/{chromosome}.gfaffix.time.log"
+    shell:
+        """
+        /usr/bin/time -v -o {log.time} \
+            apptainer exec {params.app_path}/pggb.sif gfaffix \
+            {input} -o {output.gfa} -t {output.transform} \
+            > /dev/null
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.gfa}
+        """
+
+rule odgi_postprocessing:
+    # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
+    input:
+        rules.gfaffix_on_chr.output.gfa
+    output:
+        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    threads: 8
+    params:
+        app_path=config['app.path']
+    log: 
+        cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
+        time_build="logs/pggb/{chromosome}.odgi_build.time.log",
+        cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
+        time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
+        cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
+        time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
+        cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
+        time_view="logs/pggb/{chromosome}.odgi_view.time.log"
+    shell:
+        """
+        OGfile="$(dirname {input})/$(basename {input} .gfa)"
+
+        ## Converting to odgi format and optimizing namespace
+        /usr/bin/time -v -o {log.time_build} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
+            tee {log.cmd_build}
+        
+        ## Unchoping the nodes (merges unitigs into single nodes)
+        /usr/bin/time -v -o {log.time_unchop} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
+            tee {log.cmd_unchop}
+
+        ## Sorting the nodes
+        /usr/bin/time -v -o {log.time_sort} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
+            -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
+            tee {log.cmd_sort}
+
+        ## Exporting to gfa
+        /usr/bin/time -v -o {log.time_view} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            view -i $OGfile.unchoped.sorted.og -g \
+            1> {output.gfa} \
+            2> >(tee {log.cmd_view} >&2) 
+
+        ## Removing .og files for space savings
+        rm $(dirname {input})/*.og
+
+        ## Adding metadata
+        python scripts/getTags.py \
+            --appdir {params.app_path} --config-file config.yaml \
+            > "$(dirname {input})/metadata.txt"
+        sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
+
+        # Compressing
+        # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+        #     -@ {threads} -k {output.gfa}
+        """
+
+rule generate_graph_list:
+    # Generate a text file containing all created graphs
+    input:
+        expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+    output:
+        "data/chrGraphs/graphsList.txt"
+    threads: 1
+    run:
+        with open(output[0], "w") as handle:
+            for file in input:
+                handle.write(file+"\n")
+
+rule graph_squeeze:
+    # Using odgi to merge every subgraphs into a final one
+    input:
+        glist="data/chrGraphs/graphsList.txt",
+        graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+    output:
+        "output/pan1c."+config['name']+".gfa"
+    threads: 16
+    params:
+        app_path=config['app.path']
+    shell:
+        """
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            view -t {threads} -P -i {output}.og -g > {output}
+        rm {output}.og
+        """
+
+rule graph_stats:
+    # Using GFAstats to produce stats on every chromosome graphs
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
+        pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
+    threads: 4
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
+            -g {input.graph} -P \
+            -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
+            -t {threads}
+        """
+
+rule graph_figs:
+    # Creating figures using odgi viz 
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
+        pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
+    threads: 4
+    params:
+        app_path=config['app.path'],
+        oneDviz=config['odgi.1Dviz.params'],
+        pcov=config['odgi.pcov.params']
+    shell:
+        """
+        ## 1D viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
+
+        ## Pcov viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
+        """
+
+rule aggregate_graphs_stats:
+    # Reading and merging all stats files from chromosome graphs into a .tsv.
+    input:
+        genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
+    output:
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
+            --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
+            --outputPaths {output.pathstats} --panname {params.pan_name} 
+        """
+
+rule final_graph_tagging:
+    # Add metadata to the final GFA
+    input:
+        graph="output/pan1c."+config['name']+".gfa",
+    output:
+        "output/pan1c."+config['name']+".gfa.metadata"
+    threads: 1
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        python scripts/getTags.py \
+            --appdir {params.app_path} --config-file config.yaml > {output}
+        sed -i '/^H/r {output}' {input.graph}
+        """
+
+rule pggb_input_stats:
+    # Produces statistics on pggb input sequences
+    input:
+        flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+    output:
+        "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
+            -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
+        """
+
+rule core_statistics:
+    # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
+    input:
+        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+    output:
+        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
+        dir=directory("output/pggb.usage.figs")
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        mkdir -p {output.dir}
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
+            --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
+            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
+        """
\ No newline at end of file
diff --git a/rules/post-analysis.smk b/rules/post-analysis.smk
new file mode 100644
index 0000000..3041501
--- /dev/null
+++ b/rules/post-analysis.smk
@@ -0,0 +1,118 @@
+"""
+Post-analysis section   ---------------------------------------------------------------------------
+"""
+rule get_pav:
+    # Create PAV matrix readable by panache for a given chromosome scale graph
+    input:
+        "data/chrGraphs/graphsList.txt"
+    output:
+        directory("output/pav.matrices")
+    threads: 16
+    params:
+        app_path=config['app.path']
+    run:
+        shell("mkdir {output}")
+        # Getting the list of graphs
+        with open(input[0]) as handle:
+            graphList = [graph.rstrip("\n") for graph in handle.readlines()]
+        # Iterating over graphs
+        for graph in graphList:
+            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
+
+rule panacus_stats:
+    # Produces panacus reports for a chromosome graph
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name'],
+        refname=config['reference']
+    threads: 8
+    shell:
+        """
+        bash scripts/getPanacusHG.sh \
+            -g {input.graph} \
+            -r $(basename {params.refname} .fa.gz) \
+            -d data/chrGraphs/{wildcards.chromosome} \
+            -o {output.html} \
+            -a {params.app_path} \
+            -t {threads}
+        """
+
+rule create_pan1c_report_fig:
+    # Produces a markdown report figure of chromosomes graphs
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
+        contigfig="output/chr.contig/{chromosome}.contig.png",
+    output:
+        odgifig=temp("tmp/{chromosome}.odgi.png"),
+        namefig=temp("tmp/{chromosome}.name.png"),
+        reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
+    threads: 4
+    params:
+        app_path=config['app.path']
+    shell:
+        """
+        ## Odgi 1D viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
+
+        ## Getting legend from contig figure
+        convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
+
+        odgheight=$(identify -ping -format '%h' {output.odgifig})
+        ctgheight=$(identify -ping -format '%h' {input.contigfig})
+
+        combinedheight=$(($odgheight+$ctgheight))
+
+        echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
+
+        ## Creating empty canvas ad adding other figures to it
+        convert -size "3300x$combinedheight" xc:white {output.reportfig}
+        composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
+        composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
+        composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
+        """
+
+rule create_pan1c_report:
+    # Produces a markdown report of chromosomes graphs
+    input:
+        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    output:
+        reportfig="output/pan1c."+config['name']+".report.md"
+    threads: 4
+    params:
+        app_path=config['app.path']
+    run:
+        shell("touch {output.reportfig}")
+
+        # Adding Summary
+        ## To Do
+
+        # Adding General stats
+        shell("echo '# General stats' >> {output.reportfig}")
+        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding Path stats
+        shell("echo '# Path stats' >> {output.reportfig}")
+        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding chromosomes figures
+        fig_list = [fig for fig in input.figs]
+        print(fig_list)
+        fig_list.sort()
+        
+        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
+        for fig in fig_list:
+            basename=os.path.basename(fig)
+            chr_name=basename.split('.')[1]
+            
+            shell("echo '## {chr_name}' >> {output.reportfig}")
+            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
diff --git a/rules/pre-processing.smk b/rules/pre-processing.smk
new file mode 100644
index 0000000..3580108
--- /dev/null
+++ b/rules/pre-processing.smk
@@ -0,0 +1,185 @@
+"""
+Pre-processing section : preparing haplotypes  ----------------------------------------------------
+"""
+rule ragtag_scaffolding:
+    # Scaffold input haplotype against the reference to infer chromosome scale sequences
+    input:
+        ref="data/haplotypes/"+config['reference'],
+        reffai="data/haplotypes/"+config['reference']+".fai",
+        refgzi="data/haplotypes/"+config['reference']+".gzi",
+        fa="data/haplotypes/{haplotype}.fa.gz"
+    output:
+        fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
+        tar="data/hap.ragtagged/{haplotype}.tar.gz"
+    threads: 8
+    retries: 1
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        bash scripts/ragtagChromInfer.sh \
+            -d $(dirname {output.fa})/{wildcards.haplotype} \
+            -a {params.app_path} \
+            -t {threads} \
+            -r {input.ref} \
+            -q {input.fa} \
+            -o {output.fa}
+
+        #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
+        #    echo "Error : Empty final fasta"
+        #    exit 1
+        #fi
+        """
+
+rule quast_stats:
+    # Run Quast on ragtagged genomes
+    input:
+        fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
+        ref="data/haplotypes/"+config['reference']
+    output:
+        report="output/quast/"+config['name']+".quast.report.html"
+    threads: 8
+    params:
+        app_path=config["app.path"],
+        pan_name=config["name"]
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif quast.py \
+            -t {threads} \
+            -o "$(dirname {output.report})" \
+            -r {input.ref} \
+            --plots-format png \
+            --no-read-stats \
+            --no-icarus \
+            -s \
+            --large \
+            {input.fas}
+
+        # Compressing temporary files
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
+
+        mv $(dirname {output.report})/report.html {output.report}
+        """
+
+rule contig_position:
+    # Produce figures with contig positions
+    input:
+        fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
+        fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
+    output:
+        fig="output/chr.contig/{chromosome}.contig.png",
+        outdir=directory("output/chr.contig/{chromosome}")
+    threads: 1
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir {output.outdir}
+        base="{output.outdir}"
+        hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
+        
+        # Creating .bands file
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
+            -s 5 \
+            -f {input.fa} \
+            > $base/{wildcards.chromosome}.gff
+
+        #cat $base/{wildcards.chromosome}.gff
+
+        awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
+            sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
+            > $base/{wildcards.chromosome}.bands
+
+        #cat $base/{wildcards.chromosome}.bands
+
+        # Creating bed like file
+        cat {input.fai} | \
+            cut -f1,2 | tr '\\t' ',' | \
+            sed "s/,/,1,/" | \
+            sed "1i chr\\tstart\\tend" | \
+            tr ',' '\\t' \
+            > $base/{wildcards.chromosome}.tsv
+
+        #cat $base/{wildcards.chromosome}.tsv
+
+        # Creating figure
+        apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
+            -b $base/{wildcards.chromosome}.bands \
+            -t $base/{wildcards.chromosome}.tsv \
+            -o {output.fig}
+        """
+
+rule clustering:
+    # Read ragtagged fastas and split chromosome sequences into according bins
+    input:
+        expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+    output:
+        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
+    threads: workflow.cores
+    params:
+        app_path=config["app.path"],
+        pan_name=config["name"]
+    shell:
+        """
+        mkdir -p $(dirname {output[0]})
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
+            --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
+        for file in $(dirname {output[0]})/*.fa; do
+            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
+        done
+        """
+
+rule create_allAsm_syri_fig:
+    # Create a SyRI figure containing all input assemblies
+    input:
+        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+        qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+    output:
+        fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
+        wrkdir=directory('data/asm.syri/all')
+    threads: 8
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir {output.wrkdir}
+
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {input.ref} \
+            -q "{input.qry}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        """
+
+rule create_sglAsm_syri_fig:
+    # Create a SyRI figure for a single input assembly
+    input:
+        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+        qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
+    output:
+        fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
+        wrkdir=directory('data/asm.syri/{haplotype}')
+    threads: 4
+    retries: 1
+    resources:
+        mem_gb=48
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir -p {output.wrkdir}
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {input.ref} \
+            -q "{input.qry}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        """
\ No newline at end of file
-- 
GitLab


From b6252858bb0f5c45e42b29ed5021f27283520905 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:50:59 +0200
Subject: [PATCH 35/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 0577e00..26409c2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -54,7 +54,7 @@ def which_analysis():
 
     if config["get_ASMs_SyRI"] == "True": # Creating SyRI  for each figures 
         analysis_inputs.append(
-            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF),
+            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES),
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
-- 
GitLab


From 49764da51cf0554ed70f701e0b3419f76c6742c8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:53:14 +0200
Subject: [PATCH 36/86] Update Snakefile

---
 Snakefile | 372 +++++++++++++++++++++++++++---------------------------
 1 file changed, 186 insertions(+), 186 deletions(-)

diff --git a/Snakefile b/Snakefile
index 26409c2..2f20878 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,7 +1,7 @@
 configfile: "config.yaml"
 
 include: "rules/tools.smk"
-include: "rules/pre-processing.smk"
+#include: "rules/pre-processing.smk"
 include: "rules/core.smk"
 include: "rules/post-analysis.smk"
 
@@ -54,7 +54,7 @@ def which_analysis():
 
     if config["get_ASMs_SyRI"] == "True": # Creating SyRI  for each figures 
         analysis_inputs.append(
-            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES),
+            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF),
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
@@ -99,191 +99,191 @@ rule all:
 #         "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
 #         "faidx {input.fa}"
 
-# """
-# Pre-processing section : preparing pggb inputs  ---------------------------------------------------
-# """
-# rule ragtag_scaffolding:
-#     # Scaffold input haplotype against the reference to infer chromosome scale sequences
-#     input:
-#         ref="data/haplotypes/"+config['reference'],
-#         reffai="data/haplotypes/"+config['reference']+".fai",
-#         refgzi="data/haplotypes/"+config['reference']+".gzi",
-#         fa="data/haplotypes/{haplotype}.fa.gz"
-#     output:
-#         fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
-#         tar="data/hap.ragtagged/{haplotype}.tar.gz"
-#     threads: 8
-#     retries: 1
-#     params:
-#         app_path=config["app.path"]
-#     shell:
-#         """
-#         bash scripts/ragtagChromInfer.sh \
-#             -d $(dirname {output.fa})/{wildcards.haplotype} \
-#             -a {params.app_path} \
-#             -t {threads} \
-#             -r {input.ref} \
-#             -q {input.fa} \
-#             -o {output.fa}
-
-#         #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
-#         #    echo "Error : Empty final fasta"
-#         #    exit 1
-#         #fi
-#         """
-
-# rule quast_stats:
-#     # Run Quast on ragtagged genomes
-#     input:
-#         fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
-#         ref="data/haplotypes/"+config['reference']
-#     output:
-#         report="output/quast/"+config['name']+".quast.report.html"
-#     threads: 8
-#     params:
-#         app_path=config["app.path"],
-#         pan_name=config["name"]
-#     shell:
-#         """
-#         apptainer run {params.app_path}/pan1c-env.sif quast.py \
-#             -t {threads} \
-#             -o "$(dirname {output.report})" \
-#             -r {input.ref} \
-#             --plots-format png \
-#             --no-read-stats \
-#             --no-icarus \
-#             -s \
-#             --large \
-#             {input.fas}
-
-#         # Compressing temporary files
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
-
-#         mv $(dirname {output.report})/report.html {output.report}
-#         """
-
-# rule contig_position:
-#     # Produce figures with contig positions
-#     input:
-#         fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
-#         fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
-#     output:
-#         fig="output/chr.contig/{chromosome}.contig.png",
-#         outdir=directory("output/chr.contig/{chromosome}")
-#     threads: 1
-#     params:
-#         app_path=config["app.path"]
-#     shell:
-#         """
-#         mkdir {output.outdir}
-#         base="{output.outdir}"
-#         hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
+"""
+Pre-processing section : preparing pggb inputs  ---------------------------------------------------
+"""
+rule ragtag_scaffolding:
+    # Scaffold input haplotype against the reference to infer chromosome scale sequences
+    input:
+        ref="data/haplotypes/"+config['reference'],
+        reffai="data/haplotypes/"+config['reference']+".fai",
+        refgzi="data/haplotypes/"+config['reference']+".gzi",
+        fa="data/haplotypes/{haplotype}.fa.gz"
+    output:
+        fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
+        tar="data/hap.ragtagged/{haplotype}.tar.gz"
+    threads: 8
+    retries: 1
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        bash scripts/ragtagChromInfer.sh \
+            -d $(dirname {output.fa})/{wildcards.haplotype} \
+            -a {params.app_path} \
+            -t {threads} \
+            -r {input.ref} \
+            -q {input.fa} \
+            -o {output.fa}
+
+        #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
+        #    echo "Error : Empty final fasta"
+        #    exit 1
+        #fi
+        """
+
+rule quast_stats:
+    # Run Quast on ragtagged genomes
+    input:
+        fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
+        ref="data/haplotypes/"+config['reference']
+    output:
+        report="output/quast/"+config['name']+".quast.report.html"
+    threads: 8
+    params:
+        app_path=config["app.path"],
+        pan_name=config["name"]
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif quast.py \
+            -t {threads} \
+            -o "$(dirname {output.report})" \
+            -r {input.ref} \
+            --plots-format png \
+            --no-read-stats \
+            --no-icarus \
+            -s \
+            --large \
+            {input.fas}
+
+        # Compressing temporary files
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
+
+        mv $(dirname {output.report})/report.html {output.report}
+        """
+
+rule contig_position:
+    # Produce figures with contig positions
+    input:
+        fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
+        fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
+    output:
+        fig="output/chr.contig/{chromosome}.contig.png",
+        outdir=directory("output/chr.contig/{chromosome}")
+    threads: 1
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir {output.outdir}
+        base="{output.outdir}"
+        hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
         
-#         # Creating .bands file
-#         apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
-#             -s 5 \
-#             -f {input.fa} \
-#             > $base/{wildcards.chromosome}.gff
-
-#         #cat $base/{wildcards.chromosome}.gff
-
-#         awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
-#             sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
-#             > $base/{wildcards.chromosome}.bands
-
-#         #cat $base/{wildcards.chromosome}.bands
-
-#         # Creating bed like file
-#         cat {input.fai} | \
-#             cut -f1,2 | tr '\\t' ',' | \
-#             sed "s/,/,1,/" | \
-#             sed "1i chr\\tstart\\tend" | \
-#             tr ',' '\\t' \
-#             > $base/{wildcards.chromosome}.tsv
-
-#         #cat $base/{wildcards.chromosome}.tsv
-
-#         # Creating figure
-#         apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
-#             -b $base/{wildcards.chromosome}.bands \
-#             -t $base/{wildcards.chromosome}.tsv \
-#             -o {output.fig}
-#         """
-
-# rule clustering:
-#     # Read ragtagged fastas and split chromosome sequences into according bins
-#     input:
-#         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-#     output:
-#         expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
-#     threads: workflow.cores
-#     params:
-#         app_path=config["app.path"],
-#         pan_name=config["name"]
-#     shell:
-#         """
-#         mkdir -p $(dirname {output[0]})
-#         apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
-#             --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
-#         for file in $(dirname {output[0]})/*.fa; do
-#             apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
-#         done
-#         """
-
-# rule create_allAsm_syri_fig:
-#     # Create a SyRI figure containing all input assemblies
-#     input:
-#         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-#         qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-#     output:
-#         fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
-#         wrkdir=directory('data/asm.syri/all')
-#     threads: 8
-#     params:
-#         app_path=config["app.path"]
-#     shell:
-#         """
-#         mkdir {output.wrkdir}
-
-#         bash scripts/getSyriFigs.sh \
-#             -a {params.app_path} \
-#             -t {threads} \
-#             -d {output.wrkdir} \
-#             -o $(basename {output.fig}) \
-#             -r {input.ref} \
-#             -q "{input.qry}"
-#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-#         """
-
-# rule create_sglAsm_syri_fig:
-#     # Create a SyRI figure for a single input assembly
-#     input:
-#         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-#         qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
-#     output:
-#         fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
-#         wrkdir=directory('data/asm.syri/{haplotype}')
-#     threads: 4
-#     retries: 1
-#     resources:
-#         mem_gb=48
-#     params:
-#         app_path=config["app.path"]
-#     shell:
-#         """
-#         mkdir -p {output.wrkdir}
-#         bash scripts/getSyriFigs.sh \
-#             -a {params.app_path} \
-#             -t {threads} \
-#             -d {output.wrkdir} \
-#             -o $(basename {output.fig}) \
-#             -r {input.ref} \
-#             -q "{input.qry}"
-#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-#         """
+        # Creating .bands file
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
+            -s 5 \
+            -f {input.fa} \
+            > $base/{wildcards.chromosome}.gff
+
+        #cat $base/{wildcards.chromosome}.gff
+
+        awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
+            sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
+            > $base/{wildcards.chromosome}.bands
+
+        #cat $base/{wildcards.chromosome}.bands
+
+        # Creating bed like file
+        cat {input.fai} | \
+            cut -f1,2 | tr '\\t' ',' | \
+            sed "s/,/,1,/" | \
+            sed "1i chr\\tstart\\tend" | \
+            tr ',' '\\t' \
+            > $base/{wildcards.chromosome}.tsv
+
+        #cat $base/{wildcards.chromosome}.tsv
+
+        # Creating figure
+        apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
+            -b $base/{wildcards.chromosome}.bands \
+            -t $base/{wildcards.chromosome}.tsv \
+            -o {output.fig}
+        """
+
+rule clustering:
+    # Read ragtagged fastas and split chromosome sequences into according bins
+    input:
+        expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+    output:
+        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
+    threads: workflow.cores
+    params:
+        app_path=config["app.path"],
+        pan_name=config["name"]
+    shell:
+        """
+        mkdir -p $(dirname {output[0]})
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
+            --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
+        for file in $(dirname {output[0]})/*.fa; do
+            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
+        done
+        """
+
+rule create_allAsm_syri_fig:
+    # Create a SyRI figure containing all input assemblies
+    input:
+        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+        qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
+    output:
+        fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
+        wrkdir=directory('data/asm.syri/all')
+    threads: 8
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir {output.wrkdir}
+
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {input.ref} \
+            -q "{input.qry}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        """
+
+rule create_sglAsm_syri_fig:
+    # Create a SyRI figure for a single input assembly
+    input:
+        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
+        qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
+    output:
+        fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
+        wrkdir=directory('data/asm.syri/{haplotype}')
+    threads: 4
+    retries: 1
+    resources:
+        mem_gb=48
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        mkdir -p {output.wrkdir}
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {input.ref} \
+            -q "{input.qry}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        """
 
 # """
 # Core section : Running PGGB
-- 
GitLab


From 97b12904ca91a4400f58768558b83fc92a6f50e8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 16:59:15 +0200
Subject: [PATCH 37/86] Revert "Update Snakefile"

This reverts commit 8b78a0262b616a724a1b13074dcab1ac45515c0d.
---
 Snakefile                | 953 +++++++++++++++++++--------------------
 rules/core.smk           | 346 --------------
 rules/post-analysis.smk  | 118 -----
 rules/pre-processing.smk | 185 --------
 4 files changed, 469 insertions(+), 1133 deletions(-)
 delete mode 100644 rules/core.smk
 delete mode 100644 rules/post-analysis.smk
 delete mode 100644 rules/pre-processing.smk

diff --git a/Snakefile b/Snakefile
index 2f20878..e3818f2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,9 +1,7 @@
 configfile: "config.yaml"
 
 include: "rules/tools.smk"
-#include: "rules/pre-processing.smk"
-include: "rules/core.smk"
-include: "rules/post-analysis.smk"
+
 
 ## Modules
 import os
@@ -82,23 +80,6 @@ rule all:
         "output/pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
         which_analysis()
 
-# """
-# Tool rules  ---------------------------------------------------------------------------------------
-# """
-# rule samtools_index:
-#     # Samtools faidx to index compressed fasta
-#     input:
-#         fa="{sample}.fa.gz"
-#     output:
-#         "{sample}.fa.gz.fai",
-#         "{sample}.fa.gz.gzi"
-#     threads: 1
-#     params: 
-#         app_path=config["app.path"]
-#     shell:
-#         "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
-#         "faidx {input.fa}"
-
 """
 Pre-processing section : preparing pggb inputs  ---------------------------------------------------
 """
@@ -285,471 +266,475 @@ rule create_sglAsm_syri_fig:
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         """
 
-# """
-# Core section : Running PGGB
-# """
-
-# rule create_pggb_input_syri_fig:
-#     input:
-#         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
-#     output:
-#         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
-#         wrkdir=directory('data/chrInput/syri/{chromosome}')
-#     threads: 8
-#     params:
-#         app_path=config["app.path"],
-#         ref=config['reference']
-#     shell:
-#         """
-#         mkdir {output.wrkdir}
-#         refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
-
-#         ## Creating single fasta from multifasta
-#         zcat {input.fasta} | awk -F"#" \
-#             '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
+"""
+
+Core section : Running PGGB
+
+"""
+
+rule create_pggb_input_syri_fig:
+    input:
+        fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
+    output:
+        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
+        wrkdir=directory('data/chrInput/syri/{chromosome}')
+    threads: 8
+    params:
+        app_path=config["app.path"],
+        ref=config['reference']
+    shell:
+        """
+        mkdir {output.wrkdir}
+        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
+
+        ## Creating single fasta from multifasta
+        zcat {input.fasta} | awk -F"#" \
+            '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
         
-#         ## Getting the list of sequences
-#         asmList=()
-#         for file in {output.wrkdir}/*.fa; do
-#             asm="$(basename $file .fa | cut -f1 -d"#").fa"
-#             mv $file "$(dirname $file)/$asm"
-#             asmList+=("$asm")
-#         done
-
-#         bash scripts/getSyriFigs.sh \
-#             -a {params.app_path} \
-#             -t {threads} \
-#             -d {output.wrkdir} \
-#             -o $(basename {output.fig}) \
-#             -r {output.wrkdir}/"${refname}.fa" \
-#             -q "${asmList[@]}"
-#         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-#         rm {output.wrkdir}/*.fa
-#         """
-
-# rule wfmash_on_chr:
-#     # Run wfmash on a specific chromosome input
-#     input:
-#         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-#         fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
-#     output:
-#         mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
-#         aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
-#     threads: 16
-#     params:
-#         app_path=config['app.path'],
-#         segment_length=config['wfmash.segment_length'],
-#         mapping_id=config['wfmash.mapping_id'],
-#         wfmash_sec=config['wfmash.secondary']
-#     log: 
-#         cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
-#         time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
-#         cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
-#         time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
-#     shell:
-#         """
-#         ## Mapping
-#         /usr/bin/time -v -o {log.time_map} \
-#             apptainer exec {params.app_path}/pggb.sif wfmash \
-#             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-#             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-#             --tmp-base $(dirname {output.aln}) \
-#             {input.fa} --approx-map \
-#             1> {output.mapping} \
-#             2> >(tee {log.cmd_map} >&2)
-
-#         ## Aligning
-#         /usr/bin/time -v -o {log.time_aln} \
-#             apptainer exec {params.app_path}/pggb.sif wfmash \
-#             -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-#             -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-#             --tmp-base $(dirname {output.aln}) \
-#             {input.fa} -i {output.mapping} \
-#             --invert-filtering \
-#             1> {output.aln} \
-#             2> >(tee {log.cmd_aln} >&2)
-
-#         # Compressing
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} -k {output.mapping}
-
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} -k {output.aln}
-#         """
-
-# rule seqwish:
-#     # Run seqwish on alignement produced by wfmash
-#     input:
-#         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-#         aln=rules.wfmash_on_chr.output.aln
-#     output:
-#         temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
-#     threads: 8
-#     params:
-#         app_path=config['app.path'],
-#         seqwish=config['seqwish.params']
-#     log: 
-#         cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
-#         time="logs/pggb/{chromosome}.seqwish.time.log"
-#     shell:
-#         """
-#         /usr/bin/time -v -o {log.time} \
-#             apptainer exec {params.app_path}/pggb.sif seqwish \
-#             -s {input.fa} -p {input.aln} -g {output} \
-#             {params.seqwish} -t {threads} \
-#             --temp-dir $(dirname {output}) -P 2>&1 | \
-#             tee {log.cmd}
-
-#         # Compressing
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} -k {output}
-#         """
-
-# rule gfaffix_on_chr:
-#     # Run gfaffix on seqwish graph
-#     input:
-#         rules.seqwish.output
-#     output:
-#         gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
-#         transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
-#     threads: 1
-#     params:
-#         app_path=config['app.path']
-#     log: 
-#         time="logs/pggb/{chromosome}.gfaffix.time.log"
-#     shell:
-#         """
-#         /usr/bin/time -v -o {log.time} \
-#             apptainer exec {params.app_path}/pggb.sif gfaffix \
-#             {input} -o {output.gfa} -t {output.transform} \
-#             > /dev/null
-
-#         # Compressing
-#         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#             -@ {threads} -k {output.gfa}
-#         """
-
-# rule odgi_postprocessing:
-#     # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
-#     input:
-#         rules.gfaffix_on_chr.output.gfa
-#     output:
-#         gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-#     threads: 8
-#     params:
-#         app_path=config['app.path']
-#     log: 
-#         cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
-#         time_build="logs/pggb/{chromosome}.odgi_build.time.log",
-#         cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
-#         time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
-#         cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
-#         time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
-#         cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
-#         time_view="logs/pggb/{chromosome}.odgi_view.time.log"
-#     shell:
-#         """
-#         OGfile="$(dirname {input})/$(basename {input} .gfa)"
-
-#         ## Converting to odgi format and optimizing namespace
-#         /usr/bin/time -v -o {log.time_build} \
-#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
-#             tee {log.cmd_build}
+        ## Getting the list of sequences
+        asmList=()
+        for file in {output.wrkdir}/*.fa; do
+            asm="$(basename $file .fa | cut -f1 -d"#").fa"
+            mv $file "$(dirname $file)/$asm"
+            asmList+=("$asm")
+        done
+
+        bash scripts/getSyriFigs.sh \
+            -a {params.app_path} \
+            -t {threads} \
+            -d {output.wrkdir} \
+            -o $(basename {output.fig}) \
+            -r {output.wrkdir}/"${refname}.fa" \
+            -q "${asmList[@]}"
+        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
+        rm {output.wrkdir}/*.fa
+        """
+
+rule wfmash_on_chr:
+    # Run wfmash on a specific chromosome input
+    input:
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+        fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
+    output:
+        mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
+        aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
+    threads: 16
+    params:
+        app_path=config['app.path'],
+        segment_length=config['wfmash.segment_length'],
+        mapping_id=config['wfmash.mapping_id'],
+        wfmash_sec=config['wfmash.secondary']
+    log: 
+        cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
+        time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
+        cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
+        time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
+    shell:
+        """
+        ## Mapping
+        /usr/bin/time -v -o {log.time_map} \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
+            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+            --tmp-base $(dirname {output.aln}) \
+            {input.fa} --approx-map \
+            1> {output.mapping} \
+            2> >(tee {log.cmd_map} >&2)
+
+        ## Aligning
+        /usr/bin/time -v -o {log.time_aln} \
+            apptainer exec {params.app_path}/pggb.sif wfmash \
+            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
+            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
+            --tmp-base $(dirname {output.aln}) \
+            {input.fa} -i {output.mapping} \
+            --invert-filtering \
+            1> {output.aln} \
+            2> >(tee {log.cmd_aln} >&2)
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.mapping}
+
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.aln}
+        """
+
+rule seqwish:
+    # Run seqwish on alignement produced by wfmash
+    input:
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+        aln=rules.wfmash_on_chr.output.aln
+    output:
+        temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
+    threads: 8
+    params:
+        app_path=config['app.path'],
+        seqwish=config['seqwish.params']
+    log: 
+        cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
+        time="logs/pggb/{chromosome}.seqwish.time.log"
+    shell:
+        """
+        /usr/bin/time -v -o {log.time} \
+            apptainer exec {params.app_path}/pggb.sif seqwish \
+            -s {input.fa} -p {input.aln} -g {output} \
+            {params.seqwish} -t {threads} \
+            --temp-dir $(dirname {output}) -P 2>&1 | \
+            tee {log.cmd}
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output}
+        """
+
+rule gfaffix_on_chr:
+    # Run gfaffix on seqwish graph
+    input:
+        rules.seqwish.output
+    output:
+        gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
+        transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
+    threads: 1
+    params:
+        app_path=config['app.path']
+    log: 
+        time="logs/pggb/{chromosome}.gfaffix.time.log"
+    shell:
+        """
+        /usr/bin/time -v -o {log.time} \
+            apptainer exec {params.app_path}/pggb.sif gfaffix \
+            {input} -o {output.gfa} -t {output.transform} \
+            > /dev/null
+
+        # Compressing
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} -k {output.gfa}
+        """
+
+rule odgi_postprocessing:
+    # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
+    input:
+        rules.gfaffix_on_chr.output.gfa
+    output:
+        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    threads: 8
+    params:
+        app_path=config['app.path']
+    log: 
+        cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
+        time_build="logs/pggb/{chromosome}.odgi_build.time.log",
+        cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
+        time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
+        cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
+        time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
+        cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
+        time_view="logs/pggb/{chromosome}.odgi_view.time.log"
+    shell:
+        """
+        OGfile="$(dirname {input})/$(basename {input} .gfa)"
+
+        ## Converting to odgi format and optimizing namespace
+        /usr/bin/time -v -o {log.time_build} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
+            tee {log.cmd_build}
         
-#         ## Unchoping the nodes (merges unitigs into single nodes)
-#         /usr/bin/time -v -o {log.time_unchop} \
-#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
-#             tee {log.cmd_unchop}
-
-#         ## Sorting the nodes
-#         /usr/bin/time -v -o {log.time_sort} \
-#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
-#             -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
-#             tee {log.cmd_sort}
-
-#         ## Exporting to gfa
-#         /usr/bin/time -v -o {log.time_view} \
-#             apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             view -i $OGfile.unchoped.sorted.og -g \
-#             1> {output.gfa} \
-#             2> >(tee {log.cmd_view} >&2) 
-
-#         ## Removing .og files for space savings
-#         rm $(dirname {input})/*.og
-
-#         ## Adding metadata
-#         python scripts/getTags.py \
-#             --appdir {params.app_path} --config-file config.yaml \
-#             > "$(dirname {input})/metadata.txt"
-#         sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
-
-#         # Compressing
-#         # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-#         #     -@ {threads} -k {output.gfa}
-#         """
-
-# rule generate_graph_list:
-#     # Generate a text file containing all created graphs
-#     input:
-#         expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-#     output:
-#         "data/chrGraphs/graphsList.txt"
-#     threads: 1
-#     run:
-#         with open(output[0], "w") as handle:
-#             for file in input:
-#                 handle.write(file+"\n")
-
-# rule graph_squeeze:
-#     # Using odgi to merge every subgraphs into a final one
-#     input:
-#         glist="data/chrGraphs/graphsList.txt",
-#         graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-#     output:
-#         "output/pan1c."+config['name']+".gfa"
-#     threads: 16
-#     params:
-#         app_path=config['app.path']
-#     shell:
-#         """
-#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
-#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             view -t {threads} -P -i {output}.og -g > {output}
-#         rm {output}.og
-#         """
-
-# rule graph_stats:
-#     # Using GFAstats to produce stats on every chromosome graphs
-#     input:
-#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-#     output:
-#         genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
-#         pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
-#     threads: 4
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name']
-#     shell:
-#         """
-#         apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
-#             -g {input.graph} -P \
-#             -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
-#             -t {threads}
-#         """
-
-# rule graph_figs:
-#     # Creating figures using odgi viz 
-#     input:
-#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-#     output:
-#         oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
-#         pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
-#     threads: 4
-#     params:
-#         app_path=config['app.path'],
-#         oneDviz=config['odgi.1Dviz.params'],
-#         pcov=config['odgi.pcov.params']
-#     shell:
-#         """
-#         ## 1D viz
-#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
-
-#         ## Pcov viz
-#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
-#         """
-
-# rule aggregate_graphs_stats:
-#     # Reading and merging all stats files from chromosome graphs into a .tsv.
-#     input:
-#         genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
-#     output:
-#         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-#         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name']
-#     shell:
-#         """
-#         apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
-#             --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
-#             --outputPaths {output.pathstats} --panname {params.pan_name} 
-#         """
-
-# rule final_graph_tagging:
-#     # Add metadata to the final GFA
-#     input:
-#         graph="output/pan1c."+config['name']+".gfa",
-#     output:
-#         "output/pan1c."+config['name']+".gfa.metadata"
-#     threads: 1
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name']
-#     shell:
-#         """
-#         python scripts/getTags.py \
-#             --appdir {params.app_path} --config-file config.yaml > {output}
-#         sed -i '/^H/r {output}' {input.graph}
-#         """
-
-# rule pggb_input_stats:
-#     # Produces statistics on pggb input sequences
-#     input:
-#         flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-#     output:
-#         "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name']
-#     shell:
-#         """
-#         apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
-#             -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
-#         """
-
-# rule core_statistics:
-#     # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
-#     input:
-#         chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
-#         chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-#     output:
-#         tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
-#         dir=directory("output/pggb.usage.figs")
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name']
-#     shell:
-#         """
-#         mkdir -p {output.dir}
-#         apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
-#             --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
-#             --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
-#         """
-
-# """
-# Post-processing section :
-#     The graph for each chromosome are made as well as some basic statistics.
-#     In this section, more stats are produced but more specifics ones requiring dedicated tools (Panacus, PAVs for Panache ...).
-#     It also contains rules to use the graph itself. 
-# """
-# rule get_pav:
-#     # Create PAV matrix readable by panache for a given chromosome scale graph
-#     input:
-#         "data/chrGraphs/graphsList.txt"
-#     output:
-#         directory("output/pav.matrices")
-#     threads: 16
-#     params:
-#         app_path=config['app.path']
-#     run:
-#         shell("mkdir {output}")
-#         # Getting the list of graphs
-#         with open(input[0]) as handle:
-#             graphList = [graph.rstrip("\n") for graph in handle.readlines()]
-#         # Iterating over graphs
-#         for graph in graphList:
-#             shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
-
-# rule panacus_stats:
-#     # Produces panacus reports for a chromosome graph
-#     input:
-#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-#     output:
-#         html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
-#     params:
-#         app_path=config['app.path'],
-#         pan_name=config['name'],
-#         refname=config['reference']
-#     threads: 8
-#     shell:
-#         """
-#         bash scripts/getPanacusHG.sh \
-#             -g {input.graph} \
-#             -r $(basename {params.refname} .fa.gz) \
-#             -d data/chrGraphs/{wildcards.chromosome} \
-#             -o {output.html} \
-#             -a {params.app_path} \
-#             -t {threads}
-#         """
-
-# rule create_pan1c_report_fig:
-#     # Produces a markdown report figure of chromosomes graphs
-#     input:
-#         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
-#         contigfig="output/chr.contig/{chromosome}.contig.png",
-#     output:
-#         odgifig=temp("tmp/{chromosome}.odgi.png"),
-#         namefig=temp("tmp/{chromosome}.name.png"),
-#         reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
-#     threads: 4
-#     params:
-#         app_path=config['app.path']
-#     shell:
-#         """
-#         ## Odgi 1D viz
-#         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-#             viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
-
-#         ## Getting legend from contig figure
-#         convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
-
-#         odgheight=$(identify -ping -format '%h' {output.odgifig})
-#         ctgheight=$(identify -ping -format '%h' {input.contigfig})
-
-#         combinedheight=$(($odgheight+$ctgheight))
-
-#         echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
-
-#         ## Creating empty canvas ad adding other figures to it
-#         convert -size "3300x$combinedheight" xc:white {output.reportfig}
-#         composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
-#         composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
-#         composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
-#         """
-
-# rule create_pan1c_report:
-#     # Produces a markdown report of chromosomes graphs
-#     input:
-#         figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
-#         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-#         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-#     output:
-#         reportfig="output/pan1c."+config['name']+".report.md"
-#     threads: 4
-#     params:
-#         app_path=config['app.path']
-#     run:
-#         shell("touch {output.reportfig}")
-
-#         # Adding Summary
-#         ## To Do
-
-#         # Adding General stats
-#         shell("echo '# General stats' >> {output.reportfig}")
-#         shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
-#         shell("echo '' >> {output.reportfig}")
-
-#         # Adding Path stats
-#         shell("echo '# Path stats' >> {output.reportfig}")
-#         shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
-#         shell("echo '' >> {output.reportfig}")
-
-#         # Adding chromosomes figures
-#         fig_list = [fig for fig in input.figs]
-#         print(fig_list)
-#         fig_list.sort()
+        ## Unchoping the nodes (merges unitigs into single nodes)
+        /usr/bin/time -v -o {log.time_unchop} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
+            tee {log.cmd_unchop}
+
+        ## Sorting the nodes
+        /usr/bin/time -v -o {log.time_sort} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
+            -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
+            tee {log.cmd_sort}
+
+        ## Exporting to gfa
+        /usr/bin/time -v -o {log.time_view} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            view -i $OGfile.unchoped.sorted.og -g \
+            1> {output.gfa} \
+            2> >(tee {log.cmd_view} >&2) 
+
+        ## Removing .og files for space savings
+        rm $(dirname {input})/*.og
+
+        ## Adding metadata
+        python scripts/getTags.py \
+            --appdir {params.app_path} --config-file config.yaml \
+            > "$(dirname {input})/metadata.txt"
+        sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
+
+        # Compressing
+        # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+        #     -@ {threads} -k {output.gfa}
+        """
+
+rule generate_graph_list:
+    # Generate a text file containing all created graphs
+    input:
+        expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+    output:
+        "data/chrGraphs/graphsList.txt"
+    threads: 1
+    run:
+        with open(output[0], "w") as handle:
+            for file in input:
+                handle.write(file+"\n")
+
+rule graph_squeeze:
+    # Using odgi to merge every subgraphs into a final one
+    input:
+        glist="data/chrGraphs/graphsList.txt",
+        graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
+    output:
+        "output/pan1c."+config['name']+".gfa"
+    threads: 16
+    params:
+        app_path=config['app.path']
+    shell:
+        """
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            view -t {threads} -P -i {output}.og -g > {output}
+        rm {output}.og
+        """
+
+rule graph_stats:
+    # Using GFAstats to produce stats on every chromosome graphs
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
+        pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
+    threads: 4
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
+            -g {input.graph} -P \
+            -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
+            -t {threads}
+        """
+
+rule graph_figs:
+    # Creating figures using odgi viz 
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
+        pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
+    threads: 4
+    params:
+        app_path=config['app.path'],
+        oneDviz=config['odgi.1Dviz.params'],
+        pcov=config['odgi.pcov.params']
+    shell:
+        """
+        ## 1D viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
+
+        ## Pcov viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
+        """
+
+rule aggregate_graphs_stats:
+    # Reading and merging all stats files from chromosome graphs into a .tsv.
+    input:
+        genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
+    output:
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
+            --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
+            --outputPaths {output.pathstats} --panname {params.pan_name} 
+        """
+
+rule final_graph_tagging:
+    # Add metadata to the final GFA
+    input:
+        graph="output/pan1c."+config['name']+".gfa",
+    output:
+        "output/pan1c."+config['name']+".gfa.metadata"
+    threads: 1
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        python scripts/getTags.py \
+            --appdir {params.app_path} --config-file config.yaml > {output}
+        sed -i '/^H/r {output}' {input.graph}
+        """
+
+rule pggb_input_stats:
+    # Produces statistics on pggb input sequences
+    input:
+        flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+    output:
+        "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
+            -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
+        """
+
+rule core_statistics:
+    # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
+    input:
+        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+    output:
+        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
+        dir=directory("output/pggb.usage.figs")
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name']
+    shell:
+        """
+        mkdir -p {output.dir}
+        apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
+            --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
+            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
+        """
+
+"""
+
+Post-processing section :
+    The graph for each chromosome are made as well as some basic statistics.
+    In this section, more stats are produced but more specifics ones requiring dedicated tools (Panacus, PAVs for Panache ...).
+    It also contains rules to use the graph itself. 
+
+"""
+rule get_pav:
+    # Create PAV matrix readable by panache for a given chromosome scale graph
+    input:
+        "data/chrGraphs/graphsList.txt"
+    output:
+        directory("output/pav.matrices")
+    threads: 16
+    params:
+        app_path=config['app.path']
+    run:
+        shell("mkdir {output}")
+        # Getting the list of graphs
+        with open(input[0]) as handle:
+            graphList = [graph.rstrip("\n") for graph in handle.readlines()]
+        # Iterating over graphs
+        for graph in graphList:
+            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
+
+rule panacus_stats:
+    # Produces panacus reports for a chromosome graph
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+    output:
+        html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
+    params:
+        app_path=config['app.path'],
+        pan_name=config['name'],
+        refname=config['reference']
+    threads: 8
+    shell:
+        """
+        bash scripts/getPanacusHG.sh \
+            -g {input.graph} \
+            -r $(basename {params.refname} .fa.gz) \
+            -d data/chrGraphs/{wildcards.chromosome} \
+            -o {output.html} \
+            -a {params.app_path} \
+            -t {threads}
+        """
+
+rule create_pan1c_report_fig:
+    # Produces a markdown report figure of chromosomes graphs
+    input:
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
+        contigfig="output/chr.contig/{chromosome}.contig.png",
+    output:
+        odgifig=temp("tmp/{chromosome}.odgi.png"),
+        namefig=temp("tmp/{chromosome}.name.png"),
+        reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
+    threads: 4
+    params:
+        app_path=config['app.path']
+    shell:
+        """
+        ## Odgi 1D viz
+        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
+
+        ## Getting legend from contig figure
+        convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
+
+        odgheight=$(identify -ping -format '%h' {output.odgifig})
+        ctgheight=$(identify -ping -format '%h' {input.contigfig})
+
+        combinedheight=$(($odgheight+$ctgheight))
+
+        echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
+
+        ## Creating empty canvas ad adding other figures to it
+        convert -size "3300x$combinedheight" xc:white {output.reportfig}
+        composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
+        composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
+        composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
+        """
+
+rule create_pan1c_report:
+    # Produces a markdown report of chromosomes graphs
+    input:
+        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    output:
+        reportfig="output/pan1c."+config['name']+".report.md"
+    threads: 4
+    params:
+        app_path=config['app.path']
+    run:
+        shell("touch {output.reportfig}")
+
+        # Adding Summary
+        ## To Do
+
+        # Adding General stats
+        shell("echo '# General stats' >> {output.reportfig}")
+        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding Path stats
+        shell("echo '# Path stats' >> {output.reportfig}")
+        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
+
+        # Adding chromosomes figures
+        fig_list = [fig for fig in input.figs]
+        print(fig_list)
+        fig_list.sort()
         
-#         shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
-#         for fig in fig_list:
-#             basename=os.path.basename(fig)
-#             chr_name=basename.split('.')[1]
+        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
+        for fig in fig_list:
+            basename=os.path.basename(fig)
+            chr_name=basename.split('.')[1]
             
-#             shell("echo '## {chr_name}' >> {output.reportfig}")
-#             shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
-#         shell("echo '' >> {output.reportfig}")
\ No newline at end of file
+            shell("echo '## {chr_name}' >> {output.reportfig}")
+            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
+        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
diff --git a/rules/core.smk b/rules/core.smk
deleted file mode 100644
index 5473943..0000000
--- a/rules/core.smk
+++ /dev/null
@@ -1,346 +0,0 @@
-"""
-Core section : Running PGGB on each chromosome and generating whole pangenome   -------------------
-"""
-
-rule create_pggb_input_syri_fig:
-    input:
-        fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
-    output:
-        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
-        wrkdir=directory('data/chrInput/syri/{chromosome}')
-    threads: 8
-    params:
-        app_path=config["app.path"],
-        ref=config['reference']
-    shell:
-        """
-        mkdir {output.wrkdir}
-        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
-
-        ## Creating single fasta from multifasta
-        zcat {input.fasta} | awk -F"#" \
-            '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
-        
-        ## Getting the list of sequences
-        asmList=()
-        for file in {output.wrkdir}/*.fa; do
-            asm="$(basename $file .fa | cut -f1 -d"#").fa"
-            mv $file "$(dirname $file)/$asm"
-            asmList+=("$asm")
-        done
-
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {output.wrkdir}/"${refname}.fa" \
-            -q "${asmList[@]}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        rm {output.wrkdir}/*.fa
-        """
-
-rule wfmash_on_chr:
-    # Run wfmash on a specific chromosome input
-    input:
-        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-        fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
-    output:
-        mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
-        aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
-    threads: 16
-    params:
-        app_path=config['app.path'],
-        segment_length=config['wfmash.segment_length'],
-        mapping_id=config['wfmash.mapping_id'],
-        wfmash_sec=config['wfmash.secondary']
-    log: 
-        cmd_map="logs/pggb/{chromosome}.wfmash_mapping.cmd.log",
-        time_map="logs/pggb/{chromosome}.wfmash_mapping.time.log",
-        cmd_aln="logs/pggb/{chromosome}.wfmash_aln.cmd.log",
-        time_aln="logs/pggb/{chromosome}.wfmash_aln.time.log",
-    shell:
-        """
-        ## Mapping
-        /usr/bin/time -v -o {log.time_map} \
-            apptainer exec {params.app_path}/pggb.sif wfmash \
-            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-            --tmp-base $(dirname {output.aln}) \
-            {input.fa} --approx-map \
-            1> {output.mapping} \
-            2> >(tee {log.cmd_map} >&2)
-
-        ## Aligning
-        /usr/bin/time -v -o {log.time_aln} \
-            apptainer exec {params.app_path}/pggb.sif wfmash \
-            -s {params.segment_length} -l $(( {params.segment_length} * 5 )) -p {params.mapping_id} \
-            -n $(cat {input.fai} | wc -l) {params.wfmash_sec} -t {threads} \
-            --tmp-base $(dirname {output.aln}) \
-            {input.fa} -i {output.mapping} \
-            --invert-filtering \
-            1> {output.aln} \
-            2> >(tee {log.cmd_aln} >&2)
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.mapping}
-
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.aln}
-        """
-
-rule seqwish:
-    # Run seqwish on alignement produced by wfmash
-    input:
-        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
-        aln=rules.wfmash_on_chr.output.aln
-    output:
-        temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
-    threads: 8
-    params:
-        app_path=config['app.path'],
-        seqwish=config['seqwish.params']
-    log: 
-        cmd="logs/pggb/{chromosome}.seqwish.cmd.log",
-        time="logs/pggb/{chromosome}.seqwish.time.log"
-    shell:
-        """
-        /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.app_path}/pggb.sif seqwish \
-            -s {input.fa} -p {input.aln} -g {output} \
-            {params.seqwish} -t {threads} \
-            --temp-dir $(dirname {output}) -P 2>&1 | \
-            tee {log.cmd}
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output}
-        """
-
-rule gfaffix_on_chr:
-    # Run gfaffix on seqwish graph
-    input:
-        rules.seqwish.output
-    output:
-        gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
-        transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
-    threads: 1
-    params:
-        app_path=config['app.path']
-    log: 
-        time="logs/pggb/{chromosome}.gfaffix.time.log"
-    shell:
-        """
-        /usr/bin/time -v -o {log.time} \
-            apptainer exec {params.app_path}/pggb.sif gfaffix \
-            {input} -o {output.gfa} -t {output.transform} \
-            > /dev/null
-
-        # Compressing
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} -k {output.gfa}
-        """
-
-rule odgi_postprocessing:
-    # Running pggb's postprocessing (mainly odgi) steps with gfaffix graph
-    input:
-        rules.gfaffix_on_chr.output.gfa
-    output:
-        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    threads: 8
-    params:
-        app_path=config['app.path']
-    log: 
-        cmd_build="logs/pggb/{chromosome}.odgi_build.cmd.log",
-        time_build="logs/pggb/{chromosome}.odgi_build.time.log",
-        cmd_unchop="logs/pggb/{chromosome}.odgi_unchop.cmd.log",
-        time_unchop="logs/pggb/{chromosome}.odgi_unchop.time.log",
-        cmd_sort="logs/pggb/{chromosome}.odgi_sort.cmd.log",
-        time_sort="logs/pggb/{chromosome}.odgi_sort.time.log",
-        cmd_view="logs/pggb/{chromosome}.odgi_view.cmd.log",
-        time_view="logs/pggb/{chromosome}.odgi_view.time.log"
-    shell:
-        """
-        OGfile="$(dirname {input})/$(basename {input} .gfa)"
-
-        ## Converting to odgi format and optimizing namespace
-        /usr/bin/time -v -o {log.time_build} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            build -t {threads} -P -g {input} -o $OGfile.og -O 2>&1 | \
-            tee {log.cmd_build}
-        
-        ## Unchoping the nodes (merges unitigs into single nodes)
-        /usr/bin/time -v -o {log.time_unchop} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            unchop -P -t {threads} -i $OGfile.og -o $OGfile.unchoped.og 2>&1 | \
-            tee {log.cmd_unchop}
-
-        ## Sorting the nodes
-        /usr/bin/time -v -o {log.time_sort} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            sort -P -p Ygs --temp-dir $(dirname $OGfile) -t {threads} \
-            -i $OGfile.unchoped.og -o $OGfile.unchoped.sorted.og 2>&1 | \
-            tee {log.cmd_sort}
-
-        ## Exporting to gfa
-        /usr/bin/time -v -o {log.time_view} \
-            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            view -i $OGfile.unchoped.sorted.og -g \
-            1> {output.gfa} \
-            2> >(tee {log.cmd_view} >&2) 
-
-        ## Removing .og files for space savings
-        rm $(dirname {input})/*.og
-
-        ## Adding metadata
-        python scripts/getTags.py \
-            --appdir {params.app_path} --config-file config.yaml \
-            > "$(dirname {input})/metadata.txt"
-        sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
-
-        # Compressing
-        # apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-        #     -@ {threads} -k {output.gfa}
-        """
-
-rule generate_graph_list:
-    # Generate a text file containing all created graphs
-    input:
-        expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-    output:
-        "data/chrGraphs/graphsList.txt"
-    threads: 1
-    run:
-        with open(output[0], "w") as handle:
-            for file in input:
-                handle.write(file+"\n")
-
-rule graph_squeeze:
-    # Using odgi to merge every subgraphs into a final one
-    input:
-        glist="data/chrGraphs/graphsList.txt",
-        graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
-    output:
-        "output/pan1c."+config['name']+".gfa"
-    threads: 16
-    params:
-        app_path=config['app.path']
-    shell:
-        """
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            view -t {threads} -P -i {output}.og -g > {output}
-        rm {output}.og
-        """
-
-rule graph_stats:
-    # Using GFAstats to produce stats on every chromosome graphs
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
-        pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
-    threads: 4
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run --app gfastats {params.app_path}/PanGeTools.sif \
-            -g {input.graph} -P \
-            -o $(dirname {output.genstats})/{params.pan_name}.{wildcards.chromosome} \
-            -t {threads}
-        """
-
-rule graph_figs:
-    # Creating figures using odgi viz 
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
-        pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
-    threads: 4
-    params:
-        app_path=config['app.path'],
-        oneDviz=config['odgi.1Dviz.params'],
-        pcov=config['odgi.pcov.params']
-    shell:
-        """
-        ## 1D viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.oneDviz} {params.oneDviz} -t {threads} -P
-
-        ## Pcov viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.pcov} {params.pcov} -t {threads} -P
-        """
-
-rule aggregate_graphs_stats:
-    # Reading and merging all stats files from chromosome graphs into a .tsv.
-    input:
-        genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
-    output:
-        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrStatsAggregation.py \
-            --input $(dirname {input[0]}) --outputGeneral {output.genstats} \
-            --outputPaths {output.pathstats} --panname {params.pan_name} 
-        """
-
-rule final_graph_tagging:
-    # Add metadata to the final GFA
-    input:
-        graph="output/pan1c."+config['name']+".gfa",
-    output:
-        "output/pan1c."+config['name']+".gfa.metadata"
-    threads: 1
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        python scripts/getTags.py \
-            --appdir {params.app_path} --config-file config.yaml > {output}
-        sed -i '/^H/r {output}' {input.graph}
-        """
-
-rule pggb_input_stats:
-    # Produces statistics on pggb input sequences
-    input:
-        flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-    output:
-        "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/chrInputStats.py \
-            -f data/chrInputs/*.fa.gz -o {output} -p {params.pan_name}
-        """
-
-rule core_statistics:
-    # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
-    input:
-        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
-        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
-    output:
-        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
-        dir=directory("output/pggb.usage.figs")
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name']
-    shell:
-        """
-        mkdir -p {output.dir}
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/coreStats.py \
-            --pggbStats logs/pggb --chrInputStats {input.chrInputStats} \
-            --chrGraphStats {input.chrGraphStats} -o {output.tsv} -f {output.dir} -p {params.pan_name}
-        """
\ No newline at end of file
diff --git a/rules/post-analysis.smk b/rules/post-analysis.smk
deleted file mode 100644
index 3041501..0000000
--- a/rules/post-analysis.smk
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-Post-analysis section   ---------------------------------------------------------------------------
-"""
-rule get_pav:
-    # Create PAV matrix readable by panache for a given chromosome scale graph
-    input:
-        "data/chrGraphs/graphsList.txt"
-    output:
-        directory("output/pav.matrices")
-    threads: 16
-    params:
-        app_path=config['app.path']
-    run:
-        shell("mkdir {output}")
-        # Getting the list of graphs
-        with open(input[0]) as handle:
-            graphList = [graph.rstrip("\n") for graph in handle.readlines()]
-        # Iterating over graphs
-        for graph in graphList:
-            shell("bash scripts/getPanachePAV.sh -g {graph} -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).pav.matrix.tsv -a {params.app_path} -t {threads}")
-
-rule panacus_stats:
-    # Produces panacus reports for a chromosome graph
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
-    output:
-        html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
-    params:
-        app_path=config['app.path'],
-        pan_name=config['name'],
-        refname=config['reference']
-    threads: 8
-    shell:
-        """
-        bash scripts/getPanacusHG.sh \
-            -g {input.graph} \
-            -r $(basename {params.refname} .fa.gz) \
-            -d data/chrGraphs/{wildcards.chromosome} \
-            -o {output.html} \
-            -a {params.app_path} \
-            -t {threads}
-        """
-
-rule create_pan1c_report_fig:
-    # Produces a markdown report figure of chromosomes graphs
-    input:
-        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa',
-        contigfig="output/chr.contig/{chromosome}.contig.png",
-    output:
-        odgifig=temp("tmp/{chromosome}.odgi.png"),
-        namefig=temp("tmp/{chromosome}.name.png"),
-        reportfig="output/report/"+config['name']+".{chromosome}.report.fig.png"
-    threads: 4
-    params:
-        app_path=config['app.path']
-    shell:
-        """
-        ## Odgi 1D viz
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            viz -i {input.graph} -o {output.odgifig} -x 2500 -a 80 -b -H -t {threads} -P
-
-        ## Getting legend from contig figure
-        convert {input.contigfig} -crop 790x+0+0 +repage {output.namefig}
-
-        odgheight=$(identify -ping -format '%h' {output.odgifig})
-        ctgheight=$(identify -ping -format '%h' {input.contigfig})
-
-        combinedheight=$(($odgheight+$ctgheight))
-
-        echo -e "[Debug::Report fig]\t$odgheight\t$ctgheight\t$combinedheight"
-
-        ## Creating empty canvas ad adding other figures to it
-        convert -size "3300x$combinedheight" xc:white {output.reportfig}
-        composite -geometry +0+0 {input.contigfig} {output.reportfig} {output.reportfig}
-        composite -geometry "+0+$ctgheight" {output.namefig} {output.reportfig} {output.reportfig}
-        composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
-        """
-
-rule create_pan1c_report:
-    # Produces a markdown report of chromosomes graphs
-    input:
-        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
-        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
-    output:
-        reportfig="output/pan1c."+config['name']+".report.md"
-    threads: 4
-    params:
-        app_path=config['app.path']
-    run:
-        shell("touch {output.reportfig}")
-
-        # Adding Summary
-        ## To Do
-
-        # Adding General stats
-        shell("echo '# General stats' >> {output.reportfig}")
-        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
-
-        # Adding Path stats
-        shell("echo '# Path stats' >> {output.reportfig}")
-        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
-
-        # Adding chromosomes figures
-        fig_list = [fig for fig in input.figs]
-        print(fig_list)
-        fig_list.sort()
-        
-        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
-        for fig in fig_list:
-            basename=os.path.basename(fig)
-            chr_name=basename.split('.')[1]
-            
-            shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
diff --git a/rules/pre-processing.smk b/rules/pre-processing.smk
deleted file mode 100644
index 3580108..0000000
--- a/rules/pre-processing.smk
+++ /dev/null
@@ -1,185 +0,0 @@
-"""
-Pre-processing section : preparing haplotypes  ----------------------------------------------------
-"""
-rule ragtag_scaffolding:
-    # Scaffold input haplotype against the reference to infer chromosome scale sequences
-    input:
-        ref="data/haplotypes/"+config['reference'],
-        reffai="data/haplotypes/"+config['reference']+".fai",
-        refgzi="data/haplotypes/"+config['reference']+".gzi",
-        fa="data/haplotypes/{haplotype}.fa.gz"
-    output:
-        fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
-        tar="data/hap.ragtagged/{haplotype}.tar.gz"
-    threads: 8
-    retries: 1
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        bash scripts/ragtagChromInfer.sh \
-            -d $(dirname {output.fa})/{wildcards.haplotype} \
-            -a {params.app_path} \
-            -t {threads} \
-            -r {input.ref} \
-            -q {input.fa} \
-            -o {output.fa}
-
-        #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
-        #    echo "Error : Empty final fasta"
-        #    exit 1
-        #fi
-        """
-
-rule quast_stats:
-    # Run Quast on ragtagged genomes
-    input:
-        fas=expand("data/haplotypes/{haplotype}.fa.gz", haplotype=SAMPLES_NOREF),
-        ref="data/haplotypes/"+config['reference']
-    output:
-        report="output/quast/"+config['name']+".quast.report.html"
-    threads: 8
-    params:
-        app_path=config["app.path"],
-        pan_name=config["name"]
-    shell:
-        """
-        apptainer run {params.app_path}/pan1c-env.sif quast.py \
-            -t {threads} \
-            -o "$(dirname {output.report})" \
-            -r {input.ref} \
-            --plots-format png \
-            --no-read-stats \
-            --no-icarus \
-            -s \
-            --large \
-            {input.fas}
-
-        # Compressing temporary files
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} $(dirname {output.report})/contigs_reports/*.fa
-        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads} $(dirname {output.report})/contigs_reports/minimap_output/*
-
-        mv $(dirname {output.report})/report.html {output.report}
-        """
-
-rule contig_position:
-    # Produce figures with contig positions
-    input:
-        fa="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz",
-        fai="data/chrInputs/"+config["name"]+".{chromosome}.fa.gz.fai"
-    output:
-        fig="output/chr.contig/{chromosome}.contig.png",
-        outdir=directory("output/chr.contig/{chromosome}")
-    threads: 1
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir {output.outdir}
-        base="{output.outdir}"
-        hapID=$(echo {wildcards.chromosome} | sed 's/.hap/#/')
-        
-        # Creating .bands file
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/fasta_not_NNN_gff3.py \
-            -s 5 \
-            -f {input.fa} \
-            > $base/{wildcards.chromosome}.gff
-
-        #cat $base/{wildcards.chromosome}.gff
-
-        awk '{{a++; if ((a/2)==int(a/2)){{b="gpos25"}}else{{b="gpos75"}}; print $1"\\t"$4"\\t"$5"\\tcontig_"a"\\t"b}}' $base/{wildcards.chromosome}.gff | \
-            sed '1ichr\\tstart\\tend\\tname\\tgieStain' \
-            > $base/{wildcards.chromosome}.bands
-
-        #cat $base/{wildcards.chromosome}.bands
-
-        # Creating bed like file
-        cat {input.fai} | \
-            cut -f1,2 | tr '\\t' ',' | \
-            sed "s/,/,1,/" | \
-            sed "1i chr\\tstart\\tend" | \
-            tr ',' '\\t' \
-            > $base/{wildcards.chromosome}.tsv
-
-        #cat $base/{wildcards.chromosome}.tsv
-
-        # Creating figure
-        apptainer run --app Renv {params.app_path}/pan1c-env.sif Rscript scripts/contig_position.R \
-            -b $base/{wildcards.chromosome}.bands \
-            -t $base/{wildcards.chromosome}.tsv \
-            -o {output.fig}
-        """
-
-rule clustering:
-    # Read ragtagged fastas and split chromosome sequences into according bins
-    input:
-        expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-    output:
-        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
-    threads: workflow.cores
-    params:
-        app_path=config["app.path"],
-        pan_name=config["name"]
-    shell:
-        """
-        mkdir -p $(dirname {output[0]})
-        apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
-            --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
-        for file in $(dirname {output[0]})/*.fa; do
-            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
-        done
-        """
-
-rule create_allAsm_syri_fig:
-    # Create a SyRI figure containing all input assemblies
-    input:
-        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-        qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-    output:
-        fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
-        wrkdir=directory('data/asm.syri/all')
-    threads: 8
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir {output.wrkdir}
-
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {input.ref} \
-            -q "{input.qry}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        """
-
-rule create_sglAsm_syri_fig:
-    # Create a SyRI figure for a single input assembly
-    input:
-        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-        qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
-    output:
-        fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
-        wrkdir=directory('data/asm.syri/{haplotype}')
-    threads: 4
-    retries: 1
-    resources:
-        mem_gb=48
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir -p {output.wrkdir}
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {input.ref} \
-            -q "{input.qry}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        """
\ No newline at end of file
-- 
GitLab


From bedb03d62a3269ca46ff09bddbddfda40bd40bf3 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 17:05:11 +0200
Subject: [PATCH 38/86] Contact & default wfmash parameters

---
 README.md   | 3 ++-
 config.yaml | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e2f9df2..04529fb 100644
--- a/README.md
+++ b/README.md
@@ -114,5 +114,6 @@ Pan1c-06AT-v3
 This DAG shows the worflow for a pangenome of `Saccharomyces cerevisiae` using the `R64` reference.
 ![Workflow DAG](example/workflow.svg)
 
-  
+# Contact
+[alexis.mergez@inrae.fr](mailto:alexis.mergez@inrae.fr) 
 
diff --git a/config.yaml b/config.yaml
index 3c09fb6..37caa44 100644
--- a/config.yaml
+++ b/config.yaml
@@ -8,8 +8,8 @@ app.path: '<path>'
 
 # Core parameters
 # Wfmash alignement parameters :
-wfmash.segment_length: 5000
-wfmash.mapping_id: 90
+wfmash.segment_length: 10000
+wfmash.mapping_id: 95
 wfmash.secondary: '-k 19 -H 0.001 -X'
 
 # Seqwish parameters
-- 
GitLab


From 74593b33bbd6cea30642986d906b2983bbb82b5c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 17:10:17 +0200
Subject: [PATCH 39/86] Changed Odgi viz default parameters

---
 config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.yaml b/config.yaml
index 37caa44..5fe91c1 100644
--- a/config.yaml
+++ b/config.yaml
@@ -16,8 +16,8 @@ wfmash.secondary: '-k 19 -H 0.001 -X'
 seqwish.params: '-B 10000000 -k 19 -f 0'
 
 # Odgi 1D and path coverage viz parameters 
-odgi.1Dviz.params: '-x 2000 -a 150 -b'
-odgi.pcov.params: '-x 2000 -a 150 -O'
+odgi.1Dviz.params: '-x 2000 -b'
+odgi.pcov.params: '-x 2000 -O'
 
 ## Optional parts of the workflow
 # Running Quast to get statistics on input haplotypes
-- 
GitLab


From cf4ca5e948545e498ac3a4f27f10b114489dffcf Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 17:10:32 +0200
Subject: [PATCH 40/86] Added hiddenfile to keep data folder

---
 data/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 data/.gitkeep

diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29
-- 
GitLab


From f1ca7b8a10f8b7551db9d7244f375802c0c726e4 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 26 Jun 2024 17:25:13 +0200
Subject: [PATCH 41/86] Moving bgzipping to dedicated rule

---
 Snakefile                   |  4 +++-
 rules/tools.smk             | 17 ++++++++++++++++-
 scripts/ragtagChromInfer.sh |  6 +++---
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index e3818f2..fc342e4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -73,6 +73,7 @@ def which_analysis():
 """
 Rules   -------------------------------------------------------------------------------------------
 """
+
 # Main target rule
 rule all:
     input:
@@ -83,6 +84,7 @@ rule all:
 """
 Pre-processing section : preparing pggb inputs  ---------------------------------------------------
 """
+
 rule ragtag_scaffolding:
     # Scaffold input haplotype against the reference to infer chromosome scale sequences
     input:
@@ -91,7 +93,7 @@ rule ragtag_scaffolding:
         refgzi="data/haplotypes/"+config['reference']+".gzi",
         fa="data/haplotypes/{haplotype}.fa.gz"
     output:
-        fa="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz",
+        fa=temp("data/hap.ragtagged/{haplotype}.ragtagged.fa"),
         tar="data/hap.ragtagged/{haplotype}.tar.gz"
     threads: 8
     retries: 1
diff --git a/rules/tools.smk b/rules/tools.smk
index 7528154..5aab858 100644
--- a/rules/tools.smk
+++ b/rules/tools.smk
@@ -13,4 +13,19 @@ rule samtools_index:
         app_path=config["app.path"]
     shell:
         "apptainer run --app samtools {params.app_path}/PanGeTools.sif "
-        "faidx {input.fa}"
\ No newline at end of file
+        "faidx {input.fa}"
+
+rule run_bgzip:
+    # Run BGZIP on the file
+    input: 
+        "{file}"
+    output:
+        "{file}.gz"
+    threads: 4
+    params:
+        app_path=config["app.path"]
+    shell:
+        """
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads}
+        """
\ No newline at end of file
diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index 9afa538..b8dc8ea 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -41,11 +41,11 @@ grep ">${ref}#chr*" -A1 $tmpdir/ragtag.scaffold.fasta | \
     sed "s/${ref}#chr\([^_]*\)_RagTag/${hapID}#chr\1/g" > $tmpdir/${sample}.ragtagged.fa
 
 # Compressing fasta
-apptainer run --app bgzip $appdir/PanGeTools.sif \
-    -@ $threads $tmpdir/${sample}.ragtagged.fa
+# apptainer run --app bgzip $appdir/PanGeTools.sif \
+#     -@ $threads $tmpdir/${sample}.ragtagged.fa
 
 # Moving fa.gz to output dir
-mv $tmpdir/${sample}.ragtagged.fa.gz $output
+mv $tmpdir/${sample}.ragtagged.fa $output
 
 # Compressing temporary files
 tar --remove-files -cf $tmpdir.tar $tmpdir
-- 
GitLab


From 194a1d59c03af6f5211561118da7c91eb985b1ac Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 10:36:18 +0200
Subject: [PATCH 42/86] Update Snakefile

Offloading chromosome FASTA compression from clustering rule
---
 Snakefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index fc342e4..028e807 100644
--- a/Snakefile
+++ b/Snakefile
@@ -201,8 +201,8 @@ rule clustering:
     input:
         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
     output:
-        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
-    threads: workflow.cores
+        temp(expand('data/chrInputs/'+config['name']+'.{chromosome}.fa', chromosome=CHRLIST))
+    threads: 2
     params:
         app_path=config["app.path"],
         pan_name=config["name"]
@@ -211,9 +211,9 @@ rule clustering:
         mkdir -p $(dirname {output[0]})
         apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
             --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
-        for file in $(dirname {output[0]})/*.fa; do
-            apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
-        done
+        #for file in $(dirname {output[0]})/*.fa; do
+        #    apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
+        #done
         """
 
 rule create_allAsm_syri_fig:
-- 
GitLab


From 50f491896dfd9066cefdb1230891d1d2c9e9f7a2 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 11:13:21 +0200
Subject: [PATCH 43/86] Update Snakefile

- Removed SyrI rule to get an All assembly figure ( will be replaced with a run of SyRI on chromosomes only instead of all assemblies)
- Renamed some rules for clarity
- Adding more elements to the final report
---
 Snakefile | 105 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 56 insertions(+), 49 deletions(-)

diff --git a/Snakefile b/Snakefile
index 028e807..b8f2cdb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -196,8 +196,8 @@ rule contig_position:
             -o {output.fig}
         """
 
-rule clustering:
-    # Read ragtagged fastas and split chromosome sequences into according bins
+rule chromosome_clustering:
+    # Read ragtagged fastas and split chromosome sequences into according FASTA files
     input:
         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
     output:
@@ -216,33 +216,9 @@ rule clustering:
         #done
         """
 
-rule create_allAsm_syri_fig:
-    # Create a SyRI figure containing all input assemblies
-    input:
-        ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
-        qry=expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
-    output:
-        fig="output/asm.syri.figs/pan1c."+config['name']+".allAsm.syri.png",
-        wrkdir=directory('data/asm.syri/all')
-    threads: 8
-    params:
-        app_path=config["app.path"]
-    shell:
-        """
-        mkdir {output.wrkdir}
-
-        bash scripts/getSyriFigs.sh \
-            -a {params.app_path} \
-            -t {threads} \
-            -d {output.wrkdir} \
-            -o $(basename {output.fig}) \
-            -r {input.ref} \
-            -q "{input.qry}"
-        mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
-        """
-
-rule create_sglAsm_syri_fig:
-    # Create a SyRI figure for a single input assembly
+rule SyRI_on_ASM:
+    # Run SyRI on a single assembly. 
+    # The assembly is mapped on the 'reference' and SyRI search for SV.
     input:
         ref="data/hap.ragtagged/"+config['reference'][:-5]+"ragtagged.fa.gz",
         qry="data/hap.ragtagged/{haplotype}.ragtagged.fa.gz"
@@ -269,12 +245,11 @@ rule create_sglAsm_syri_fig:
         """
 
 """
-
 Core section : Running PGGB
-
 """
 
-rule create_pggb_input_syri_fig:
+rule SyRI_on_chrInput:
+    # WIP
     input:
         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
@@ -700,43 +675,75 @@ rule create_pan1c_report_fig:
         composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
         """
 
+def get_report_section():
+    """
+    Return 'create_pan1c_report' optional inputs to add them to the final report.
+    For example :
+        - SyRI figures on Assemblies
+    """
+    sections = dict()
+
+    sections["metadata"] = "output/pan1c."+config['name']+".gfa.metadata"
+
+    if config["get_ASMs_SyRI"] == "True":
+        sections["SyRI_on_ASMs_figs"] = expand(
+            "output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", 
+            haplotype=SAMPLES_NOREF
+            )
+
+    return sections
+        
+
 rule create_pan1c_report:
     # Produces a markdown report of chromosomes graphs
     input:
-        figs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
+        odgifigs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv",
+        unpack(get_report_section)
     output:
-        reportfig="output/pan1c."+config['name']+".report.md"
+        report="output/pan1c."+config['name']+".report.md"
     threads: 4
     params:
         app_path=config['app.path']
     run:
-        shell("touch {output.reportfig}")
+        shell("touch {output.report}")
+
+        # Adding Summary (made for importing in Joplin)
+        shell("echo '# Summary' >> {output.report}")
+        shell("echo '[TOC]' >> {output.report}")
+        shell("echo '' >> {output.report}")
+        
+        # Adding graph construction info
+        ## WIP
 
-        # Adding Summary
-        ## To Do
+        # Adding metadata
+        shell("echo '# Graph metadata' >> {output.report}")
+        shell("cat {input.metadata} >> {output.report}")
+        shell("echo '' >> {output.report}")
 
         # Adding General stats
-        shell("echo '# General stats' >> {output.reportfig}")
-        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
+        shell("echo '# General stats' >> {output.report}")
+        shell("cat {input.genstats} | csv2md -d $'\\t' >> {output.report}")
+        shell("echo '' >> {output.report}")
 
         # Adding Path stats
-        shell("echo '# Path stats' >> {output.reportfig}")
-        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
+        shell("echo '# Path stats' >> {output.report}")
+        shell("cat {input.pathstats} | csv2md -d $'\\t' >> {output.report}")
+        shell("echo '' >> {output.report}")
 
         # Adding chromosomes figures
-        fig_list = [fig for fig in input.figs]
+        fig_list = [fig for fig in input.odgifigs]
         print(fig_list)
         fig_list.sort()
         
-        shell("echo '# Chromosome-scale odgi graphs' >> {output.reportfig}")
+        shell("echo '# Chromosome-scale odgi graphs' >> {output.report}")
         for fig in fig_list:
             basename=os.path.basename(fig)
             chr_name=basename.split('.')[1]
             
-            shell("echo '## {chr_name}' >> {output.reportfig}")
-            shell("echo '![{basename}](./report/{basename})' >> {output.reportfig}")
-        shell("echo '' >> {output.reportfig}")
\ No newline at end of file
+            shell("echo '## {chr_name}' >> {output.report}")
+            shell("echo '![{basename}](./report/{basename})' >> {output.report}")
+        shell("echo '' >> {output.report}")
+
+        # Adding SyRI figs if produced
\ No newline at end of file
-- 
GitLab


From ef1345e67bd8da52e07b0b48ee0e5f46f09b80de Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 11:21:23 +0200
Subject: [PATCH 44/86] Update Snakefile

Added addition of SyRI figs in the final report if wanted
---
 Snakefile | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index b8f2cdb..fc795e0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -705,7 +705,8 @@ rule create_pan1c_report:
         report="output/pan1c."+config['name']+".report.md"
     threads: 4
     params:
-        app_path=config['app.path']
+        app_path=config['app.path'],
+        add_SyRI=config['get_ASMs_SyRI']
     run:
         shell("touch {output.report}")
 
@@ -746,4 +747,18 @@ rule create_pan1c_report:
             shell("echo '![{basename}](./report/{basename})' >> {output.report}")
         shell("echo '' >> {output.report}")
 
-        # Adding SyRI figs if produced
\ No newline at end of file
+        # Adding SyRI figs if produced
+        if params.add_SyRI:
+
+            fig_list = [fig for fig in input.SyRI_on_ASMs_figs]
+            fig_list.sort()
+
+            shell("echo '# SyRI on input assemblies' >> {output.report}")
+            for fig in fig_list:
+                basename = os.path.basename(fig)
+                hap_name = basename.split('.')[2:4]
+
+                shell("echo '## {hap_name[0]}, {hap_name[1]}' >> {output.report}")
+                shell("echo '![{basename}](./asm.syri.figs/{basename})' >> {output.report}")
+
+            shell("echo '' >> {output.report}")
\ No newline at end of file
-- 
GitLab


From 040bfb9b579aaec0ff9093b4be44fa3d29e6ecc3 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 11:23:45 +0200
Subject: [PATCH 45/86] Update Snakefile

---
 Snakefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index fc795e0..e14cd2d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -675,7 +675,7 @@ rule create_pan1c_report_fig:
         composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
         """
 
-def get_report_section():
+def get_report_sections():
     """
     Return 'create_pan1c_report' optional inputs to add them to the final report.
     For example :
@@ -684,6 +684,9 @@ def get_report_section():
     sections = dict()
 
     sections["metadata"] = "output/pan1c."+config['name']+".gfa.metadata"
+    sections["odgifigs"] = expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST)
+    sections["genstats"] = "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+    sections["pathstats"] = "output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
 
     if config["get_ASMs_SyRI"] == "True":
         sections["SyRI_on_ASMs_figs"] = expand(
@@ -697,10 +700,7 @@ def get_report_section():
 rule create_pan1c_report:
     # Produces a markdown report of chromosomes graphs
     input:
-        odgifigs=expand("output/report/"+config['name']+".{chromosome}.report.fig.png", chromosome=CHRLIST),
-        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv",
-        unpack(get_report_section)
+        unpack(get_report_sections)
     output:
         report="output/pan1c."+config['name']+".report.md"
     threads: 4
-- 
GitLab


From b87480f4c9c5fa6bc8e63ae2fe05fdda5207b25b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 11:24:25 +0200
Subject: [PATCH 46/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index e14cd2d..5372649 100644
--- a/Snakefile
+++ b/Snakefile
@@ -675,7 +675,7 @@ rule create_pan1c_report_fig:
         composite -geometry "+790+$ctgheight" {output.odgifig} {output.reportfig} {output.reportfig}
         """
 
-def get_report_sections():
+def get_report_sections(wildcards):
     """
     Return 'create_pan1c_report' optional inputs to add them to the final report.
     For example :
-- 
GitLab


From 6266a20a58ead08340ffc2cf5e81af09ee12be68 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 27 Jun 2024 16:37:14 +0200
Subject: [PATCH 47/86] Update Snakefile

Added threads to stats jobs
---
 Snakefile | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/Snakefile b/Snakefile
index 5372649..1610dd6 100644
--- a/Snakefile
+++ b/Snakefile
@@ -202,7 +202,7 @@ rule chromosome_clustering:
         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
     output:
         temp(expand('data/chrInputs/'+config['name']+'.{chromosome}.fa', chromosome=CHRLIST))
-    threads: 2
+    threads: 1
     params:
         app_path=config["app.path"],
         pan_name=config["name"]
@@ -531,6 +531,7 @@ rule aggregate_graphs_stats:
     output:
         genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
         pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
+    threads: 1
     params:
         app_path=config['app.path'],
         pan_name=config['name']
@@ -564,6 +565,7 @@ rule pggb_input_stats:
         flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
     output:
         "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
+    threads: 1
     params:
         app_path=config['app.path'],
         pan_name=config['name']
@@ -576,11 +578,12 @@ rule pggb_input_stats:
 rule core_statistics:
     # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
     input:
-        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
-        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
+        chrInputStats = "output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats = "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
     output:
-        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
-        dir=directory("output/pggb.usage.figs")
+        tsv = "output/stats/pan1c."+config['name']+".core.stats.tsv",
+        dir = directory("output/pggb.usage.figs")
+    threads: 1
     params:
         app_path=config['app.path'],
         pan_name=config['name']
@@ -593,12 +596,7 @@ rule core_statistics:
         """
 
 """
-
-Post-processing section :
-    The graph for each chromosome are made as well as some basic statistics.
-    In this section, more stats are produced but more specifics ones requiring dedicated tools (Panacus, PAVs for Panache ...).
-    It also contains rules to use the graph itself. 
-
+Post-processing section
 """
 rule get_pav:
     # Create PAV matrix readable by panache for a given chromosome scale graph
@@ -694,8 +692,7 @@ def get_report_sections(wildcards):
             haplotype=SAMPLES_NOREF
             )
 
-    return sections
-        
+    return sections      
 
 rule create_pan1c_report:
     # Produces a markdown report of chromosomes graphs
-- 
GitLab


From fb1938c8f1b2a79b4f100f5bcba6d4ead6d3b46a Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 3 Jul 2024 09:26:26 +0200
Subject: [PATCH 48/86] Update Snakefile

Added logs to more steps :
- Panacus
- Squeeze
- SyRI
- Quast
- RagTag
---
 Snakefile | 51 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/Snakefile b/Snakefile
index 1610dd6..7e5773a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -99,15 +99,20 @@ rule ragtag_scaffolding:
     retries: 1
     params:
         app_path=config["app.path"]
+    log: 
+        cmd="logs/ragtag/{haplotype}.ragtag.cmd.log",
+        time="logs/ragtag/{haplotype}.ragtag.time.log"
     shell:
         """
-        bash scripts/ragtagChromInfer.sh \
+        /usr/bin/time -v -o {log.time} \
+            bash scripts/ragtagChromInfer.sh \
             -d $(dirname {output.fa})/{wildcards.haplotype} \
             -a {params.app_path} \
             -t {threads} \
             -r {input.ref} \
             -q {input.fa} \
-            -o {output.fa}
+            -o {output.fa} 2>&1 | \
+            tee {log.cmd}
 
         #if [[ -z $(zgrep '[^[:space:]]' {output.fa}) ]] ; then
         #    echo "Error : Empty final fasta"
@@ -126,9 +131,13 @@ rule quast_stats:
     params:
         app_path=config["app.path"],
         pan_name=config["name"]
+    log: 
+        cmd="logs/quast/{haplotype}.quast.cmd.log",
+        time="logs/quast/{haplotype}.quast.time.log"
     shell:
         """
-        apptainer run {params.app_path}/pan1c-env.sif quast.py \
+        /usr/bin/time -v -o {log.time} \
+            apptainer run {params.app_path}/pan1c-env.sif quast.py \
             -t {threads} \
             -o "$(dirname {output.report})" \
             -r {input.ref} \
@@ -137,7 +146,8 @@ rule quast_stats:
             --no-icarus \
             -s \
             --large \
-            {input.fas}
+            {input.fas} 2>&1 | \
+            tee {log.cmd}
 
         # Compressing temporary files
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
@@ -211,9 +221,6 @@ rule chromosome_clustering:
         mkdir -p $(dirname {output[0]})
         apptainer run {params.app_path}/pan1c-env.sif python scripts/inputClustering.py \
             --fasta {input} --output $(dirname {output[0]}) --panname {params.pan_name}
-        #for file in $(dirname {output[0]})/*.fa; do
-        #    apptainer run --app bgzip {params.app_path}/PanGeTools.sif -@ {threads} $file
-        #done
         """
 
 rule SyRI_on_ASM:
@@ -225,6 +232,9 @@ rule SyRI_on_ASM:
     output:
         fig="output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png",
         wrkdir=directory('data/asm.syri/{haplotype}')
+    log: 
+        cmd="logs/SyRI_ASM/{haplotype}.SyRI_ASM.cmd.log",
+        time="logs/SyRI_ASM/{haplotype}.SyRI_ASM.time.log"
     threads: 4
     retries: 1
     resources:
@@ -234,13 +244,16 @@ rule SyRI_on_ASM:
     shell:
         """
         mkdir -p {output.wrkdir}
-        bash scripts/getSyriFigs.sh \
+        /usr/bin/time -v -o {log.time} \
+            bash scripts/getSyriFigs.sh \
             -a {params.app_path} \
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
             -r {input.ref} \
-            -q "{input.qry}"
+            -q "{input.qry}" 2>&1 | \
+            tee {log.cmd}
+        
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         """
 
@@ -470,15 +483,22 @@ rule graph_squeeze:
         graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
     output:
         "output/pan1c."+config['name']+".gfa"
+    log: 
+        cmd="logs/squeeze/"+config['name']+".squeeze.cmd.log",
+        time="logs/squeeze/"+config['name']+".squeeze.time.log",
     threads: 16
     params:
         app_path=config['app.path']
     shell:
         """
-        apptainer run --app odgi {params.app_path}/PanGeTools.sif \
-            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
+        /usr/bin/time -v -o {log.time} \
+            apptainer run --app odgi {params.app_path}/PanGeTools.sif \
+            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og 2>&1 | \
+            tee {log.cmd}
+
         apptainer run --app odgi {params.app_path}/PanGeTools.sif \
             view -t {threads} -P -i {output}.og -g > {output}
+
         rm {output}.og
         """
 
@@ -622,6 +642,9 @@ rule panacus_stats:
         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     output:
         html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
+    log: 
+        cmd="logs/panacus/{chromosome}.panacus.cmd.log",
+        time="logs/panacus/{chromosome}.panacus.time.log"
     params:
         app_path=config['app.path'],
         pan_name=config['name'],
@@ -629,13 +652,15 @@ rule panacus_stats:
     threads: 8
     shell:
         """
-        bash scripts/getPanacusHG.sh \
+        /usr/bin/time -v -o {log.time} \
+            bash scripts/getPanacusHG.sh \
             -g {input.graph} \
             -r $(basename {params.refname} .fa.gz) \
             -d data/chrGraphs/{wildcards.chromosome} \
             -o {output.html} \
             -a {params.app_path} \
-            -t {threads}
+            -t {threads} 2>&1 | \
+            tee {log.cmd}
         """
 
 rule create_pan1c_report_fig:
-- 
GitLab


From c6c377917d516431653b6d489b00826a51f04630 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 3 Jul 2024 09:28:29 +0200
Subject: [PATCH 49/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 7e5773a..5839de7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -132,8 +132,8 @@ rule quast_stats:
         app_path=config["app.path"],
         pan_name=config["name"]
     log: 
-        cmd="logs/quast/{haplotype}.quast.cmd.log",
-        time="logs/quast/{haplotype}.quast.time.log"
+        cmd="logs/quast/quast.cmd.log",
+        time="logs/quast/quast.time.log"
     shell:
         """
         /usr/bin/time -v -o {log.time} \
-- 
GitLab


From 61f7281cf117f239d2af017a223011a68e599285 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 4 Jul 2024 09:10:09 +0200
Subject: [PATCH 50/86] Update tools.smk

---
 rules/tools.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/tools.smk b/rules/tools.smk
index 5aab858..53fe1ff 100644
--- a/rules/tools.smk
+++ b/rules/tools.smk
@@ -27,5 +27,5 @@ rule run_bgzip:
     shell:
         """
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
-            -@ {threads}
+            -@ {threads} {input}
         """
\ No newline at end of file
-- 
GitLab


From 2ca1fc4e14a6e75a4f671ffe5b1cd4fed4a3f518 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 4 Jul 2024 15:30:37 +0200
Subject: [PATCH 51/86] Update Snakefile

---
 Snakefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Snakefile b/Snakefile
index 5839de7..1abfc61 100644
--- a/Snakefile
+++ b/Snakefile
@@ -742,7 +742,9 @@ rule create_pan1c_report:
 
         # Adding metadata
         shell("echo '# Graph metadata' >> {output.report}")
+        shell("echo '```' >> {output.report}")
         shell("cat {input.metadata} >> {output.report}")
+        shell("echo '```' >> {output.report}")
         shell("echo '' >> {output.report}")
 
         # Adding General stats
-- 
GitLab


From a7593a90e16c94afdf7baf21526441fde10f100e Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:14:29 +0200
Subject: [PATCH 52/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 1abfc61..ac6421f 100644
--- a/Snakefile
+++ b/Snakefile
@@ -279,7 +279,7 @@ rule SyRI_on_chrInput:
 
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
-            '/^>/ {OUT="{output.wrkdir}" substr($0,2) ".fa"}; {print >> OUT; close(OUT)}'
+            '/^>/ {{OUT="{output.wrkdir}" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
         
         ## Getting the list of sequences
         asmList=()
-- 
GitLab


From 33802127ef3fed52872ba2709c92dfdad977b891 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:16:07 +0200
Subject: [PATCH 53/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index ac6421f..793aef4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -294,7 +294,7 @@ rule SyRI_on_chrInput:
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
-            -r {output.wrkdir}/"${refname}.fa" \
+            -r {output.wrkdir}/"${{refname}}.fa" \
             -q "${asmList[@]}"
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
-- 
GitLab


From 2e8e05af134a7d7e231257267a7f65aaaa929ba1 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:16:51 +0200
Subject: [PATCH 54/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 793aef4..499b8dd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -295,7 +295,7 @@ rule SyRI_on_chrInput:
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
             -r {output.wrkdir}/"${{refname}}.fa" \
-            -q "${asmList[@]}"
+            -q "${{asmList[@]}}"
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
         """
-- 
GitLab


From 02dcafaeed179468e840d1d6878c67c1decd7704 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:18:48 +0200
Subject: [PATCH 55/86] Update Snakefile

---
 Snakefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Snakefile b/Snakefile
index 499b8dd..2b49ce5 100644
--- a/Snakefile
+++ b/Snakefile
@@ -280,6 +280,8 @@ rule SyRI_on_chrInput:
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
             '/^>/ {{OUT="{output.wrkdir}" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
+
+        grep '>' {output.wrkdir}/*.fa
         
         ## Getting the list of sequences
         asmList=()
-- 
GitLab


From 13721c88cdee86ad13bf4d83537897387bd7a059 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:21:06 +0200
Subject: [PATCH 56/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 2b49ce5..26a0210 100644
--- a/Snakefile
+++ b/Snakefile
@@ -267,7 +267,7 @@ rule SyRI_on_chrInput:
         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
-        wrkdir=directory('data/chrInput/syri/{chromosome}')
+        wrkdir=directory('data/chrInputs/syri/{chromosome}/')
     threads: 8
     params:
         app_path=config["app.path"],
-- 
GitLab


From f3a90a07bb9f4676d882cf79efefb7fbfd2ef28b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:23:50 +0200
Subject: [PATCH 57/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 26a0210..9460fbf 100644
--- a/Snakefile
+++ b/Snakefile
@@ -267,7 +267,7 @@ rule SyRI_on_chrInput:
         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
-        wrkdir=directory('data/chrInputs/syri/{chromosome}/')
+        wrkdir=directory('data/chrInputs/syri/{chromosome}')
     threads: 8
     params:
         app_path=config["app.path"],
@@ -279,7 +279,7 @@ rule SyRI_on_chrInput:
 
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
-            '/^>/ {{OUT="{output.wrkdir}" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
+            '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
         grep '>' {output.wrkdir}/*.fa
         
-- 
GitLab


From be9cbe44ae911968e43e54fa8ab4755342c73977 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:28:34 +0200
Subject: [PATCH 58/86] Update Snakefile

---
 Snakefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 9460fbf..84f47f6 100644
--- a/Snakefile
+++ b/Snakefile
@@ -268,7 +268,7 @@ rule SyRI_on_chrInput:
     output:
         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
         wrkdir=directory('data/chrInputs/syri/{chromosome}')
-    threads: 8
+    threads: workflow.cores
     params:
         app_path=config["app.path"],
         ref=config['reference']
@@ -282,6 +282,8 @@ rule SyRI_on_chrInput:
             '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
         grep '>' {output.wrkdir}/*.fa
+        apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
+            -@ {threads} {output.wrkdir}/*.fa
         
         ## Getting the list of sequences
         asmList=()
-- 
GitLab


From 7ce85183a84af0db35d88c1d054d74ffd2353241 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:29:46 +0200
Subject: [PATCH 59/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 84f47f6..0820f13 100644
--- a/Snakefile
+++ b/Snakefile
@@ -287,8 +287,8 @@ rule SyRI_on_chrInput:
         
         ## Getting the list of sequences
         asmList=()
-        for file in {output.wrkdir}/*.fa; do
-            asm="$(basename $file .fa | cut -f1 -d"#").fa"
+        for file in {output.wrkdir}/*.fa.gz; do
+            asm="$(basename $file .fa.gz | cut -f1 -d"#").fa.gz"
             mv $file "$(dirname $file)/$asm"
             asmList+=("$asm")
         done
-- 
GitLab


From a02ff604b899b089858b0d0db5241fb836405f45 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:30:59 +0200
Subject: [PATCH 60/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 0820f13..c6dea3b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -298,7 +298,7 @@ rule SyRI_on_chrInput:
             -t {threads} \
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
-            -r {output.wrkdir}/"${{refname}}.fa" \
+            -r {output.wrkdir}/"${{refname}}.fa.gz" \
             -q "${{asmList[@]}}"
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
-- 
GitLab


From 16a9997590055486ddf2fdae9cf4c93ec6c69748 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:32:25 +0200
Subject: [PATCH 61/86] Update Snakefile

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index c6dea3b..eafe179 100644
--- a/Snakefile
+++ b/Snakefile
@@ -281,10 +281,10 @@ rule SyRI_on_chrInput:
         zcat {input.fasta} | awk -F"#" \
             '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
-        grep '>' {output.wrkdir}/*.fa
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} {output.wrkdir}/*.fa
-        
+        zgrep '>' {output.wrkdir}/*.fa
+
         ## Getting the list of sequences
         asmList=()
         for file in {output.wrkdir}/*.fa.gz; do
-- 
GitLab


From 701dd233cc9e506944941ae965bc755d8639aab3 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:33:48 +0200
Subject: [PATCH 62/86] Update Snakefile

---
 Snakefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index eafe179..e3977c5 100644
--- a/Snakefile
+++ b/Snakefile
@@ -281,9 +281,10 @@ rule SyRI_on_chrInput:
         zcat {input.fasta} | awk -F"#" \
             '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
+        ls 
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} {output.wrkdir}/*.fa
-        zgrep '>' {output.wrkdir}/*.fa
+        zgrep '>' {output.wrkdir}/*.fa.gz
 
         ## Getting the list of sequences
         asmList=()
-- 
GitLab


From 4e32c41d2e119d15819408954b17bdca064e4cdf Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:37:29 +0200
Subject: [PATCH 63/86] Update Snakefile

---
 Snakefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index e3977c5..f3d98c9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -281,7 +281,6 @@ rule SyRI_on_chrInput:
         zcat {input.fasta} | awk -F"#" \
             '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
-        ls 
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} {output.wrkdir}/*.fa
         zgrep '>' {output.wrkdir}/*.fa.gz
@@ -291,7 +290,7 @@ rule SyRI_on_chrInput:
         for file in {output.wrkdir}/*.fa.gz; do
             asm="$(basename $file .fa.gz | cut -f1 -d"#").fa.gz"
             mv $file "$(dirname $file)/$asm"
-            asmList+=("$asm")
+            asmList+=("$(dirname $file)/$asm")
         done
 
         bash scripts/getSyriFigs.sh \
-- 
GitLab


From 4892e9bc9d170ddd8ebfbb159903ec5d5add4eab Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 11:53:38 +0200
Subject: [PATCH 64/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 3baf716..dc4b0f8 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -36,6 +36,8 @@ for item in "${temp[@]}"; do
     fi
 done
 
+echo "${asmList[@]}"
+
 # Array to store the created syri files 
 syriFileList=()
 
@@ -45,8 +47,8 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     # Setting filepaths for later
     bamFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).bam"
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
-    #hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
-    #refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
 
     # Debug
     echo -e "[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID"
-- 
GitLab


From 6a09682cf95f923893caef5a4fad3cc2cd034dc7 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:28:42 +0200
Subject: [PATCH 65/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index dc4b0f8..5e1ed5a 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -36,7 +36,7 @@ for item in "${temp[@]}"; do
     fi
 done
 
-echo "${asmList[@]}"
+echo -e "${asmList[@]}"
 
 # Array to store the created syri files 
 syriFileList=()
-- 
GitLab


From a263bd73bbada19908622df4c4cabed1c1adb87f Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:33:19 +0200
Subject: [PATCH 66/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 5e1ed5a..724c1fd 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -36,7 +36,7 @@ for item in "${temp[@]}"; do
     fi
 done
 
-echo -e "${asmList[@]}"
+echo "The array : ${asmList[@]}"
 
 # Array to store the created syri files 
 syriFileList=()
-- 
GitLab


From bbe34e4ed6eb8e7c7c4aaab324ce126b336425d4 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:36:58 +0200
Subject: [PATCH 67/86] Update Snakefile

---
 Snakefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index f3d98c9..188937d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -286,12 +286,14 @@ rule SyRI_on_chrInput:
         zgrep '>' {output.wrkdir}/*.fa.gz
 
         ## Getting the list of sequences
-        asmList=()
+        AllAsmList=()
         for file in {output.wrkdir}/*.fa.gz; do
             asm="$(basename $file .fa.gz | cut -f1 -d"#").fa.gz"
             mv $file "$(dirname $file)/$asm"
-            asmList+=("$(dirname $file)/$asm")
+            AllAsmList+=("$(dirname $file)/$asm")
         done
+        
+        echo "The ASM Array : ${{AllAsmList[@]}}"
 
         bash scripts/getSyriFigs.sh \
             -a {params.app_path} \
@@ -299,7 +301,7 @@ rule SyRI_on_chrInput:
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
             -r {output.wrkdir}/"${{refname}}.fa.gz" \
-            -q "${{asmList[@]}}"
+            -q "${{AllAsmList[@]}}"
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
         """
-- 
GitLab


From 02d0e4b733af1958ab221caab1effab9f4b6f2ae Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:44:59 +0200
Subject: [PATCH 68/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 724c1fd..dc70828 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -28,7 +28,10 @@ done
 # Reading query argument and creating an array containing the path to query fasta files
 IFS=' ' read -r -a temp <<< "$qry"
 
+echo "tempArr : ${temp[@]}"
+
 asmList=("$ref")
+
 # Sorting the array to put the reference in first
 for item in "${temp[@]}"; do
     if [[ $item != "$ref" ]]; then
-- 
GitLab


From 0b15696608296670320bf294490400581d54a41c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:46:52 +0200
Subject: [PATCH 69/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index dc70828..fe81ce8 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -28,6 +28,7 @@ done
 # Reading query argument and creating an array containing the path to query fasta files
 IFS=' ' read -r -a temp <<< "$qry"
 
+echo "Query : $qry"
 echo "tempArr : ${temp[@]}"
 
 asmList=("$ref")
-- 
GitLab


From bb6529350be5e348fbdd3a8342c795582c6f6fd2 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 13:55:22 +0200
Subject: [PATCH 70/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 188937d..ee9cc90 100644
--- a/Snakefile
+++ b/Snakefile
@@ -301,7 +301,7 @@ rule SyRI_on_chrInput:
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
             -r {output.wrkdir}/"${{refname}}.fa.gz" \
-            -q "${{AllAsmList[@]}}"
+            -q "${{AllAsmList[*]}}"
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
         """
-- 
GitLab


From 119d8e1d880e6fac876738e2cabe8aee8e304b09 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 14:09:09 +0200
Subject: [PATCH 71/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index ee9cc90..e3491b4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -279,7 +279,7 @@ rule SyRI_on_chrInput:
 
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
-            '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
+            '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".hap" substr($0,3) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} {output.wrkdir}/*.fa
-- 
GitLab


From 67e49a0c1e995e74646658f824d173d96be0b133 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 14:19:04 +0200
Subject: [PATCH 72/86] Update Snakefile

---
 Snakefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index e3491b4..46ff6a2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -279,16 +279,16 @@ rule SyRI_on_chrInput:
 
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
-            '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".hap" substr($0,3) ".fa"}}; {{print >> OUT; close(OUT)}}'
+            '/^>/ {{OUT="{output.wrkdir}/" substr($0,2) ".fa"}}; {{print >> OUT; close(OUT)}}'
 
         apptainer run --app bgzip {params.app_path}/PanGeTools.sif \
             -@ {threads} {output.wrkdir}/*.fa
-        zgrep '>' {output.wrkdir}/*.fa.gz
+        #zgrep '>' {output.wrkdir}/*.fa.gz
 
         ## Getting the list of sequences
         AllAsmList=()
         for file in {output.wrkdir}/*.fa.gz; do
-            asm="$(basename $file .fa.gz | cut -f1 -d"#").fa.gz"
+            asm="$(basename $file .fa.gz | cut -f1,2 -d"#" | sed 's/#/\.hap/').fa.gz"
             mv $file "$(dirname $file)/$asm"
             AllAsmList+=("$(dirname $file)/$asm")
         done
-- 
GitLab


From b3cac8129237d833412e9b3e1c88b0e85c9f19cf Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 14:19:53 +0200
Subject: [PATCH 73/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 46ff6a2..68c535d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -288,7 +288,7 @@ rule SyRI_on_chrInput:
         ## Getting the list of sequences
         AllAsmList=()
         for file in {output.wrkdir}/*.fa.gz; do
-            asm="$(basename $file .fa.gz | cut -f1,2 -d"#" | sed 's/#/\.hap/').fa.gz"
+            asm="$(basename $file .fa.gz | cut -f1,2 -d"#" | sed 's/#/\\.hap/').fa.gz"
             mv $file "$(dirname $file)/$asm"
             AllAsmList+=("$(dirname $file)/$asm")
         done
-- 
GitLab


From 93238a63287d70f5338bbaa97d5c16142d3cb743 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 14:29:21 +0200
Subject: [PATCH 74/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 68c535d..c03ec33 100644
--- a/Snakefile
+++ b/Snakefile
@@ -275,7 +275,7 @@ rule SyRI_on_chrInput:
     shell:
         """
         mkdir {output.wrkdir}
-        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1)
+        refname=$(basename {params.ref} .fa.gz | cut -d'.' -f1,2)
 
         ## Creating single fasta from multifasta
         zcat {input.fasta} | awk -F"#" \
-- 
GitLab


From 1ac7f9875caf20319afc040f1509c15de53b4439 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 14:42:54 +0200
Subject: [PATCH 75/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index fe81ce8..51ccfa5 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -27,20 +27,21 @@ done
 ## Main script
 # Reading query argument and creating an array containing the path to query fasta files
 IFS=' ' read -r -a temp <<< "$qry"
+readarray -td '' temp_sorted < <(printf '%s\0' "${temp[@]}" | sort -z)
 
-echo "Query : $qry"
-echo "tempArr : ${temp[@]}"
+#echo "Query : $qry"
+#echo "tempArr : ${temp[@]}"
 
 asmList=("$ref")
 
 # Sorting the array to put the reference in first
-for item in "${temp[@]}"; do
+for item in "${temp_sorted[@]}"; do
     if [[ $item != "$ref" ]]; then
         asmList+=($item)
     fi
 done
 
-echo "The array : ${asmList[@]}"
+#echo "The array : ${asmList[@]}"
 
 # Array to store the created syri files 
 syriFileList=()
@@ -51,11 +52,11 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     # Setting filepaths for later
     bamFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).bam"
     syriFile="$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz).syri.out"
-    hapID=$(basename ${asmList[i + 1]} .ragtagged.fa.gz | sed 's/.hap/#/')
-    refID=$(basename ${asmList[i]} .ragtagged.fa.gz | sed 's/.hap/#/')
+    hapID=$(basename ${asmList[i + 1]})
+    refID=$(basename ${asmList[i]})
 
     # Debug
-    echo -e "[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID"
+    echo -e "\n[Debug::SyRI_script::$hapID] hapID: $hapID\trefID: $refID\n"
     
     # Adding the output syri file to the array
     syriFileList+=($syriFile)
-- 
GitLab


From 13b49a01a058ac488f5e309a3f51deba49b9ae14 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 5 Jul 2024 18:05:13 +0200
Subject: [PATCH 76/86] Updated SyRI script

Added width and height to SyRI script
---
 Snakefile              |  3 ++-
 scripts/getSyriFigs.sh | 10 +++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index c03ec33..e38f585 100644
--- a/Snakefile
+++ b/Snakefile
@@ -301,7 +301,8 @@ rule SyRI_on_chrInput:
             -d {output.wrkdir} \
             -o $(basename {output.fig}) \
             -r {output.wrkdir}/"${{refname}}.fa.gz" \
-            -q "${{AllAsmList[*]}}"
+            -q "${{AllAsmList[*]}}" \
+            -h 9 -w 16
         mv {output.wrkdir}/$(basename {output.fig}) {output.fig}
         rm {output.wrkdir}/*.fa
         """
diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 51ccfa5..848e2e4 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -9,9 +9,11 @@ appdir=""       # Directory containing apptainer images
 threads=""      
 wrkdir=""       # Working directory (directory used by pggb to store step files like .paf, etc...)
 output=""       # Output Syri figure(s)
+height=16       # Figure height
+width=9         # Figure width
 
 ## Getting arguments
-while getopts "r:q:a:t:d:o:" option; do
+while getopts "r:q:a:t:d:o:h:w:" option; do
     case "$option" in
         r) ref="$OPTARG";;
         q) qry="$OPTARG";;
@@ -19,7 +21,9 @@ while getopts "r:q:a:t:d:o:" option; do
         t) threads="$OPTARG";;
         d) wrkdir="$OPTARG";;
         o) output="$OPTARG";;
-        \?) echo "Usage: $0 [-r ref] [-q query] [-a appdir] [-t threads] [-d wrkdir] [-o output]" >&2
+        h) height="$OPTARG";;
+        w) width="$OPTARG";;
+        \?) echo "Usage: $0 [-r ref] [-q query] [-a appdir] [-t threads] [-d wrkdir] [-o output] [-h height] [-w width]" >&2
             exit 1;;
     esac
 done
@@ -93,7 +97,7 @@ for asm in "${asmList[@]}"; do
 done
 
 # Generating the plotsr command
-command="--genomes $wrkdir/genomes.txt -o $wrkdir/$output -f 12 -H 16 -W 9 -d 600 "
+command="--genomes $wrkdir/genomes.txt -o $wrkdir/$output -f 12 -H $height -W $width -d 600 "
 
 # Adding syri files to the command as each needs to be specified using "--sr" argument 
 for file in "${syriFileList[@]}"; do
-- 
GitLab


From b59f1f312c49d5fa0af2fc035ef1b564d9a10589 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 8 Jul 2024 11:01:14 +0200
Subject: [PATCH 77/86] Added Syri on chrInputs

---
 Snakefile                | 9 +++++++--
 config.yaml              | 2 +-
 example/config_CICD.yaml | 3 ++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index e38f585..c493eea 100644
--- a/Snakefile
+++ b/Snakefile
@@ -50,9 +50,13 @@ def which_analysis():
     if config["get_PAV"] == "True": # Adding PAV matrix creation
         analysis_inputs.append("output/pav.matrices")
 
-    if config["get_ASMs_SyRI"] == "True": # Creating SyRI  for each figures 
+    if config["get_ASMs_SyRI"] == "True": # Creating SyRI for each input assembly 
         analysis_inputs.append(
-            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF),
+            expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF)
+        )
+    if config["get_chrInputs_SyRI"] == "True": # Creating SyRI figures for each PGGB input
+        analysis_input.append(
+            expand("output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png", chromosome=CHRLIST)
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
         analysis_inputs.append(
@@ -576,6 +580,7 @@ rule final_graph_tagging:
     output:
         "output/pan1c."+config['name']+".gfa.metadata"
     threads: 1
+    priority: 100
     params:
         app_path=config['app.path'],
         pan_name=config['name']
diff --git a/config.yaml b/config.yaml
index 5fe91c1..0a8502a 100644
--- a/config.yaml
+++ b/config.yaml
@@ -27,7 +27,7 @@ get_contig_pos: 'True'
 # Computes Presence Absence Variant matrices for Panache (not recommended; very long)
 get_PAV: 'False'
 # Computes SyRI figures for haplotypes 
-#get_allASM_SyRI: 'False' # All vs all
 get_ASMs_SyRI: 'False' # Haplotype vs Reference
+get_chrInputs_SyRI: 'False' # SyRI on chrInputs
 # Creating final report
 create_report: 'True'
diff --git a/example/config_CICD.yaml b/example/config_CICD.yaml
index 648e686..cc0c279 100644
--- a/example/config_CICD.yaml
+++ b/example/config_CICD.yaml
@@ -28,6 +28,7 @@ get_contig_pos: 'True'
 get_PAV: 'False'
 # Computes SyRI figures for haplotypes 
 #get_allASM_SyRI: 'False' # All vs all
-get_ASMs_SyRI: 'False' # Haplotype vs Reference
+get_ASMs_SyRI: 'True' # Haplotype vs Reference
+get_chrInputs_SyRI: 'True' # SyRI on chrInputs
 # Creating final report
 create_report: 'True'
-- 
GitLab


From b4779c67f09da8ce25b0996a60aeadd37e33fe71 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 8 Jul 2024 11:06:41 +0200
Subject: [PATCH 78/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index c493eea..7420323 100644
--- a/Snakefile
+++ b/Snakefile
@@ -55,7 +55,7 @@ def which_analysis():
             expand("output/asm.syri.figs/pan1c."+config['name']+".{haplotype}.syri.png", haplotype=SAMPLES_NOREF)
         )
     if config["get_chrInputs_SyRI"] == "True": # Creating SyRI figures for each PGGB input
-        analysis_input.append(
+        analysis_inputs.append(
             expand("output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png", chromosome=CHRLIST)
         )
     if config["run_Quast"] == "True": # Running Quast on input haplotypes
-- 
GitLab


From 867217fd644f7c180e3187f4754d34b3a87c426c Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 10:40:14 +0200
Subject: [PATCH 79/86] Update Snakefile

---
 Snakefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Snakefile b/Snakefile
index e38f585..a5d3272 100644
--- a/Snakefile
+++ b/Snakefile
@@ -97,6 +97,7 @@ rule ragtag_scaffolding:
         tar="data/hap.ragtagged/{haplotype}.tar.gz"
     threads: 8
     retries: 1
+    priority: 100
     params:
         app_path=config["app.path"]
     log: 
@@ -213,6 +214,7 @@ rule chromosome_clustering:
     output:
         temp(expand('data/chrInputs/'+config['name']+'.{chromosome}.fa', chromosome=CHRLIST))
     threads: 1
+    priority: 100
     params:
         app_path=config["app.path"],
         pan_name=config["name"]
@@ -316,6 +318,7 @@ rule wfmash_on_chr:
         mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
         aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
     threads: 16
+    priority: 100
     params:
         app_path=config['app.path'],
         segment_length=config['wfmash.segment_length'],
@@ -365,6 +368,7 @@ rule seqwish:
     output:
         temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
     threads: 8
+    priority: 100
     params:
         app_path=config['app.path'],
         seqwish=config['seqwish.params']
@@ -393,6 +397,7 @@ rule gfaffix_on_chr:
         gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
         transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
     threads: 1
+    priority: 100
     params:
         app_path=config['app.path']
     log: 
@@ -416,6 +421,7 @@ rule odgi_postprocessing:
     output:
         gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     threads: 8
+    priority: 100
     params:
         app_path=config['app.path']
     log: 
@@ -478,6 +484,7 @@ rule generate_graph_list:
     output:
         "data/chrGraphs/graphsList.txt"
     threads: 1
+    priority: 100
     run:
         with open(output[0], "w") as handle:
             for file in input:
@@ -494,6 +501,7 @@ rule graph_squeeze:
         cmd="logs/squeeze/"+config['name']+".squeeze.cmd.log",
         time="logs/squeeze/"+config['name']+".squeeze.time.log",
     threads: 16
+    priority: 100
     params:
         app_path=config['app.path']
     shell:
-- 
GitLab


From 1ccf65891b89b9b983c4fc30d016e94d302a9242 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 10:55:17 +0200
Subject: [PATCH 80/86] Update syri_pruning.py

Unifying chromosome sequence record's name to prevent SyRI from erroring out when input chromosomes can be matched with 2 query chromosomes
---
 scripts/syri_pruning.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/syri_pruning.py b/scripts/syri_pruning.py
index 811e2b9..2482cb2 100644
--- a/scripts/syri_pruning.py
+++ b/scripts/syri_pruning.py
@@ -62,6 +62,7 @@ with gzip.open(args.ref, 'rt') as handle:
             chrname = record.id.split("#")[-1]
             chrList.append(chrname)
             seqDict[name][chrname] = record
+            seqDict[name][chrname].id = chrname
 
 # Parsing fasta files
 for filename in args.fastafiles:
@@ -74,6 +75,7 @@ for filename in args.fastafiles:
         for record in sequences:
             chrname = record.id.split("#")[-1]
             seqDict[name][chrname] = record
+            seqDict[name][chrname].id = chrname
 
 # Getting the list of common chromosomes
 for hap, chromosomes in seqDict.items():
-- 
GitLab


From ea67debd266fb5851d34c29973443d7f04b71e43 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 12:15:38 +0200
Subject: [PATCH 81/86] Update getSyriFigs.sh

Using prunned FASTAs instead of input FASTAs
---
 scripts/getSyriFigs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 848e2e4..07cdec7 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -84,7 +84,7 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
 
     # Syri on previous alignment
     apptainer run $appdir/pan1c-env.sif \
-        syri -c $wrkdir/$bamFile -r ${asmList[i]} -q ${asmList[i + 1]} -k -F B \
+        syri -c $wrkdir/$bamFile -r $wrkdir/$(basename ${asmList[i]} .gz) -q $wrkdir/$(basename ${asmList[i + 1]} .gz) -k -F B \
         --nc $threads \
         --dir $wrkdir --prefix "$(basename ${asmList[i]} .fa.gz)_$(basename ${asmList[i + 1]} .fa.gz)."    
 done
-- 
GitLab


From 204d3b62b6932029f4664554fbee0a7dfc59984a Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 15:53:30 +0200
Subject: [PATCH 82/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 07cdec7..92b1cdb 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -69,6 +69,11 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
     apptainer run $appdir/pan1c-env.sif python scripts/syri_pruning.py \
         -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir
 
+    echo "\n[Debug::SyRI_script::$hapID] REF"
+    grep '>' ${asmList[i]}
+    echo "\n[Debug::SyRI_script::$hapID] QRY"
+    grep '>' ${asmList[i + 1]}
+    
     # Renaming chromosomes with same ids as the reference (Not working)
     #sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
 
-- 
GitLab


From ba890499fe98c030df6542da90dbe262994796de Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 15:55:57 +0200
Subject: [PATCH 83/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 92b1cdb..972d2dc 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -70,10 +70,10 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
         -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir
 
     echo "\n[Debug::SyRI_script::$hapID] REF"
-    grep '>' ${asmList[i]}
+    grep '>' $wrkdir/$(basename ${asmList[i]} .gz)
     echo "\n[Debug::SyRI_script::$hapID] QRY"
-    grep '>' ${asmList[i + 1]}
-    
+    grep '>' $wrkdir/$(basename ${asmList[i + 1]} .gz)
+
     # Renaming chromosomes with same ids as the reference (Not working)
     #sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
 
-- 
GitLab


From 2cad655b30c1917c30bdda9000eaf6cabb9ea1b1 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 15:57:56 +0200
Subject: [PATCH 84/86] Update getSyriFigs.sh

---
 scripts/getSyriFigs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index 972d2dc..e85ea25 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -98,7 +98,7 @@ done
 # Each line contains 2 columns : fasta filepath and its simpler name
 echo -e "#files\tname" > $wrkdir/genomes.txt
 for asm in "${asmList[@]}"; do
-    echo -e "${asm}\t$(basename $asm .fa.gz | cut -d'.' -f1)" >> $wrkdir/genomes.txt
+    echo -e "$wrkdir/$(basename ${asm} .gz)\t$(basename $asm .fa.gz | cut -d'.' -f1)" >> $wrkdir/genomes.txt
 done
 
 # Generating the plotsr command
-- 
GitLab


From 059b5f346bece0dcfc40bca951de04c7711d14bb Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 9 Jul 2024 16:06:07 +0200
Subject: [PATCH 85/86] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 94cb1f6..a86dcef 100644
--- a/Snakefile
+++ b/Snakefile
@@ -274,7 +274,7 @@ rule SyRI_on_chrInput:
     output:
         fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
         wrkdir=directory('data/chrInputs/syri/{chromosome}')
-    threads: workflow.cores
+    threads: 8
     params:
         app_path=config["app.path"],
         ref=config['reference']
-- 
GitLab


From 249b23d0fd2492d4d43581c8d9b7a00d986249fc Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Thu, 11 Jul 2024 12:15:39 +0200
Subject: [PATCH 86/86] Silented some debug lines

---
 Snakefile              |  2 +-
 scripts/getSyriFigs.sh | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Snakefile b/Snakefile
index a86dcef..71c2bb3 100644
--- a/Snakefile
+++ b/Snakefile
@@ -299,7 +299,7 @@ rule SyRI_on_chrInput:
             AllAsmList+=("$(dirname $file)/$asm")
         done
         
-        echo "The ASM Array : ${{AllAsmList[@]}}"
+        #echo "The ASM Array : ${{AllAsmList[@]}}"
 
         bash scripts/getSyriFigs.sh \
             -a {params.app_path} \
diff --git a/scripts/getSyriFigs.sh b/scripts/getSyriFigs.sh
index e85ea25..84f93c2 100755
--- a/scripts/getSyriFigs.sh
+++ b/scripts/getSyriFigs.sh
@@ -67,12 +67,12 @@ for ((i = 0; i < ${#asmList[@]} - 1; i++)); do
 
     # Prunning fasta files based of common chromosomes
     apptainer run $appdir/pan1c-env.sif python scripts/syri_pruning.py \
-        -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir
+        -r ${asmList[i]} -f ${asmList[i + 1]} -o $wrkdir -d
 
-    echo "\n[Debug::SyRI_script::$hapID] REF"
-    grep '>' $wrkdir/$(basename ${asmList[i]} .gz)
-    echo "\n[Debug::SyRI_script::$hapID] QRY"
-    grep '>' $wrkdir/$(basename ${asmList[i + 1]} .gz)
+    #echo "\n[Debug::SyRI_script::$hapID] REF"
+    #grep '>' $wrkdir/$(basename ${asmList[i]} .gz)
+    #echo "\n[Debug::SyRI_script::$hapID] QRY"
+    #grep '>' $wrkdir/$(basename ${asmList[i + 1]} .gz)
 
     # Renaming chromosomes with same ids as the reference (Not working)
     #sed -i "s/${hapID}#chr/${refID}#chr/g" $wrkdir/$(basename ${asmList[i + 1]} .gz)
-- 
GitLab