Skip to content

Commit 03a1f54

Browse files
committed
Update glimpse2 sbwf
1 parent d911d93 commit 03a1f54

File tree

12 files changed

+1195
-457
lines changed

12 files changed

+1195
-457
lines changed

modules/nf-core/glimpse2/splitreference/main.nf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ process GLIMPSE2_SPLITREFERENCE {
1818
'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }"
1919

2020
input:
21-
tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region)
22-
tuple val(meta2), path(map)
21+
tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region), path(map)
2322

2423

2524
output:

modules/nf-core/glimpse2/splitreference/meta.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ input:
4242
type: string
4343
description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000).
4444
pattern: "chrXX:leftBufferPosition-rightBufferPosition"
45-
- - meta2:
46-
type: map
47-
description: |
48-
Groovy Map containing genomic map information
49-
e.g. `[ map:'GRCh38' ]`
5045
- map:
5146
type: file
5247
description: File containing the genetic map.

modules/nf-core/glimpse2/splitreference/tests/main.nf.test

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ nextflow_process {
2020
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
2121
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
2222
"chr22:16600000-16800000",
23-
"chr22:16600000-16800000"
23+
"chr22:16600000-16800000",
24+
[]
2425
]
25-
input[1]= [[ id:'map'],[]]
2626
"""
2727
}
2828
}
@@ -48,10 +48,7 @@ nextflow_process {
4848
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
4949
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
5050
"chr22:16600000-16800000",
51-
"chr22:16600000-16800000"
52-
]
53-
input[1]= [
54-
[ id:'map'],
51+
"chr22:16600000-16800000",
5552
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true)
5653
]
5754
"""
@@ -80,9 +77,9 @@ nextflow_process {
8077
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
8178
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
8279
"chr22:16600000-16800000",
83-
"chr22:16600000-16800000"
80+
"chr22:16600000-16800000",
81+
[]
8482
]
85-
input[1]= [[ id:'map'],[]]
8683
"""
8784
}
8885
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main'
2+
include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main'
3+
include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main'
4+
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main'
5+
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf'
6+
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf'
7+
8+
workflow BAM_VCF_IMPUTE_GLIMPSE2 {
9+
10+
take:
11+
ch_input // channel (mandatory): [ meta, vcf, csi, list, infos ]
12+
ch_ref // channel (mandatory): [ meta, vcf, csi, region ]
13+
ch_chunks // channel (optional) : [ meta, regionin, regionout ]
14+
ch_map // channel (optional) : [ meta, map ]
15+
ch_fasta // channel (optional) : [ meta, fasta, index ]
16+
chunk // val (optional) : boolean to activate/deactivate chunking step
17+
chunk_model // val (optional) : model file for chunking
18+
splitreference // val (optional) : boolean to activate/deactivate split reference step
19+
20+
main:
21+
22+
ch_versions = channel.empty()
23+
24+
if ( chunk == true ){
25+
// Error if pre-defined chunks are provided when chunking is activated
26+
ch_chunks
27+
.filter { _meta, regionin, regionout -> regionin.size() > 0 || regionout.size() > 0 }
28+
.subscribe {
29+
error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking."
30+
}
31+
32+
// Chunk reference panel
33+
ch_ref_map = ch_ref
34+
.combine(ch_map, by: 0)
35+
GLIMPSE2_CHUNK ( ch_ref_map, chunk_model )
36+
ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() )
37+
38+
ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr
39+
.splitCsv(header: [
40+
'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm',
41+
'WindowMb', 'NbTotVariants', 'NbComVariants'
42+
], sep: "\t", skip: 0)
43+
.map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]}
44+
}
45+
46+
ch_chunks
47+
.filter { _meta, regionin, regionout -> regionin.size() == 0 || regionout.size() == 0 }
48+
.subscribe {
49+
error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true."
50+
}
51+
52+
if ( splitreference == true ) {
53+
// Split reference panel in bin files
54+
split_input = ch_ref
55+
.combine(ch_chunks, by: 0)
56+
.combine(ch_map, by: 0)
57+
.map{ meta, ref, index, _region, regionin, regionout, gmap -> [
58+
meta + ["regionin": regionin, "regionout": regionout],
59+
ref, index, regionin, regionout, gmap
60+
] }
61+
62+
GLIMPSE2_SPLITREFERENCE( split_input )
63+
ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() )
64+
65+
ch_chunks_panel_map = GLIMPSE2_SPLITREFERENCE.out.bin_ref
66+
.map{ meta, bin_ref -> [ meta, [], [], bin_ref, [], [] ] } // Everything is provided by the bin file
67+
} else {
68+
ch_chunks_panel_map = ch_chunks
69+
.combine(ch_ref, by:0)
70+
.combine(ch_map, by:0)
71+
.map{ meta, regionin, regionout, ref, ref_index, _region, gmap -> [
72+
meta + ["regionin": regionin, "regionout": regionout],
73+
regionin, regionout, ref, ref_index, gmap
74+
] }
75+
}
76+
77+
ch_chunks_panel_map.ifEmpty{
78+
error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input."
79+
}
80+
81+
ch_phase_input = ch_input
82+
.combine(ch_chunks_panel_map)
83+
.map{ metaI, input, index, list, infos, metaCPM, regionin, regionout, panel, panel_index, gmap -> [
84+
metaI + metaCPM, // combined metadata
85+
input, index, list, infos, // input files
86+
regionin, regionout, // chunk regions
87+
panel, panel_index, gmap // panel and map files
88+
] }
89+
90+
// Impute with Glimpse2
91+
GLIMPSE2_PHASE(ch_phase_input, ch_fasta)
92+
ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() )
93+
94+
// Index phased file
95+
BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants)
96+
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions.first() )
97+
98+
// Ligate all phased files in one and index it
99+
ligate_input = GLIMPSE2_PHASE.out.phased_variants
100+
.join( BCFTOOLS_INDEX_1.out.csi )
101+
.map{ meta, vcf, index ->
102+
def keysToKeep = meta.keySet() - ['regionin', 'regionout']
103+
[ meta.subMap(keysToKeep), vcf, index ]
104+
}
105+
.groupTuple()
106+
107+
GLIMPSE2_LIGATE( ligate_input )
108+
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )
109+
110+
BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants )
111+
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() )
112+
113+
// Join imputed and index files
114+
ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
115+
.join(
116+
BCFTOOLS_INDEX_2.out.tbi
117+
.mix(BCFTOOLS_INDEX_2.out.csi)
118+
)
119+
120+
emit:
121+
ch_chunks = ch_chunks // channel: [ val(meta), regionin, regionout ]
122+
ch_vcf_index = ch_vcf_index // channel: [ val(meta), vcf, csi ]
123+
124+
versions = ch_versions // channel: [ versions.yml ]
125+
}

subworkflows/nf-core/multiple_impute_glimpse2/meta.yml renamed to subworkflows/nf-core/bam_vcf_impute_glimpse2/meta.yml

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "multiple_impute_glimpse2"
1+
name: "bam_vcf_impute_glimpse2"
22
description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2
33
keywords:
44
- glimpse
@@ -18,8 +18,9 @@ input:
1818
description: |
1919
Target dataset in CRAM, BAM or VCF/BCF format.
2020
Index file of the input file.
21+
File containing the list of files to be imputed and their sample names (for CRAM/BAM input).
2122
File with sample names and ploidy information.
22-
Structure: [ meta, file, index, txt ]
23+
Structure: [ meta, file, index, bamlist, ploidy ]
2324
- ch_ref:
2425
type: file
2526
description: |
@@ -28,32 +29,46 @@ input:
2829
Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20).
2930
The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended).
3031
Structure: [ meta, vcf, csi, region ]
32+
- ch_chunks:
33+
type: string
34+
description: |
35+
Channel containing the chunking regions for each chromosome.
36+
Structure: [ meta, region with buffer, region without buffer ]
3137
- ch_map:
3238
type: file
3339
description: |
34-
File containing the genetic map.
40+
Genetic map file for each chromosome.
3541
Structure: [ meta, gmap ]
3642
- ch_fasta:
3743
type: file
3844
description: |
3945
Reference genome in fasta format.
4046
Reference genome index in fai format
4147
Structure: [ meta, fasta, fai ]
48+
- chunk:
49+
type: boolean
50+
description: Whether to perform chunking of the input data before imputation.
51+
- chunk_model:
52+
type: string
53+
description: |
54+
Chunking model to use.
55+
Options: "sequential", "recursive"
56+
- splitreference:
57+
type: boolean
58+
description: Whether to split the reference panel and convert it to binary files before imputation.
59+
4260
output:
43-
- chunk_chr:
44-
type: file
61+
- ch_chunks:
62+
type: string
4563
description: |
46-
Tab delimited output txt file containing buffer and imputation regions.
47-
Structure: [meta, txt]
48-
- merged_variants:
64+
Channel containing the chunking regions for each chromosome.
65+
Structure: [ meta, region with buffer, region without buffer ]
66+
- ch_vcf_index:
4967
type: file
5068
description: |
5169
Output VCF/BCF file for the merged regions.
52-
Phased information (HS field) is updated accordingly for the full region.
53-
Structure: [ val(meta), bcf ]
54-
- merged_variants_index:
55-
type: file
56-
description: Index file of the ligated phased variants files.
70+
Index file of the output VCF/BCF file.
71+
Structure: [ val(meta), variants, index ]
5772
- versions:
5873
type: file
5974
description: File containing software versions

0 commit comments

Comments
 (0)