|
| 1 | +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' |
| 2 | +include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' |
| 3 | +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' |
| 4 | +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' |
| 5 | +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf' |
| 6 | +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf' |
| 7 | + |
| 8 | +workflow BAM_VCF_IMPUTE_GLIMPSE2 { |
| 9 | + |
| 10 | + take: |
| 11 | + ch_input // channel (mandatory): [ meta, vcf, csi, list, infos ] |
| 12 | + ch_ref // channel (mandatory): [ meta, vcf, csi, region ] |
| 13 | + ch_chunks // channel (optional) : [ meta, regionin, regionout ] |
| 14 | + ch_map // channel (optional) : [ meta, map ] |
| 15 | + ch_fasta // channel (optional) : [ meta, fasta, index ] |
| 16 | + chunk // val (optional) : boolean to activate/deactivate chunking step |
| 17 | + chunk_model // val (optional) : model file for chunking |
| 18 | + splitreference // val (optional) : boolean to activate/deactivate split reference step |
| 19 | + |
| 20 | + main: |
| 21 | + |
| 22 | + ch_versions = channel.empty() |
| 23 | + |
| 24 | + if ( chunk == true ){ |
| 25 | + // Error if pre-defined chunks are provided when chunking is activated |
| 26 | + ch_chunks |
| 27 | + .filter { _meta, regionin, regionout -> regionin.size() > 0 || regionout.size() > 0 } |
| 28 | + .subscribe { |
| 29 | + error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking." |
| 30 | + } |
| 31 | + |
| 32 | + // Chunk reference panel |
| 33 | + ch_ref_map = ch_ref |
| 34 | + .combine(ch_map, by: 0) |
| 35 | + GLIMPSE2_CHUNK ( ch_ref_map, chunk_model ) |
| 36 | + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) |
| 37 | + |
| 38 | + ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr |
| 39 | + .splitCsv(header: [ |
| 40 | + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', |
| 41 | + 'WindowMb', 'NbTotVariants', 'NbComVariants' |
| 42 | + ], sep: "\t", skip: 0) |
| 43 | + .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} |
| 44 | + } |
| 45 | + |
| 46 | + ch_chunks |
| 47 | + .filter { _meta, regionin, regionout -> regionin.size() == 0 || regionout.size() == 0 } |
| 48 | + .subscribe { |
| 49 | + error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true." |
| 50 | + } |
| 51 | + |
| 52 | + if ( splitreference == true ) { |
| 53 | + // Split reference panel in bin files |
| 54 | + split_input = ch_ref |
| 55 | + .combine(ch_chunks, by: 0) |
| 56 | + .combine(ch_map, by: 0) |
| 57 | + .map{ meta, ref, index, _region, regionin, regionout, gmap -> [ |
| 58 | + meta + ["regionin": regionin, "regionout": regionout], |
| 59 | + ref, index, regionin, regionout, gmap |
| 60 | + ] } |
| 61 | + |
| 62 | + GLIMPSE2_SPLITREFERENCE( split_input ) |
| 63 | + ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) |
| 64 | + |
| 65 | + ch_chunks_panel_map = GLIMPSE2_SPLITREFERENCE.out.bin_ref |
| 66 | + .map{ meta, bin_ref -> [ meta, [], [], bin_ref, [], [] ] } // Everything is provided by the bin file |
| 67 | + } else { |
| 68 | + ch_chunks_panel_map = ch_chunks |
| 69 | + .combine(ch_ref, by:0) |
| 70 | + .combine(ch_map, by:0) |
| 71 | + .map{ meta, regionin, regionout, ref, ref_index, _region, gmap -> [ |
| 72 | + meta + ["regionin": regionin, "regionout": regionout], |
| 73 | + regionin, regionout, ref, ref_index, gmap |
| 74 | + ] } |
| 75 | + } |
| 76 | + |
| 77 | + ch_chunks_panel_map.ifEmpty{ |
| 78 | + error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input." |
| 79 | + } |
| 80 | + |
| 81 | + ch_phase_input = ch_input |
| 82 | + .combine(ch_chunks_panel_map) |
| 83 | + .map{ metaI, input, index, list, infos, metaCPM, regionin, regionout, panel, panel_index, gmap -> [ |
| 84 | + metaI + metaCPM, // combined metadata |
| 85 | + input, index, list, infos, // input files |
| 86 | + regionin, regionout, // chunk regions |
| 87 | + panel, panel_index, gmap // panel and map files |
| 88 | + ] } |
| 89 | + |
| 90 | + // Impute with Glimpse2 |
| 91 | + GLIMPSE2_PHASE(ch_phase_input, ch_fasta) |
| 92 | + ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() ) |
| 93 | + |
| 94 | + // Index phased file |
| 95 | + BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants) |
| 96 | + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions.first() ) |
| 97 | + |
| 98 | + // Ligate all phased files in one and index it |
| 99 | + ligate_input = GLIMPSE2_PHASE.out.phased_variants |
| 100 | + .join( BCFTOOLS_INDEX_1.out.csi ) |
| 101 | + .map{ meta, vcf, index -> |
| 102 | + def keysToKeep = meta.keySet() - ['regionin', 'regionout'] |
| 103 | + [ meta.subMap(keysToKeep), vcf, index ] |
| 104 | + } |
| 105 | + .groupTuple() |
| 106 | + |
| 107 | + GLIMPSE2_LIGATE( ligate_input ) |
| 108 | + ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() ) |
| 109 | + |
| 110 | + BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants ) |
| 111 | + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() ) |
| 112 | + |
| 113 | + // Join imputed and index files |
| 114 | + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants |
| 115 | + .join( |
| 116 | + BCFTOOLS_INDEX_2.out.tbi |
| 117 | + .mix(BCFTOOLS_INDEX_2.out.csi) |
| 118 | + ) |
| 119 | + |
| 120 | + emit: |
| 121 | + ch_chunks = ch_chunks // channel: [ val(meta), regionin, regionout ] |
| 122 | + ch_vcf_index = ch_vcf_index // channel: [ val(meta), vcf, csi ] |
| 123 | + |
| 124 | + versions = ch_versions // channel: [ versions.yml ] |
| 125 | +} |
0 commit comments