diff --git a/CWL/tools/bioconda-tool-bwa-aln.yml b/CWL/tools/bioconda-tool-bwa-aln.yml index a018439..7023f4a 100755 --- a/CWL/tools/bioconda-tool-bwa-aln.yml +++ b/CWL/tools/bioconda-tool-bwa-aln.yml @@ -33,7 +33,13 @@ outputs: inputs: prefix: - type: string + type: File + secondaryFiles: + - ".amb" + - ".ann" + - ".bwt" + - ".pac" + - ".sa" doc: | reference prefix inputBinding: @@ -84,7 +90,7 @@ inputs: prefix: "-e" minIndelEndDist: - type: int?] + type: int? doc: | do not put an indel within INT bp towards the ends [5] inputBinding: @@ -139,8 +145,6 @@ inputs: position: 1 prefix: "-q" - - doc: | Usage: bwa aln [options] diff --git a/CWL/tools/bioconda-tool-bwa-index.cwl b/CWL/tools/bioconda-tool-bwa-index.cwl index 9867fd2..a9dc355 100755 --- a/CWL/tools/bioconda-tool-bwa-index.cwl +++ b/CWL/tools/bioconda-tool-bwa-index.cwl @@ -1,15 +1,20 @@ #!/usr/bin/env cwl-runner -cwlVersion: "v1.0" - class: CommandLineTool +cwlVersion: "v1.0" + s:author: - class: s:Person s:identifier: https://orcid.org/0000-0001-6231-4417 s:email: mailto:karl.nordstroem@uni-saarland.de s:name: Karl Nordström +requirements: + - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: input + hints: - class: ResourceRequirement coresMin: 1 @@ -18,10 +23,8 @@ hints: - class: DockerRequirement dockerPull: "quay.io/biocontainers/bwa:0.6.2--1" -requirements: - - class: InlineJavascriptRequirement - inputs: + input: type: File inputBinding: @@ -29,7 +32,7 @@ inputs: algorithm: type: string? - description: | + doc: | BWT construction algorithm: bwtsw or is (Default: auto) inputBinding: position: 2 @@ -37,7 +40,7 @@ inputs: outPrefix: type: string? - description: | + doc: | Prefix of the index (Default: same as fasta name) inputBinding: position: 2 @@ -45,7 +48,7 @@ inputs: blockSize: type: int? - description: | + doc: | Block size for the bwtsw algorithm (effective with -a bwtsw) (Default: 10000000) inputBinding: position: 2 @@ -53,78 +56,88 @@ inputs: altSuffix: type: boolean? - description: | + doc: | Index files named as .64.* instead of .* inputBinding: position: 2 prefix: "-6" outputs: - - id: output - type: { type: array, items: File } - outputBinding: - glob: - - ${ - if (inputs.p) { - return inputs.p + ".amb" - } else { - if (inputs._6 == true) { - return inputs.input.path + ".64.amb" - } else { - return inputs.input.path + ".amb" - } - } - } - - ${ - if (inputs.p) { - return inputs.p + ".ann" - } else { - if (inputs._6 == true) { - return inputs.input.path + ".64.ann" - } else { - return inputs.input.path + ".ann" - } - } - } - - ${ - if (inputs.p) { - return inputs.p + ".bwt" - } else { - if (inputs._6 == true) { - return inputs.input.path + ".64.bwt" - } else { - return inputs.input.path + ".bwt" - } - } - } - - ${ - if (inputs.p) { - return inputs.p + ".pac" - } else { - if (inputs._6 == true) { - return inputs.input.path + ".64.pac" - } else { - return inputs.input.path + ".pac" - } - } - } - - ${ - if (inputs.p) { - return inputs.p + ".sa" - } else { - if (inputs._6 == true) { - return inputs.input.path + ".64.sa" - } else { - return inputs.input.path + ".sa" - } - } - } + # - id: output +# index: +# type: File +# outputBinding: +# glob: $( inputs.input ) +# secondaryFiles: +# - ".amb" +# - ".ann" +# - ".bwt" +# - ".pac" +# - ".sa" + # type: { type: array, items: File } + # outputBinding: + # glob: + # - ${ + # if (inputs.p) { + # return inputs.p + ".amb" + # } else { + # if (inputs._6 == true) { + # return inputs.input.path + ".64.amb" + # } else { + # return inputs.input.path + ".amb" + # } + # } + # } + # - ${ + # if (inputs.p) { + # return inputs.p + ".ann" + # } else { + # if (inputs._6 == true) { + # return inputs.input.path + ".64.ann" + # } else { + # return inputs.input.path + ".ann" + # } + # } + # } + # - ${ + # if (inputs.p) { + # return inputs.p + ".bwt" + # } else { + # if (inputs._6 == true) { + # return inputs.input.path + ".64.bwt" + # } else { + # return inputs.input.path + ".bwt" + # } + # } + # } + # - ${ + # if (inputs.p) { + # return inputs.p + ".pac" + # } else { + # if (inputs._6 == true) { + # return inputs.input.path + ".64.pac" + # } else { + # return inputs.input.path + ".pac" + # } + # } + # } + # - ${ + # if (inputs.p) { + # return inputs.p + ".sa" + # } else { + # if (inputs._6 == true) { + # return inputs.input.path + ".64.sa" + # } else { + # return inputs.input.path + ".sa" + # } + # } + # } baseCommand: - bwa - index -description: | +doc: | Usage: bwa index [options] Options: -a STR BWT construction algorithm: bwtsw or is [auto] @@ -135,8 +148,6 @@ description: | Warning: `-a bwtsw' does not work for short genomes, while `-a is' and `-a div' do not work not for long genomes. - - $namespaces: s: https://schema.org/ edam: http://edamontology.org/ diff --git a/CWL/tools/bioconda-tool-bwa-sampe.yml b/CWL/tools/bioconda-tool-bwa-sampe.yml index e58bef8..e30f832 100644 --- a/CWL/tools/bioconda-tool-bwa-sampe.yml +++ b/CWL/tools/bioconda-tool-bwa-sampe.yml @@ -35,7 +35,13 @@ outputs: inputs: prefix: - type: string + type: File + secondaryFiles: + - ".amb" + - ".ann" + - ".bwt" + - ".pac" + - ".sa" doc: | reference prefix inputBinding: @@ -71,6 +77,14 @@ inputs: inputBinding: position: 14 + maximumInsertSize: + type: int? + inputBinding: + position: 5 + prefix: "-a" + doc: | + maximum nsert size [500] + noSW: type: boolean? doc: | @@ -103,7 +117,13 @@ inputs: position: 5 prefix: "-N" - + readGroupHeaderLine: + type: string? + inputBinding: + position: 5 + prefix: "-r" + doc: | + read group header line such as '@RG\tID:foo\tSM:bar' [null] doc: | Usage: bwa sampe [options] diff --git a/CWL/tools/bioconda-tool-picard-MarkDuplicates.cwl b/CWL/tools/bioconda-tool-picard-MarkDuplicates.cwl new file mode 100644 index 0000000..f0539d3 --- /dev/null +++ b/CWL/tools/bioconda-tool-picard-MarkDuplicates.cwl @@ -0,0 +1,427 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "Picard_MarkDuplicates" +label: "Picard MarkDuplicates" + +cwlVersion: "v1.0" + +doc: | + ![build_status](https://quay.io/repository/karl616/dockstore-tool-picard/status) + A Docker container containing the Picard jar file. See the [Picard](http://broadinstitute.github.io/picard/) webpage for more information. + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0001-6231-4417 + s:email: mailto:karl.nordstroem@uni-saarland.de + s:name: Karl Nordström + + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: ResourceRequirement + coresMin: 1 + ramMin: 4092 + outdirMin: 512000 + - class: DockerRequirement + dockerPull: "quay.io/biocontainers/picard:2.17.2--py36_0" + +baseCommand: ["picard", "MarkDuplicates"] + +outputs: + + OUTPUT_output: + type: File + outputBinding: + glob: $( inputs.OUTPUT ) + + METRICS_FILE_output: + type: File + outputBinding: + glob: $( inputs.METRICS_FILE ) + +inputs: + + MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP: + inputBinding: + position: 5 + prefix: "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=" + separate: false + type: int? + doc: | + This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000. This option can be set to 'null' to clear the default value. + + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: + inputBinding: + position: 5 + prefix: "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + separate: false + type: int? + doc: | + Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system. Default value: 8000. This option can be set to 'null' to clear the default value. + + SORTING_COLLECTION_SIZE_RATIO: + inputBinding: + position: 5 + prefix: "SORTING_COLLECTION_SIZE_RATIO=" + separate: false + type: double? + doc: | + This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number. Default value: 0.25. This option can be set to 'null' to clear the default value. + + BARCODE_TAG: + inputBinding: + position: 5 + prefix: "BARCODE_TAG=" + separate: false + type: string? + doc: | + Barcode SAM tag (ex. BC for 10X Genomics) Default value: null. + + READ_ONE_BARCODE_TAG: + inputBinding: + position: 5 + prefix: "READ_ONE_BARCODE_TAG=" + separate: false + type: string? + doc: | + Read one barcode SAM tag (ex. BX for 10X Genomics) Default value: null. + + READ_TWO_BARCODE_TAG: + inputBinding: + position: 5 + prefix: "READ_TWO_BARCODE_TAG=" + separate: false + type: string? + doc: | + Read two barcode SAM tag (ex. BX for 10X Genomics) Default value: null. + + TAG_DUPLICATE_SET_MEMBERS: + inputBinding: + position: 5 + prefix: "TAG_DUPLICATE_SET_MEMBERS=true" + separate: false + type: boolean? + doc: | + If a read appears in a duplicate set, add two tags. The first tag, DUPLICATE_SET_SIZE_TAG (DS), indicates the size of the duplicate set. The smallest possible DS value is 2 which occurs when two reads map to the same portion of the reference only one of which is marked as duplicate. The second tag, DUPLICATE_SET_INDEX_TAG (DI), represents a unique identifier for the duplicate set to which the record belongs. This identifier is the index-in-file of the representative read that was selected out of the duplicate set. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + REMOVE_SEQUENCING_DUPLICATES: + inputBinding: + position: 5 + prefix: "REMOVE_SEQUENCING_DUPLICATES=true" + separate: false + type: boolean? + doc: | + If true remove 'optical' duplicates and other duplicates that appear to have arisen from the sequencing process instead of the library preparation process, even if REMOVE_DUPLICATES is false. If REMOVE_DUPLICATES is true, all duplicates are removed and this option is ignored. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + TAGGING_POLICY: + inputBinding: + position: 5 + prefix: "TAGGING_POLICY=" + separate: false + type: + - "null" + - type: enum + symbols: [DontTag, OpticalOnly, All] + doc: | + Determines how duplicate types are recorded in the DT optional attribute. Default value: DontTag. This option can be set to 'null' to clear the default value. Possible values: {DontTag, OpticalOnly, All} + + CLEAR_DT: + inputBinding: + position: 5 + prefix: "CLEAR_DT=false" + separate: false + type: boolean? + doc: | + Clear DT tag from input SAM records. Should be set to false if input SAM doesn't have this tag. Default true Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + ADD_PG_TAG_TO_READS: + inputBinding: + position: 5 + prefix: "ADD_PG_TAG_TO_READS=false" + separate: false + type: boolean? + doc: | + Add PG tag to each read in a SAM or BAM Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + INPUT: + inputBinding: + position: 5 + type: +# - type: File +# inputBinding: +# prefix: "INPUT=" +# separate: false + - type: array + items: File + inputBinding: + prefix: "INPUT=" + separate: false + doc: | + One or more input SAM or BAM files to analyze. Must be coordinate sorted. Default value: null. This option may be specified 0 or more times. + + OUTPUT: + inputBinding: + position: 5 + prefix: "OUTPUT=" + separate: false + type: string + doc: | + The output file to write marked records to Required. + + METRICS_FILE: + inputBinding: + position: 5 + prefix: "METRICS_FILE=" + separate: false + type: string + doc: | + File to write duplication metrics to Required. + + REMOVE_DUPLICATES: + inputBinding: + position: 5 + prefix: "REMOVE_DUPLICATES=true" + separate: false + type: boolean? + doc: | + If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + ASSUME_SORTED: + inputBinding: + position: 5 + prefix: "ASSUME_SORTED=true" + separate: false + type: boolean? + doc: | + If true, assume that the input file is coordinate sorted even if the header says otherwise. Deprecated, used ASSUME_SORT_ORDER=coordinate instead. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} Cannot be used in conjuction with option(s) ASSUME_SORT_ORDER (ASO) + + ASSUME_SORT_ORDER: + inputBinding: + position: 5 + prefix: "ASSUME_SORT_ORDER=" + separate: false + type: + - 'null' + - type: enum + symbols: [unsorted, queryname, coordinate, duplicate, unknown] + doc: | + If not null, assume that the input file has this order even if the header says otherwise. Default value: null. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} Cannot be used in conjuction with option(s) ASSUME_SORTED (AS) + + DUPLICATE_SCORING_STRATEGY: + inputBinding: + position: 5 + prefix: "DUPLICATE_SCORING_STRATEGY=" + separate: false + type: + - 'null' + - type: enum + symbols: [SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM] + doc: | + The scoring strategy for choosing the non-duplicate among candidates. Default value: SUM_OF_BASE_QUALITIES. This option can be set to 'null' to clear the default value. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + + PROGRAM_RECORD_ID: + inputBinding: + position: 5 + prefix: "PROGRAM_RECORD_ID=" + separate: false + type: string? + doc: | + The program record ID for the @PG record(s) created by this program. Set to null to disable PG record creation. This string may have a suffix appended to avoid collision with other program record IDs. Default value: MarkDuplicates. This option can be set to 'null' to clear the default value. + + PROGRAM_GROUP_VERSION: + inputBinding: + position: 5 + prefix: "PROGRAM_GROUP_VERSION=" + separate: false + type: string? + doc: | + Value of VN tag of PG record to be created. If not specified, the version will be detected automatically. Default value: null. + + PROGRAM_GROUP_COMMAND_LINE: + inputBinding: + position: 5 + prefix: "PROGRAM_GROUP_COMMAND_LINE=" + separate: false + type: string? + doc: | + Value of CL tag of PG record to be created. If not supplied the command line will be detected automatically. Default value: null. + + PROGRAM_GROUP_NAME: + inputBinding: + position: 5 + prefix: "PROGRAM_GROUP_NAME=" + separate: false + type: string? + doc: | + Value of PN tag of PG record to be created. Default value: MarkDuplicates. This option can be set to 'null' to clear the default value. + + COMMENT: + inputBinding: + position: 5 + prefix: "COMMENT=" + separate: false + type: string? + doc: | + Comment(s) to include in the output file's header. Default value: null. This option may be specified 0 or more times. + + READ_NAME_REGEX: + inputBinding: + position: 5 + prefix: "READ_NAME_REGEX=" + separate: false + type: string? + doc: | + Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. Set this option to null to disable optical duplicate detection, e.g. for RNA-seq or other data where duplicate sets are extremely large and estimating library complexity is not an aim. Note that without optical duplicate counts, library size estimation will be inaccurate. The regular expression should contain three capture groups for the three variables, in order. It must match the entire read name. Note that if the default regex is specified, a regex match is not actually done, but instead the read name is split on colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be tile, x and y values. Default value: . This option can be set to 'null' to clear the default value. + + OPTICAL_DUPLICATE_PIXEL_DISTANCE: + inputBinding: + position: 5 + prefix: "OPTICAL_DUPLICATE_PIXEL_DISTANCE=" + separate: false + type: int? + doc: | + The maximum offset between two duplicate clusters in order to consider them optical duplicates. The default is appropriate for unpatterned versions of the Illumina platform. For the patterned flowcell models, 2500 is moreappropriate. For other platforms and models, users should experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. + + MAX_OPTICAL_DUPLICATE_SET_SIZE: + inputBinding: + position: 5 + prefix: "MAX_OPTICAL_DUPLICATE_SET_SIZE=" + separate: false + type: long? + doc: | + This number is the maximum size of a set of duplicate reads for which we will attempt to determine which are optical duplicates. Please be aware that if you raise this value too high and do encounter a very large set of duplicate reads, it will severely affect the runtime of this tool. To completely disable this check, set the value to -1. Default value: 300000. This option can be set to 'null' to clear the default value. + + TMP_DIR: + inputBinding: + position: 5 + prefix: "TMP_DIR=" + separate: false + type: string? + doc: | + One or more directories with space available to be used by this program for temporary storage of working files Default value: null. This option may be specified 0 or more times. + + VERBOSITY: + inputBinding: + position: 5 + prefix: "VERBOSITY=" + separate: false + type: + - 'null' + - type: enum + symbols: [ERROR, WARNING, INFO, DEBUG] + doc: | + Control verbosity of logging. Default value: INFO. This option can be set to 'null' to clear the default value. Possible values: {ERROR, WARNING, INFO, DEBUG} + + QUIET: + inputBinding: + position: 5 + prefix: "QUIET=true" + separate: false + type: boolean? + doc: | + Whether to suppress job-summary info on System.err. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + VALIDATION_STRINGENCY: + inputBinding: + position: 5 + prefix: "VALIDATION_STRINGENCY=" + separate: false + type: + - 'null' + - type: enum + symbols: [STRICT, LENIENT, SILENT] + doc: | + Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT, LENIENT, SILENT} + + COMPRESSION_LEVEL: + inputBinding: + position: 5 + prefix: "COMPRESSION_LEVEL=" + separate: false + type: int? + doc: | + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 5. This option can be set to 'null' to clear the default value. + + MAX_RECORDS_IN_RAM: + inputBinding: + position: 5 + prefix: "MAX_RECORDS_IN_RAM=" + separate: false + type: int? + doc: | + When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed. Default value: 500000. This option can be set to 'null' to clear the default value. + + CREATE_INDEX: + inputBinding: + position: 5 + prefix: "CREATE_INDEX=true" + separate: false + type: boolean? + doc: | + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + CREATE_MD5_FILE: + inputBinding: + position: 5 + prefix: "CREATE_MD5_FILE=true" + separate: false + type: boolean? + doc: | + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + REFERENCE_SEQUENCE: + inputBinding: + position: 5 + prefix: "REFERENCE_SEQUENCE=" + separate: false + type: File? + doc: | + Reference sequence file. Default value: null. + + GA4GH_CLIENT_SECRETS: + inputBinding: + position: 5 + prefix: "GA4GH_CLIENT_SECRETS=" + separate: false + type: File? + doc: | + Google Genomics API client_secrets.json file path. Default value: client_secrets.json. This option can be set to 'null' to clear the default value. + + USE_JDK_DEFLATER: + inputBinding: + position: 5 + prefix: "USE_JDK_DEFLATER=true" + separate: false + type: boolean? + doc: | + Use the JDK Deflater instead of the Intel Deflater for writing compressed output Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + USE_JDK_INFLATER: + inputBinding: + position: 5 + prefix: "USE_JDK_INFLATER=true" + separate: false + type: boolean? + doc: | + Use the JDK Inflater instead of the Intel Inflater for reading compressed input Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + OPTIONS_FILE: + inputBinding: + position: 5 + prefix: "OPTIONS_FILE=" + separate: false + type: File? + doc: | + File of OPTION_NAME=value pairs. No positional parameters allowed. Unlike command-line options, unrecognized options are ignored. A single-valued option set in an options file may be overridden by a subsequent command-line option. A line starting with '#' is considered a comment. Required. + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + - http://edamontology.org/EDAM_1.18.owl diff --git a/CWL/tools/bioconda-tool-picard-collectMultipleMetrics.cwl b/CWL/tools/bioconda-tool-picard-collectMultipleMetrics.cwl new file mode 100644 index 0000000..d0284f3 --- /dev/null +++ b/CWL/tools/bioconda-tool-picard-collectMultipleMetrics.cwl @@ -0,0 +1,314 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "Picard_CollectMultipleMetrics" +label: "Picard CollectMultipleMetrics" + +cwlVersion: "v1.0" + +doc: | + A Docker container containing the Picard jar file. See the [Picard](http://broadinstitue.github.io/picard/) webpage for more information + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-7816-2363 + s:email: mailto:wiebkeschmitt@outlook.de + s:name: Wiebke Schmitt + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: ResourceRequirement + coresMin: 1 + ramMin: 4092 + outdirMin: 512000 + - class: DockerRequirement + dockerPull: "quay.io/biocontainers/picard:2.17.2--py36_0" + +baseCommand: ["picard", "CollectMultipleMetrics"] + +outputs: + + summaryFiles: + type: File[] + outputBinding: + glob: $(inputs.OUTPUT + "*") + +# AlignmentSummarymetrics: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +# InsertSizemetrics: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +# QualityByCyclemetrics: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +# QualityDistributionmetrics: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +# QualityByCyclemetricsTwo: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +# QualityDistributionmetricsTwo: +# type: File +# outputBinding: +# glob: $(inputs.OUTPUT) + +inputs: + + ASSUME_SORTED: + inputBinding: + position: 5 + prefix: "ASSUME_SORTED=true" + separate: false + type: boolean? + doc: | + if true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: [true, false] + + STOP_AFTER: + inputBinding: + position: 5 + prefix: "STOP_AFTER=" + separate: false + type: int? + doc: | + Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. + + METRIC_ACCUMULATION_LEVEL: + inputBinding: + position: 5 + prefix: "METRIC_ACCUMULATION_LEVEL=" + separate: false + type: + - 'null' + - type: enum + symbols: [ALL_READS, SAMPLE, LIBRARY, READ_GROUP] + doc: | + The level(s) at wich to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: [ALL_READS, SAMPLE, LIBRARY, READ_GROUP]. This option may be specified 0 or more times. This option can be set to 'null' to clear the default list. + + FILE_EXTENSION: + inputBinding: + position: 5 + prefix: "FILE_EXTENSION=" + separate: false + type: string? + doc: | + Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null. Default value: null + + PROGRAM: + inputBinding: + position: 10 + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: "PROGRAM=" + separate: false +# items: +# type: enum +# symbols: +# - CollectAlignmentSummaryMetrics +# - CollectInsertSizeMetrics +# - QualityScoreDistribution +# - MeanQualityByCycle +# - CollectBaseDistributionByCycle +# - CollectGcBiasMetrics +# - RnaSeqMetrics +# - CollectSequencingArtifactMetrics +# - CollectQualityYieldMetrics + doc: | + Set of metrics programs to apply during the pass through the SAM file. Default value: [CollectAlignmentSummaryMetrics, CollectSizeMetrics, MeanQualityByCycle, QualityScoreDistribution]. This option can be set to 'null' to clear the default value. Possible values: [CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, CollectSequencingArtifactMetrics, CollectQualityYieldMetrics]. This option may be specified 0 or more times. This option can be set to 'null' to clear the default list. + + INTERVALS: + inputBinding: + position: 5 + prefix: "INTERVALS=" + separate: false + type: File? + doc: | + An optional list of intervals to restrict analysis to. Only pertains to some of the PROGRAMs. Programs whose stand-alone CLP does not have an INTERVALS argument will silently ignore this argument. Default value: null + + DB_SNP: + inputBinding: + position: 5 + prefix: "DB_SNP=" + separate: false + type: File? + doc: | + VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't alllow for this argument will quietly ignore it. Default value: null. + + INCLUDE_UNPAIRED: + inputBinding: + position: 5 + prefix: "INCLUDE_UNPAIRED=true" + separate: false + type: boolean? + doc: | + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true then all paired reads will be included as well - MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. Default value: false. This option can be set to 'null' to clear the default value. Possible values: [true, false] + + INPUT: + inputBinding: + position: 5 + prefix: "INPUT=" + separate: false + type: + - File + - type: array + items: File + inputBinding: + itemSeparator: "INPUT=" + doc: | + Input SAM or BAM file. Required. + + OUTPUT: + inputBinding: + position: 5 + prefix: "OUTPUT=" + separate: false + type: string + doc: | + Base name of output files. Required. + + TMP_DIR: + inputBinding: + position: 5 + prefix: "TMP_DIR=" + separate: false + type: string? + doc: | + One or more directories with space available to be used by this program for temporary storage of working files Default value: null. This option may be specified 0 or more times. + + VERBOSITY: + inputBinding: + position: 5 + prefix: "VERBOSITY=" + separate: false + type: + - 'null' + - type: enum + symbols: [ERROR, WARNING, INFO, DEBUG] + doc: | + Control verbosity of logging. Default value: INFO. This option can be set to 'null' to clear the default value. Possible values: {ERROR, WARNING, INFO, DEBUG} + + QUIET: + inputBinding: + position: 5 + prefix: "QUIET=true" + separate: false + type: boolean? + doc: | + Whether to suppress job-summary info on System.err. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + VALIDATION_STRINGENCY: + inputBinding: + position: 5 + prefix: "VALIDATION_STRINGENCY=" + separate: false + type: + - 'null' + - type: enum + symbols: [STRICT, LENIENT, SILENT] + doc: | + Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT, LENIENT, SILENT} + + COMPRESSION_LEVEL: + inputBinding: + position: 5 + prefix: "COMPRESSION_LEVEL=" + separate: false + type: int? + doc: | + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 5. This option can be set to 'null' to clear the default value. + + MAX_RECORDS_IN_RAM: + inputBinding: + position: 5 + prefix: "MAX_RECORDS_IN_RAM=" + separate: false + type: int? + doc: | + When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed. Default value: 500000. This option can be set to 'null' to clear the default value. + + CREATE_INDEX: + inputBinding: + position: 5 + prefix: "CREATE_INDEX=true" + separate: false + type: boolean? + doc: | + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + CREATE_MD5_FILE: + inputBinding: + position: 5 + prefix: "CREATE_MD5_FILE=true" + separate: false + type: boolean? + doc: | + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + REFERENCE_SEQUENCE: + inputBinding: + position: 5 + prefix: "REFERENCE_SEQUENCE=" + separate: false + type: File? + doc: | + Reference sequence file. Default value: null. + + GA4GH_CLIENT_SECRETS: + inputBinding: + position: 5 + prefix: "GA4GH_CLIENT_SECRETS=" + separate: false + type: File? + doc: | + Google Genomics API client_secrets.json file path. Default value: client_secrets.json. This option can be set to 'null' to clear the default value. + + USE_JDK_DEFLATER: + inputBinding: + position: 5 + prefix: "USE_JDK_DEFLATER=true" + separate: false + type: boolean? + doc: | + Use the JDK Inflater instead of the Intel Inflater for reading compressed input Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + USE_JDK_INFLATER: + inputBinding: + position: 5 + prefix: "USE_JDK_INFLATER=true" + separate: false + type: boolean? + doc: | + Use the JDK Inflater instead of the Intel Inflater for reading compressed input Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + OPTIONS_FILE: + inputBinding: + position: 5 + prefix: "OPTIONS_FILE=" + separate: false + type: File? + doc: | + File of OPTION_NAME=value pairs. No positional parameters allowed. Unlike command-line options, unrecognized options are ignored. A single-valued option set in an options file may be overridden by a subsequent command-line option. A line starting with '#' is considered a comment. Required. + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + - http://edamontology.org/EDAM_1.18.owl diff --git a/CWL/tools/bioconda-tool-samtools-flagstat.cwl b/CWL/tools/bioconda-tool-samtools-flagstat.cwl new file mode 100644 index 0000000..290d1a5 --- /dev/null +++ b/CWL/tools/bioconda-tool-samtools-flagstat.cwl @@ -0,0 +1,64 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "samtools_flagstat" +label: "samtools flagstat" + +cwlVersion: "v1.0" + +doc: | + A Docker container containing samtools flagstat. See the [htslib](http://www.htslib.org/) webpage for more information. + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0001-6231-4417 + s:email: mailto:karl.nordstroem@uni-saarland.de + s:name: Karl Nordström + + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: ResourceRequirement + coresMin: 1 + ramMin: 4092 + outdirMin: 512000 + - class: DockerRequirement + dockerPull: "quay.io/biocontainers/samtools:1.3.1--5" + +baseCommand: ["samtools", "flagstat"] + +stdout: $( inputs.outputName ) + +outputs: + flagstat: + type: stdout + +inputs: + + input: + type: File + inputBinding: + position: 10 + + outputName: + type: string + + inputFmtOption: + type: string? + inputBinding: + position: 5 + prefix: "--input-fmt-option" + doc: | + "--input-fmt-option OPT=VAL" + + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + - http://edamontology.org/EDAM_1.18.owl diff --git a/CWL/tools/bioconda-tool-samtools-sort.cwl b/CWL/tools/bioconda-tool-samtools-sort.cwl new file mode 100644 index 0000000..fa29698 --- /dev/null +++ b/CWL/tools/bioconda-tool-samtools-sort.cwl @@ -0,0 +1,110 @@ +#!usr/bin/env cwl-runner + +class: CommandLineTool + +id: "samtools_sort" +label: "samtools sort" + +cwlVersion: "v1.0" + +doc: | + Sorts alignments by leftmost coordinates or by read name. Additionally, a sort order header tag will be added or an existing one updated if necessary. + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-7816-2363 + s:email: mailto:wiebkeschmitt@outlook.de + s:name: Wiebke Schmitt + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: ResourceRequirement + coresMin: 1 + ramMin: 4092 + outdirMin: 512000 + - class: DockerRequirement + dockerPull: "quay.io/biocontainers/samtools:1.3.1--5" + +baseCommand: ["samtools", "sort", "-o"] + +stdout: $( inputs.outputPrefix + ".bam" ) + +outputs: + + bamFile: + type: stdout + +inputs: + + input: + type: File + inputBinding: + position: 10 + + outputPrefix: + type: string + inputBinding: + position: 15 + doc: | + output file name + + sortByReadName: + type: boolean? + inputBinding: + position: 5 + prefix: -n + doc: | + sort by name rather than by chromosomal coordinates + + useOutPrefixInsteadOfPrefix: + type: boolean? + inputBinding: + position: 5 + prefix: -f + doc: | + use as full file name instead of prefix + + compressionLevel: + type: int? + inputBinding: + position: 5 + prefix: -l + doc: | + compression level, from 0 to 9 [-1] + + numberOfThreads: + type: int? + inputBinding: + position: 5 + prefix: --threads + doc: | + number of BAM compression threads to use in addition to main thread [0]. + + maxMemoryPerThread: + type: string? + inputBinding: + position: 5 + prefix: -m + doc: | + Approximately the maximum required memory per thread, specified either in bytes or with a K, M or G suffix [768 MiB] + +doc: | + Usage: samtools sort [options] + + Options: + -n Sort by read name + -f use as full file name instead of prefix + -o final output to stdout + -l INT compression level, from 0 to 9 [-1] + -@ INT number of sorting and compression threads [1] + -m INT max memory per thread; suffix K/M/G recognized + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + - https://edamontology.org/EDAM_1.18.owl diff --git a/CWL/tools/bioconda-tool-samtools-view.cwl b/CWL/tools/bioconda-tool-samtools-view.cwl new file mode 100644 index 0000000..6affb1e --- /dev/null +++ b/CWL/tools/bioconda-tool-samtools-view.cwl @@ -0,0 +1,313 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "samtools_view" +label: "samtools view" + +cwlVersion: "v1.0" + +doc: | + A Docker container containing samtools index. See the [htslib](http://www.htslib.org/) webpage for more information. + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0001-6231-4417 + s:email: mailto:karl.nordstroem@uni-saarland.de + s:name: Karl Nordström + + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: ResourceRequirement + coresMin: 1 + ramMin: 4092 + outdirMin: 512000 + - class: DockerRequirement + dockerPull: "quay.io/biocontainers/samtools:1.3.1--5" + +baseCommand: ["samtools", "view"] + +stdout: $( inputs.outputFileName ) + +outputs: + + bamFile: + type: stdout + + complementBamFile: + type: File? + outputBinding: + glob: $( inputs.complementBamFilename) + +inputs: + + input: + type: File + inputBinding: + position: 10 + + outputFileName: + type: string + doc: | + output file name + + region: + type: string? + inputBinding: + position: 15 + doc: | + limit the extraction to this region + + outBam: + type: boolean? + inputBinding: + position: 5 + prefix: -b + doc: | + output BAM + + outCram: + type: boolean? + inputBinding: + position: 5 + prefix: -C + doc: | + output CRAM (requires -T) + + useFastCompression: + type: boolean? + inputBinding: + position: 5 + prefix: "-1" + doc: | + use fast BAM compression (implies -b) + + useNoCompression: + type: boolean? + inputBinding: + position: 5 + prefix: -u + doc: | + uncompressed BAM output (implies -b) + + includeHeader: + type: boolean? + inputBinding: + position: 5 + prefix: -h + doc: | + include header in SAM output + + printOnlyHeader: + type: boolean? + inputBinding: + position: 5 + prefix: -H + doc: | + print SAM header only (no alignments) + + printCountMatchingLines: + type: boolean? + inputBinding: + position: 5 + prefix: -c + doc: | + print only the count of matching records + + complementBamFilename: + type: string? + inputBinding: + position: 5 + prefix: -U + doc: | + output reads not selected by filters to FILE [null] + + chromSizeFile: + type: File? + inputBinding: + position: 5 + prefix: -t + doc: | + FILE listing reference names and lengths (see long help) [null] + + overlapBedFile: + type: File? + inputBinding: + position: 5 + prefix: -L + doc: | + only include reads overlapping this BED FILE [null] + + selectReadGroup: + type: string? + inputBinding: + position: 5 + prefix: -r + doc: | + only include reads in read group STR [null] + + selectReadGroups: + type: File? + inputBinding: + position: 5 + prefix: -R + doc: | + only include reads with read group listed in FILE [null] + + qualityCutoff: + type: int? + inputBinding: + position: 5 + prefix: -q + doc: | + only include reads with mapping quality >= INT [0] + + selectLibrary: + type: string? + inputBinding: + position: 5 + prefix: -l + doc : | + only include reads in library STR [null] + + minimumCIGARoperations: + type: int? + inputBinding: + position: 5 + prefix: -m + doc: | + only include reads with number of CIGAR operations consuming + query sequence >= INT [0] + + selectFLAGall: + type: int? + inputBinding: + position: 5 + prefix: -f + doc: | + only include reads with all bits set in INT set in FLAG [0] + + selectFLAGnone: + type: int? + inputBinding: + position: 5 + prefix: -F + doc: | + only include reads with none of the bits set in INT set in FLAG [0] + + stripTag: + type: + - "null" + - type: array + items: string + inputBinding: + prefix: -x + inputBinding: + position: 5 + doc: | + read tag to strip (repeatable) [null] + + collapseBackwardCIGAR: + type: boolean? + inputBinding: + position: 5 + prefix: -B + doc: | + collapse the backward CIGAR operation + + randomSeed: + type: float? + inputBinding: + position: 5 + prefix: -s + doc: | + integer part sets seed of random number generator [0]; + rest sets fraction of templates to subsample [no subsampling] + + numberOfThreads: + type: int? + inputBinding: + position: 5 + prefix: --threads + doc: | + number of BAM/CRAM compression threads [0] + + outputFormat: + type: + - "null" + - type: enum + symbols: [SAM, BAM, CRAM] + inputBinding: + position: 5 + prefix: --output-fmt + doc: | + Specify output format (SAM, BAM, CRAM) + + referenceFasta: + type: File? + inputBinding: + position: 5 + prefix: --reference + doc: | + Reference sequence FASTA FILE [null] + + inputFormat: + type: boolean? + inputBinding: + position: 5 + prefix: -S + doc: | + input file is a SAM-File, if this parameter is set to true + +doc: | + Usage: samtools view [options] || [region ...] + + Options: + -b output BAM + -C output CRAM (requires -T) + -1 use fast BAM compression (implies -b) + -u uncompressed BAM output (implies -b) + -h include header in SAM output + -H print SAM header only (no alignments) + -c print only the count of matching records + -o FILE output file name [stdout] + -U FILE output reads not selected by filters to FILE [null] + -t FILE FILE listing reference names and lengths (see long help) [null] + -L FILE only include reads overlapping this BED FILE [null] + -r STR only include reads in read group STR [null] + -R FILE only include reads with read group listed in FILE [null] + -q INT only include reads with mapping quality >= INT [0] + -l STR only include reads in library STR [null] + -m INT only include reads with number of CIGAR operations consuming + query sequence >= INT [0] + -f INT only include reads with all bits set in INT set in FLAG [0] + -F INT only include reads with none of the bits set in INT set in FLAG [0] + -x STR read tag to strip (repeatable) [null] + -B collapse the backward CIGAR operation + -s FLOAT integer part sets seed of random number generator [0]; + rest sets fraction of templates to subsample [no subsampling] + -@, --threads INT + number of BAM/CRAM compression threads [0] + -? print long help, including note about region specification + -S ignored (input format is auto-detected) + --input-fmt-option OPT[=VAL] + Specify a single input file format option in the form + of OPTION or OPTION=VALUE + -O, --output-fmt FORMAT[,OPT[=VAL]]... + Specify output format (SAM, BAM, CRAM) + --output-fmt-option OPT[=VAL] + Specify a single output file format option in the form + of OPTION or OPTION=VALUE + -T, --reference FILE + Reference sequence FASTA FILE [null] + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/docs/schema_org_rdfa.html + - http://edamontology.org/EDAM_1.18.owl diff --git a/CWL/workflows/GAL-parallel.cwl b/CWL/workflows/GAL-parallel.cwl new file mode 100644 index 0000000..d7776d0 --- /dev/null +++ b/CWL/workflows/GAL-parallel.cwl @@ -0,0 +1,103 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: Workflow + +requirements: + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement + +inputs: + fq1file: File[] + fq2file: File[] + reference_genome: File + outputName: string + prefix: File + #secondaryFiles: + # - ".amb" + # - ".ann" + # - ".bwt" + # - ".pac" + # - ".sa" + +outputs: + logFiles: + type: File[] + outputSource: picardCollectMultipleMetrics/summaryFiles + + markDuplicatesBamFile: + type: File + outputSource: picardMarkDuplicates/OUTPUT_output + +steps: + GAL-part1: + run: GAL-part1.cwl + scatter: + - fq1file + - fq2file + scatterMethod: dotproduct + in: + fq1file: fq1file + fq2file: fq2file + reference_genome: reference_genome + outputName: outputName + prefix: prefix + out: + - samtoolsSortBamFile + + picardMarkDuplicates: + run: ../tools/bioconda-tool-picard-MarkDuplicates.cwl + in: + INPUT: GAL-part1/samtoolsSortBamFile + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".bam" ) + METRICS_FILE: + valueFrom: $( inputs.name + ".PicardMarkDupmetrics.txt" ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + REMOVE_DUPLICATES: + valueFrom: $( 1==0 ) + ASSUME_SORTED: + valueFrom: $( 1==1 ) + CREATE_INDEX: + valueFrom: $( 1==1 ) + MAX_RECORDS_IN_RAM: + valueFrom: $( 12500000 ) + src: + - GAL-part1/samtoolsSortBamFile + out: [METRICS_FILE_output, OUTPUT_output] + + samtoolsFlagstat: + run: ../tools/bioconda-tool-samtools-flagstat.cwl + in: + input: picardMarkDuplicates/OUTPUT_output + name: outputName + outputName: + valueFrom: $( inputs.name + ".flagstat.txt") + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - flagstat + + picardCollectMultipleMetrics: + run: ../tools/bioconda-tool-picard-collectMultipleMetrics.cwl + in: + INPUT: picardMarkDuplicates/OUTPUT_output + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".collectMultipleMetrics.txt") + REFERNCE_SEQUENCE: + valueFrom: reference_genome + ASSUME_SORTED: + valueFrom: $( 1==1 ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + PROGRAMsToRun: + valueFrom: $( ["CollectAlignmentSummaryMetrics", "CollectInsertSizeMetrics", "QualityScoreDistribution", "MeanQualityByCycle"] ) + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - summaryFiles diff --git a/CWL/workflows/GAL-part1.cwl b/CWL/workflows/GAL-part1.cwl new file mode 100644 index 0000000..3d5e3b3 --- /dev/null +++ b/CWL/workflows/GAL-part1.cwl @@ -0,0 +1,108 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: Workflow + +requirements: + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement + +inputs: + fq1file: File + fq2file: File + reference_genome: File + outputName: string + prefix: File + # secondaryFiles: + # - ".amb" + # - ".ann" + # - ".bwt" + # - ".pac" + # - ".sa" + +outputs: + + samtoolsSortBamFile: + type: File + outputSource: samtoolsSort/bamFile + +steps: + bwaAln1: + run: ../tools/bioconda-tool-bwa-aln.yml + in: + input: fq1file + name: outputName + outputName: + valueFrom: $( inputs.name + "_R1_Aln" ) + prefix: prefix + threads: + valueFrom: $( 12 ) + minQual: + valueFrom: $( 20 ) + out: + [alnFile] + + bwaAln2: + run: ../tools/bioconda-tool-bwa-aln.yml + in: + input: fq2file + name: outputName + outputName: + valueFrom: $( inputs.name + "_R2_Aln" ) + prefix: prefix + threads: + valueFrom: $( 12 ) + minQual: + valueFrom: $( 20 ) + out: + [alnFile] + + bwaSampe: + run: ../tools/bioconda-tool-bwa-sampe.yml + in: + aln1: bwaAln1/alnFile + aln2: bwaAln2/alnFile + fq1: fq1file + fq2: fq2file + prefix: prefix + outputName: outputName + preloadIndex: + valueFrom: $( 1==1 ) + maximumInsertSize: + valueFrom: $( 1000 ) + readGroupHeaderLine: + valueFrom: $( '@RG\tID:foo\tSM:bar' ) + src: [bwaAln1/alnFile, bwaAln2/alnFile] + out: + [sampeFile] + + samtoolsView: + run: ../tools/bioconda-tool-samtools-view.cwl + in: + input: bwaSampe/sampeFile + outputFileName: + valueFrom: $( inputs.outputName + ".bam") + useNoCompression: + valueFrom: $( 1==1 ) + outBam: + valueFrom: $( 1==1 ) + includeHeader: + valueFrom: $( 1==1 ) + inputFormat: + valueFrom: $( 1==1 ) + src: bwaSampe/sampeFile + out: + [bamFile] + + samtoolsSort: + run: ../tools/bioconda-tool-samtools-sort.cwl + in: + input: samtoolsView/bamFile + name: outputName + outputPrefix: + valueFrom: $( inputs.name + "_sort" ) + src: samtoolsView/bamFile + out: [bamFile] diff --git a/CWL/workflows/GAL-part2.cwl b/CWL/workflows/GAL-part2.cwl new file mode 100644 index 0000000..84a389c --- /dev/null +++ b/CWL/workflows/GAL-part2.cwl @@ -0,0 +1,73 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: Workflow + +requirements: +- class: StepInputExpressionRequirement +- class: InlineJavascriptRequirement +- class: MultipleInputFeatureRequirement +- class: SubworkflowFeatureRequirement + +inputs: + bamFile: File + outputName: string + +outputs: + logFiles: + type: File[] + outputSource: picardCollectMultipleMetrics/summaryFiles + +steps: + picardMarkDuplicates: + run: ../tools/bioconda-tool-picard-MarkDuplicates.cwl + in: + INPUT: bamFile + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".bam" ) + METRICS_FILE: + valueFrom: $( inputs.name + ".PicardMarkDupmetrics.txt" ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + REMOVE_DUPLICATES: + valueFrom: $( 1==0 ) + ASSUME_SORTED: + valueFrom: $( 1==1 ) + CREATE_INDEX: + valueFrom: $( 1==1 ) + MAX_RECORDS_IN_RAM: + valueFrom: $( 12500000 ) + src: bamFile + out: [METRICS_FILE_output, OUTPUT_output] + + samtoolsFlagstat: + run: ../tools/bioconda-tool-samtools-flagstat.cwl + in: + input: picardMarkDuplicates/OUTPUT_output + name: outputName + outputName: + valueFrom: $( inputs.name + ".flagstat.txt") + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - flagstat + + picardCollectMultipleMetrics: + run: ../tools/bioconda-tool-picard-collectMultipleMetrics.cwl + in: + INPUT: picardMarkDuplicates/OUTPUT_output + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".collectMultipleMetrics.txt") + REFERNCE_SEQUENCE: + valueFrom: reference_genome + ASSUME_SORTED: + valueFrom: $( 1==1 ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + PROGRAMsToRun: + valueFrom: $( ["CollectAlignmentSummaryMetrics", "CollectInsertSizeMetrics", "QualityScoreDistribution", "MeanQualityByCycle"] ) + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - summaryFiles diff --git a/CWL/workflows/GAL-workflow-job.yml b/CWL/workflows/GAL-workflow-job.yml new file mode 100644 index 0000000..2989c36 --- /dev/null +++ b/CWL/workflows/GAL-workflow-job.yml @@ -0,0 +1,21 @@ +fq1file: + - class: File + path: file_R1.fastq.gz + - class: File + path: file2_R1.fastq.gz + +fq2file: + - class: File + path: file_R2.fastq.gz + - class: File + path: file2_R2.fastq.gz + +reference_genome: + class: File + path: /projects/student/Wiebke/reference/hs37d5_PhiX_Lambda.fa + +outputName: "bamFile" + +prefix: + class: File + path: /projects/student/Wiebke/reference/hs37d5_PhiX_Lambda.fa diff --git a/CWL/workflows/GAL.cwl b/CWL/workflows/GAL.cwl new file mode 100644 index 0000000..fe5c90c --- /dev/null +++ b/CWL/workflows/GAL.cwl @@ -0,0 +1,187 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: Workflow + +requirements: + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: MultipleInputFeatureRequirement + +inputs: + fq1file: File + fq2file: File + reference_genome: File + outputName: string + prefix: string + +outputs: + + logFiles: + type: File[] + outputSource: picardCollectMultipleMetrics/summaryFiles + + bamFileMarkDuplicates: + type: File + outputSource: picardMarkDuplicates/OUTPUT_output + +# AlignmentSummarymetrics: +# type: File +# outputSource: picardCollectMultipleMetrics/AlignmentSummarymetrics + +# InsertSizemetrics: +# type: File +# outputSource: picardCollectMultipleMetrics/InsertSizemetrics + +# QualityByCyclemetrics: +# type: File +# outputSource: picardCollectMultipleMetrics/QualityByCyclemetrics + +# QualityDistributionmetrics: +# type: File +# outputSource: picardCollectMultipleMetrics/QualityDistributionmetrics + +# QualityByCyclemetricsTwo: +# type: File +# outputSource: picardCollectMultipleMetrics/QualityByCyclemetricsTwo + +# QualityDistributionmetricsTwo: +# type: File +# outputSource: picardCollectMultipleMetrics/QualityDistributionmetricsTwo + +steps: + bwaAln1: + run: ../tools/bioconda-tool-bwa-aln.yml + in: + input: fq1file + name: outputName + outputName: + valueFrom: $( inputs.name + "_R1_Aln" ) + prefix: prefix + threads: + valueFrom: $( 12 ) + minQual: + valueFrom: $( 20 ) + out: + [alnFile] + + bwaAln2: + run: ../tools/bioconda-tool-bwa-aln.yml + in: + input: fq2file + name: outputName + outputName: + valueFrom: $( inputs.name + "_R2_Aln" ) + prefix: prefix + threads: + valueFrom: $( 12 ) + minQual: + valueFrom: $( 20 ) + out: + [alnFile] + + bwaSampe: + run: ../tools/bioconda-tool-bwa-sampe.yml + in: + aln1: bwaAln1/alnFile + aln2: bwaAln2/alnFile + fq1: fq1file + fq2: fq2file + prefix: prefix + outputName: outputName + preloadIndex: + valueFrom: $( 1==1 ) + maximumInsertSize: + valueFrom: $( 1000 ) + readGroupHeaderLine: + valueFrom: $( '@RG\tID:foo\tSM:bar' ) + src: [bwaAln1/alnFile, bwaAln2/alnFile] + out: + [sampeFile] + + samtoolsView: + run: ../tools/bioconda-tool-samtools-view.cwl + in: + input: bwaSampe/sampeFile + outputFileName: + valueFrom: $( inputs.outputName + ".bam") + useNoCompression: + valueFrom: $( 1==1 ) + outBam: + valueFrom: $( 1==1 ) + includeHeader: + valueFrom: $( 1==1 ) + inputFormat: + valueFrom: $( 1==1 ) + src: bwaSampe/sampeFile + out: + [bamFile] + + samtoolsSort: + run: ../tools/bioconda-tool-samtools-sort.cwl + in: + input: samtoolsView/bamFile + name: outputName + outputPrefix: + valueFrom: $( inputs.name + "_sort" ) + src: samtoolsView/bamFile + out: [bamFile] + + picardMarkDuplicates: + run: ../tools/bioconda-tool-picard-MarkDuplicates.cwl + in: + INPUT: samtoolsSort/bamFile + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".bam" ) + METRICS_FILE: + valueFrom: $( inputs.name + ".PicardMarkDupmetrics.txt" ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + REMOVE_DUPLICATES: + valueFrom: $( 1==0 ) + ASSUME_SORTED: + valueFrom: $( 1==1 ) + CREATE_INDEX: + valueFrom: $( 1==1 ) + MAX_RECORDS_IN_RAM: + valueFrom: $( 12500000 ) + src: samtoolsSort/bamFile + out: [METRICS_FILE_output, OUTPUT_output] + + samtoolsFlagstat: + run: ../tools/bioconda-tool-samtools-flagstat.cwl + in: + input: picardMarkDuplicates/OUTPUT_output + name: outputName + outputName: + valueFrom: $( inputs.name + ".flagstat.txt") + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - flagstat + + picardCollectMultipleMetrics: + run: ../tools/bioconda-tool-picard-collectMultipleMetrics.cwl + in: + INPUT: picardMarkDuplicates/OUTPUT_output + name: outputName + OUTPUT: + valueFrom: $( inputs.name + ".collectMultipleMetrics.txt") + REFERNCE_SEQUENCE: + valueFrom: reference_genome + ASSUME_SORTED: + valueFrom: $( 1==1 ) + VALIDATION_STRINGENCY: + valueFrom: SILENT + PROGRAMsToRun: + valueFrom: $( ["CollectAlignmentSummaryMetrics", "CollectInsertSizeMetrics", "QualityScoreDistribution", "MeanQualityByCycle"] ) + src: [picardMarkDuplicates/METRICS_FILE_output, picardMarkDuplicates/OUTPUT_output] + out: + - summaryFiles +# - AlignmentSummarymetrics +# - InsertSizemetrics +# - QualityByCyclemetrics +# - QualityDistributionmetrics +# - QualityByCyclemetricsTwo +# - QualityDistributionmetricsTwo diff --git a/test/bamFile.collectMultipleMetrics.txt.alignment_summary_metrics b/test/bamFile.collectMultipleMetrics.txt.alignment_summary_metrics new file mode 100644 index 0000000..3cd11d3 --- /dev/null +++ b/test/bamFile.collectMultipleMetrics.txt.alignment_summary_metrics @@ -0,0 +1,12 @@ +## htsjdk.samtools.metrics.StringHeader +# CollectMultipleMetrics INPUT=/tmp/tmpIELU1C/stg5ed07f2e-3a31-4e1b-bdd8-b4420e17058f/bamFile.bam ASSUME_SORTED=true OUTPUT=bamFile.collectMultipleMetrics.txt VALIDATION_STRINGENCY=SILENT STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Mar 16 16:36:47 CET 2018 + +## METRICS CLASS picard.analysis.AlignmentSummaryMetrics +CATEGORY TOTAL_READS PF_READS PCT_PF_READS PF_NOISE_READS PF_READS_ALIGNED PCT_PF_READS_ALIGNED PF_ALIGNED_BASES PF_HQ_ALIGNED_READS PF_HQ_ALIGNED_BASES PF_HQ_ALIGNED_Q20_BASES PF_HQ_MEDIAN_MISMATCHES PF_MISMATCH_RATE PF_HQ_ERROR_RATE PF_INDEL_RATE MEAN_READ_LENGTH READS_ALIGNED_IN_PAIRS PCT_READS_ALIGNED_IN_PAIRS PF_READS_IMPROPER_PAIRS PCT_PF_READS_IMPROPER_PAIRS BAD_CYCLES STRAND_BALANCE PCT_CHIMERAS PCT_ADAPTER SAMPLE LIBRARY READ_GROUP +FIRST_OF_PAIR 250 250 1 0 0 0 0 0 0 0 0 0 0 0 51 0 0 0 0 0 0 0 0 +SECOND_OF_PAIR 250 250 1 0 0 0 0 0 0 0 0 0 0 0 51 0 0 0 0 0 0 0 0 +PAIR 500 500 1 0 0 0 0 0 0 0 0 0 0 0 51 0 0 0 0 0 0 0 0 + + diff --git a/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle.pdf b/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle.pdf new file mode 100644 index 0000000..47a1758 Binary files /dev/null and b/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle.pdf differ diff --git a/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle_metrics b/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle_metrics new file mode 100644 index 0000000..0557483 --- /dev/null +++ b/test/bamFile.collectMultipleMetrics.txt.base_distribution_by_cycle_metrics @@ -0,0 +1,111 @@ +## htsjdk.samtools.metrics.StringHeader +# CollectMultipleMetrics INPUT=/tmp/tmpIELU1C/stg5ed07f2e-3a31-4e1b-bdd8-b4420e17058f/bamFile.bam ASSUME_SORTED=true OUTPUT=bamFile.collectMultipleMetrics.txt VALIDATION_STRINGENCY=SILENT STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Mar 16 16:36:47 CET 2018 + +## METRICS CLASS picard.analysis.BaseDistributionByCycleMetrics +READ_END CYCLE PCT_A PCT_C PCT_G PCT_T PCT_N +1 1 25.2 21.2 27.6 26 0 +1 2 35.6 20.4 14 30 0 +1 3 30.4 26.4 16.8 26.4 0 +1 4 32.8 21.6 19.6 26 0 +1 5 26.4 23.2 18.8 31.6 0 +1 6 29.6 19.6 22.8 28 0 +1 7 25.6 16.4 22.8 35.2 0 +1 8 31.2 21.2 18.8 28.8 0 +1 9 32.4 17.2 24.8 25.6 0 +1 10 28.4 22.8 18.8 30 0 +1 11 32.4 18 20.4 29.2 0 +1 12 30 20 20.8 29.2 0 +1 13 34 19.6 19.2 27.2 0 +1 14 27.6 25.2 23.6 23.6 0 +1 15 31.2 20 22.4 26.4 0 +1 16 27.6 19.6 24.8 28 0 +1 17 29.2 26.4 16.4 28 0 +1 18 27.6 22.4 21.6 28.4 0 +1 19 33.2 16 18.4 32.4 0 +1 20 23.6 22 26.4 28 0 +1 21 26.8 18.8 21.6 32.8 0 +1 22 32.4 18.4 24.8 24.4 0 +1 23 30.8 22 23.6 23.6 0 +1 24 26.8 20.8 25.2 27.2 0 +1 25 30 19.6 22.4 28 0 +1 26 27.6 19.2 25.2 28 0 +1 27 25.6 17.6 22.8 34 0 +1 28 28.8 23.2 26 22 0 +1 29 30.4 20.4 21.6 27.6 0 +1 30 25.2 21.2 22.8 30.8 0 +1 31 27.2 22.8 21.2 28.8 0 +1 32 22.8 22 25.2 30 0 +1 33 24.8 23.2 21.6 30.4 0 +1 34 32 21.6 20.4 26 0 +1 35 29.2 21.2 25.2 24.4 0 +1 36 30 19.2 27.6 23.2 0 +1 37 29.2 21.6 20.8 28.4 0 +1 38 31.6 22 19.2 27.2 0 +1 39 28.8 22 22 27.2 0 +1 40 28.4 27.6 19.2 24.8 0 +1 41 28.8 24 20.4 26.8 0 +1 42 26 24.4 21.2 28.4 0 +1 43 29.6 19.6 20.4 30.4 0 +1 44 31.2 17.2 19.2 32.4 0 +1 45 26 20.4 26 27.6 0 +1 46 27.6 18.8 25.2 28.4 0 +1 47 32.4 24 14.8 28.8 0 +1 48 28 26.4 22.4 23.2 0 +1 49 26 25.2 21.2 27.6 0 +1 50 32.4 20.8 19.2 26 1.6 +1 51 34 18 22 17.6 8.4 +2 52 22.4 18.4 20.8 23.6 14.8 +2 53 23.6 13.6 14.4 29.6 18.8 +2 54 24.4 12.4 14.8 19.2 29.2 +2 55 21.2 12.8 16.8 24.8 24.4 +2 56 23.2 16.4 14.4 18 28 +2 57 23.6 14.8 19.2 23.2 19.2 +2 58 20.4 17.6 16 20.8 25.2 +2 59 24 13.6 15.2 20 27.2 +2 60 20 14 12 26 28 +2 61 28 14 12.4 24 21.6 +2 62 28.4 18.8 17.2 21.6 14 +2 63 31.2 21.2 17.2 28.4 2 +2 64 25.6 23.6 18.8 29.6 2.4 +2 65 35.6 18.8 20.8 24.8 0 +2 66 32.4 19.2 21.6 25.2 1.6 +2 67 35.2 18.4 22 24.4 0 +2 68 26.8 26.4 20.4 26.4 0 +2 69 28.4 23.6 22 26 0 +2 70 27.2 21.6 22.8 28.4 0 +2 71 28.4 22.4 18.8 30.4 0 +2 72 26.4 18.8 29.6 25.2 0 +2 73 30 17.6 24.4 28 0 +2 74 23.6 22 22.4 32 0 +2 75 24.4 22 23.2 30.4 0 +2 76 26.4 20 26 27.6 0 +2 77 25.6 21.6 23.6 29.2 0 +2 78 30 17.2 26.8 26 0 +2 79 28.8 23.2 18.8 29.2 0 +2 80 29.6 20.8 20.4 29.2 0 +2 81 26.4 21.6 25.2 26.8 0 +2 82 26 21.6 22 30.4 0 +2 83 29.6 20.4 24.8 25.2 0 +2 84 28.4 18 28.8 24.8 0 +2 85 26.4 22.4 24.4 26.8 0 +2 86 28 24 22.4 25.6 0 +2 87 35.6 23.6 19.6 21.2 0 +2 88 30.8 24 21.6 23.6 0 +2 89 33.2 21.6 18.8 26.4 0 +2 90 27.6 22 23.2 27.2 0 +2 91 29.6 21.2 19.6 29.6 0 +2 92 29.6 18.8 25.6 26 0 +2 93 28 22.4 22.4 27.2 0 +2 94 24.4 24.4 22 29.2 0 +2 95 25.6 21.2 21.2 32 0 +2 96 31.2 20.8 21.6 26.4 0 +2 97 27.6 21.6 22.4 28.4 0 +2 98 28.4 24 21.6 26 0 +2 99 27.6 21.6 21.2 29.6 0 +2 100 23.2 20 21.6 35.2 0 +2 101 28 22.4 20 29.6 0 +2 102 26.4 28.8 20 24.8 0 + + diff --git a/test/bamFile.collectMultipleMetrics.txt.insert_size_histogram.pdf b/test/bamFile.collectMultipleMetrics.txt.insert_size_histogram.pdf new file mode 100644 index 0000000..0324c40 Binary files /dev/null and b/test/bamFile.collectMultipleMetrics.txt.insert_size_histogram.pdf differ diff --git a/test/bamFile.collectMultipleMetrics.txt.insert_size_metrics b/test/bamFile.collectMultipleMetrics.txt.insert_size_metrics new file mode 100644 index 0000000..c242698 --- /dev/null +++ b/test/bamFile.collectMultipleMetrics.txt.insert_size_metrics @@ -0,0 +1,193 @@ +## htsjdk.samtools.metrics.StringHeader +# CollectMultipleMetrics INPUT=/tmp/tmpIELU1C/stg5ed07f2e-3a31-4e1b-bdd8-b4420e17058f/bamFile.bam ASSUME_SORTED=true OUTPUT=bamFile.collectMultipleMetrics.txt VALIDATION_STRINGENCY=SILENT STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Mar 16 16:36:47 CET 2018 + +## METRICS CLASS picard.analysis.InsertSizeMetrics +MEDIAN_INSERT_SIZE MODE_INSERT_SIZE MEDIAN_ABSOLUTE_DEVIATION MIN_INSERT_SIZE MAX_INSERT_SIZE MEAN_INSERT_SIZE STANDARD_DEVIATION READ_PAIRS PAIR_ORIENTATION WIDTH_OF_10_PERCENT WIDTH_OF_20_PERCENT WIDTH_OF_30_PERCENT WIDTH_OF_40_PERCENT WIDTH_OF_50_PERCENT WIDTH_OF_60_PERCENT WIDTH_OF_70_PERCENT WIDTH_OF_80_PERCENT WIDTH_OF_90_PERCENT WIDTH_OF_95_PERCENT WIDTH_OF_99_PERCENT SAMPLE LIBRARY READ_GROUP +252.5 168 76.5 105 1187357 271.693617 110.300288 236 FR 35 65 97 131 153 181 227 275 389 475 629 + +## HISTOGRAM java.lang.Integer +insert_size All_Reads.fr_count +105 1 +109 1 +110 2 +111 1 +112 1 +114 1 +121 1 +123 1 +124 1 +125 1 +127 1 +128 1 +130 1 +132 2 +133 1 +135 1 +139 2 +140 2 +141 1 +143 2 +146 1 +151 2 +153 2 +155 1 +158 1 +160 1 +161 1 +162 1 +163 1 +164 1 +166 2 +167 2 +168 4 +170 1 +171 1 +172 1 +173 1 +175 1 +176 4 +178 1 +179 2 +180 1 +181 1 +183 1 +184 1 +185 2 +186 2 +187 2 +191 1 +192 2 +193 3 +194 1 +196 1 +197 1 +198 1 +200 2 +201 2 +202 1 +206 1 +207 2 +208 1 +209 1 +211 1 +213 1 +214 1 +216 1 +217 2 +219 1 +220 2 +221 1 +222 1 +223 1 +225 1 +226 2 +232 1 +234 1 +235 1 +238 1 +239 2 +242 2 +244 2 +246 1 +248 1 +249 1 +250 1 +252 2 +253 2 +255 1 +256 1 +261 1 +263 1 +265 2 +266 1 +269 1 +270 1 +271 1 +273 1 +274 1 +275 1 +276 2 +277 1 +278 1 +279 1 +280 1 +282 2 +284 1 +286 2 +287 1 +289 2 +291 1 +292 1 +293 2 +295 1 +300 1 +308 1 +309 2 +310 1 +313 3 +315 1 +317 1 +320 1 +322 1 +324 3 +328 2 +329 1 +330 1 +331 1 +332 1 +334 1 +338 1 +339 1 +342 1 +351 1 +354 1 +355 1 +358 1 +359 1 +361 1 +364 1 +367 2 +368 1 +374 1 +376 1 +381 2 +382 1 +386 3 +389 1 +391 2 +394 1 +396 1 +402 1 +408 1 +409 2 +410 2 +416 1 +417 1 +430 2 +434 1 +444 1 +446 1 +447 1 +457 1 +458 1 +462 1 +463 1 +467 1 +468 1 +473 1 +477 1 +480 1 +484 1 +489 1 +492 1 +493 1 +494 1 +498 1 +505 2 +512 1 +548 1 +566 1 +610 1 + diff --git a/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle.pdf b/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle.pdf new file mode 100644 index 0000000..f621269 Binary files /dev/null and b/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle.pdf differ diff --git a/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle_metrics b/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle_metrics new file mode 100644 index 0000000..ce2faa7 --- /dev/null +++ b/test/bamFile.collectMultipleMetrics.txt.quality_by_cycle_metrics @@ -0,0 +1,111 @@ +## htsjdk.samtools.metrics.StringHeader +# CollectMultipleMetrics INPUT=/tmp/tmpIELU1C/stg5ed07f2e-3a31-4e1b-bdd8-b4420e17058f/bamFile.bam ASSUME_SORTED=true OUTPUT=bamFile.collectMultipleMetrics.txt VALIDATION_STRINGENCY=SILENT STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Mar 16 16:36:47 CET 2018 + + +## HISTOGRAM java.lang.Integer +CYCLE MEAN_QUALITY +1 32.86 +2 32.856 +3 32.86 +4 36.808 +5 36.896 +6 36.72 +7 36.88 +8 36.832 +9 36.912 +10 36.896 +11 36.952 +12 36.832 +13 36.936 +14 39.7 +15 39.612 +16 39.512 +17 39.484 +18 39.484 +19 39.38 +20 39.508 +21 39.42 +22 39.336 +23 39.44 +24 39.532 +25 39.616 +26 39.488 +27 39.452 +28 39.396 +29 39.204 +30 39.344 +31 39.196 +32 39.084 +33 39.228 +34 39.252 +35 39.372 +36 39.196 +37 39.168 +38 39.312 +39 38.98 +40 39.02 +41 39.084 +42 39.228 +43 39.048 +44 38.984 +45 38.952 +46 38.884 +47 38.776 +48 38.776 +49 38.772 +50 38.132 +51 35.244 +52 27.024 +53 26.532 +54 23.784 +55 27.284 +56 26.856 +57 28.064 +58 27.704 +59 27.304 +60 27.012 +61 27.772 +62 29.232 +63 31.308 +64 32.604 +65 36.62 +66 36.776 +67 37.152 +68 37.252 +69 37.388 +70 37.5 +71 37.524 +72 37.532 +73 37.388 +74 37.396 +75 37.296 +76 37.272 +77 37.316 +78 37.248 +79 37.156 +80 37.288 +81 37.112 +82 37.204 +83 37.1 +84 37.148 +85 37.124 +86 36.848 +87 36.928 +88 37.248 +89 37.184 +90 36.964 +91 36.864 +92 36.928 +93 36.92 +94 36.912 +95 37.036 +96 37.116 +97 36.892 +98 37.08 +99 36.92 +100 36.82 +101 36.82 +102 36.284 + diff --git a/test/bamFile.collectMultipleMetrics.txt.quality_distribution.pdf b/test/bamFile.collectMultipleMetrics.txt.quality_distribution.pdf new file mode 100644 index 0000000..fb9d7ad Binary files /dev/null and b/test/bamFile.collectMultipleMetrics.txt.quality_distribution.pdf differ diff --git a/test/bamFile.collectMultipleMetrics.txt.quality_distribution_metrics b/test/bamFile.collectMultipleMetrics.txt.quality_distribution_metrics new file mode 100644 index 0000000..f5362bd --- /dev/null +++ b/test/bamFile.collectMultipleMetrics.txt.quality_distribution_metrics @@ -0,0 +1,16 @@ +## htsjdk.samtools.metrics.StringHeader +# CollectMultipleMetrics INPUT=/tmp/tmpIELU1C/stg5ed07f2e-3a31-4e1b-bdd8-b4420e17058f/bamFile.bam ASSUME_SORTED=true OUTPUT=bamFile.collectMultipleMetrics.txt VALIDATION_STRINGENCY=SILENT STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Mar 16 16:36:47 CET 2018 + + +## HISTOGRAM java.lang.Byte +QUALITY COUNT_OF_Q +6 69 +15 258 +22 84 +27 407 +33 4095 +37 6320 +40 13601 + diff --git a/test/file_R1.fastq.gz b/test/file_R1.fastq.gz new file mode 100644 index 0000000..46bccf6 Binary files /dev/null and b/test/file_R1.fastq.gz differ diff --git a/test/file_R2.fastq.gz b/test/file_R2.fastq.gz new file mode 100644 index 0000000..05b2638 Binary files /dev/null and b/test/file_R2.fastq.gz differ diff --git a/test/log.txt b/test/log.txt new file mode 100644 index 0000000..3d76534 --- /dev/null +++ b/test/log.txt @@ -0,0 +1,8 @@ +{ + "QualityDistributionmetrics": null, + "QualityByCyclemetricsTwo": null, + "InsertSizemetrics": null, + "AlignmentSummarymetrics": null, + "QualityByCyclemetrics": null, + "QualityDistributionmetricsTwo": null +}