The Nextflow language

The main file

We can use GitPod for looking and playing with the code. We need to log-in using github and open the project [… test this …].

Then we go to workplace and load the main.nf file.

#!/usr/bin/env nextflow

/*
 * Copyright (c) 2021, Centre for Genomic Regulation (CRG).
 *
 *   This file is part of 'CRG_course_NextFlow'.
 *
 *   CRG_course_NextFlow is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   CRG_course_NextFlow is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with CRG_course_NextFlow.  If not, see <http://www.gnu.org/licenses/>.
 */


/* 
 * This code enables the new Nextflow dsl (domain-specific language). 
 */

nextflow.enable.dsl=2


/* 
 * NextFlow test pipe, Those are comments
 * @authors
 * Luca Cozzuto <lucacozzuto@gmail.com>
 * 
 */

/*
 * Input parameters: read pairs, reference and output
 * The configuration is in nextflow.config file 
 * Params are stored in the params.config file 
 */

// this prevents a warning of undefined parameter
params.help             = false

// this prints the input parameters
log.info """
BIOCORE@CRG - N F TESTPIPE  
=============================================
reads                           : ${params.reads}
reference                       : ${params.reference}
output				: ${params.output}
"""

// this prints the help in case you use --help parameter in the command line and it stops the pipeline
if (params.help) {
    log.info 'This is the Biocore\'s Nextflow test pipeline'
    log.info 'Please define reads, reference and output!\n'
    log.info 'Enjoy!'
    log.info '\n'
    exit 1
}

/*
 * Defining the output folders.
 */
fastqcOutputFolder    = "${params.output}/ouptut_fastqc"
alnOutputFolder       = "${params.output}/ouptut_aln"
multiqcOutputFolder   = "${params.output}/ouptut_multiQC"

/*
 * Defining the module / subworkflow path, and iclude the elements
 */
subwork_folder = "${projectDir}/subworkflows/"
include { fastqc } from "${subwork_folder}/fastqc" addParams(OUTPUT: fastqcOutputFolder)
include { BOWTIE } from "${subwork_folder}/bowtie" addParams(OUTPUT: alnOutputFolder, LABEL:'twocpus')
include { multiqc } from "${subwork_folder}/multiqc" addParams(OUTPUT: multiqcOutputFolder)


/*
 * Read the files indicated by the wildcard                   
 */
Channel
    .fromPath( params.reads )  								                           
    .ifEmpty { error "Cannot find any reads matching: ${params.reads}" }                                  // if empty, complains
    .set {reads} 											  // make the channel "reads"

/*
 *  Read the reference file. If empty, complains                  
 */
reference = file(params.reference)
if (!reference.exists()) { error "Cannot find any reference file matching: ${params.reference}" }  

/*
 * MAIN workflow definition.
 * 1) Execute fastqc on input reads in parallel
 * 2) Execute BOWTIE sub-workflow with both indexing and alignment 
 * 3) Gather the results from both fastqc module and bowtie sub-workflow and execute the multiQC module
 */
workflow {
	fastqc_out = fastqc(reads)
	map_res = BOWTIE(reference, reads)
	multiqc(fastqc_out.mix(map_res.logs).collect())
	map_res.sam.view()                                 // view the output sam files
	map_res.logs.view()                                // view the output log files
}


/*
 *  When complete print a message                  
 */
workflow.onComplete { 
	println ( workflow.success ? "\nDone! Open the following report in your browser --> ${multiqcOutputFolder}/multiqc_report.html\n" : "Oops .. something went wrong" )
}

The params.config file

This file contains the definition of the pipeline parameters with some default values. They can be overridden by using two hyphens (as --reads or --reference etc).

params {
	reads		= "$projectDir/data/*.fastq.gz"
	reference       = "$projectDir/data/chr19.fasta.gz"
        output          = "./output"
}