1 /** 2 This is the `collectPileUps` command of `dentist`. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.commands.collectPileUps; 10 11 import dentist.commandline : DentistCommand, OptionsFor; 12 import dentist.commands.collectPileUps.filter : 13 AmbiguousAlignmentChainsFilter, 14 ImproperAlignmentChainsFilter, 15 RedundantAlignmentChainsFilter, 16 WeaklyAnchoredAlignmentChainsFilter; 17 import dentist.common : 18 DentistException, 19 ReferenceInterval, 20 ReferenceRegion; 21 import dentist.common.alignments : 22 AlignmentChain, 23 getType, 24 PileUp; 25 import dentist.common.binio : writePileUpsDb; 26 import dentist.dazzler : 27 getAlignments, 28 getNumContigs, 29 readMask; 30 import dentist.util.log; 31 import dentist.util.math : NaturalNumberSet; 32 import std.algorithm : count, isSorted, map, sum; 33 import std.array : array; 34 import std.conv : to; 35 import std.exception : enforce; 36 import std.typecons : tuple; 37 import vibe.data.json : toJson = serializeToJson; 38 39 /// Options for the `collectPileUps` command. 40 alias Options = OptionsFor!(DentistCommand.collectPileUps); 41 42 /// Execute the `collectPileUps` command with `options`. 43 void execute(in Options options) 44 { 45 auto collector = new PileUpCollector(options); 46 47 collector.run(); 48 } 49 50 /// This class comprises the `collectPileUps` step of the `dentist` algorithm 51 class PileUpCollector 52 { 53 protected const Options options; 54 protected size_t numReferenceContigs; 55 protected size_t numReads; 56 protected AlignmentChain[] readsAlignment; 57 protected ReferenceRegion repetitiveRegions; 58 protected NaturalNumberSet unusedReads; 59 60 this(in ref Options options) 61 { 62 this.options = options; 63 } 64 65 void run() 66 { 67 mixin(traceExecution); 68 69 readInputs(); 70 initUnusedReads(); 71 filterAlignments(); 72 auto pileUps = buildPileUps(); 73 writePileUps(pileUps); 74 } 75 76 protected void readInputs() 77 { 78 mixin(traceExecution); 79 80 numReferenceContigs = getNumContigs(options.refDb, options.workdir); 81 numReads = getNumContigs(options.readsDb, options.workdir); 82 logJsonInfo( 83 "numReferenceContigs", numReferenceContigs, 84 "numReads", numReads, 85 ); 86 readsAlignment = getAlignments( 87 options.refDb, 88 options.readsDb, 89 options.readsAlignmentFile, 90 options.workdir, 91 ); 92 repetitiveRegions = ReferenceRegion(readMask!ReferenceInterval( 93 options.refDb, 94 options.repeatMask, 95 options.workdir, 96 )); 97 98 foreach (mask; options.additionalMasks) 99 repetitiveRegions |= ReferenceRegion(readMask!ReferenceInterval( 100 options.refDb, 101 mask, 102 options.workdir, 103 )); 104 105 enforce!DentistException(readsAlignment.length > 0, "empty ref vs. reads alignment"); 106 } 107 108 protected void initUnusedReads() 109 { 110 mixin(traceExecution); 111 112 unusedReads.reserveFor(numReads); 113 foreach (readId; 1 .. numReads + 1) 114 { 115 unusedReads.add(readId); 116 } 117 } 118 119 protected void filterAlignments() 120 { 121 mixin(traceExecution); 122 123 auto filters = tuple( 124 new WeaklyAnchoredAlignmentChainsFilter(repetitiveRegions, options.minAnchorLength), 125 new ImproperAlignmentChainsFilter(), 126 new AmbiguousAlignmentChainsFilter(&unusedReads), 127 new RedundantAlignmentChainsFilter(&unusedReads), 128 ); 129 logJsonDiagnostic( 130 "filterStage", "Input", 131 "readsAlignment", shouldLog(LogLevel.debug_) 132 ? readsAlignment.toJson 133 : toJson(null), 134 "numAlignmentChains", readsAlignment.count!"!a.flags.disabled", 135 ); 136 137 foreach (i, filter; filters) 138 { 139 readsAlignment = filter(readsAlignment); 140 assert(isSorted(readsAlignment)); 141 142 logJsonDiagnostic( 143 "filterStage", typeof(filter).stringof, 144 "readsAlignment", shouldLog(LogLevel.debug_) 145 ? readsAlignment.toJson 146 : toJson(null), 147 "numAlignmentChains", readsAlignment.count!"!a.flags.disabled", 148 ); 149 } 150 } 151 152 protected PileUp[] buildPileUps() 153 { 154 mixin(traceExecution); 155 156 import dentist.commands.collectPileUps.pileups : build; 157 auto pileUps = build( 158 numReferenceContigs, 159 readsAlignment, 160 options, 161 ); 162 163 logJsonDebug("pileUps", pileUps 164 .map!(pileUp => toJson([ 165 "type": toJson(pileUp.getType.to!string), 166 "readAlignments": pileUp.map!"a[]".array.toJson, 167 ])) 168 .array 169 .toJson); 170 logJsonInfo( 171 "numPileUps", pileUps.length, 172 "numAlignmentChains", pileUps.map!"a[].length".sum, 173 ); 174 175 return pileUps; 176 } 177 178 protected void writePileUps(PileUp[] pileUps) 179 { 180 mixin(traceExecution); 181 182 writePileUpsDb(pileUps, options.pileUpsFile); 183 } 184 }