1 /**
2     This is the `collectPileUps` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.collectPileUps;
10 
11 import dentist.commandline : DentistCommand, OptionsFor;
12 import dentist.commands.collectPileUps.filter :
13     AmbiguousAlignmentChainsFilter,
14     ImproperAlignmentChainsFilter,
15     RedundantAlignmentChainsFilter,
16     WeaklyAnchoredAlignmentChainsFilter;
17 import dentist.common :
18     DentistException,
19     ReferenceInterval,
20     ReferenceRegion;
21 import dentist.common.alignments :
22     AlignmentChain,
23     getType,
24     PileUp;
25 import dentist.common.binio : writePileUpsDb;
26 import dentist.dazzler :
27     getAlignments,
28     getNumContigs,
29     readMask;
30 import dentist.util.log;
31 import dentist.util.math : NaturalNumberSet;
32 import std.algorithm : count, isSorted, map, sum;
33 import std.array : array;
34 import std.conv : to;
35 import std.exception : enforce;
36 import std.typecons : tuple;
37 import vibe.data.json : toJson = serializeToJson;
38 
39 /// Options for the `collectPileUps` command.
40 alias Options = OptionsFor!(DentistCommand.collectPileUps);
41 
42 /// Execute the `collectPileUps` command with `options`.
43 void execute(in Options options)
44 {
45     auto collector = new PileUpCollector(options);
46 
47     collector.run();
48 }
49 
50 /// This class comprises the `collectPileUps` step of the `dentist` algorithm
51 class PileUpCollector
52 {
53     protected const Options options;
54     protected size_t numReferenceContigs;
55     protected size_t numReads;
56     protected AlignmentChain[] readsAlignment;
57     protected ReferenceRegion repetitiveRegions;
58     protected NaturalNumberSet unusedReads;
59 
60     this(in ref Options options)
61     {
62         this.options = options;
63     }
64 
65     void run()
66     {
67         mixin(traceExecution);
68 
69         readInputs();
70         initUnusedReads();
71         filterAlignments();
72         auto pileUps = buildPileUps();
73         writePileUps(pileUps);
74     }
75 
76     protected void readInputs()
77     {
78         mixin(traceExecution);
79 
80         numReferenceContigs = getNumContigs(options.refDb, options.workdir);
81         numReads = getNumContigs(options.readsDb, options.workdir);
82         logJsonInfo(
83             "numReferenceContigs", numReferenceContigs,
84             "numReads", numReads,
85         );
86         readsAlignment = getAlignments(
87             options.refDb,
88             options.readsDb,
89             options.readsAlignmentFile,
90             options.workdir,
91         );
92         repetitiveRegions = ReferenceRegion(readMask!ReferenceInterval(
93             options.refDb,
94             options.repeatMask,
95             options.workdir,
96         ));
97 
98         foreach (mask; options.additionalMasks)
99             repetitiveRegions |= ReferenceRegion(readMask!ReferenceInterval(
100                 options.refDb,
101                 mask,
102                 options.workdir,
103             ));
104 
105         enforce!DentistException(readsAlignment.length > 0, "empty ref vs. reads alignment");
106     }
107 
108     protected void initUnusedReads()
109     {
110         mixin(traceExecution);
111 
112         unusedReads.reserveFor(numReads);
113         foreach (readId; 1 .. numReads + 1)
114         {
115             unusedReads.add(readId);
116         }
117     }
118 
119     protected void filterAlignments()
120     {
121         mixin(traceExecution);
122 
123         auto filters = tuple(
124             new WeaklyAnchoredAlignmentChainsFilter(repetitiveRegions, options.minAnchorLength),
125             new ImproperAlignmentChainsFilter(),
126             new AmbiguousAlignmentChainsFilter(&unusedReads),
127             new RedundantAlignmentChainsFilter(&unusedReads),
128         );
129         logJsonDiagnostic(
130             "filterStage", "Input",
131             "readsAlignment", shouldLog(LogLevel.debug_)
132                 ? readsAlignment.toJson
133                 : toJson(null),
134             "numAlignmentChains", readsAlignment.count!"!a.flags.disabled",
135         );
136 
137         foreach (i, filter; filters)
138         {
139             readsAlignment = filter(readsAlignment);
140             assert(isSorted(readsAlignment));
141 
142             logJsonDiagnostic(
143                 "filterStage", typeof(filter).stringof,
144                 "readsAlignment", shouldLog(LogLevel.debug_)
145                     ? readsAlignment.toJson
146                     : toJson(null),
147                 "numAlignmentChains", readsAlignment.count!"!a.flags.disabled",
148             );
149         }
150     }
151 
152     protected PileUp[] buildPileUps()
153     {
154         mixin(traceExecution);
155 
156         import dentist.commands.collectPileUps.pileups : build;
157         auto pileUps = build(
158             numReferenceContigs,
159             readsAlignment,
160             options,
161         );
162 
163         logJsonDebug("pileUps", pileUps
164             .map!(pileUp => toJson([
165                 "type": toJson(pileUp.getType.to!string),
166                 "readAlignments": pileUp.map!"a[]".array.toJson,
167             ]))
168             .array
169             .toJson);
170         logJsonInfo(
171             "numPileUps", pileUps.length,
172             "numAlignmentChains", pileUps.map!"a[].length".sum,
173         );
174 
175         return pileUps;
176     }
177 
178     protected void writePileUps(PileUp[] pileUps)
179     {
180         mixin(traceExecution);
181 
182         writePileUpsDb(pileUps, options.pileUpsFile);
183     }
184 }