1 /**
2     This is the `findClosableGaps` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.findClosableGaps;
10 
11 import dentist.commandline : OptionsFor, TestingCommand;
12 import dentist.common :
13     isTesting,
14     ReferenceInterval,
15     ReferenceRegion;
16 import dentist.common.alignments :
17     AlignmentChain,
18     coord_t,
19     id_t;
20 import dentist.dazzler :
21     ContigSegment,
22     GapSegment,
23     getScaffoldStructure,
24     readMask,
25     ScaffoldSegment;
26 import dentist.util.algorithm : cmpLexicographically;
27 import dentist.util.log;
28 import std.algorithm :
29     copy,
30     filter,
31     map,
32     sort,
33     swap;
34 import std.array : array;
35 import std.format : format, formattedRead;
36 import std.range : assumeSorted, enumerate;
37 import std.stdio : File, writeln;
38 import std..string : capitalize;
39 import std.typecons : PhobosFlag = Flag, tuple;
40 import vibe.data.json : toJson = serializeToJson, serializeToJsonString;
41 
42 
43 static if (isTesting):
44 
45 alias Options = OptionsFor!(TestingCommand.findClosableGaps);
46 
47 /// Execute the `findClosableGaps` command with `options`.
48 void execute(in Options options)
49 {
50     auto finder = ClosableGapsFinder(options);
51 
52     finder.run();
53 }
54 
55 struct ClosableGapsFinder
56 {
57     const(Options) options;
58     const(ScaffoldSegment)[] trueAssemblyScaffoldStructure;
59     ReferenceRegion mappedRegionsMask;
60     TrueAlignment[] trueAlignments;
61     ClosableGap[] closableGaps;
62 
63     void run()
64     {
65         init();
66         findClosableGaps();
67 
68         writeln(serializeToJsonString(closableGaps));
69     }
70 
71     void init()
72     {
73         trueAssemblyScaffoldStructure = getScaffoldStructure(options.trueAssemblyDb).array;
74         mappedRegionsMask = ReferenceRegion(readMask!ReferenceInterval(
75             options.trueAssemblyDb,
76             options.mappedRegionsMask,
77             options.workdir,
78         ));
79         trueAlignments = readTrueAlignments(options.readsMap);
80 
81         logJsonDebug(
82             "trueAssemblyScaffoldStructure", trueAssemblyScaffoldStructure
83                 .map!(scaffoldPart => scaffoldPart.peek!GapSegment !is null
84                     ? scaffoldPart.get!GapSegment.toJson
85                     : scaffoldPart.get!ContigSegment.toJson)
86                 .array
87                 .toJson,
88             "mappedRegionsMask", mappedRegionsMask.intervals.toJson,
89             "trueAlignments", trueAlignments.toJson,
90         );
91     }
92 
93     void findClosableGaps()
94     {
95         alias isContigPart = contigPart => contigPart.peek!ContigSegment !is null;
96         alias mkContigPart = contigPart => contigPart.get!ContigSegment;
97 
98         id_t currentContigId;
99         foreach (contigPart; trueAssemblyScaffoldStructure.filter!isContigPart.map!mkContigPart)
100         {
101             // `contigId` is 1-based
102             ++currentContigId;
103             auto trueContigRegion = ReferenceRegion(ReferenceInterval(
104                 contigPart.globalContigId,
105                 0,
106                 contigPart.length,
107             ));
108             auto scaffoldGaps = trueContigRegion - mappedRegionsMask;
109             this.closableGaps.length += scaffoldGaps.intervals.length;
110             auto closableGaps = this.closableGaps[$ - scaffoldGaps.intervals.length .. $];
111 
112             auto trueAlignments = this.trueAlignments
113                 .assumeSorted!"a.scaffoldId < b.scaffoldId"
114                 .equalRange(TrueAlignment(cast(id_t) contigPart.scaffoldId))
115                 .map!(read => TrueAlignment(
116                     read.scaffoldId,
117                     cast(coord_t) (read.begin - contigPart.begin),
118                     cast(coord_t) (read.end - contigPart.begin),
119                     read.flags,
120                     read.readId,
121                 ))
122                 .array;
123 
124             foreach (i, gap; scaffoldGaps.intervals)
125             {
126                 closableGaps[i].fromContig = cast(id_t) (currentContigId);
127                 closableGaps[i].toContig = cast(id_t) (currentContigId + 1);
128                 closableGaps[i].gapSize = cast(coord_t) gap.size;
129                 closableGaps[i].mappedInterval = gap;
130 
131                 if (gap.begin == 0 || gap.end == contigPart.length)
132                     // Skip gaps at the beginning or end of a true scaffold
133                     continue;
134 
135                 foreach (read; trueAlignments)
136                     if (
137                         read.begin < gap.begin &&
138                         (gap.begin - read.begin) >= options.minAnchorLength &&
139                         gap.end < read.end &&
140                         (read.end - gap.end) >= options.minAnchorLength
141                     )
142                         closableGaps[i].spanningReads ~= read.readId;
143                 closableGaps[i].spanningReads.sort;
144                 ++currentContigId;
145             }
146         }
147 
148         auto bufferRest = closableGaps
149             .filter!(closableGap => closableGap.spanningReads.length >= options.minSpanningReads)
150             .copy(closableGaps);
151         closableGaps = closableGaps[0 .. $ - bufferRest.length];
152     }
153 }
154 
155 struct ClosableGap
156 {
157     id_t fromContig;
158     id_t toContig;
159     coord_t gapSize;
160     ReferenceInterval mappedInterval;
161     id_t[] spanningReads;
162 }
163 
164 struct TrueAlignment
165 {
166     static alias Flag = AlignmentChain.Flag;
167     static alias Flags = AlignmentChain.Flags;
168 
169     id_t scaffoldId;
170     coord_t begin;
171     coord_t end;
172     Flags flags;
173     id_t readId;
174 
175     static foreach(flagName; __traits(allMembers, Flag))
176     {
177         mixin(format!(q"<
178             static alias %1$s = PhobosFlag!"%2$s";
179 
180             @property PhobosFlag!"%2$s" %2$s() pure const nothrow @trusted
181             {
182                 return cast(PhobosFlag!"%2$s") flags.%2$s;
183             }
184 
185             @property void %2$s(PhobosFlag!"%2$s" %2$s) pure nothrow
186             {
187                 flags.%2$s = %2$s;
188             }
189         >")(flagName.capitalize, flagName));
190     }
191 
192     int opCmp(in TrueAlignment other) const pure nothrow
193     {
194         return cmpLexicographically!(
195             TrueAlignment,
196             "a.scaffoldId",
197             "a.begin",
198             "a.end",
199             "a.readId",
200         )(this, other);
201     }
202 }
203 
204 TrueAlignment[] readTrueAlignments(in string readsMap)
205 {
206     auto trueAlignments = File(readsMap)
207         .byLine
208         .enumerate(1)
209         .map!parseTrueAlignment
210         .array;
211     trueAlignments.sort;
212 
213     return trueAlignments;
214 }
215 
216 // Not meant for public usage.
217 TrueAlignment parseTrueAlignment(EnumLine)(EnumLine enumLine)
218 {
219     auto line = enumLine.value;
220     TrueAlignment trueAlignment;
221     trueAlignment.readId = enumLine.index;
222 
223     line.formattedRead!" %d %d %d"(
224         trueAlignment.scaffoldId,
225         trueAlignment.begin,
226         trueAlignment.end,
227     );
228 
229     if (trueAlignment.begin > trueAlignment.end)
230     {
231         swap(trueAlignment.begin, trueAlignment.end);
232         trueAlignment.flags |= TrueAlignment.Flag.complement;
233     }
234 
235     return trueAlignment;
236 }