1 /**
2     This is the `translateCoords` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.translateCoords;
10 
11 import dentist.common : isTesting;
12 
13 static if (isTesting):
14 
15 import dentist.commandline : OptionsFor, TestingCommand;
16 import dentist.common :
17     DentistException,
18     OutputCoordinate;
19 import dentist.common.alignments : arithmetic_t, coord_t, id_t;
20 import dentist.common.binio : InsertionDb;
21 import dentist.common.insertions :
22     getInfoForExistingContig,
23     getInfoForGap,
24     getInfoForNewSequenceInsertion,
25     Insertion,
26     InsertionInfo,
27     isValidInsertion,
28     isOutputGap,
29     OutputScaffold;
30 import dentist.common.scaffold :
31     ContigNode,
32     isAntiParallel,
33     isDefault,
34     linearWalk,
35     scaffoldStarts;
36 import dentist.util.algorithm : uniqInPlace;
37 import dentist.util.log;
38 import std.algorithm :
39     find,
40     map,
41     maxElement,
42     min,
43     sort,
44     uniq;
45 import std.array : array;
46 import std.conv : to;
47 import std.exception : enforce;
48 import std.format : format;
49 import std.range :
50     chain,
51     dropExactly,
52     only;
53 import std.range.primitives;
54 import std.stdio : writefln, writeln;
55 import vibe.data.json :
56     toJson = serializeToJson,
57     toJsonString = serializeToPrettyJson;
58 
59 
60 /// Options for the `translateCoords` command.
61 alias Options = OptionsFor!(TestingCommand.translateCoords);
62 
63 /// Execute the `translateCoords` command with `options`.
64 void execute(in Options options)
65 {
66     auto translator = new CoordinateTranslator(options);
67 
68     translator.run();
69 }
70 
71 enum SequenceType
72 {
73     existing,
74     insertion,
75     unkown,
76 }
77 
78 struct ReferenceCoordinate
79 {
80     id_t contigId;
81     arithmetic_t contigCoord;
82     coord_t contigLength;
83     id_t[] contigIds;
84     bool isReverseComplement;
85     SequenceType sequenceType;
86 }
87 
88 class CoordinateTranslator
89 {
90     alias OriginType = OutputCoordinate.OriginType;
91 
92     const(Options) options;
93     OutputScaffold assemblyGraph;
94     OutputScaffold.IncidentEdgesCache incidentEdgesCache;
95     ContigNode[] scaffoldStartNodes;
96 
97     this(in Options options)
98     {
99         this.options = options;
100     }
101 
102     void run()
103     {
104         mixin(traceExecution);
105 
106         init();
107 
108         foreach (i, outputCoordinate; options.outputCoordinates)
109         {
110             try
111             {
112                 auto walker = Walker(
113                     outputCoordinate,
114                     assemblyGraph,
115                     incidentEdgesCache,
116                     scaffoldStartNodes,
117                 );
118 
119                 walker.walkToCoordinate();
120 
121                 if (options.useJson)
122                     writeJson(walker.refCoord);
123                 else
124                 {
125                     if (i > 0)
126                         writeln();
127                     writeTabular(walker.refCoord);
128                 }
129             }
130             catch (DentistException e)
131             {
132                 logJsonError(
133                     "info", e.msg,
134                     "coord", outputCoordinate.toString(),
135                 );
136             }
137         }
138     }
139 
140     protected void init()
141     {
142         mixin(traceExecution);
143 
144         auto insertionDb = InsertionDb.parse(options.assemblyGraphFile);
145         auto insertions = insertionDb[];
146         auto nodes = chain(
147             insertions.map!"a.start".uniq,
148             insertions.map!"a.end".uniq,
149         )
150             .array
151             .sort
152             .release;
153         uniqInPlace(nodes);
154 
155         assemblyGraph = OutputScaffold(nodes);
156         assemblyGraph.bulkAddForce(insertions);
157         incidentEdgesCache = assemblyGraph.allIncidentEdges();
158         scaffoldStartNodes = scaffoldStarts!InsertionInfo(assemblyGraph, incidentEdgesCache).array;
159     }
160 
161     protected static void writeTabular(ReferenceCoordinate refCoord)
162     {
163         auto stats = [
164             format!`%d`(refCoord.contigId),
165             format!`%d`(refCoord.contigCoord),
166             format!`%d`(refCoord.contigLength),
167             format!`%-(%d,%)`(refCoord.contigIds),
168             refCoord.isReverseComplement ? "yes" : "no",
169             refCoord.sequenceType.to!string,
170         ];
171         auto columnWidth = stats.map!"a.length".maxElement;
172 
173         writefln!"refContigId:        %*s"(columnWidth, stats[0]);
174         writefln!"refContigCoord:     %*s"(columnWidth, stats[1]);
175         writefln!"refContigLength:    %*s"(columnWidth, stats[2]);
176         writefln!"flankingRefContigs: %*s"(columnWidth, stats[3]);
177         writefln!"reverseComplement:  %*s"(columnWidth, stats[4]);
178         writefln!"sequenceType:       %*s"(columnWidth, stats[5]);
179     }
180 
181     protected static void writeJson(ReferenceCoordinate refCoord)
182     {
183         writeln(toJsonString([
184             "refContig": [
185                 "id": refCoord.contigId,
186                 "coord": refCoord.contigCoord,
187                 "length": refCoord.contigLength,
188             ].toJson,
189             "flankingRefContigs": refCoord.contigIds.toJson,
190             "isReverseComplement": refCoord.isReverseComplement.toJson,
191             "sequenceType": refCoord.sequenceType.to!string.toJson,
192         ]));
193     }
194 }
195 
196 private struct Walker
197 {
198     alias OriginType = OutputCoordinate.OriginType;
199 
200     const(OutputCoordinate) outCoord;
201     OutputScaffold assemblyGraph;
202     OutputScaffold.IncidentEdgesCache incidentEdgesCache;
203     ContigNode[] scaffoldStartNodes;
204 
205     coord_t numWalkedBasePairs;
206     id_t lastContigId;
207     arithmetic_t lastContigStart;
208     coord_t lastContigLength;
209     SequenceType lastWalkedSequenceType;
210     ReferenceCoordinate refCoord;
211 
212     protected void walkToCoordinate()
213     {
214         final switch (outCoord.originType)
215         {
216         case OriginType.scaffold:
217             auto scaffoldStart = getStartOfScaffold(outCoord.scaffoldId);
218 
219             walkScaffold(scaffoldStart);
220             break;
221         case OriginType.scaffoldContig:
222             assert(0, "unimplemented");
223         case OriginType.global:
224             assert(0, "unimplemented");
225         case OriginType.contig:
226             assert(0, "unimplemented");
227         }
228     }
229 
230     protected ContigNode getStartOfScaffold(id_t scaffoldId)
231     {
232         alias byContigId = (node, needleId) => node.contigId == needleId;
233 
234         auto candidates = scaffoldStartNodes.find!byContigId(scaffoldId);
235 
236         enforce!DentistException(!candidates.empty, "scaffold does not exist");
237 
238         return candidates.front;
239     }
240 
241     protected void walkScaffold(ContigNode startNode)
242     {
243         mixin(traceExecution);
244 
245         ContigNode insertionBegin = startNode;
246         Insertion lastInsertion;
247         auto globalComplement = false;
248 
249         foreach (currentInsertion; linearWalk!InsertionInfo(assemblyGraph, startNode, incidentEdgesCache))
250         {
251             lastInsertion = currentInsertion;
252 
253             walkInsertion(
254                 insertionBegin,
255                 currentInsertion,
256                 globalComplement,
257             );
258 
259             if (numWalkedBasePairs >= outCoord.coord)
260                 break;
261 
262             insertionBegin = currentInsertion.target(insertionBegin);
263             if (currentInsertion.isAntiParallel)
264                 globalComplement = !globalComplement;
265         }
266 
267         enforce!DentistException(
268             numWalkedBasePairs == outCoord.coord,
269             "requested coordinate is out of bounds",
270         );
271 
272         refCoord.contigId = lastContigId;
273         refCoord.contigLength = lastContigLength;
274         refCoord.contigCoord = numWalkedBasePairs - lastContigStart;
275         if (globalComplement)
276             refCoord.contigCoord = lastContigLength - refCoord.contigCoord;
277         refCoord.contigIds = only(
278             cast(id_t) lastInsertion.start.contigId,
279             cast(id_t) lastInsertion.end.contigId,
280         ).uniq.array;
281         refCoord.sequenceType = lastWalkedSequenceType;
282         refCoord.isReverseComplement = globalComplement;
283     }
284 
285     protected void walkInsertion(
286         in ContigNode begin,
287         in Insertion insertion,
288         in bool globalComplement,
289     )
290     {
291         assert(insertion.isValidInsertion, "invalid insertion");
292         size_t length;
293 
294         if (insertion.isDefault)
295         {
296             auto info = getInfoForExistingContig(begin, insertion, globalComplement);
297 
298             lastContigId = info.contigId;
299             lastContigStart = numWalkedBasePairs;
300             lastContigLength = cast(coord_t) info.contigLength;
301             length = info.length;
302             lastWalkedSequenceType = SequenceType.existing;
303         }
304         else if (insertion.isOutputGap)
305         {
306             length = getInfoForGap(insertion).length;
307             lastWalkedSequenceType = SequenceType.unkown;
308         }
309         else
310         {
311             length = getInfoForNewSequenceInsertion(begin, insertion, globalComplement).length;
312             lastWalkedSequenceType = SequenceType.insertion;
313         }
314 
315         numWalkedBasePairs = min(
316             numWalkedBasePairs + cast(coord_t) length,
317             outCoord.coord,
318         );
319     }
320 }