1 /** 2 This is the `translateCoords` command of `dentist`. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.commands.translateCoords; 10 11 import dentist.common : isTesting; 12 13 static if (isTesting): 14 15 import dentist.commandline : OptionsFor, TestingCommand; 16 import dentist.common : 17 DentistException, 18 OutputCoordinate; 19 import dentist.common.alignments : arithmetic_t, coord_t, id_t; 20 import dentist.common.binio : InsertionDb; 21 import dentist.common.insertions : 22 getInfoForExistingContig, 23 getInfoForGap, 24 getInfoForNewSequenceInsertion, 25 Insertion, 26 InsertionInfo, 27 isValidInsertion, 28 isOutputGap, 29 OutputScaffold; 30 import dentist.common.scaffold : 31 ContigNode, 32 isAntiParallel, 33 isDefault, 34 linearWalk, 35 scaffoldStarts; 36 import dentist.util.algorithm : uniqInPlace; 37 import dentist.util.log; 38 import std.algorithm : 39 find, 40 map, 41 maxElement, 42 min, 43 sort, 44 uniq; 45 import std.array : array; 46 import std.conv : to; 47 import std.exception : enforce; 48 import std.format : format; 49 import std.range : 50 chain, 51 dropExactly, 52 only; 53 import std.range.primitives; 54 import std.stdio : writefln, writeln; 55 import vibe.data.json : 56 toJson = serializeToJson, 57 toJsonString = serializeToPrettyJson; 58 59 60 /// Options for the `translateCoords` command. 61 alias Options = OptionsFor!(TestingCommand.translateCoords); 62 63 /// Execute the `translateCoords` command with `options`. 64 void execute(in Options options) 65 { 66 auto translator = new CoordinateTranslator(options); 67 68 translator.run(); 69 } 70 71 enum SequenceType 72 { 73 existing, 74 insertion, 75 unkown, 76 } 77 78 struct ReferenceCoordinate 79 { 80 id_t contigId; 81 arithmetic_t contigCoord; 82 coord_t contigLength; 83 id_t[] contigIds; 84 bool isReverseComplement; 85 SequenceType sequenceType; 86 } 87 88 class CoordinateTranslator 89 { 90 alias OriginType = OutputCoordinate.OriginType; 91 92 const(Options) options; 93 OutputScaffold assemblyGraph; 94 OutputScaffold.IncidentEdgesCache incidentEdgesCache; 95 ContigNode[] scaffoldStartNodes; 96 97 this(in Options options) 98 { 99 this.options = options; 100 } 101 102 void run() 103 { 104 mixin(traceExecution); 105 106 init(); 107 108 foreach (i, outputCoordinate; options.outputCoordinates) 109 { 110 try 111 { 112 auto walker = Walker( 113 outputCoordinate, 114 assemblyGraph, 115 incidentEdgesCache, 116 scaffoldStartNodes, 117 ); 118 119 walker.walkToCoordinate(); 120 121 if (options.useJson) 122 writeJson(walker.refCoord); 123 else 124 { 125 if (i > 0) 126 writeln(); 127 writeTabular(walker.refCoord); 128 } 129 } 130 catch (DentistException e) 131 { 132 logJsonError( 133 "info", e.msg, 134 "coord", outputCoordinate.toString(), 135 ); 136 } 137 } 138 } 139 140 protected void init() 141 { 142 mixin(traceExecution); 143 144 auto insertionDb = InsertionDb.parse(options.assemblyGraphFile); 145 auto insertions = insertionDb[]; 146 auto nodes = chain( 147 insertions.map!"a.start".uniq, 148 insertions.map!"a.end".uniq, 149 ) 150 .array 151 .sort 152 .release; 153 uniqInPlace(nodes); 154 155 assemblyGraph = OutputScaffold(nodes); 156 assemblyGraph.bulkAddForce(insertions); 157 incidentEdgesCache = assemblyGraph.allIncidentEdges(); 158 scaffoldStartNodes = scaffoldStarts!InsertionInfo(assemblyGraph, incidentEdgesCache).array; 159 } 160 161 protected static void writeTabular(ReferenceCoordinate refCoord) 162 { 163 auto stats = [ 164 format!`%d`(refCoord.contigId), 165 format!`%d`(refCoord.contigCoord), 166 format!`%d`(refCoord.contigLength), 167 format!`%-(%d,%)`(refCoord.contigIds), 168 refCoord.isReverseComplement ? "yes" : "no", 169 refCoord.sequenceType.to!string, 170 ]; 171 auto columnWidth = stats.map!"a.length".maxElement; 172 173 writefln!"refContigId: %*s"(columnWidth, stats[0]); 174 writefln!"refContigCoord: %*s"(columnWidth, stats[1]); 175 writefln!"refContigLength: %*s"(columnWidth, stats[2]); 176 writefln!"flankingRefContigs: %*s"(columnWidth, stats[3]); 177 writefln!"reverseComplement: %*s"(columnWidth, stats[4]); 178 writefln!"sequenceType: %*s"(columnWidth, stats[5]); 179 } 180 181 protected static void writeJson(ReferenceCoordinate refCoord) 182 { 183 writeln(toJsonString([ 184 "refContig": [ 185 "id": refCoord.contigId, 186 "coord": refCoord.contigCoord, 187 "length": refCoord.contigLength, 188 ].toJson, 189 "flankingRefContigs": refCoord.contigIds.toJson, 190 "isReverseComplement": refCoord.isReverseComplement.toJson, 191 "sequenceType": refCoord.sequenceType.to!string.toJson, 192 ])); 193 } 194 } 195 196 private struct Walker 197 { 198 alias OriginType = OutputCoordinate.OriginType; 199 200 const(OutputCoordinate) outCoord; 201 OutputScaffold assemblyGraph; 202 OutputScaffold.IncidentEdgesCache incidentEdgesCache; 203 ContigNode[] scaffoldStartNodes; 204 205 coord_t numWalkedBasePairs; 206 id_t lastContigId; 207 arithmetic_t lastContigStart; 208 coord_t lastContigLength; 209 SequenceType lastWalkedSequenceType; 210 ReferenceCoordinate refCoord; 211 212 protected void walkToCoordinate() 213 { 214 final switch (outCoord.originType) 215 { 216 case OriginType.scaffold: 217 auto scaffoldStart = getStartOfScaffold(outCoord.scaffoldId); 218 219 walkScaffold(scaffoldStart); 220 break; 221 case OriginType.scaffoldContig: 222 assert(0, "unimplemented"); 223 case OriginType.global: 224 assert(0, "unimplemented"); 225 case OriginType.contig: 226 assert(0, "unimplemented"); 227 } 228 } 229 230 protected ContigNode getStartOfScaffold(id_t scaffoldId) 231 { 232 alias byContigId = (node, needleId) => node.contigId == needleId; 233 234 auto candidates = scaffoldStartNodes.find!byContigId(scaffoldId); 235 236 enforce!DentistException(!candidates.empty, "scaffold does not exist"); 237 238 return candidates.front; 239 } 240 241 protected void walkScaffold(ContigNode startNode) 242 { 243 mixin(traceExecution); 244 245 ContigNode insertionBegin = startNode; 246 Insertion lastInsertion; 247 auto globalComplement = false; 248 249 foreach (currentInsertion; linearWalk!InsertionInfo(assemblyGraph, startNode, incidentEdgesCache)) 250 { 251 lastInsertion = currentInsertion; 252 253 walkInsertion( 254 insertionBegin, 255 currentInsertion, 256 globalComplement, 257 ); 258 259 if (numWalkedBasePairs >= outCoord.coord) 260 break; 261 262 insertionBegin = currentInsertion.target(insertionBegin); 263 if (currentInsertion.isAntiParallel) 264 globalComplement = !globalComplement; 265 } 266 267 enforce!DentistException( 268 numWalkedBasePairs == outCoord.coord, 269 "requested coordinate is out of bounds", 270 ); 271 272 refCoord.contigId = lastContigId; 273 refCoord.contigLength = lastContigLength; 274 refCoord.contigCoord = numWalkedBasePairs - lastContigStart; 275 if (globalComplement) 276 refCoord.contigCoord = lastContigLength - refCoord.contigCoord; 277 refCoord.contigIds = only( 278 cast(id_t) lastInsertion.start.contigId, 279 cast(id_t) lastInsertion.end.contigId, 280 ).uniq.array; 281 refCoord.sequenceType = lastWalkedSequenceType; 282 refCoord.isReverseComplement = globalComplement; 283 } 284 285 protected void walkInsertion( 286 in ContigNode begin, 287 in Insertion insertion, 288 in bool globalComplement, 289 ) 290 { 291 assert(insertion.isValidInsertion, "invalid insertion"); 292 size_t length; 293 294 if (insertion.isDefault) 295 { 296 auto info = getInfoForExistingContig(begin, insertion, globalComplement); 297 298 lastContigId = info.contigId; 299 lastContigStart = numWalkedBasePairs; 300 lastContigLength = cast(coord_t) info.contigLength; 301 length = info.length; 302 lastWalkedSequenceType = SequenceType.existing; 303 } 304 else if (insertion.isOutputGap) 305 { 306 length = getInfoForGap(insertion).length; 307 lastWalkedSequenceType = SequenceType.unkown; 308 } 309 else 310 { 311 length = getInfoForNewSequenceInsertion(begin, insertion, globalComplement).length; 312 lastWalkedSequenceType = SequenceType.insertion; 313 } 314 315 numWalkedBasePairs = min( 316 numWalkedBasePairs + cast(coord_t) length, 317 outCoord.coord, 318 ); 319 } 320 }