1 /** 2 This is the `findClosableGaps` command of `dentist`. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.commands.findClosableGaps; 10 11 import dentist.commandline : OptionsFor, TestingCommand; 12 import dentist.common : 13 isTesting, 14 ReferenceInterval, 15 ReferenceRegion; 16 import dentist.common.alignments : 17 AlignmentChain, 18 coord_t, 19 id_t; 20 import dentist.dazzler : 21 ContigSegment, 22 GapSegment, 23 getScaffoldStructure, 24 readMask, 25 ScaffoldSegment; 26 import dentist.util.algorithm : cmpLexicographically; 27 import dentist.util.log; 28 import std.algorithm : 29 copy, 30 filter, 31 map, 32 sort, 33 swap; 34 import std.array : array; 35 import std.format : format, formattedRead; 36 import std.range : assumeSorted, enumerate; 37 import std.stdio : File, writeln; 38 import std..string : capitalize; 39 import std.typecons : PhobosFlag = Flag, tuple; 40 import vibe.data.json : toJson = serializeToJson, serializeToJsonString; 41 42 43 static if (isTesting): 44 45 alias Options = OptionsFor!(TestingCommand.findClosableGaps); 46 47 /// Execute the `findClosableGaps` command with `options`. 48 void execute(in Options options) 49 { 50 auto finder = ClosableGapsFinder(options); 51 52 finder.run(); 53 } 54 55 struct ClosableGapsFinder 56 { 57 const(Options) options; 58 const(ScaffoldSegment)[] trueAssemblyScaffoldStructure; 59 ReferenceRegion mappedRegionsMask; 60 TrueAlignment[] trueAlignments; 61 ClosableGap[] closableGaps; 62 63 void run() 64 { 65 init(); 66 findClosableGaps(); 67 68 writeln(serializeToJsonString(closableGaps)); 69 } 70 71 void init() 72 { 73 trueAssemblyScaffoldStructure = getScaffoldStructure(options.trueAssemblyDb).array; 74 mappedRegionsMask = ReferenceRegion(readMask!ReferenceInterval( 75 options.trueAssemblyDb, 76 options.mappedRegionsMask, 77 options.workdir, 78 )); 79 trueAlignments = readTrueAlignments(options.readsMap); 80 81 logJsonDebug( 82 "trueAssemblyScaffoldStructure", trueAssemblyScaffoldStructure 83 .map!(scaffoldPart => scaffoldPart.peek!GapSegment !is null 84 ? scaffoldPart.get!GapSegment.toJson 85 : scaffoldPart.get!ContigSegment.toJson) 86 .array 87 .toJson, 88 "mappedRegionsMask", mappedRegionsMask.intervals.toJson, 89 "trueAlignments", trueAlignments.toJson, 90 ); 91 } 92 93 void findClosableGaps() 94 { 95 alias isContigPart = contigPart => contigPart.peek!ContigSegment !is null; 96 alias mkContigPart = contigPart => contigPart.get!ContigSegment; 97 98 id_t currentContigId; 99 foreach (contigPart; trueAssemblyScaffoldStructure.filter!isContigPart.map!mkContigPart) 100 { 101 // `contigId` is 1-based 102 ++currentContigId; 103 auto trueContigRegion = ReferenceRegion(ReferenceInterval( 104 contigPart.globalContigId, 105 0, 106 contigPart.length, 107 )); 108 auto scaffoldGaps = trueContigRegion - mappedRegionsMask; 109 this.closableGaps.length += scaffoldGaps.intervals.length; 110 auto closableGaps = this.closableGaps[$ - scaffoldGaps.intervals.length .. $]; 111 112 auto trueAlignments = this.trueAlignments 113 .assumeSorted!"a.scaffoldId < b.scaffoldId" 114 .equalRange(TrueAlignment(cast(id_t) contigPart.scaffoldId)) 115 .map!(read => TrueAlignment( 116 read.scaffoldId, 117 cast(coord_t) (read.begin - contigPart.begin), 118 cast(coord_t) (read.end - contigPart.begin), 119 read.flags, 120 read.readId, 121 )) 122 .array; 123 124 foreach (i, gap; scaffoldGaps.intervals) 125 { 126 closableGaps[i].fromContig = cast(id_t) (currentContigId); 127 closableGaps[i].toContig = cast(id_t) (currentContigId + 1); 128 closableGaps[i].gapSize = cast(coord_t) gap.size; 129 closableGaps[i].mappedInterval = gap; 130 131 if (gap.begin == 0 || gap.end == contigPart.length) 132 // Skip gaps at the beginning or end of a true scaffold 133 continue; 134 135 foreach (read; trueAlignments) 136 if ( 137 read.begin < gap.begin && 138 (gap.begin - read.begin) >= options.minAnchorLength && 139 gap.end < read.end && 140 (read.end - gap.end) >= options.minAnchorLength 141 ) 142 closableGaps[i].spanningReads ~= read.readId; 143 closableGaps[i].spanningReads.sort; 144 ++currentContigId; 145 } 146 } 147 148 auto bufferRest = closableGaps 149 .filter!(closableGap => closableGap.spanningReads.length >= options.minSpanningReads) 150 .copy(closableGaps); 151 closableGaps = closableGaps[0 .. $ - bufferRest.length]; 152 } 153 } 154 155 struct ClosableGap 156 { 157 id_t fromContig; 158 id_t toContig; 159 coord_t gapSize; 160 ReferenceInterval mappedInterval; 161 id_t[] spanningReads; 162 } 163 164 struct TrueAlignment 165 { 166 static alias Flag = AlignmentChain.Flag; 167 static alias Flags = AlignmentChain.Flags; 168 169 id_t scaffoldId; 170 coord_t begin; 171 coord_t end; 172 Flags flags; 173 id_t readId; 174 175 static foreach(flagName; __traits(allMembers, Flag)) 176 { 177 mixin(format!(q"< 178 static alias %1$s = PhobosFlag!"%2$s"; 179 180 @property PhobosFlag!"%2$s" %2$s() pure const nothrow @trusted 181 { 182 return cast(PhobosFlag!"%2$s") flags.%2$s; 183 } 184 185 @property void %2$s(PhobosFlag!"%2$s" %2$s) pure nothrow 186 { 187 flags.%2$s = %2$s; 188 } 189 >")(flagName.capitalize, flagName)); 190 } 191 192 int opCmp(in TrueAlignment other) const pure nothrow 193 { 194 return cmpLexicographically!( 195 TrueAlignment, 196 "a.scaffoldId", 197 "a.begin", 198 "a.end", 199 "a.readId", 200 )(this, other); 201 } 202 } 203 204 TrueAlignment[] readTrueAlignments(in string readsMap) 205 { 206 auto trueAlignments = File(readsMap) 207 .byLine 208 .enumerate(1) 209 .map!parseTrueAlignment 210 .array; 211 trueAlignments.sort; 212 213 return trueAlignments; 214 } 215 216 // Not meant for public usage. 217 TrueAlignment parseTrueAlignment(EnumLine)(EnumLine enumLine) 218 { 219 auto line = enumLine.value; 220 TrueAlignment trueAlignment; 221 trueAlignment.readId = enumLine.index; 222 223 line.formattedRead!" %d %d %d"( 224 trueAlignment.scaffoldId, 225 trueAlignment.begin, 226 trueAlignment.end, 227 ); 228 229 if (trueAlignment.begin > trueAlignment.end) 230 { 231 swap(trueAlignment.begin, trueAlignment.end); 232 trueAlignment.flags |= TrueAlignment.Flag.complement; 233 } 234 235 return trueAlignment; 236 }