1 /** 2 This is the `translocateGaps` command of `dentist`. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.commands.translocateGaps; 10 11 import dentist.common : isTesting; 12 13 static if (isTesting): 14 15 import dentist.commandline : TestingCommand, OptionsFor; 16 import dentist.common : 17 ReferenceInterval, 18 ReferenceRegion, 19 to; 20 import dentist.common.alignments : 21 AlignmentChain, 22 id_t; 23 import dentist.dazzler : 24 getAlignments, 25 getNumContigs, 26 getFastaSequence, 27 writeMask; 28 import dentist.util.log; 29 import dentist.util.range : wrapLines; 30 import std.algorithm : 31 cache, 32 copy, 33 filter, 34 joiner, 35 map; 36 import std.array : array; 37 import std.format : format; 38 import std.range : 39 assumeSorted, 40 chain, 41 chunks, 42 enumerate, 43 iota, 44 only, 45 repeat, 46 slide, 47 takeExactly; 48 import std.stdio : File, stdout; 49 import std.typecons : No; 50 import vibe.data.json : toJson = serializeToJson; 51 52 53 /// Options for the `collectPileUps` command. 54 alias Options = OptionsFor!(TestingCommand.translocateGaps); 55 56 /// Execute the `translocateGaps` command with `options`. 57 void execute(in Options options) 58 { 59 auto translocator = Translocator(options); 60 61 return translocator.run(); 62 } 63 64 private struct Translocator 65 { 66 alias FastaWriter = typeof(wrapLines(stdout.lockingTextWriter, 0)); 67 68 const(Options) options; 69 size_t numContigsAssembly2; 70 AlignmentChain[] alignments; 71 ReferenceRegion mappedRegions; 72 File assemblyFile; 73 FastaWriter writer; 74 75 this(in Options options) 76 { 77 this.options = options; 78 this.assemblyFile = options.assemblyFile is null 79 ? stdout 80 : File(options.assemblyFile, "w"); 81 this.writer = wrapLines(assemblyFile.lockingTextWriter, options.fastaLineWidth); 82 } 83 84 void run() 85 { 86 mixin(traceExecution); 87 88 init(); 89 writeOutputAssembly(); 90 } 91 92 protected void init() 93 { 94 mixin(traceExecution); 95 96 alignments = getAlignments( 97 options.trueAssemblyDb, 98 options.shortReadAssemblyDb, 99 options.shortReadAssemblyAlignmentFile, 100 options.workdir, 101 ); 102 mappedRegions = ReferenceRegion(alignments 103 .filter!"a.isProper" 104 .map!(to!(ReferenceRegion, "contigA")) 105 .map!"a.intervals.dup" 106 .joiner 107 .array); 108 writeMask( 109 options.trueAssemblyDb, 110 options.mappedRegionsMask, 111 mappedRegions.intervals, 112 options.workdir, 113 ); 114 } 115 116 protected void writeOutputAssembly() 117 { 118 mixin(traceExecution); 119 120 enum dchar unknownBase = 'n'; 121 auto numRefContigs = getNumContigs(options.trueAssemblyDb, options.workdir); 122 auto mappedRegions = mappedRegions.intervals.assumeSorted!"a.contigId < b.contigId"; 123 ReferenceInterval needle; 124 125 foreach (id_t contigId; 1 .. numRefContigs + 1) 126 { 127 getScaffoldHeader(contigId).copy(writer); 128 129 auto contigSequence = getFastaSequence( 130 options.trueAssemblyDb, 131 contigId, 132 options.workdir, 133 ); 134 needle.contigId = contigId; 135 auto contigMappedRegions = chain( 136 only(needle), 137 mappedRegions.equalRange(needle) 138 ); 139 140 foreach (keepRegions; contigMappedRegions.slide!(No.withPartial)(2)) 141 { 142 if (keepRegions[0].end > 0) 143 repeat(unknownBase) 144 .takeExactly(keepRegions[1].begin - keepRegions[0].end) 145 .copy(writer); 146 147 contigSequence[keepRegions[1].begin .. keepRegions[1].end].copy(writer); 148 } 149 150 "\n".copy(writer); 151 } 152 } 153 154 static protected string getScaffoldHeader(in size_t scaffoldId) 155 { 156 return format!">translocated_gaps_%d\n"(scaffoldId); 157 } 158 }