1 /**
2     This is the `translocateGaps` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.translocateGaps;
10 
11 import dentist.common : isTesting;
12 
13 static if (isTesting):
14 
15 import dentist.commandline : TestingCommand, OptionsFor;
16 import dentist.common :
17     ReferenceInterval,
18     ReferenceRegion,
19     to;
20 import dentist.common.alignments :
21     AlignmentChain,
22     id_t;
23 import dentist.dazzler :
24     getAlignments,
25     getNumContigs,
26     getFastaSequence,
27     writeMask;
28 import dentist.util.log;
29 import dentist.util.range : wrapLines;
30 import std.algorithm :
31     cache,
32     copy,
33     filter,
34     joiner,
35     map;
36 import std.array : array;
37 import std.format : format;
38 import std.range :
39     assumeSorted,
40     chain,
41     chunks,
42     enumerate,
43     iota,
44     only,
45     repeat,
46     slide,
47     takeExactly;
48 import std.stdio : File, stdout;
49 import std.typecons : No;
50 import vibe.data.json : toJson = serializeToJson;
51 
52 
53 /// Options for the `collectPileUps` command.
54 alias Options = OptionsFor!(TestingCommand.translocateGaps);
55 
56 /// Execute the `translocateGaps` command with `options`.
57 void execute(in Options options)
58 {
59     auto translocator = Translocator(options);
60 
61     return translocator.run();
62 }
63 
64 private struct Translocator
65 {
66     alias FastaWriter = typeof(wrapLines(stdout.lockingTextWriter, 0));
67 
68     const(Options) options;
69     size_t numContigsAssembly2;
70     AlignmentChain[] alignments;
71     ReferenceRegion mappedRegions;
72     File assemblyFile;
73     FastaWriter writer;
74 
75     this(in Options options)
76     {
77         this.options = options;
78         this.assemblyFile = options.assemblyFile is null
79             ? stdout
80             : File(options.assemblyFile, "w");
81         this.writer = wrapLines(assemblyFile.lockingTextWriter, options.fastaLineWidth);
82     }
83 
84     void run()
85     {
86         mixin(traceExecution);
87 
88         init();
89         writeOutputAssembly();
90     }
91 
92     protected void init()
93     {
94         mixin(traceExecution);
95 
96         alignments = getAlignments(
97             options.trueAssemblyDb,
98             options.shortReadAssemblyDb,
99             options.shortReadAssemblyAlignmentFile,
100             options.workdir,
101         );
102         mappedRegions = ReferenceRegion(alignments
103             .filter!"a.isProper"
104             .map!(to!(ReferenceRegion, "contigA"))
105             .map!"a.intervals.dup"
106             .joiner
107             .array);
108         writeMask(
109             options.trueAssemblyDb,
110             options.mappedRegionsMask,
111             mappedRegions.intervals,
112             options.workdir,
113         );
114     }
115 
116     protected void writeOutputAssembly()
117     {
118         mixin(traceExecution);
119 
120         enum dchar unknownBase = 'n';
121         auto numRefContigs = getNumContigs(options.trueAssemblyDb, options.workdir);
122         auto mappedRegions = mappedRegions.intervals.assumeSorted!"a.contigId < b.contigId";
123         ReferenceInterval needle;
124 
125         foreach (id_t contigId; 1 .. numRefContigs + 1)
126         {
127             getScaffoldHeader(contigId).copy(writer);
128 
129             auto contigSequence = getFastaSequence(
130                 options.trueAssemblyDb,
131                 contigId,
132                 options.workdir,
133             );
134             needle.contigId = contigId;
135             auto contigMappedRegions = chain(
136                 only(needle),
137                 mappedRegions.equalRange(needle)
138             );
139 
140             foreach (keepRegions; contigMappedRegions.slide!(No.withPartial)(2))
141             {
142                 if (keepRegions[0].end > 0)
143                     repeat(unknownBase)
144                         .takeExactly(keepRegions[1].begin - keepRegions[0].end)
145                         .copy(writer);
146 
147                 contigSequence[keepRegions[1].begin .. keepRegions[1].end].copy(writer);
148             }
149 
150             "\n".copy(writer);
151         }
152     }
153 
154     static protected string getScaffoldHeader(in size_t scaffoldId)
155     {
156         return format!">translocated_gaps_%d\n"(scaffoldId);
157     }
158 }