1 /**
2     Everything to handle insertions.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.common.insertions;
10 
11 import dentist.common : ReferencePoint;
12 import dentist.common.alignments :
13     AlignmentChain,
14     AlignmentLocationSeed,
15     id_t;
16 import dentist.common.binio : CompressedSequence;
17 import dentist.common.scaffold :
18     ContigNode,
19     ContigPart,
20     isDefault,
21     isExtension,
22     isGap,
23     Scaffold;
24 import dentist.util.math : add;
25 import std.algorithm :
26     canFind,
27     filter,
28     swap;
29 import std.array : array;
30 import std.typecons : Flag, tuple, Tuple;
31 
32 /// Information about the point where the two sequences should be spliced.
33 struct SpliceSite
34 {
35     ReferencePoint croppingRefPosition;
36     AlignmentLocationSeed alignmentSeed;
37     AlignmentChain.Flags flags;
38 }
39 
40 /// This characterizes an insertion.
41 struct InsertionInfo
42 {
43     CompressedSequence sequence;
44     size_t contigLength;
45     SpliceSite[] spliceSites;
46 }
47 
48 /// This is used to collect all sub-sequences of the output.
49 alias OutputScaffold = Scaffold!InsertionInfo;
50 
51 /// This characterizes an insertion.
52 alias Insertion = OutputScaffold.Edge;
53 
54 /// Returns true iff a sequence of `n`s should be written.
55 bool isOutputGap(in Insertion insertion)
56 {
57     return !insertion.hasSequence && !insertion.isDefault;
58 }
59 
60 bool isValidInsertion(in Insertion insertion)
61 {
62     return
63         insertion.isDefault ^
64         insertion.isOutputGap ^
65         (insertion.isGap && insertion.hasSequence) ^
66         (insertion.isExtension && insertion.hasSequence);
67 }
68 
69 bool hasSequence(in Insertion insertion)
70 {
71     return insertion.payload.sequence.length > 0;
72 }
73 
74 Insertion concatenateSpliceSites(Insertion existingJoin, Insertion newJoin)
75 {
76     if (existingJoin.payload.sequence.length == 0)
77     {
78         existingJoin.payload.sequence = newJoin.payload.sequence;
79     }
80     existingJoin.payload.spliceSites ~= newJoin.payload.spliceSites;
81 
82     return existingJoin;
83 }
84 
85 /// Remove contig cropping where no new sequence is to be inserted.
86 OutputScaffold fixContigCropping(OutputScaffold scaffold)
87 {
88     alias replace = OutputScaffold.ConflictStrategy.replace;
89     auto contigJoins = scaffold.edges.filter!isDefault;
90 
91     foreach (contigJoin; contigJoins)
92     {
93         bool insertionUpdated;
94 
95         foreach (contigNode; [contigJoin.start, contigJoin.end])
96         {
97             auto shouldInsertNewSequence = scaffold
98                 .incidentEdges(contigNode)
99                 .canFind!(insertion => !insertion.isOutputGap && (insertion.isGap || insertion.isExtension));
100 
101             if (!shouldInsertNewSequence)
102             {
103                 auto contigLength = contigJoin.payload.contigLength;
104                 auto newSpliceSites = contigJoin
105                     .payload
106                     .spliceSites
107                     .filter!(spliceSite => contigNode.contigPart == ContigPart.begin
108                         ? !(spliceSite.croppingRefPosition.value < contigLength / 2)
109                         : !(spliceSite.croppingRefPosition.value >= contigLength / 2))
110                     .array;
111                 if (newSpliceSites.length < contigJoin.payload.spliceSites.length)
112                 {
113                     contigJoin.payload.spliceSites = newSpliceSites;
114                     insertionUpdated = true;
115                 }
116             }
117         }
118 
119         if (insertionUpdated)
120         {
121             scaffold.add!replace(contigJoin);
122         }
123     }
124 
125     return scaffold;
126 }
127 
128 auto getInfoForExistingContig(in ContigNode begin, in Insertion insertion, in bool globalComplement)
129 {
130     auto spliceSites = insertion.payload.spliceSites;
131     auto contigId = cast(id_t) begin.contigId;
132     auto contigLength = insertion.payload.contigLength;
133     size_t spliceStart;
134     size_t spliceEnd;
135 
136     assert(contigLength > 0);
137 
138     switch (spliceSites.length)
139     {
140     case 0:
141         spliceStart = 0;
142         spliceEnd = contigLength;
143         break;
144     case 1:
145         auto splicePosition = spliceSites[0].croppingRefPosition.value;
146         auto spliceSeed = spliceSites[0].alignmentSeed;
147 
148         if (spliceSeed == AlignmentLocationSeed.front)
149         {
150             spliceStart = splicePosition;
151             spliceEnd = contigLength;
152         }
153         else
154         {
155             spliceStart = 0;
156             spliceEnd = splicePosition;
157         }
158         break;
159     case 2:
160         assert(spliceSites[0].croppingRefPosition.contigId
161                 == spliceSites[1].croppingRefPosition.contigId);
162 
163         spliceStart = spliceSites[0].croppingRefPosition.value;
164         spliceEnd = spliceSites[1].croppingRefPosition.value;
165 
166         assert(
167             spliceStart == spliceEnd ||
168             (spliceStart < spliceEnd)
169             ==
170             (spliceSites[0].alignmentSeed < spliceSites[1].alignmentSeed)
171         );
172         if (spliceEnd < spliceStart)
173             swap(spliceStart, spliceEnd);
174         break;
175     default:
176         assert(0, "too many spliceSites");
177     }
178 
179     assert(globalComplement != (begin < insertion.target(begin)));
180 
181     return tuple!(
182         "contigId",
183         "contigLength",
184         "spliceStart",
185         "spliceEnd",
186         "length",
187         "complement",
188     )(
189         contigId,
190         contigLength,
191         spliceStart,
192         spliceEnd,
193         spliceEnd - spliceStart,
194         globalComplement,
195     );
196 }
197 
198 auto getInfoForGap(in Insertion insertion)
199 {
200     return tuple!("length")(insertion.payload.contigLength);
201 }
202 
203 auto getInfoForNewSequenceInsertion(
204     in ContigNode begin,
205     in Insertion insertion,
206     in bool globalComplement,
207 )
208 {
209     auto spliceSites = insertion.payload.spliceSites;
210     auto effectiveComplement = spliceSites[0].flags.complement ^ globalComplement;
211 
212     assert(
213         (insertion.isExtension && spliceSites.length == 1) ^
214         (insertion.isGap && spliceSites.length == 2)
215     );
216 
217     return tuple!(
218         "sequence",
219         "length",
220         "complement",
221     )(
222         insertion.payload.sequence,
223         insertion.payload.sequence.length,
224         effectiveComplement,
225     );
226 }