1 /** 2 Everything to handle insertions. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.common.insertions; 10 11 import dentist.common : ReferencePoint; 12 import dentist.common.alignments : 13 AlignmentChain, 14 AlignmentLocationSeed, 15 id_t; 16 import dentist.common.binio : CompressedSequence; 17 import dentist.common.scaffold : 18 ContigNode, 19 ContigPart, 20 isDefault, 21 isExtension, 22 isGap, 23 Scaffold; 24 import dentist.util.math : add; 25 import std.algorithm : 26 canFind, 27 filter, 28 swap; 29 import std.array : array; 30 import std.typecons : Flag, tuple, Tuple; 31 32 /// Information about the point where the two sequences should be spliced. 33 struct SpliceSite 34 { 35 ReferencePoint croppingRefPosition; 36 AlignmentLocationSeed alignmentSeed; 37 AlignmentChain.Flags flags; 38 } 39 40 /// This characterizes an insertion. 41 struct InsertionInfo 42 { 43 CompressedSequence sequence; 44 size_t contigLength; 45 SpliceSite[] spliceSites; 46 } 47 48 /// This is used to collect all sub-sequences of the output. 49 alias OutputScaffold = Scaffold!InsertionInfo; 50 51 /// This characterizes an insertion. 52 alias Insertion = OutputScaffold.Edge; 53 54 /// Returns true iff a sequence of `n`s should be written. 55 bool isOutputGap(in Insertion insertion) 56 { 57 return !insertion.hasSequence && !insertion.isDefault; 58 } 59 60 bool isValidInsertion(in Insertion insertion) 61 { 62 return 63 insertion.isDefault ^ 64 insertion.isOutputGap ^ 65 (insertion.isGap && insertion.hasSequence) ^ 66 (insertion.isExtension && insertion.hasSequence); 67 } 68 69 bool hasSequence(in Insertion insertion) 70 { 71 return insertion.payload.sequence.length > 0; 72 } 73 74 Insertion concatenateSpliceSites(Insertion existingJoin, Insertion newJoin) 75 { 76 if (existingJoin.payload.sequence.length == 0) 77 { 78 existingJoin.payload.sequence = newJoin.payload.sequence; 79 } 80 existingJoin.payload.spliceSites ~= newJoin.payload.spliceSites; 81 82 return existingJoin; 83 } 84 85 /// Remove contig cropping where no new sequence is to be inserted. 86 OutputScaffold fixContigCropping(OutputScaffold scaffold) 87 { 88 alias replace = OutputScaffold.ConflictStrategy.replace; 89 auto contigJoins = scaffold.edges.filter!isDefault; 90 91 foreach (contigJoin; contigJoins) 92 { 93 bool insertionUpdated; 94 95 foreach (contigNode; [contigJoin.start, contigJoin.end]) 96 { 97 auto shouldInsertNewSequence = scaffold 98 .incidentEdges(contigNode) 99 .canFind!(insertion => !insertion.isOutputGap && (insertion.isGap || insertion.isExtension)); 100 101 if (!shouldInsertNewSequence) 102 { 103 auto contigLength = contigJoin.payload.contigLength; 104 auto newSpliceSites = contigJoin 105 .payload 106 .spliceSites 107 .filter!(spliceSite => contigNode.contigPart == ContigPart.begin 108 ? !(spliceSite.croppingRefPosition.value < contigLength / 2) 109 : !(spliceSite.croppingRefPosition.value >= contigLength / 2)) 110 .array; 111 if (newSpliceSites.length < contigJoin.payload.spliceSites.length) 112 { 113 contigJoin.payload.spliceSites = newSpliceSites; 114 insertionUpdated = true; 115 } 116 } 117 } 118 119 if (insertionUpdated) 120 { 121 scaffold.add!replace(contigJoin); 122 } 123 } 124 125 return scaffold; 126 } 127 128 auto getInfoForExistingContig(in ContigNode begin, in Insertion insertion, in bool globalComplement) 129 { 130 auto spliceSites = insertion.payload.spliceSites; 131 auto contigId = cast(id_t) begin.contigId; 132 auto contigLength = insertion.payload.contigLength; 133 size_t spliceStart; 134 size_t spliceEnd; 135 136 assert(contigLength > 0); 137 138 switch (spliceSites.length) 139 { 140 case 0: 141 spliceStart = 0; 142 spliceEnd = contigLength; 143 break; 144 case 1: 145 auto splicePosition = spliceSites[0].croppingRefPosition.value; 146 auto spliceSeed = spliceSites[0].alignmentSeed; 147 148 if (spliceSeed == AlignmentLocationSeed.front) 149 { 150 spliceStart = splicePosition; 151 spliceEnd = contigLength; 152 } 153 else 154 { 155 spliceStart = 0; 156 spliceEnd = splicePosition; 157 } 158 break; 159 case 2: 160 assert(spliceSites[0].croppingRefPosition.contigId 161 == spliceSites[1].croppingRefPosition.contigId); 162 163 spliceStart = spliceSites[0].croppingRefPosition.value; 164 spliceEnd = spliceSites[1].croppingRefPosition.value; 165 166 assert( 167 spliceStart == spliceEnd || 168 (spliceStart < spliceEnd) 169 == 170 (spliceSites[0].alignmentSeed < spliceSites[1].alignmentSeed) 171 ); 172 if (spliceEnd < spliceStart) 173 swap(spliceStart, spliceEnd); 174 break; 175 default: 176 assert(0, "too many spliceSites"); 177 } 178 179 assert(globalComplement != (begin < insertion.target(begin))); 180 181 return tuple!( 182 "contigId", 183 "contigLength", 184 "spliceStart", 185 "spliceEnd", 186 "length", 187 "complement", 188 )( 189 contigId, 190 contigLength, 191 spliceStart, 192 spliceEnd, 193 spliceEnd - spliceStart, 194 globalComplement, 195 ); 196 } 197 198 auto getInfoForGap(in Insertion insertion) 199 { 200 return tuple!("length")(insertion.payload.contigLength); 201 } 202 203 auto getInfoForNewSequenceInsertion( 204 in ContigNode begin, 205 in Insertion insertion, 206 in bool globalComplement, 207 ) 208 { 209 auto spliceSites = insertion.payload.spliceSites; 210 auto effectiveComplement = spliceSites[0].flags.complement ^ globalComplement; 211 212 assert( 213 (insertion.isExtension && spliceSites.length == 1) ^ 214 (insertion.isGap && spliceSites.length == 2) 215 ); 216 217 return tuple!( 218 "sequence", 219 "length", 220 "complement", 221 )( 222 insertion.payload.sequence, 223 insertion.payload.sequence.length, 224 effectiveComplement, 225 ); 226 }