1 /** 2 Everything to handle local alignments and friends. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.common.alignments; 10 11 import dentist.common.scaffold : 12 concatenatePayloads, 13 ContigNode, 14 ContigPart, 15 Join; 16 import dentist.util.algorithm : cmpLexicographically, orderLexicographically; 17 import dentist.util.log; 18 import dentist.util.math : absdiff, ceildiv, floor, RoundingMode; 19 import core.exception : AssertError; 20 import std.algorithm : 21 all, 22 among, 23 any, 24 canFind, 25 chunkBy, 26 cumulativeFold, 27 equal, 28 filter, 29 find, 30 isSorted, 31 joiner, 32 map, 33 mean, 34 min, 35 sort, 36 sum, 37 swap, 38 SwapStrategy; 39 import std.array : appender, array, minimallyInitializedArray; 40 import std.conv : to; 41 import std.exception : assertNotThrown, assertThrown, enforce, ErrnoException; 42 import std.format : format; 43 import std.math : sgn; 44 import std.range : 45 assumeSorted, 46 chain, 47 chunks, 48 enumerate, 49 InputRange, 50 inputRangeObject, 51 iota, 52 only, 53 radial, 54 slide, 55 takeNone, 56 zip; 57 import std..string : capitalize, split; 58 import std.stdio : File, LockType; 59 import std.typecons : BitFlags, PhobosFlag = Flag, No, tuple, Tuple, Yes; 60 import std.traits : isArray, TemplateArgsOf, TemplateOf; 61 import vibe.data.json : toJson = serializeToJson; 62 63 debug import std.stdio : writefln, writeln; 64 65 alias arithmetic_t = int; 66 alias coord_t = uint; 67 alias diff_t = uint; 68 alias id_t = uint; 69 alias trace_point_t = ushort; 70 71 /** 72 Holds a chain of local alignments that form a compound alignment. An AlignmentChain should 73 contain at least one element. 74 */ 75 struct AlignmentChain 76 { 77 static enum Flag : ubyte 78 { 79 complement = 1 << 0, 80 disabled = 1 << 1, 81 } 82 83 static alias Flags = BitFlags!Flag; 84 static alias TranslatedTracePoint = Tuple!( 85 coord_t, "contigA", 86 coord_t, "contigB", 87 ); 88 89 static struct LocalAlignment 90 { 91 static struct Locus 92 { 93 coord_t begin; 94 coord_t end; 95 96 @property coord_t length() const pure nothrow 97 { 98 return end - begin; 99 } 100 } 101 102 static struct TracePoint 103 { 104 trace_point_t numDiffs; 105 trace_point_t numBasePairs; 106 } 107 108 Locus contigA; 109 Locus contigB; 110 diff_t numDiffs; 111 TracePoint[] tracePoints; 112 113 TranslatedTracePoint translateTracePoint(string contig = "contigA")( 114 in coord_t contigPos, 115 trace_point_t tracePointDistance, 116 RoundingMode roundingMode, 117 ) const pure if (contig.among("contigA", "contigB")) 118 { 119 assert(mixin(contig ~ `.begin <= contigPos && contigPos <= ` ~ contig ~ `.end`)); 120 121 auto tracePointIndex = tracePointsUpTo!contig(contigPos, tracePointDistance, roundingMode); 122 auto contigBPos = contigB.begin + tracePoints[0 .. tracePointIndex] 123 .map!"a.numBasePairs" 124 .sum; 125 auto contigAPos = tracePointIndex == 0 126 ? contigA.begin 127 : tracePointIndex < tracePoints.length 128 ? floor(contigA.begin, tracePointDistance) + cast(coord_t) (tracePointIndex * tracePointDistance) 129 : contigA.end; 130 131 return TranslatedTracePoint(contigAPos, contigBPos); 132 } 133 134 auto tracePointsUpTo(string contig)( 135 in coord_t contigAPos, 136 trace_point_t tracePointDistance, 137 RoundingMode roundingMode, 138 ) const pure nothrow if (contig == "contigA") 139 { 140 assert(contigA.begin <= contigAPos && contigAPos <= contigA.end); 141 142 auto firstTracePointRefPos = contigA.begin; 143 auto secondTracePointRefPos = floor(firstTracePointRefPos, tracePointDistance) + tracePointDistance; 144 auto secondFromLastTracePointRefPos = floor(contigA.end - 1, tracePointDistance); 145 146 final switch (roundingMode) 147 { 148 case RoundingMode.floor: 149 if (contigAPos < secondTracePointRefPos) 150 return 0; 151 if (contigAPos < contigA.end) 152 return 1 + (contigAPos - secondTracePointRefPos) / tracePointDistance; 153 else 154 return tracePoints.length; 155 case RoundingMode.round: 156 assert(0, "unimplemented"); 157 case RoundingMode.ceil: 158 if (firstTracePointRefPos == contigAPos) 159 return 0; 160 if (contigAPos <= secondTracePointRefPos) 161 return 1; 162 else if (contigAPos <= secondFromLastTracePointRefPos) 163 return 1 + ceildiv(contigAPos - secondTracePointRefPos, tracePointDistance); 164 else 165 return tracePoints.length; 166 } 167 } 168 169 unittest 170 { 171 auto localAlignment = LocalAlignment( 172 Locus(50, 2897), 173 Locus(50, 2905), 174 8, 175 [TracePoint(1, 50), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 176 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 177 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 178 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 179 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 180 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 181 TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), TracePoint(0, 100), 182 TracePoint(7, 105)], 183 ); 184 coord_t contigPos = 79; 185 trace_point_t tracePointDistance = 100; 186 auto roundingMode = RoundingMode.ceil; 187 auto tpsUpTo = localAlignment.tracePointsUpTo!"contigA"( 188 contigPos, 189 tracePointDistance, 190 roundingMode, 191 ); 192 193 assert(0 <= tpsUpTo && tpsUpTo <= localAlignment.tracePoints.length); 194 } 195 196 auto tracePointsUpTo(string contig)( 197 in coord_t contigBPos, 198 trace_point_t tracePointDistance, 199 RoundingMode roundingMode, 200 ) const pure nothrow if (contig == "contigB") 201 { 202 assert(contigB.begin <= contigBPos && contigBPos <= contigB.end); 203 204 if (contigBPos == contigB.begin) 205 return 0; 206 else if (contigBPos == contigB.end) 207 return tracePoints.length; 208 209 auto tracePointPositions = chain(only(contigB.begin), tracePoints.map!"a.numBasePairs") 210 .cumulativeFold!"a + b" 211 .enumerate; 212 213 final switch (roundingMode) 214 { 215 case RoundingMode.floor: 216 return tracePointPositions 217 .find!(pair => contigBPos < pair.value) 218 .front 219 .index - 1; 220 case RoundingMode.round: 221 assert(0, "unimplemented"); 222 case RoundingMode.ceil: 223 return tracePointPositions 224 .find!(pair => contigBPos <= pair.value) 225 .front 226 .index; 227 } 228 } 229 } 230 231 static struct Contig 232 { 233 id_t id; 234 coord_t length; 235 } 236 237 static enum maxScore = 2 ^^ 16; 238 static enum emptyFlags = Flags(); 239 240 id_t id; 241 Contig contigA; 242 Contig contigB; 243 Flags flags; 244 LocalAlignment[] localAlignments; 245 trace_point_t tracePointDistance; 246 247 static foreach(flagName; __traits(allMembers, Flag)) 248 { 249 mixin(format!(q"< 250 static alias %1$s = PhobosFlag!"%2$s"; 251 252 @property PhobosFlag!"%2$s" %2$s() pure const nothrow @trusted 253 { 254 return cast(PhobosFlag!"%2$s") flags.%2$s; 255 } 256 257 @property void %2$s(PhobosFlag!"%2$s" %2$s) pure nothrow 258 { 259 flags.%2$s = %2$s; 260 } 261 >")(flagName.capitalize, flagName)); 262 } 263 264 invariant 265 { 266 assert(localAlignments.length >= 1, "empty chain is forbidden"); 267 foreach (la; localAlignments) 268 { 269 assert(0 <= la.contigA.begin && la.contigA.begin < la.contigA.end 270 && (contigA.length == 0 || la.contigA.end <= contigA.length), "non-sense alignment of contigA"); 271 assert(0 <= la.contigB.begin && la.contigB.begin < la.contigB.end 272 && (contigB.length == 0 || la.contigB.end <= contigB.length), "non-sense alignment of contigB"); 273 274 assert(tracePointDistance == 0 || la.tracePoints.length > 0, "missing trace points"); 275 if (tracePointDistance > 0) 276 { 277 coord_t traceLength = la.tracePoints.map!(tp => tp.numBasePairs.to!coord_t).sum; 278 coord_t traceDiffs = la.tracePoints.map!(tp => tp.numDiffs.to!coord_t).sum; 279 280 // TODO remove logging if fixed in LAdump (issue #23) 281 if (la.numDiffs != traceDiffs) 282 { 283 debug logJsonDebug( 284 "contigA", contigA.id, 285 "contigB", contigB.id, 286 "la.numDiffs", la.numDiffs, 287 "traceDiffs", traceDiffs, 288 ); 289 } 290 // TODO make equality assertion if fixed in LAdump (issue #23) 291 assert(la.numDiffs <= traceDiffs, "missing trace points"); 292 assert(la.contigB.end - la.contigB.begin == traceLength, 293 "trace distance does not match alignment"); 294 } 295 } 296 } 297 298 unittest 299 { 300 with (LocalAlignment) 301 { 302 auto acZeroLength = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, []); 303 auto ac1 = AlignmentChain(1, Contig(1, 10), Contig(1, 10), emptyFlags, 304 [LocalAlignment(Locus(1, 1), Locus(1, 9), 0)]); 305 auto ac2 = AlignmentChain(2, Contig(1, 10), Contig(1, 10), emptyFlags, 306 [LocalAlignment(Locus(1, 11), Locus(1, 9), 0)]); 307 auto ac3 = AlignmentChain(3, Contig(1, 10), Contig(1, 10), emptyFlags, 308 [LocalAlignment(Locus(1, 9), Locus(1, 1), 0)]); 309 auto ac4 = AlignmentChain(4, Contig(1, 10), Contig(1, 10), emptyFlags, 310 [LocalAlignment(Locus(1, 9), Locus(1, 11), 0)]); 311 auto acFine = AlignmentChain(5, Contig(1, 10), Contig(1, 10), 312 emptyFlags, [LocalAlignment(Locus(1, 9), Locus(1, 9), 0)]); 313 314 assertThrown!AssertError(acZeroLength.totalLength); 315 assertThrown!AssertError(ac1.totalLength); 316 assertThrown!AssertError(ac2.totalLength); 317 assertThrown!AssertError(ac3.totalLength); 318 assertThrown!AssertError(ac4.totalLength); 319 assertNotThrown!AssertError(acFine.totalLength); 320 } 321 } 322 323 @property ref const(LocalAlignment) first() const pure nothrow 324 { 325 return localAlignments[0]; 326 } 327 328 unittest 329 { 330 with (Complement) with (LocalAlignment) 331 { 332 auto firstLA = LocalAlignment(Locus(1, 9), Locus(1, 9), 0); 333 auto otherLA = LocalAlignment(Locus(9, 10), Locus(9, 10), 0); 334 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [firstLA, otherLA]); 335 336 assert(ac.first == firstLA); 337 } 338 } 339 340 @property ref const(LocalAlignment) last() const pure nothrow 341 { 342 return localAlignments[$ - 1]; 343 } 344 345 unittest 346 { 347 with (Complement) with (LocalAlignment) 348 { 349 auto lastLA = LocalAlignment(Locus(1, 9), Locus(1, 9), 0); 350 auto otherLA = LocalAlignment(Locus(9, 10), Locus(9, 10), 0); 351 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [otherLA, lastLA]); 352 353 assert(ac.last == lastLA); 354 } 355 } 356 357 PhobosFlag!"disabled" disableIf(lazy bool disable) pure 358 { 359 if (!flags.disabled) 360 { 361 flags.disabled = disable; 362 } 363 364 365 return disabled; 366 } 367 368 /// This alignment is called proper iff it starts and ends at a read boundary. 369 @property bool isProper() const pure nothrow 370 { 371 return ( 372 first.contigA.begin == 0 || 373 first.contigB.begin == 0 374 ) 375 && 376 ( 377 last.contigA.end == contigA.length || 378 last.contigB.end == contigB.length 379 ); 380 } 381 382 /** 383 Returns true if the aligned read `contigB` (with extensions on either 384 end) is fully contained in the reference `contigA`. 385 386 According to the following 'definition' `contigA` is fully contained 387 in `contigB` iff `x >= 0` and `y <= l_a`. 388 --- 389 0 x ua va y la 390 contigA |------------------+---+-+---+-+---+-------------------------| 391 / / / | | | \ \ \ 392 / / / | | | \ \ \ 393 / / / | | | \ \ \ 394 / / / | | | \ \ \ 395 contigB |---+------+---+------+---| 396 0 ub vb lb 397 398 x = ua - (ub - 0) = ua - ub 399 y = va + (lb - vb) 400 --- 401 */ 402 bool isFullyContained() const 403 { 404 if (first.contigB.begin > first.contigA.begin) 405 { 406 // x < 0; return early to avoid negative numbers in unsigned integers 407 return false; 408 } 409 410 auto x = first.contigA.begin - first.contigB.begin; 411 auto y = last.contigA.end + contigB.length - last.contigB.end; 412 413 return 0 <= x && y < contigA.length; 414 } 415 416 unittest 417 { 418 with (Complement) with (LocalAlignment) 419 { 420 auto la = LocalAlignment(Locus(30, 35), Locus(5, 10), 1); 421 auto ac = AlignmentChain(0, Contig(1, 50), Contig(1, 15), emptyFlags, [la]); 422 423 // read with extension align an contigA from 25 to 40 424 assert(ac.isFullyContained); 425 } 426 427 with (Complement) with (LocalAlignment) 428 { 429 auto la1 = LocalAlignment(Locus(10, 20), Locus(5, 10), 1); 430 auto la2 = LocalAlignment(Locus(30, 40), Locus(5, 10), 1); 431 auto ac = AlignmentChain(0, Contig(1, 50), Contig(1, 15), emptyFlags, [la1, la2]); 432 433 // read with extension align an contigA from 5 to 45 434 assert(ac.isFullyContained); 435 } 436 437 with (Complement) with (LocalAlignment) 438 { 439 auto la = LocalAlignment(Locus(0, 10), Locus(5, 10), 1); 440 auto ac = AlignmentChain(0, Contig(1, 50), Contig(1, 15), emptyFlags, [la]); 441 442 // read with extension align an contigA from -5 to 15 443 assert(!ac.isFullyContained); 444 } 445 446 with (Complement) with (LocalAlignment) 447 { 448 auto la = LocalAlignment(Locus(40, 50), Locus(5, 10), 1); 449 auto ac = AlignmentChain(0, Contig(1, 50), Contig(1, 15), emptyFlags, [la]); 450 451 // read with extension align an contigA from 35 to 55 452 assert(!ac.isFullyContained); 453 } 454 455 with (Complement) with (LocalAlignment) 456 { 457 auto la1 = LocalAlignment(Locus(0, 20), Locus(5, 10), 1); 458 auto la2 = LocalAlignment(Locus(30, 50), Locus(5, 10), 1); 459 auto ac = AlignmentChain(0, Contig(1, 50), Contig(1, 15), emptyFlags, [la1, la2]); 460 461 // read with extension align an contigA from -5 to 55 462 assert(!ac.isFullyContained); 463 } 464 } 465 466 @property size_t totalLength() const pure 467 { 468 return last.contigA.end - first.contigA.begin; 469 } 470 471 @property size_t coveredBases(string contig)() const pure 472 { 473 return localAlignments.map!("a." ~ contig ~ ".end - a." ~ contig ~ ".begin").sum; 474 } 475 476 unittest 477 { 478 with (Complement) with (LocalAlignment) 479 { 480 auto la1 = LocalAlignment(Locus(1, 3), Locus(1, 3), 1); 481 auto la2 = LocalAlignment(Locus(5, 10), Locus(5, 10), 2); 482 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la1, la2]); 483 484 assert(ac.totalLength == 9); 485 } 486 } 487 488 @property size_t totalDiffs() const pure 489 { 490 return localAlignments.map!"a.numDiffs".sum; 491 } 492 493 unittest 494 { 495 with (Complement) with (LocalAlignment) 496 { 497 auto la1 = LocalAlignment(Locus(1, 3), Locus(1, 3), 1); 498 auto la2 = LocalAlignment(Locus(5, 10), Locus(5, 10), 2); 499 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la1, la2]); 500 501 assert(ac.totalDiffs == 3); 502 } 503 } 504 505 @property size_t totalGapLength() const pure 506 { 507 return localAlignments 508 .chunks(2) 509 .map!(las => las.length < 2 ? 0 : absdiff(las[1].contigA.begin, las[0].contigA.end)) 510 .sum; 511 } 512 513 unittest 514 { 515 with (Complement) with (LocalAlignment) 516 { 517 auto la1 = LocalAlignment(Locus(1, 3), Locus(1, 3), 1); 518 auto la2 = LocalAlignment(Locus(5, 10), Locus(5, 10), 2); 519 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la1, la2]); 520 521 assert(ac.totalGapLength == 2); 522 } 523 } 524 525 @property size_t numMatchingBps() const pure 526 { 527 return totalLength - (totalDiffs + totalGapLength); 528 } 529 530 unittest 531 { 532 with (Complement) with (LocalAlignment) 533 { 534 auto la1 = LocalAlignment(Locus(1, 3), Locus(1, 3), 1); 535 auto la2 = LocalAlignment(Locus(5, 10), Locus(5, 10), 2); 536 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la1, la2]); 537 538 assert(ac.numMatchingBps == 9 - (3 + 2)); 539 } 540 } 541 542 @property size_t score() const pure 543 { 544 return numMatchingBps * maxScore / totalLength; 545 } 546 547 unittest 548 { 549 with (Complement) with (LocalAlignment) 550 { 551 auto la1 = LocalAlignment(Locus(1, 3), Locus(1, 3), 1); 552 auto la2 = LocalAlignment(Locus(5, 10), Locus(5, 10), 2); 553 auto ac = AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la1, la2]); 554 555 assert(ac.score == 4 * maxScore / 9); 556 } 557 } 558 559 int compareIds(ref const AlignmentChain other) const pure nothrow 560 { 561 return cmpLexicographically!( 562 typeof(this), 563 ac => ac.contigA.id, 564 ac => ac.contigB.id, 565 )(this, other); 566 } 567 568 unittest 569 { 570 with (Complement) with (LocalAlignment) 571 { 572 auto la = LocalAlignment(Locus(0, 1), Locus(0, 1), 1); 573 auto acs = [ 574 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la]), 575 AlignmentChain(1, Contig(1, 10), Contig(2, 10), emptyFlags, [la]), 576 AlignmentChain(2, Contig(2, 10), Contig(1, 10), emptyFlags, [la]), 577 AlignmentChain(3, Contig(2, 10), Contig(2, 10), emptyFlags, [la]), 578 ]; 579 580 foreach (i; 0 .. acs.length) 581 foreach (j; 0 .. acs.length) 582 { 583 auto compareValue = acs[i].compareIds(acs[j]); 584 alias errorMessage = (cmp) => format!"expected %s and %s to compare %s 0 but got %d"( 585 acs[i], acs[j], cmp, compareValue); 586 587 if (i < j) 588 assert(compareValue < 0, errorMessage("<")); 589 else if (i > j) 590 assert(compareValue > 0, errorMessage(">")); 591 else 592 assert(compareValue == 0, errorMessage("==")); 593 } 594 } 595 } 596 597 int opCmp(ref const AlignmentChain other) const pure nothrow 598 { 599 return cmpLexicographically!( 600 typeof(this), 601 ac => ac.contigA.id, 602 ac => ac.contigB.id, 603 ac => ac.first.contigA.begin, 604 ac => ac.first.contigB.begin, 605 ac => ac.last.contigA.end, 606 ac => ac.last.contigB.end, 607 )(this, other); 608 } 609 610 unittest 611 { 612 // see compareIds 613 with (Complement) with (LocalAlignment) 614 { 615 auto la = LocalAlignment(Locus(0, 1), Locus(0, 1), 1); 616 auto acs = [ 617 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la]), 618 AlignmentChain(1, Contig(1, 10), Contig(2, 10), emptyFlags, [la]), 619 AlignmentChain(2, Contig(2, 10), Contig(1, 10), emptyFlags, [la]), 620 AlignmentChain(3, Contig(2, 10), Contig(2, 10), emptyFlags, [la]), 621 ]; 622 623 foreach (i; 0 .. acs.length) 624 foreach (j; 0 .. acs.length) 625 { 626 auto compareValue = acs[i].opCmp(acs[j]); 627 alias errorMessage = (cmp) => format!"expected %s and %s to compare %s 0 but got %d"( 628 acs[i], acs[j], cmp, compareValue); 629 630 if (i < j) 631 assert(compareValue < 0, errorMessage("<")); 632 else if (i > j) 633 assert(compareValue > 0, errorMessage(">")); 634 else 635 assert(compareValue == 0, errorMessage("==")); 636 } 637 } 638 639 // test non-id-related comparison 640 with (Complement) with (LocalAlignment) 641 { 642 auto acs = [ 643 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [ 644 LocalAlignment(Locus(0, 2), Locus(0, 2), 1), 645 LocalAlignment(Locus(3, 5), Locus(3, 5), 1) 646 ]), 647 AlignmentChain(1, Contig(1, 10), Contig(1, 10), emptyFlags, [ 648 LocalAlignment(Locus(0, 2), Locus(1, 2), 1), 649 LocalAlignment(Locus(3, 5), Locus(3, 5), 1) 650 ]), 651 AlignmentChain(2, Contig(1, 10), Contig(1, 10), emptyFlags, [ 652 LocalAlignment(Locus(1, 2), Locus(0, 2), 1), 653 LocalAlignment(Locus(3, 5), Locus(3, 5), 1) 654 ]), 655 AlignmentChain(3, Contig(1, 10), Contig(1, 10), emptyFlags, [ 656 LocalAlignment(Locus(1, 2), Locus(1, 2), 1), 657 LocalAlignment(Locus(3, 5), Locus(3, 5), 1) 658 ]), 659 AlignmentChain(4, Contig(1, 10), Contig(1, 10), emptyFlags, [ 660 LocalAlignment(Locus(1, 2), Locus(1, 2), 1), 661 LocalAlignment(Locus(3, 5), Locus(3, 6), 1) 662 ]), 663 AlignmentChain(5, Contig(1, 10), Contig(1, 10), emptyFlags, [ 664 LocalAlignment(Locus(1, 2), Locus(1, 2), 1), 665 LocalAlignment(Locus(3, 6), Locus(3, 5), 1) 666 ]), 667 AlignmentChain(6, Contig(1, 10), Contig(1, 10), emptyFlags, [ 668 LocalAlignment(Locus(1, 2), Locus(1, 2), 1), 669 LocalAlignment(Locus(3, 6), Locus(3, 6), 1) 670 ]), 671 ]; 672 673 foreach (i; 0 .. acs.length) 674 foreach (j; 0 .. acs.length) 675 { 676 auto compareValue = acs[i].opCmp(acs[j]); 677 alias errorMessage = (cmp) => format!"expected %s and %s to compare %s 0 but got %d"( 678 acs[i], acs[j], cmp, compareValue); 679 680 if (i < j) 681 assert(compareValue < 0, errorMessage("<")); 682 else if (i > j) 683 assert(compareValue > 0, errorMessage(">")); 684 else 685 assert(compareValue == 0, errorMessage("==")); 686 } 687 } 688 } 689 690 TranslatedTracePoint translateTracePoint(string contig = "contigA")( 691 in coord_t contigPos, 692 RoundingMode roundingMode, 693 ) const pure if (contig.among("contigA", "contigB")) 694 { 695 bool coversContigPos(in AlignmentChain.LocalAlignment localAlignment) 696 { 697 return mixin(`localAlignment.` ~ contig ~ `.begin <= contigPos 698 && contigPos <= localAlignment.` ~ contig ~ `.end`); 699 } 700 701 enum otherContig = contig == "contigA" ? "contigB" : "contigA"; 702 auto coveringLocalAlignments = localAlignments.find!coversContigPos; 703 enforce!Exception( 704 coveringLocalAlignments.length > 0, 705 "cannot translate coordinate due to lack of alignment coverage", 706 ); 707 auto coveringLocalAlignment = coveringLocalAlignments[0]; 708 709 return coveringLocalAlignment.translateTracePoint!contig( 710 contigPos, 711 tracePointDistance, 712 roundingMode, 713 ); 714 } 715 716 unittest 717 { 718 alias Locus = LocalAlignment.Locus; 719 alias TracePoint = LocalAlignment.TracePoint; 720 enum tracePointDistance = 100; 721 auto ac = AlignmentChain( 722 0, 723 Contig(1, 2584), 724 Contig(58024, 10570), 725 Flags(Flag.complement), 726 [LocalAlignment( 727 Locus(579, 2584), 728 Locus(0, 2158), 729 292, 730 [ 731 TracePoint( 2, 23), 732 TracePoint(11, 109), 733 TracePoint(13, 109), 734 TracePoint(15, 107), 735 TracePoint(18, 107), 736 TracePoint(14, 103), 737 TracePoint(16, 106), 738 TracePoint(17, 106), 739 TracePoint( 9, 106), 740 TracePoint(14, 112), 741 TracePoint(16, 105), 742 TracePoint(16, 114), 743 TracePoint(10, 103), 744 TracePoint(14, 110), 745 TracePoint(15, 110), 746 TracePoint(15, 101), 747 TracePoint(17, 108), 748 TracePoint(17, 109), 749 TracePoint(15, 111), 750 TracePoint(17, 111), 751 TracePoint(11, 88), 752 ], 753 )], 754 tracePointDistance, 755 ); 756 757 assert(ac.translateTracePoint(579, RoundingMode.floor) == tuple(579, 0)); 758 assert(ac.translateTracePoint(599, RoundingMode.floor) == tuple(579, 0)); 759 assert(ac.translateTracePoint(600, RoundingMode.floor) == tuple(600, 23)); 760 assert(ac.translateTracePoint(699, RoundingMode.floor) == tuple(600, 23)); 761 assert( 762 ac.translateTracePoint(699, RoundingMode.ceil) 763 == 764 ac.translateTracePoint(701, RoundingMode.floor) 765 ); 766 assert(ac.translateTracePoint(700, RoundingMode.floor) == tuple(700, 23 + 109)); 767 assert(ac.translateTracePoint(799, RoundingMode.floor) == tuple(700, 23 + 109)); 768 assert(ac.translateTracePoint(800, RoundingMode.floor) == tuple(800, 23 + 109 + 109)); 769 assert(ac.translateTracePoint(899, RoundingMode.floor) == tuple(800, 23 + 109 + 109)); 770 assert(ac.translateTracePoint(2583, RoundingMode.floor) == tuple(2500, 2070)); 771 assert(ac.translateTracePoint(2584, RoundingMode.floor) == tuple(2584, 2158)); 772 assertThrown!Exception(ac.translateTracePoint(578, RoundingMode.floor)); 773 assertThrown!Exception(ac.translateTracePoint(2585, RoundingMode.floor)); 774 } 775 } 776 777 bool idsPred(in AlignmentChain ac1, in AlignmentChain ac2) pure 778 { 779 auto cmpValue = ac1.compareIds(ac2); 780 781 return 0 != cmpValue && cmpValue < 0; 782 } 783 784 unittest 785 { 786 with (AlignmentChain) with (LocalAlignment) with (Complement) 787 { 788 auto la = LocalAlignment(Locus(0, 1), Locus(0, 1), 1); 789 auto acs = [ 790 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [la]), 791 AlignmentChain(1, Contig(1, 10), Contig(2, 10), emptyFlags, [la]), 792 AlignmentChain(2, Contig(2, 10), Contig(1, 10), emptyFlags, [la]), 793 AlignmentChain(3, Contig(2, 10), Contig(2, 10), emptyFlags, [la]), 794 ]; 795 796 foreach (i; 0 .. acs.length) 797 foreach (j; 0 .. acs.length) 798 { 799 auto compareValue = idsPred(acs[i], acs[j]); 800 alias errorMessage = (expValue) => format!"expected idsPred(%s, %s) to be %s but got %s"( 801 acs[i], acs[j], expValue, compareValue); 802 803 if (i < j) 804 assert(compareValue, errorMessage(true)); 805 else 806 assert(!compareValue, errorMessage(false)); 807 } 808 } 809 } 810 811 auto haveEqualIds(in AlignmentChain ac1, in AlignmentChain ac2) pure 812 { 813 auto cmpValue = ac1.compareIds(ac2); 814 815 return 0 == cmpValue; 816 } 817 818 unittest 819 { 820 with (AlignmentChain) with (Complement) with (LocalAlignment) 821 { 822 auto sortedTestChains = [ 823 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 10), Locus(0, 1), 0)]), 824 AlignmentChain(1, Contig(1, 10), Contig(2, 20), emptyFlags, [LocalAlignment(Locus(0, 10), Locus(0, 2), 0)]), 825 AlignmentChain(2, Contig(1, 10), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 10), Locus(0, 3), 0)]), 826 AlignmentChain(3, Contig(2, 20), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 20), Locus(0, 4), 0)]), 827 AlignmentChain(4, Contig(2, 20), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 20), Locus(0, 5), 0)]), 828 AlignmentChain(5, Contig(2, 20), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 20), Locus(0, 6), 0)]), 829 AlignmentChain(6, Contig(3, 30), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 30), Locus(0, 7), 0)]), 830 AlignmentChain(7, Contig(3, 30), Contig(2, 20), emptyFlags, [LocalAlignment(Locus(0, 30), Locus(0, 8), 0)]), 831 AlignmentChain(8, Contig(3, 30), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 30), Locus(0, 9), 0)]), 832 ]; 833 834 assert(sortedTestChains.chunkBy!haveEqualIds.equal!equal([ 835 sortedTestChains[0..1], 836 sortedTestChains[1..2], 837 sortedTestChains[2..3], 838 sortedTestChains[3..5], 839 sortedTestChains[5..6], 840 sortedTestChains[6..7], 841 sortedTestChains[7..8], 842 sortedTestChains[8..9], 843 ])); 844 } 845 } 846 847 auto equalIdsRange(in AlignmentChain[] acList, in id_t contigAID, in id_t contigBID) pure 848 { 849 assert(isSorted!idsPred(acList)); 850 AlignmentChain needle = { 851 contigA: AlignmentChain.Contig(contigAID, 1), 852 contigB: AlignmentChain.Contig(contigBID, 1), 853 localAlignments: [AlignmentChain.LocalAlignment(AlignmentChain.LocalAlignment.Locus(0, 1), AlignmentChain.LocalAlignment.Locus(0, 1))], 854 }; 855 856 return acList.assumeSorted!idsPred.equalRange(needle); 857 } 858 859 unittest 860 { 861 with (AlignmentChain) with (Complement) with (LocalAlignment) 862 { 863 auto sortedTestChains = [ 864 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 1), Locus(0, 1), 0)]), 865 AlignmentChain(1, Contig(1, 10), Contig(2, 20), emptyFlags, [LocalAlignment(Locus(0, 2), Locus(0, 2), 0)]), 866 AlignmentChain(2, Contig(1, 10), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 3), Locus(0, 3), 0)]), 867 AlignmentChain(3, Contig(2, 20), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 4), Locus(0, 4), 0)]), 868 AlignmentChain(4, Contig(2, 20), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 5), Locus(0, 5), 0)]), 869 AlignmentChain(5, Contig(2, 20), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 6), Locus(0, 6), 0)]), 870 AlignmentChain(6, Contig(3, 30), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(0, 7), Locus(0, 7), 0)]), 871 AlignmentChain(7, Contig(3, 30), Contig(2, 20), emptyFlags, [LocalAlignment(Locus(0, 8), Locus(0, 8), 0)]), 872 AlignmentChain(8, Contig(3, 30), Contig(3, 30), emptyFlags, [LocalAlignment(Locus(0, 9), Locus(0, 9), 0)]), 873 ]; 874 875 assert(sortedTestChains.equalIdsRange(1, 1).equal(sortedTestChains[0 .. 1])); 876 assert(sortedTestChains.equalIdsRange(2, 1).equal(sortedTestChains[3 .. 5])); 877 assert(sortedTestChains.equalIdsRange(3, 1).equal(sortedTestChains[6 .. 7])); 878 assert(sortedTestChains.equalIdsRange(42, 1337).equal(sortedTestChains[0 .. 0])); 879 } 880 } 881 882 /** 883 Returns true iff ac1 begins before ac2 on the given contig (in). 884 */ 885 bool isBefore(string contig)(in AlignmentChain ac1, in AlignmentChain ac2) pure 886 if (contig == "contigA" || contig == "contigB") 887 { 888 assert(__traits(getMember, ac1, contig) == __traits(getMember, ac2, contig), 889 "alignment chains do not belong to the same contig"); 890 891 static if (contig == "contigA") 892 { 893 return __traits(getMember, ac1.first, contig).begin < __traits(getMember, 894 ac2.first, contig).begin; 895 } 896 else 897 { 898 899 } 900 } 901 902 unittest 903 { 904 with (AlignmentChain) with (Flag) with (LocalAlignment) 905 { 906 auto acs = [ 907 AlignmentChain(0, Contig(1, 10), Contig(1, 10), emptyFlags, [LocalAlignment(Locus(1, 6), Locus(0, 1), 0)]), 908 AlignmentChain(1, Contig(1, 10), Contig(2, 10), Flags(complement), [LocalAlignment(Locus(2, 6), Locus(0, 1), 0)]), 909 AlignmentChain(2, Contig(1, 10), Contig(3, 10), emptyFlags, [LocalAlignment(Locus(3, 6), Locus(0, 1), 0)]), 910 AlignmentChain(3, Contig(1, 10), Contig(4, 10), Flags(complement), [LocalAlignment(Locus(4, 6), Locus(0, 1), 0)]), 911 AlignmentChain(4, Contig(1, 10), Contig(5, 10), emptyFlags, [LocalAlignment(Locus(5, 6), Locus(0, 1), 0)]), 912 ]; 913 914 foreach (i; 0 .. acs.length) 915 foreach (j; 0 .. acs.length) 916 { 917 auto compareValue = isBefore!"contigA"(acs[i], acs[j]); 918 alias errorMessage = (expValue) => format!"expected isBefore!\"contigA\"(%s, %s) to be %s but got %s"( 919 acs[i], acs[j], expValue, compareValue); 920 921 if (i < j) 922 assert(compareValue, errorMessage(true)); 923 else 924 assert(!compareValue, errorMessage(false)); 925 } 926 } 927 } 928 929 /// Calculates the coverage of the contigs by the given alignments. Only 930 /// contigs involved in the alignments are regarded. 931 double alignmentCoverage(in AlignmentChain[] alignments) 932 { 933 static double coveredBases(T)(T alignmentsPerContig) 934 { 935 return alignmentsPerContig.map!(ac => ac.coveredBases!"contigA").sum; 936 } 937 938 auto alignmentsPerContig = alignments.chunkBy!"a.contigA.id == b.contigA.id"; 939 auto totalContigLength = alignmentsPerContig 940 .save 941 .map!"a.front.contigA.length" 942 .sum; 943 auto totalCoveredBases = alignmentsPerContig 944 .save 945 .map!coveredBases 946 .sum; 947 948 return totalCoveredBases.to!double / totalContigLength.to!double; 949 } 950 951 unittest 952 { 953 auto alignments = [ 954 AlignmentChain( 955 0, 956 AlignmentChain.Contig(1, 100), 957 AlignmentChain.Contig(1, 50), 958 AlignmentChain.emptyFlags, 959 [ 960 AlignmentChain.LocalAlignment( 961 AlignmentChain.LocalAlignment.Locus(0, 10), 962 AlignmentChain.LocalAlignment.Locus(40, 50), 963 0 964 ), 965 ], 966 ), 967 AlignmentChain( 968 1, 969 AlignmentChain.Contig(1, 100), 970 AlignmentChain.Contig(2, 30), 971 AlignmentChain.Flags(AlignmentChain.Flag.complement), 972 [ 973 AlignmentChain.LocalAlignment( 974 AlignmentChain.LocalAlignment.Locus(10, 20), 975 AlignmentChain.LocalAlignment.Locus(0, 10), 976 0 977 ), 978 AlignmentChain.LocalAlignment( 979 AlignmentChain.LocalAlignment.Locus(30, 40), 980 AlignmentChain.LocalAlignment.Locus(20, 30), 981 0 982 ), 983 ], 984 ), 985 AlignmentChain( 986 2, 987 AlignmentChain.Contig(1, 100), 988 AlignmentChain.Contig(3, 20), 989 AlignmentChain.emptyFlags, 990 [ 991 AlignmentChain.LocalAlignment( 992 AlignmentChain.LocalAlignment.Locus(40, 60), 993 AlignmentChain.LocalAlignment.Locus(0, 20), 994 0 995 ), 996 ], 997 ), 998 AlignmentChain( 999 3, 1000 AlignmentChain.Contig(1, 100), 1001 AlignmentChain.Contig(4, 50), 1002 AlignmentChain.Flags(AlignmentChain.Flag.complement), 1003 [ 1004 AlignmentChain.LocalAlignment( 1005 AlignmentChain.LocalAlignment.Locus(70, 100), 1006 AlignmentChain.LocalAlignment.Locus(0, 30), 1007 0 1008 ), 1009 ], 1010 ), 1011 ]; 1012 1013 assert(alignmentCoverage(alignments) == 80.0 / 100.0); 1014 } 1015 1016 /// Type of the read alignment. 1017 enum AlignmentLocationSeed : ubyte 1018 { 1019 front, 1020 back, 1021 } 1022 1023 /** 1024 An alignment chain with a "seed", ie. hint for it's intended location. 1025 */ 1026 struct SeededAlignment 1027 { 1028 AlignmentChain alignment; 1029 AlignmentLocationSeed seed; 1030 1031 alias alignment this; 1032 1033 invariant 1034 { 1035 assert(seed != AlignmentLocationSeed.front || isFrontExtension(alignment)); 1036 assert(seed != AlignmentLocationSeed.back || isBackExtension(alignment)); 1037 } 1038 1039 int opCmp(ref const SeededAlignment other) const pure nothrow 1040 { 1041 return cmpLexicographically!( 1042 typeof(this), 1043 "a.alignment", 1044 "a.seed", 1045 )(this, other); 1046 } 1047 1048 static InputRange!SeededAlignment from(AlignmentChain alignmentChain) 1049 { 1050 alias Seed = AlignmentLocationSeed; 1051 1052 if (isFrontExtension(alignmentChain) && isBackExtension(alignmentChain)) 1053 { 1054 return inputRangeObject(only( 1055 SeededAlignment(alignmentChain, Seed.front), 1056 SeededAlignment(alignmentChain, Seed.back), 1057 )); 1058 } 1059 else if (isFrontExtension(alignmentChain) && !isBackExtension(alignmentChain)) 1060 { 1061 return inputRangeObject(only(SeededAlignment(alignmentChain, Seed.front))); 1062 } 1063 else if (isBackExtension(alignmentChain) && !isFrontExtension(alignmentChain)) 1064 { 1065 return inputRangeObject(only(SeededAlignment(alignmentChain, Seed.back))); 1066 } 1067 else 1068 { 1069 return inputRangeObject(takeNone!(SeededAlignment[])); 1070 } 1071 } 1072 } 1073 1074 /// Type of the read alignment. 1075 static enum ReadAlignmentType 1076 { 1077 front = 0, 1078 gap = 1, 1079 back = 2, 1080 } 1081 1082 private bool isExtension(in AlignmentChain alignment) pure nothrow 1083 { 1084 return isFrontExtension(alignment) ^ isBackExtension(alignment); 1085 } 1086 1087 private bool isFrontExtension(in AlignmentChain alignment) pure nothrow 1088 { 1089 auto readExtensionLength = alignment.first.contigB.begin; 1090 auto referenceExtensionLength = alignment.first.contigA.begin; 1091 1092 return readExtensionLength > referenceExtensionLength; 1093 } 1094 1095 private bool isBackExtension(in AlignmentChain alignment) pure nothrow 1096 { 1097 auto readExtensionLength = alignment.contigB.length - alignment.last.contigB.end; 1098 auto referenceExtensionLength = alignment.contigA.length - alignment.last.contigA.end; 1099 1100 return readExtensionLength > referenceExtensionLength; 1101 } 1102 1103 /** 1104 Alignment of a read against the reference. This is either one or two 1105 alignment chains which belong to the same read and one or two reference 1106 contig(s). 1107 */ 1108 struct ReadAlignment 1109 { 1110 SeededAlignment[2] _alignments; 1111 size_t _length; 1112 1113 this(SeededAlignment[] alignments...) 1114 { 1115 if (1 <= alignments.length && alignments.length <= 2) 1116 { 1117 this._length = alignments.length; 1118 this._alignments[0 .. _length] = alignments[0 .. _length]; 1119 } 1120 else 1121 { 1122 logJsonDebug( 1123 "info", format!"creating invalid read alignment with %d local alignments"(alignments.length), 1124 "alignments", alignments.toJson, 1125 ); 1126 1127 this._length = 0; 1128 } 1129 } 1130 1131 @property size_t length() pure const nothrow 1132 { 1133 return _length; 1134 } 1135 1136 @property size_t opDollar() pure const nothrow 1137 { 1138 return _length; 1139 } 1140 1141 inout(SeededAlignment[]) opIndex() inout pure nothrow 1142 { 1143 return _alignments[0 .. _length]; 1144 } 1145 1146 inout(SeededAlignment) opIndex(T)(T idx) inout pure nothrow 1147 { 1148 return this[][idx]; 1149 } 1150 1151 unittest 1152 { 1153 auto ac1 = SeededAlignment(AlignmentChain(1), AlignmentLocationSeed.front); 1154 auto ac2 = SeededAlignment(AlignmentChain(2), AlignmentLocationSeed.back); 1155 1156 auto ra1 = ReadAlignment(ac1); 1157 1158 assert(ra1.length == 1); 1159 assert(ra1[] == [ac1]); 1160 assert(ra1[0] == ac1); 1161 assert(ra1[$ - 1] == ac1); 1162 1163 auto ra2 = ReadAlignment(ac1, ac2); 1164 1165 assert(ra2.length == 2); 1166 assert(ra2[] == [ac1, ac2]); 1167 assert(ra2[0] == ac1); 1168 assert(ra2[1] == ac2); 1169 assert(ra2[$ - 1] == ac2); 1170 } 1171 1172 /** 1173 If readAlignment is a gap return true iff the first alignment's 1174 `contigA.id` is lower than the second alignment's; otherwise returns 1175 true. 1176 */ 1177 @property bool isInOrder() const pure nothrow 1178 { 1179 return !isGap || _alignments[0].contigA.id < _alignments[1].contigA.id; 1180 } 1181 1182 ReadAlignment getInOrder() pure nothrow 1183 { 1184 if (isGap && !isInOrder) 1185 { 1186 swap(_alignments[0], _alignments[1]); 1187 } 1188 1189 return this; 1190 } 1191 1192 /** 1193 Returns true iff the read alignment is valid, ie. it is either an 1194 extension or gap. 1195 */ 1196 @property bool isValid() const pure nothrow 1197 { 1198 return isExtension ^ isGap; 1199 } 1200 1201 /** 1202 Get the type of the read alignment. 1203 1204 See_Also: `isFrontExtension`, `isBackExtension`, `isGap` 1205 */ 1206 @property ReadAlignmentType type() const pure nothrow 1207 { 1208 assert(isValid, "invalid read alignment"); 1209 1210 if (isGap) 1211 { 1212 return ReadAlignmentType.gap; 1213 } 1214 else if (isFrontExtension) 1215 { 1216 return ReadAlignmentType.front; 1217 } 1218 else 1219 { 1220 return ReadAlignmentType.back; 1221 } 1222 } 1223 1224 /** 1225 Returns true iff the read alignment is an extension, ie. it is a front or 1226 back extension. 1227 1228 See_Also: `isFrontExtension`, `isBackExtension` 1229 */ 1230 @property bool isExtension() const pure nothrow 1231 { 1232 // A single SeededAlignment is always a valid extension. 1233 return _length == 1; 1234 } 1235 1236 /** 1237 Returns true iff the read alignment is an front extension, ie. it is an 1238 extension and reaches over the front of the reference contig. 1239 1240 --- 1241 Case 1 (complement alignment): 1242 1243 0 rx 1244 ref |--+->-+->-->-->--| 1245 | | | 1246 read |--<--<--<-+-<-+--| 1247 0 ax 1248 1249 Case 2 (non-complement alignment): 1250 1251 0 rx 1252 ref |--+->-+->-->-->--| 1253 | | | 1254 read |-->-->-->-+->-+--| 1255 0 ax 1256 --- 1257 */ 1258 @property bool isFrontExtension() const pure nothrow 1259 { 1260 return _length == 1 && _alignments[0].seed == AlignmentLocationSeed.front; 1261 } 1262 1263 /** 1264 Returns true iff the read alignment is an back extension, ie. it is an 1265 extension and reaches over the back of the reference contig. 1266 1267 --- 1268 Case 1 (complement alignment): 1269 1270 0 ry lr 1271 ref |-->-->-->-+->-+--| 1272 | | | 1273 read |--+-<-+-<--<--<--| 1274 0 ay la 1275 1276 Case 2 (non-complement alignment): 1277 1278 0 ry lr 1279 ref |-->-->-->-+->-+--| 1280 | | | 1281 read |--+->-+->-->-->--| 1282 0 ay la 1283 --- 1284 */ 1285 @property bool isBackExtension() const pure nothrow 1286 { 1287 return _length == 1 && _alignments[0].seed == AlignmentLocationSeed.back; 1288 } 1289 1290 /** 1291 Returns true iff the read alignment spans a gap, ie. two alignments of 1292 the same read on different reference contigs are involved. 1293 */ 1294 @property bool isGap() const pure nothrow 1295 { 1296 return _length == 2 && 1297 _alignments[0].contigA.id != _alignments[1].contigA.id && 1298 _alignments[0].contigB.id == _alignments[1].contigB.id; 1299 } 1300 1301 /** 1302 Returns true iff the read alignment spans a gap and the flanking 1303 contigs have in the same orientation (according to this alignment). 1304 */ 1305 @property bool isParallel() const pure nothrow 1306 { 1307 return isGap && 1308 _alignments[0].seed != _alignments[1].seed && 1309 _alignments[0].complement == _alignments[1].complement; 1310 } 1311 1312 /** 1313 Returns true iff the read alignment spans a gap and the flanking 1314 contigs have in different orientation (according to this alignment). 1315 */ 1316 @property bool isAntiParallel() const pure nothrow 1317 { 1318 return isGap && 1319 _alignments[0].seed == _alignments[1].seed && 1320 _alignments[0].complement != _alignments[1].complement; 1321 } 1322 1323 unittest 1324 { 1325 with (AlignmentChain) with (LocalAlignment) with (Flag) 1326 { 1327 auto testData = [ 1328 // TODO drop this case? 1329 //"innerAlignment": ReadAlignment( 1330 // SeededAlignment.from(AlignmentChain( 1331 // 1, 1332 // Contig(1, 100), 1333 // Contig(1, 10), 1334 // no, 1335 // [ 1336 // LocalAlignment( 1337 // Locus(10, 11), 1338 // Locus(0, 1), 1339 // 0, 1340 // ), 1341 // LocalAlignment( 1342 // Locus(19, 20), 1343 // Locus(9, 10), 1344 // 0, 1345 // ), 1346 // ], 1347 // )).front, 1348 //), 1349 // TODO drop this case? 1350 //"innerAlignmentComplement": ReadAlignment( 1351 // SeededAlignment.from(AlignmentChain( 1352 // 2, 1353 // Contig(1, 100), 1354 // Contig(1, 10), 1355 // yes, 1356 // [ 1357 // LocalAlignment( 1358 // Locus(10, 11), 1359 // Locus(0, 1), 1360 // 0, 1361 // ), 1362 // LocalAlignment( 1363 // Locus(19, 20), 1364 // Locus(9, 10), 1365 // 0, 1366 // ), 1367 // ], 1368 // )).front, 1369 //), 1370 "frontExtension": ReadAlignment( 1371 SeededAlignment.from(AlignmentChain( 1372 3, 1373 Contig(1, 100), 1374 Contig(1, 10), 1375 emptyFlags, 1376 [ 1377 LocalAlignment( 1378 Locus(2, 3), 1379 Locus(5, 6), 1380 0, 1381 ), 1382 LocalAlignment( 1383 Locus(5, 6), 1384 Locus(9, 10), 1385 0, 1386 ), 1387 ], 1388 )).front, 1389 ), 1390 "frontExtensionComplement": ReadAlignment( 1391 SeededAlignment.from(AlignmentChain( 1392 4, 1393 Contig(1, 100), 1394 Contig(1, 10), 1395 Flags(complement), 1396 [ 1397 LocalAlignment( 1398 Locus(2, 3), 1399 Locus(5, 6), 1400 0, 1401 ), 1402 LocalAlignment( 1403 Locus(5, 6), 1404 Locus(9, 10), 1405 0, 1406 ), 1407 ], 1408 )).front, 1409 ), 1410 "backExtension": ReadAlignment( 1411 SeededAlignment.from(AlignmentChain( 1412 5, 1413 Contig(1, 100), 1414 Contig(1, 10), 1415 emptyFlags, 1416 [ 1417 LocalAlignment( 1418 Locus(94, 95), 1419 Locus(0, 1), 1420 0, 1421 ), 1422 LocalAlignment( 1423 Locus(97, 98), 1424 Locus(4, 5), 1425 0, 1426 ), 1427 ], 1428 )).front, 1429 ), 1430 "backExtensionComplement": ReadAlignment( 1431 SeededAlignment.from(AlignmentChain( 1432 6, 1433 Contig(1, 100), 1434 Contig(1, 10), 1435 Flags(complement), 1436 [ 1437 LocalAlignment( 1438 Locus(94, 95), 1439 Locus(0, 1), 1440 0, 1441 ), 1442 LocalAlignment( 1443 Locus(97, 98), 1444 Locus(4, 5), 1445 0, 1446 ), 1447 ], 1448 )).front, 1449 ), 1450 "gapEnd2Front": ReadAlignment( 1451 SeededAlignment.from(AlignmentChain( 1452 7, 1453 Contig(1, 100), 1454 Contig(1, 10), 1455 emptyFlags, 1456 [ 1457 LocalAlignment( 1458 Locus(94, 95), 1459 Locus(0, 1), 1460 0, 1461 ), 1462 LocalAlignment( 1463 Locus(97, 98), 1464 Locus(4, 5), 1465 0, 1466 ), 1467 ], 1468 )).front, 1469 SeededAlignment.from(AlignmentChain( 1470 8, 1471 Contig(2, 100), 1472 Contig(1, 10), 1473 emptyFlags, 1474 [ 1475 LocalAlignment( 1476 Locus(2, 3), 1477 Locus(5, 6), 1478 0, 1479 ), 1480 LocalAlignment( 1481 Locus(5, 6), 1482 Locus(9, 10), 1483 0, 1484 ), 1485 ], 1486 )).front, 1487 ), 1488 "gapFront2EndComplement": ReadAlignment( 1489 SeededAlignment.from(AlignmentChain( 1490 9, 1491 Contig(2, 100), 1492 Contig(1, 10), 1493 Flags(complement), 1494 [ 1495 LocalAlignment( 1496 Locus(2, 3), 1497 Locus(5, 6), 1498 0, 1499 ), 1500 LocalAlignment( 1501 Locus(5, 6), 1502 Locus(9, 10), 1503 0, 1504 ), 1505 ], 1506 )).front, 1507 SeededAlignment.from(AlignmentChain( 1508 10, 1509 Contig(1, 100), 1510 Contig(1, 10), 1511 Flags(complement), 1512 [ 1513 LocalAlignment( 1514 Locus(94, 95), 1515 Locus(0, 1), 1516 0, 1517 ), 1518 LocalAlignment( 1519 Locus(97, 98), 1520 Locus(4, 5), 1521 0, 1522 ), 1523 ], 1524 )).front, 1525 ), 1526 "gapEnd2End": ReadAlignment( 1527 SeededAlignment.from(AlignmentChain( 1528 11, 1529 Contig(1, 100), 1530 Contig(1, 10), 1531 Flags(complement), 1532 [ 1533 LocalAlignment( 1534 Locus(94, 95), 1535 Locus(0, 1), 1536 0, 1537 ), 1538 LocalAlignment( 1539 Locus(97, 98), 1540 Locus(4, 5), 1541 0, 1542 ), 1543 ], 1544 )).front, 1545 SeededAlignment.from(AlignmentChain( 1546 12, 1547 Contig(2, 100), 1548 Contig(1, 10), 1549 emptyFlags, 1550 [ 1551 LocalAlignment( 1552 Locus(94, 95), 1553 Locus(0, 1), 1554 0, 1555 ), 1556 LocalAlignment( 1557 Locus(97, 98), 1558 Locus(4, 5), 1559 0, 1560 ), 1561 ], 1562 )).front, 1563 ), 1564 "gapBegin2Begin": ReadAlignment( 1565 SeededAlignment.from(AlignmentChain( 1566 13, 1567 Contig(2, 100), 1568 Contig(1, 10), 1569 emptyFlags, 1570 [ 1571 LocalAlignment( 1572 Locus(2, 3), 1573 Locus(5, 6), 1574 0, 1575 ), 1576 LocalAlignment( 1577 Locus(5, 6), 1578 Locus(9, 10), 1579 0, 1580 ), 1581 ], 1582 )).front, 1583 SeededAlignment.from(AlignmentChain( 1584 14, 1585 Contig(1, 100), 1586 Contig(1, 10), 1587 Flags(complement), 1588 [ 1589 LocalAlignment( 1590 Locus(2, 3), 1591 Locus(5, 6), 1592 0, 1593 ), 1594 LocalAlignment( 1595 Locus(5, 6), 1596 Locus(9, 10), 1597 0, 1598 ), 1599 ], 1600 )).front, 1601 ), 1602 ]; 1603 // +--------- isInOrder 1604 // |+-------- isValid 1605 // ||+------- type 1606 // |||+------ isExtension 1607 // ||||+----- isFrontExt 1608 // |||||+---- isBackExt 1609 // ||||||+--- isGap 1610 // |||||||+-- isParallel 1611 // ||||||||+- isAntiParallel 1612 // ||||||||| 1613 // ||||||||| 1614 auto testCases = [ 1615 //"innerAlignment": "+.F......", 1616 //"innerAlignmentComplement": "+.F......", 1617 "frontExtension": "++F++....", 1618 "frontExtensionComplement": "++F++....", 1619 "backExtension": "++B+.+...", 1620 "backExtensionComplement": "++B+.+...", 1621 "gapEnd2Front": "++G...++.", 1622 "gapFront2EndComplement": ".+G...++.", 1623 "gapEnd2End": "++G...+.+", 1624 "gapBegin2Begin": ".+G...+.+", 1625 ]; 1626 1627 alias getFailureMessage = (testCase, testFunction, expectedValue) => format!"expected %s.%s to be %s"( 1628 testCase, testFunction, expectedValue); 1629 alias toBool = (c) => c == '+' ? true : false; 1630 alias toRAT = (c) => c == 'F' 1631 ? ReadAlignmentType.front 1632 : c == 'B' 1633 ? ReadAlignmentType.back 1634 : ReadAlignmentType.gap; 1635 1636 foreach (testCase, expectations; testCases) 1637 { 1638 auto readAlignment = testData[testCase]; 1639 1640 auto expIsInOrder = toBool(expectations[0]); 1641 auto expIsValid = toBool(expectations[1]); 1642 auto expType = toRAT(expectations[2]); 1643 auto expIsExtension = toBool(expectations[3]); 1644 auto expIsFrontExt = toBool(expectations[4]); 1645 auto expIsBackExt = toBool(expectations[5]); 1646 auto expIsGap = toBool(expectations[6]); 1647 auto expIsParallel = toBool(expectations[7]); 1648 auto expIsAntiParallel = toBool(expectations[8]); 1649 1650 assert(expIsInOrder == readAlignment.isInOrder, 1651 getFailureMessage(testCase, "isInOrder", expIsInOrder)); 1652 assert(expIsValid == readAlignment.isValid, 1653 getFailureMessage(testCase, "isValid", expIsValid)); 1654 if (readAlignment.isValid) 1655 assert(expType == readAlignment.type, 1656 getFailureMessage(testCase, "type", expType)); 1657 else 1658 assertThrown!AssertError(readAlignment.type, 1659 format!"expected type(%s) to throw"(testCase)); 1660 assert(expIsExtension == readAlignment.isExtension, 1661 getFailureMessage(testCase, "isExtension", expIsExtension)); 1662 assert(expIsFrontExt == readAlignment.isFrontExtension, 1663 getFailureMessage(testCase, "isFrontExtension", expIsFrontExt)); 1664 assert(expIsBackExt == readAlignment.isBackExtension, 1665 getFailureMessage(testCase, "isBackExtension", expIsBackExt)); 1666 assert(expIsGap == readAlignment.isGap, 1667 getFailureMessage(testCase, "isGap", expIsGap)); 1668 assert(expIsParallel == readAlignment.isParallel, 1669 getFailureMessage(testCase, "isParallel", expIsParallel)); 1670 assert(expIsAntiParallel == readAlignment.isAntiParallel, 1671 getFailureMessage(testCase, "isAntiParallel", expIsAntiParallel)); 1672 } 1673 } 1674 } 1675 1676 double meanScore() const pure 1677 { 1678 return this[].map!"a.score".mean; 1679 } 1680 } 1681 1682 /// Generate basic join from read alignment. 1683 J makeJoin(J)(ReadAlignment readAlignment) 1684 { 1685 final switch (readAlignment.type) 1686 { 1687 case ReadAlignmentType.front: 1688 return J( 1689 ContigNode( 1690 readAlignment[0].contigA.id, 1691 ContigPart.pre, 1692 ), 1693 ContigNode( 1694 readAlignment[0].contigA.id, 1695 ContigPart.begin, 1696 ), 1697 ); 1698 case ReadAlignmentType.gap: 1699 alias contigPart = (locationSeed) => locationSeed == AlignmentLocationSeed.front 1700 ? ContigPart.begin 1701 : ContigPart.end; 1702 1703 return J( 1704 ContigNode( 1705 readAlignment[0].contigA.id, 1706 contigPart(readAlignment[0].seed), 1707 ), 1708 ContigNode( 1709 readAlignment[1].contigA.id, 1710 contigPart(readAlignment[1].seed), 1711 ), 1712 ); 1713 case ReadAlignmentType.back: 1714 return J( 1715 ContigNode( 1716 readAlignment[0].contigA.id, 1717 ContigPart.end, 1718 ), 1719 ContigNode( 1720 readAlignment[0].contigA.id, 1721 ContigPart.post, 1722 ), 1723 ); 1724 } 1725 } 1726 1727 /// Generate join from read alignment. 1728 J to(J : Join!(ReadAlignment[]))(ReadAlignment readAlignment) 1729 { 1730 auto join = makeJoin!J(readAlignment); 1731 join.payload = [readAlignment]; 1732 1733 return join; 1734 } 1735 1736 /// 1737 unittest 1738 { 1739 with (AlignmentChain) with (LocalAlignment) with (Flag) 1740 { 1741 auto frontExtension = ReadAlignment( 1742 SeededAlignment.from(AlignmentChain( 1743 3, 1744 Contig(1, 100), 1745 Contig(1, 10), 1746 emptyFlags, 1747 [ 1748 LocalAlignment( 1749 Locus(2, 3), 1750 Locus(5, 6), 1751 0, 1752 ), 1753 LocalAlignment( 1754 Locus(5, 6), 1755 Locus(9, 10), 1756 0, 1757 ), 1758 ], 1759 )).front, 1760 ); 1761 auto backExtension = ReadAlignment( 1762 SeededAlignment.from(AlignmentChain( 1763 5, 1764 Contig(1, 100), 1765 Contig(1, 10), 1766 emptyFlags, 1767 [ 1768 LocalAlignment( 1769 Locus(94, 95), 1770 Locus(0, 1), 1771 0, 1772 ), 1773 LocalAlignment( 1774 Locus(97, 98), 1775 Locus(4, 5), 1776 0, 1777 ), 1778 ], 1779 )).front, 1780 ); 1781 auto gap = ReadAlignment( 1782 SeededAlignment.from(AlignmentChain( 1783 11, 1784 Contig(1, 100), 1785 Contig(1, 10), 1786 Flags(complement), 1787 [ 1788 LocalAlignment( 1789 Locus(94, 95), 1790 Locus(0, 1), 1791 0, 1792 ), 1793 LocalAlignment( 1794 Locus(97, 98), 1795 Locus(4, 5), 1796 0, 1797 ), 1798 ], 1799 )).front, 1800 SeededAlignment.from(AlignmentChain( 1801 12, 1802 Contig(2, 100), 1803 Contig(1, 10), 1804 emptyFlags, 1805 [ 1806 LocalAlignment( 1807 Locus(94, 95), 1808 Locus(0, 1), 1809 0, 1810 ), 1811 LocalAlignment( 1812 Locus(97, 98), 1813 Locus(4, 5), 1814 0, 1815 ), 1816 ], 1817 )).front, 1818 ); 1819 1820 auto join1 = frontExtension.to!(Join!(ReadAlignment[])); 1821 auto join2 = backExtension.to!(Join!(ReadAlignment[])); 1822 auto join3 = gap.to!(Join!(ReadAlignment[])); 1823 1824 assert(join1.start == ContigNode(1, ContigPart.pre)); 1825 assert(join1.end == ContigNode(1, ContigPart.begin)); 1826 assert(join1.payload == [frontExtension]); 1827 1828 assert(join2.start == ContigNode(1, ContigPart.end)); 1829 assert(join2.end == ContigNode(1, ContigPart.post)); 1830 assert(join2.payload == [backExtension]); 1831 1832 assert(join3.start == ContigNode(1, ContigPart.end)); 1833 assert(join3.end == ContigNode(2, ContigPart.end)); 1834 assert(join3.payload == [gap]); 1835 } 1836 } 1837 1838 /** 1839 A pile of read alignments belonging to the same gap/contig end. 1840 1841 See_Also: Hit 1842 */ 1843 alias PileUp = ReadAlignment[]; 1844 1845 /** 1846 Get the type of the read alignment. 1847 1848 See_Also: `isFrontExtension`, `isBackExtension`, `isGap` 1849 */ 1850 ReadAlignmentType getType(in PileUp pileUp) pure nothrow 1851 { 1852 assert(pileUp.isValid, "invalid read alignment"); 1853 1854 if (pileUp.isGap) 1855 { 1856 return ReadAlignmentType.gap; 1857 } 1858 else 1859 { 1860 assert(pileUp.isExtension); 1861 return pileUp[0].isFrontExtension 1862 ? ReadAlignmentType.front 1863 : ReadAlignmentType.back; 1864 } 1865 } 1866 1867 bool isValid(in PileUp pileUp) pure nothrow 1868 { 1869 return pileUp.isExtension ^ pileUp.isGap; 1870 } 1871 1872 bool isExtension(in PileUp pileUp) pure nothrow 1873 { 1874 if (pileUp[0].isFrontExtension) 1875 { 1876 return pileUp.all!(readAlignment => readAlignment.isFrontExtension); 1877 } 1878 else if (pileUp[0].isBackExtension) 1879 { 1880 return pileUp.all!(readAlignment => readAlignment.isBackExtension); 1881 } 1882 else 1883 { 1884 return false; 1885 } 1886 } 1887 1888 bool isGap(in PileUp pileUp) pure nothrow 1889 { 1890 return pileUp.any!(readAlignment => readAlignment.isGap); 1891 } 1892 1893 auto isParallel(in PileUp pileUp) pure nothrow 1894 { 1895 return pileUp.isGap && pileUp 1896 .filter!(readAlignment => readAlignment.isGap) 1897 .front 1898 .isParallel; 1899 } 1900 1901 auto isAntiParallel(in PileUp pileUp) pure nothrow 1902 { 1903 return pileUp.isGap && pileUp 1904 .filter!(readAlignment => readAlignment.isGap) 1905 .front 1906 .isAntiParallel; 1907 } 1908 1909 /// Returns a list of pointers to all involved alignment chains. 1910 AlignmentChain*[] getAlignmentRefs(PileUp pileUp) pure nothrow 1911 { 1912 auto alignmentChainsAcc = appender!(AlignmentChain*[]); 1913 alignmentChainsAcc.reserve(pileUp.map!"a.length".sum); 1914 1915 foreach (ref readAlignment; pileUp) 1916 { 1917 foreach (ref seededAlignment; readAlignment) 1918 { 1919 alignmentChainsAcc ~= &(seededAlignment.alignment); 1920 } 1921 } 1922 1923 return alignmentChainsAcc.data; 1924 } 1925 1926 /// 1927 unittest 1928 { 1929 auto pileUp = [ 1930 ReadAlignment(SeededAlignment(), SeededAlignment()), ReadAlignment(SeededAlignment()) 1931 ]; 1932 auto allAlignmentChains = pileUp.getAlignmentRefs(); 1933 1934 assert(allAlignmentChains.length == 3); 1935 assert(pileUp[0][0].id == 0); 1936 assert(pileUp[0][1].id == 0); 1937 assert(pileUp[1][0].id == 0); 1938 1939 allAlignmentChains[0].id = 1; 1940 allAlignmentChains[1].id = 2; 1941 allAlignmentChains[2].id = 3; 1942 1943 assert(pileUp[0][0].id == 1); 1944 assert(pileUp[0][1].id == 2); 1945 assert(pileUp[1][0].id == 3); 1946 }