1 /**
2     This is the `checkResults` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.checkResults;
10 
11 import dentist.common : isTesting;
12 
13 static if (isTesting):
14 
15 import core.atomic : atomicOp;
16 import dentist.commandline : TestingCommand, OptionsFor;
17 import dentist.common :
18     ReferenceInterval,
19     ReferenceRegion,
20     to;
21 import dentist.common.alignments :
22     AlignmentChain,
23     coord_t,
24     id_t;
25 import dentist.dazzler :
26     ContigSegment,
27     fingerprint,
28     GapSegment,
29     getAlignments,
30     getExactAlignment,
31     getScaffoldStructure,
32     readMask,
33     ScaffoldSegment;
34 import dentist.util.algorithm :
35     cmpLexicographically,
36     first,
37     last,
38     sliceBy;
39 import dentist.util.log;
40 import dentist.util.math :
41     absdiff,
42     ceildiv,
43     longestIncreasingSubsequence,
44     mean,
45     median,
46     N,
47     NaturalNumberSet;
48 import dentist.util.range : tupleMap;
49 import std.algorithm :
50     all,
51     among,
52     chunkBy,
53     copy,
54     count,
55     filter,
56     find,
57     fold,
58     joiner,
59     map,
60     max,
61     maxElement,
62     min,
63     remove,
64     sort,
65     sum,
66     uniq;
67 import std.array : appender, array;
68 import std.format : format;
69 import std.math :
70     ceil,
71     log10;
72 import std.parallelism : parallel;
73 import std.range :
74     assumeSorted,
75     chain,
76     enumerate,
77     only,
78     StoppingPolicy,
79     zip;
80 import std.range.primitives;
81 import std.regex : ctRegex, replaceAll;
82 import std.stdio : File, writeln;
83 import std..string : join;
84 import std.typecons : Tuple;
85 import vibe.data.json : Json, toJson = serializeToJson, toJsonString = serializeToPrettyJson;
86 
87 
88 /// Options for the `collectPileUps` command.
89 alias Options = OptionsFor!(TestingCommand.checkResults);
90 
91 /// Execute the `checkResults` command with `options`.
92 void execute(in Options options)
93 {
94     auto analyzer = ResultAnalyzer(options);
95     auto stats = analyzer.collect();
96 
97     if (options.useJson)
98         writeln(stats.toJsonString());
99     else
100         writeln(stats.toTabular());
101 }
102 
103 private struct ResultAnalyzer
104 {
105     alias ExactAlignment = typeof(getExactAlignment("", "", resultAlignment[0], ""));
106     alias Fingerprint = typeof(fingerprint(&resultAlignment[0]));
107     static enum identityLevels = Stats.identityLevels;
108 
109     const(Options) options;
110     protected const(ScaffoldSegment)[] trueAssemblyScaffoldStructure;
111     protected const(ScaffoldSegment)[] resultScaffoldStructure;
112     protected coord_t referenceOffset;
113     protected AlignmentChain[] resultAlignment;
114     protected ExactAlignment[Fingerprint] exactResultAlignments;
115     protected ReferenceRegion mappedRegionsMask;
116     protected ReferenceRegion referenceGaps;
117     protected ReferenceRegion reconstructedRegions;
118     protected ReferenceRegion[] reconstructedGaps;
119     protected size_t[][identityLevels.length] correctContigsPerIdentityLevel;
120     protected size_t[][identityLevels.length] correctGapsPerIdentityLevel;
121 
122     Stats collect()
123     {
124         mixin(traceExecution);
125 
126         init();
127 
128         if (options.alignmentTabular !is null)
129             writeAlignmentTabular(
130                 File(options.alignmentTabular, "w"),
131                 resultAlignment,
132             );
133 
134         Stats stats;
135 
136         stats.numBpsExpected = getNumBpsExpected();
137         stats.numBpsKnown = getNumBpsKnown();
138         stats.numBpsResult = getNumBpsResult();
139         stats.numTranslocatedGaps = getNumTranslocatedGaps();
140         stats.numBpsCorrect = getNumBpsCorrect();
141         stats.numContigsExpected = getNumContigsExpected();
142         stats.numCorrectContigs = getNumCorrectContigs();
143         stats.numCorrectGaps = getNumCorrectGaps();
144         stats.numClosedGaps = getNumClosedGaps();
145         stats.numPartlyClosedGaps = getNumPartlyClosedGaps();
146         stats.numUnaddressedGaps = getNumUnaddressedGaps();
147         stats.numBpsInClosedGaps = getNumBpsInClosedGaps();
148         stats.numBpsInGaps = getNumBpsInGaps();
149         stats.maximumN50 = getMaximumN50();
150         stats.inputN50 = getInputN50();
151         stats.resultN50 = getResultN50();
152         stats.gapMedian = getGapMedian();
153         stats.closedGapMedian = getClosedGapMedian();
154         stats.maxClosedGap = getMaxClosedGap();
155         if (options.bucketSize > 0)
156         {
157             stats.correctGapLengthHistograms = getCorrectGapLengthHistograms();
158             stats.closedGapLengthHistogram = getClosedGapLengthHistogram();
159             stats.gapLengthHistogram = getGapLengthHistogram();
160         }
161 
162         return stats;
163     }
164 
165     void init()
166     {
167         trueAssemblyScaffoldStructure = getScaffoldStructure(options.trueAssemblyDb).array;
168         resultScaffoldStructure = getScaffoldStructure(options.resultDb).array;
169         if (options.onlyContigId == 0)
170             resultAlignment = cleanUpAlignments(getAlignments(
171                 options.trueAssemblyDb,
172                 options.resultDb,
173                 options.resultsAlignmentFile,
174                 options.workdir,
175                 options.tracePointDistance,
176             ));
177         else
178             resultAlignment = cleanUpAlignments(getAlignments(
179                 options.trueAssemblyDb,
180                 options.resultDb,
181                 options.resultsAlignmentFile,
182                 options.workdir,
183                 options.tracePointDistance,
184             ).filter!(ac => ac.contigA.id == options.onlyContigId).array);
185         mappedRegionsMask = ReferenceRegion(readMask!ReferenceInterval(
186             options.trueAssemblyDb,
187             options.mappedRegionsMask,
188             options.workdir,
189         ));
190         referenceOffset = cast(coord_t) mappedRegionsMask.intervals[0].begin;
191         debug logJsonDebug("referenceOffset", referenceOffset);
192         referenceGaps = getReferenceGaps();
193         reconstructedRegions = getReconstructedRegions();
194         reconstructedGaps = getReconstructedGaps();
195         debug logJsonDebug(
196             "referenceGaps", referenceGaps.toJson,
197             "reconstructedRegions", reconstructedRegions.toJson,
198             "reconstructedGaps", reconstructedGaps.toJson,
199         );
200         calculateExactResultAlignments();
201         correctContigsPerIdentityLevel = getCorrectRegions(mappedRegionsMask.intervals);
202         if (options.gapDetailsTabular !is null)
203             correctGapsPerIdentityLevel = getCorrectRegions(
204                 referenceGaps
205                     .intervals
206                     .array,
207                 File(options.gapDetailsTabular, "w"),
208             );
209         else
210             correctGapsPerIdentityLevel = getCorrectRegions(
211                 referenceGaps
212                     .intervals
213                     .array,
214             );
215     }
216 
217     AlignmentChain[] cleanUpAlignments(AlignmentChain[] localAlignments)
218     {
219         mixin(traceExecution);
220 
221         static struct AlignmentEvent
222         {
223             size_t contigId;
224             size_t position;
225             int diff;
226             AlignmentChain* alignmentChain;
227 
228             int opCmp(in AlignmentEvent other) const pure nothrow
229             {
230                 return cmpLexicographically!(
231                     const(AlignmentEvent),
232                     e => e.contigId,
233                     e => e.position,
234                     e => -e.diff,
235                 )(this, other);
236             }
237         }
238 
239         // 1. Prepare alignmentEvents
240         auto alignmentEvents = localAlignments
241             .map!((ref alignment) => only(
242                 AlignmentEvent(alignment.contigA.id, alignment.first.contigA.begin, 1, &alignment),
243                 AlignmentEvent(alignment.contigA.id, alignment.last.contigA.end, -1, &alignment),
244             ))
245             .joiner;
246         auto contigBoundaryEvents = localAlignments
247             .map!"a.contigA"
248             .uniq
249             .map!(contig => only(
250                 AlignmentEvent(contig.id, 0, 0, null),
251                 AlignmentEvent(contig.id, contig.length, 0, null),
252             ))
253             .joiner;
254         auto changeEvents = chain(alignmentEvents, contigBoundaryEvents).array;
255         changeEvents.sort();
256 
257         if (changeEvents.length == 0)
258             return localAlignments;
259 
260         // 2. Find overlapping alignments
261         int currentCoverage;
262         auto overlappingAlignments = appender!(AlignmentChain*[]);
263         auto goodAlignments = appender!(AlignmentChain*[]);
264         foreach (changeEvent; changeEvents)
265         {
266             currentCoverage += changeEvent.diff;
267 
268             if (currentCoverage == 0 && overlappingAlignments.data.length > 0)
269             {
270                 // Collected local group of overlapping alignments
271 
272                 auto locallyCleanedUpAlignments = overlappingAlignments.data.length == 1
273                     ? overlappingAlignments.data
274                     : locallyCleanUpAlignments(overlappingAlignments.data);
275                 logJsonDebug(
276                     "overlappingAlignments", overlappingAlignments.data.map!"*a".array.toJson,
277                     "locallyCleanedUpAlignments", locallyCleanedUpAlignments.map!"*a".array.toJson,
278                 );
279                 goodAlignments ~= locallyCleanedUpAlignments;
280 
281                 overlappingAlignments.clear();
282             }
283             else if (currentCoverage >= 1 && changeEvent.diff > 0)
284             {
285                 // Found overlapping alignments
286                 assert(changeEvent.alignmentChain !is null);
287                 overlappingAlignments ~= changeEvent.alignmentChain;
288             }
289         }
290 
291         logJsonDiagnostic(
292             "numRawLocalAlignments", localAlignments.length,
293             "numCleanedLocalAlignments", goodAlignments.data.length,
294         );
295 
296         return goodAlignments.data.map!"*a".array;
297     }
298 
299     AlignmentChain*[] locallyCleanUpAlignments(AlignmentChain*[] overlappingAlignments)
300     {
301         ReferenceInterval interval(in AlignmentChain* ac) pure
302         {
303             return ReferenceInterval(
304                 ac.contigA.id,
305                 ac.first.contigA.begin,
306                 ac.last.contigA.end,
307             );
308         }
309 
310         alias containedWithin = (lhs, rhs) => interval(lhs) in interval(rhs);
311         alias overlapEachOther = (lhs, rhs) =>
312                 interval(lhs).intersects(interval(rhs));
313 
314         // 1. Remove LAs completely included in others
315         foreach (ac; overlappingAlignments)
316             foreach (other; overlappingAlignments)
317                 ac.disableIf(!other.flags.disabled && containedWithin(ac, other));
318 
319         // 2. Find combination of alignments, s.t.
320         //     - they are not disabled
321         //     - they do not overlap
322         //     - if more than one: cover maximum possible area
323         //     - if more than one: have best quality
324 
325         static struct ViableCombination
326         {
327             ReferenceRegion region;
328             AlignmentChain*[] acs;
329         }
330 
331         auto viableCombinationsAcc = appender!(ViableCombination[]);
332 
333         void findNonOverlappingCombinations(AlignmentChain*[] candidates, ReferenceRegion region, AlignmentChain*[] included)
334         {
335             size_t numExpansions;
336             foreach (i, candidate; candidates)
337             {
338                 auto candidateInterval = interval(candidate);
339                 auto newRegion = region | candidateInterval;
340 
341                 if (newRegion.size == region.size + candidateInterval.size)
342                 {
343                     findNonOverlappingCombinations(
344                         candidates.remove(i),
345                         newRegion,
346                         included ~ [candidate],
347                     );
348                     ++numExpansions;
349                 }
350             }
351 
352             if (numExpansions == 0)
353                 viableCombinationsAcc ~= ViableCombination(
354                     region,
355                     included,
356                 );
357         }
358 
359         findNonOverlappingCombinations(
360             overlappingAlignments.filter!"!a.flags.disabled".array,
361             ReferenceRegion(),
362             [],
363         );
364 
365         assert(viableCombinationsAcc.data.length > 0);
366 
367         return viableCombinationsAcc
368             .data
369             .maxElement!(vc => vc.region.size - vc.acs.map!"a.totalDiffs".sum.to!double)
370             .acs;
371     }
372 
373     void calculateExactResultAlignments()
374     {
375         mixin(traceExecution);
376 
377         foreach (alignmentChain; parallel(resultAlignment))
378         {
379             auto acFingerprint = fingerprint(&alignmentChain);
380             auto exactAlignment = getExactAlignment(
381                 options.trueAssemblyDb,
382                 options.resultDb,
383                 alignmentChain,
384                 options.workdir,
385                 10*2^^30,
386             );
387 
388             synchronized
389                 exactResultAlignments[acFingerprint] = exactAlignment;
390         }
391     }
392 
393     ReferenceRegion getReferenceGaps()
394     {
395         mixin(traceExecution);
396 
397         alias scaffoldRegion = (contigPart) => ReferenceRegion(ReferenceInterval(
398             contigPart.globalContigId,
399             0,
400             contigPart.length,
401         ));
402         alias mappedGapsMask = (contigPart) => scaffoldRegion(contigPart) - mappedRegionsMask;
403 
404         return ReferenceRegion(trueAssemblyScaffoldStructure
405             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
406             .map!(contigPart => contigPart.get!ContigSegment)
407             .map!(contigPart => cast(ReferenceInterval[]) mappedGapsMask(contigPart).intervals)
408             .joiner
409             .filter!(gap => isInnerGap(gap))
410             .array
411         );
412     }
413 
414     ReferenceRegion getReconstructedRegions()
415     {
416         mixin(traceExecution);
417 
418         return ReferenceRegion(resultAlignment
419             .map!(ac => ac
420                 .localAlignments
421                 .map!(la => ReferenceInterval(
422                     ac.contigA.id,
423                     la.contigA.begin,
424                     la.contigA.end,
425                 )))
426             .joiner
427             .array);
428     }
429 
430     ReferenceRegion[] getReconstructedGaps()
431     {
432         mixin(traceExecution);
433 
434         return referenceGaps
435             .intervals
436             .map!(gap => reconstructedRegions & gap)
437             .array;
438     }
439 
440     void writeAlignmentTabular(File tabularFile, AlignmentChain[] alignmentChains)
441     {
442         coord_t contigBId;
443         coord_t contigBOffset;
444         foreach (alignmentChain; alignmentChains)
445         {
446             if (contigBId != alignmentChain.contigB.id)
447             {
448                 contigBId = alignmentChain.contigB.id;
449                 contigBOffset = getResultContigBegin(contigBId);
450             }
451 
452             alias percentSimilarity = (la) => 1.0 - cast(double) la.numDiffs /
453                                                     cast(double) (la.contigA.end - la.contigA.begin);
454 
455             foreach (i, localAlignment; alignmentChain.localAlignments)
456             {
457                 tabularFile.writefln!"%12d %12d %12d %12d %12f %d"(
458                     alignmentChain.contigA.id,
459                     alignmentChain.contigB.id,
460                     localAlignment.contigA.begin,
461                     alignmentChain.flags.complement
462                         ? alignmentChain.contigB.length - (contigBOffset + localAlignment.contigB.end)
463                         : contigBOffset + localAlignment.contigB.begin,
464                     0.0,
465                     alignmentChain.flags.complement ? -1 : 1,
466                 );
467                 tabularFile.writefln!"%12d %12d %12d %12d %12f %d"(
468                     alignmentChain.contigA.id,
469                     alignmentChain.contigB.id,
470                     localAlignment.contigA.end,
471                     alignmentChain.flags.complement
472                         ? alignmentChain.contigB.length - (contigBOffset + localAlignment.contigB.begin)
473                         : contigBOffset + localAlignment.contigB.end,
474                     percentSimilarity(localAlignment),
475                     alignmentChain.flags.complement ? -1 : 1,
476                 );
477             }
478             tabularFile.writeln();
479         }
480     }
481 
482     coord_t getResultContigBegin(id_t contigId)
483     {
484         return cast(coord_t) (resultScaffoldStructure
485             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
486             .map!(contigPart => contigPart.peek!ContigSegment)
487             .find!(contigPart => contigPart.globalContigId == contigId)
488             .map!(contigPart => contigPart.begin)
489             .front + 0);
490     }
491 
492     size_t getNumBpsExpected()
493     {
494         mixin(traceExecution);
495 
496         return mappedRegionsMask
497             .intervals
498             .sliceBy!"a.contigId == b.contigId"
499             .map!(trueScaffoldSlice => trueScaffoldSlice[$ - 1].end - trueScaffoldSlice[0].begin)
500             .sum;
501     }
502 
503     size_t getNumBpsKnown()
504     {
505         mixin(traceExecution);
506 
507         return mappedRegionsMask.size;
508     }
509 
510     size_t getNumBpsResult()
511     {
512         mixin(traceExecution);
513 
514         return resultScaffoldStructure
515             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
516             .map!(contigPart => contigPart.peek!ContigSegment.length)
517             .sum;
518     }
519 
520     size_t getNumBpsCorrect()
521     {
522         mixin(traceExecution);
523 
524         return resultAlignment
525             .map!(ac => ac
526                 .localAlignments
527                 .map!(la => la.contigA.end - la.contigA.begin - la.numDiffs))
528             .joiner
529             .sum;
530     }
531 
532     size_t getNumTranslocatedGaps()
533     {
534         mixin(traceExecution);
535 
536         version (assert)
537         {
538             foreach (pair; zip(referenceGaps.intervals, reconstructedGaps))
539                 assert(
540                     isGapClosed(pair) ^ isGapPartlyClosed(pair) ^ isGapUnaddressed(pair),
541                     format!"%s-%s-%s"(
542                         isGapClosed(pair),
543                         isGapPartlyClosed(pair),
544                         isGapUnaddressed(pair),
545                     ),
546                 );
547         }
548 
549         return referenceGaps.intervals.length;
550     }
551 
552     size_t getNumContigsExpected()
553     {
554         mixin(traceExecution);
555 
556         return mappedRegionsMask.intervals.length;
557     }
558 
559     size_t getNumCorrectContigs()
560     {
561         return correctContigsPerIdentityLevel[0].length;
562     }
563 
564     size_t getNumCorrectGaps()
565     {
566         return correctGapsPerIdentityLevel[0].length;
567     }
568 
569     size_t[][identityLevels.length] getCorrectRegions(in ReferenceInterval[] intervals, File detailsTabular = File())
570     {
571         if (detailsTabular.isOpen)
572             detailsTabular.writefln!"%10s %10s %10s %10s %10s %s"(
573                 "contigId",
574                 "begin",
575                 "end",
576                 "numDiffs",
577                 "percentIdentity",
578                 "sequenceAlignment",
579             );
580 
581         auto sortedResultAlignment = resultAlignment.assumeSorted!"a.contigA.id < b.contigA.id";
582 
583         size_t[][identityLevels.length] identicalLengthsPerLevel;
584         foreach (identicalLengths; identicalLengthsPerLevel)
585             identicalLengths.reserve(intervals.length);
586 
587         foreach (interval; parallel(intervals))
588         {
589             auto overlappingAlignments = sortedResultAlignment
590                 .equalRange(getDummyAC(interval));
591 
592             auto overlappedRegion = overlappingAlignments
593                 .map!(to!(ReferenceRegion, "contigA"))
594                 .fold!"a | b"(ReferenceRegion());
595             size_t numDiffs = (ReferenceRegion(interval) - overlappedRegion).size;
596             auto alignmentString = appender!string;
597 
598             foreach (overlappingAlignment; overlappingAlignments)
599             {
600                 auto alignmentBegin = overlappingAlignment.first.contigA.begin;
601                 auto alignmentEnd = overlappingAlignment.last.contigA.end;
602                 if (interval.end <= alignmentBegin || alignmentEnd <= interval.begin)
603                     continue; // Ignore non-overlaps
604                 auto overlapBegin = interval.begin > alignmentBegin
605                     ? interval.begin - alignmentBegin
606                     : 0;
607                 auto overlapEnd = interval.end < alignmentEnd
608                     ? interval.end - alignmentBegin
609                     : alignmentEnd - alignmentBegin;
610 
611                 if (overlappingAlignment.flags.complement)
612                 {
613                     // Inversions are not allowed
614                     numDiffs += overlapEnd - overlapBegin;
615                 }
616                 else
617                 {
618                     auto exactAlignment = getOverlapAlignment(overlappingAlignment);
619                     auto overlappingExactAlignment = exactAlignment[overlapBegin..min(overlapEnd, $)];
620 
621                     numDiffs += overlappingExactAlignment.score;
622 
623                     if (detailsTabular.isOpen)
624                         alignmentString ~= overlappingExactAlignment
625                             .toString()
626                             .replaceAll(ctRegex!(r"\n", "g"), "\\n");
627                 }
628                 if (detailsTabular.isOpen)
629                     alignmentString ~= "\\n\\n";
630             }
631 
632             if (detailsTabular.isOpen)
633                 synchronized detailsTabular.writefln!"%10d %10d %10d %10d %.8f %s"(
634                     interval.contigId,
635                     interval.begin,
636                     interval.end,
637                     numDiffs,
638                     1.0 - (numDiffs.to!double / interval.size.to!double),
639                     alignmentString.data,
640                 );
641 
642             foreach (i, identityLevel; identityLevels)
643                 if (numDiffs <= (1.0 - identityLevel) * interval.size)
644                     synchronized identicalLengthsPerLevel[i] ~= interval.size;
645         }
646 
647         return identicalLengthsPerLevel;
648     }
649 
650     auto getOverlapAlignment(in AlignmentChain alignmentChain)
651     {
652         auto acFingerprint = fingerprint(&alignmentChain);
653 
654         return exactResultAlignments[acFingerprint];
655     }
656 
657     static AlignmentChain getDummyAC(in ReferenceInterval refInterval)
658     {
659         return AlignmentChain(
660             0,
661             AlignmentChain.Contig(
662                 cast(id_t) refInterval.contigId,
663                 cast(coord_t) refInterval.end,
664             ),
665             AlignmentChain.Contig(0, 1),
666             AlignmentChain.emptyFlags,
667             [AlignmentChain.LocalAlignment(
668                 AlignmentChain.LocalAlignment.Locus(
669                     cast(coord_t) refInterval.begin,
670                     cast(coord_t) refInterval.end,
671                 ),
672                 AlignmentChain.LocalAlignment.Locus(
673                     cast(coord_t) 0,
674                     cast(coord_t) 1,
675                 ),
676             )],
677         );
678     }
679 
680     bool isInnerGap(in ReferenceInterval gap)
681     {
682         return 0 < gap.begin && gap.end < trueScaffoldLength(cast(id_t) gap.contigId);
683     }
684 
685     coord_t trueScaffoldLength(in id_t contigId)
686     {
687         return cast(coord_t) trueAssemblyScaffoldStructure
688             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
689             .map!(contigPart => contigPart.get!ContigSegment)
690             .find!(contigPart => contigPart.globalContigId == contigId)
691             .first
692             .length;
693     }
694 
695     size_t getNumClosedGaps()
696     {
697         mixin(traceExecution);
698 
699         assert(referenceGaps.intervals.length == reconstructedGaps.length);
700         logJsonDebug("closedGaps", zip(referenceGaps.intervals, reconstructedGaps)
701             .filter!(pair => isGapClosed(pair))
702             .map!"a[1].intervals"
703             .array
704             .toJson);
705 
706         return zip(referenceGaps.intervals, reconstructedGaps).count!(pair => isGapClosed(pair));
707     }
708 
709     size_t getNumPartlyClosedGaps()
710     {
711         mixin(traceExecution);
712 
713         assert(referenceGaps.intervals.length == reconstructedGaps.length);
714         logJsonDebug("partlyClosedGaps", zip(referenceGaps.intervals, reconstructedGaps)
715             .filter!(pair => isGapPartlyClosed(pair))
716             .map!"a[1].intervals"
717             .array
718             .toJson);
719 
720         return zip(referenceGaps.intervals, reconstructedGaps).count!(pair => isGapPartlyClosed(pair));
721     }
722 
723     size_t getNumUnaddressedGaps()
724     {
725         mixin(traceExecution);
726 
727         assert(referenceGaps.intervals.length == reconstructedGaps.length);
728         logJsonDebug("unaddressedGaps", zip(referenceGaps.intervals, reconstructedGaps)
729             .filter!(pair => isGapUnaddressed(pair))
730             .map!"a[1].intervals"
731             .array
732             .toJson);
733 
734         return zip(referenceGaps.intervals, reconstructedGaps).count!(pair => isGapUnaddressed(pair));
735     }
736 
737     bool isGapClosed(Pair)(Pair gapAndInsertion)
738     {
739         auto gap = gapAndInsertion[0];
740         auto insertion = gapAndInsertion[1];
741 
742         return insertion.size == gap.size;
743     }
744 
745     bool isGapPartlyClosed(Pair)(Pair gapAndInsertion)
746     {
747         auto gap = gapAndInsertion[0];
748         auto insertion = gapAndInsertion[1];
749 
750         return options.minInsertionLength <= insertion.size && insertion.size < gap.size;
751     }
752 
753     bool isGapUnaddressed(Pair)(Pair gapAndInsertion)
754     {
755         auto gap = gapAndInsertion[0];
756         auto insertion = gapAndInsertion[1];
757 
758         return insertion.size < min(options.minInsertionLength, gap.size);
759     }
760 
761     size_t getNumBpsInClosedGaps()
762     {
763         mixin(traceExecution);
764 
765         return zip(referenceGaps.intervals, reconstructedGaps)
766             .filter!(pair => isGapClosed(pair))
767             .map!"a[1].size"
768             .sum;
769     }
770 
771     size_t getNumBpsInGaps()
772     {
773         mixin(traceExecution);
774 
775         return referenceGaps
776             .intervals
777             .map!(gap => gap.size)
778             .sum;
779     }
780 
781     size_t getMaximumN50()
782     {
783         mixin(traceExecution);
784 
785         return trueAssemblyScaffoldStructure
786             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
787             .map!(contigPart => contigPart.peek!ContigSegment.length)
788             .array
789             .N!50(getNumBpsExpected);
790     }
791 
792     size_t getInputN50()
793     {
794         mixin(traceExecution);
795 
796         return mappedRegionsMask
797             .intervals
798             .map!(mappedInterval => mappedInterval.size)
799             .array
800             .N!50(getNumBpsExpected);
801     }
802 
803     size_t getResultN50()
804     {
805         mixin(traceExecution);
806 
807         return resultScaffoldStructure
808             .filter!(contigPart => contigPart.peek!ContigSegment !is null)
809             .map!(contigPart => contigPart.peek!ContigSegment.length)
810             .array
811             .N!50(getNumBpsExpected);
812     }
813 
814     size_t getGapMedian()
815     {
816         mixin(traceExecution);
817 
818         auto values = referenceGaps
819             .intervals
820             .map!(gap => gap.size)
821             .array;
822 
823         if (values.length == 0)
824             return size_t.max;
825         else
826             return median(values);
827     }
828 
829     size_t getClosedGapMedian()
830     {
831         mixin(traceExecution);
832 
833         auto values = zip(referenceGaps.intervals, reconstructedGaps)
834             .filter!(pair => isGapClosed(pair))
835             .map!"a[0].size"
836             .array;
837 
838         if (values.length == 0)
839             return size_t.max;
840         else
841             return median(values);
842     }
843 
844     ReferenceInterval getMaxClosedGap()
845     {
846         mixin(traceExecution);
847 
848         return zip(referenceGaps.intervals, reconstructedGaps)
849             .filter!(pair => isGapClosed(pair))
850             .map!"a[0]"
851             .maxElement!"a.size"(ReferenceInterval(0, 0, 0));
852     }
853 
854     Histogram!coord_t[identityLevels.length] getCorrectGapLengthHistograms()
855     {
856         mixin(traceExecution);
857 
858         typeof(return) correctGapLengthHistograms;
859 
860         foreach (i; 0 .. identityLevels.length)
861         {
862             correctGapLengthHistograms[i] = histogram(
863                 options.bucketSize,
864                 correctGapsPerIdentityLevel[i],
865             );
866             // NOTE: for some reason bucketSize is not correctly set; force it
867             correctGapLengthHistograms[i].bucketSize = options.bucketSize;
868         }
869 
870         return correctGapLengthHistograms;
871     }
872 
873     Histogram!coord_t getClosedGapLengthHistogram()
874     {
875         mixin(traceExecution);
876 
877         return histogram(
878             options.bucketSize,
879             zip(referenceGaps.intervals, reconstructedGaps)
880                 .filter!(pair => isGapClosed(pair))
881                 .map!(pair => cast(coord_t) pair[0].size)
882                 .array,
883         );
884     }
885 
886     Histogram!coord_t getGapLengthHistogram()
887     {
888         mixin(traceExecution);
889 
890         return histogram(
891             options.bucketSize,
892             referenceGaps
893                 .intervals
894                 .map!(gap => cast(coord_t) gap.size)
895                 .array,
896         );
897     }
898 }
899 
900 private struct Histogram(value_t)
901 {
902     alias Bucket = Tuple!(
903         value_t, "begin",
904         value_t, "end",
905         size_t, "count",
906     );
907 
908     value_t bucketSize;
909     size_t[] histogram;
910     alias histogram this;
911 
912     this(in value_t bucketSize, in value_t[] values)
913     {
914         this.bucketSize = bucketSize + 0;
915 
916         if (values.length == 0)
917             return; // empty histogram
918 
919         auto largestValue = maxElement(values);
920         this.length = largestValue / bucketSize + 1;
921 
922         foreach (value; values)
923             ++this[value / bucketSize];
924     }
925 
926     auto buckets() const pure nothrow
927     {
928         return histogram
929             .enumerate
930             .map!(rawBucket => Bucket(
931                 bucketSize * cast(value_t) rawBucket.index,
932                 bucketSize * cast(value_t) (rawBucket.index + 1),
933                 rawBucket.value + 0,
934             ));
935     }
936 }
937 
938 auto histogram(value_t)(in value_t bucketSize, in value_t[] values)
939 {
940     return Histogram!value_t(bucketSize, values);
941 }
942 
943 private struct Stats
944 {
945     static enum identityLevels = [.999, .99, .95, .90];
946 
947     size_t numBpsExpected;
948     size_t numBpsKnown;
949     size_t numBpsResult;
950     size_t numBpsCorrect;
951     size_t numTranslocatedGaps;
952     size_t numCorrectGaps;
953     size_t numContigsExpected;
954     size_t numCorrectContigs;
955     size_t numClosedGaps;
956     size_t numPartlyClosedGaps;
957     size_t numUnaddressedGaps;
958     size_t numBpsInClosedGaps;
959     size_t numBpsInGaps;
960     size_t maximumN50;
961     size_t inputN50;
962     size_t resultN50;
963     size_t gapMedian;
964     size_t closedGapMedian;
965     ReferenceInterval maxClosedGap;
966     Histogram!coord_t[identityLevels.length] correctGapLengthHistograms;
967     Histogram!coord_t closedGapLengthHistogram;
968     Histogram!coord_t gapLengthHistogram;
969 
970     string toJsonString() const
971     {
972         return [
973             "numBpsExpected": numBpsExpected.toJson,
974             "numBpsKnown": numBpsKnown.toJson,
975             "numBpsResult": numBpsResult.toJson,
976             "numBpsCorrect": numBpsCorrect.toJson,
977             "numTranslocatedGaps": numTranslocatedGaps.toJson,
978             "numCorrectGaps": numCorrectGaps.toJson,
979             "numContigsExpected": numContigsExpected.toJson,
980             "numCorrectContigs": numCorrectContigs.toJson,
981             "numClosedGaps": numClosedGaps.toJson,
982             "numPartlyClosedGaps": numPartlyClosedGaps.toJson,
983             "numUnaddressedGaps": numUnaddressedGaps.toJson,
984             "numBpsInClosedGaps": numBpsInClosedGaps.toJson,
985             "numBpsInGaps": numBpsInGaps.toJson,
986             "maximumN50": maximumN50.toJson,
987             "inputN50": inputN50.toJson,
988             "resultN50": resultN50.toJson,
989             "gapMedian": gapMedian.toJson,
990             "closedGapMedian": closedGapMedian.toJson,
991             "maxClosedGap": maxClosedGap.toJson,
992             "gapLengthHistogram": histsToJson(
993                 correctGapLengthHistograms[0],
994                 correctGapLengthHistograms[1],
995                 correctGapLengthHistograms[2],
996                 correctGapLengthHistograms[3],
997                 closedGapLengthHistogram,
998                 gapLengthHistogram,
999             ),
1000         ].toJsonString;
1001     }
1002 
1003     string toTabular() const
1004     {
1005         auto columnWidth = columnWidth();
1006         auto rows =[
1007             format!"numContigsExpected:    %*d"(columnWidth, numContigsExpected),
1008             format!"numCorrectContigs:     %*d"(columnWidth, numCorrectContigs),
1009 
1010             format!"numTranslocatedGaps:   %*d"(columnWidth, numTranslocatedGaps),
1011             format!"numClosedGaps:         %*d"(columnWidth, numClosedGaps),
1012             format!"numPartlyClosedGaps:   %*d"(columnWidth, numPartlyClosedGaps),
1013             format!"numUnaddressedGaps:    %*d"(columnWidth, numUnaddressedGaps),
1014             format!"numCorrectGaps:        %*d"(columnWidth, numCorrectGaps),
1015 
1016             format!"numBpsExpected:        %*d"(columnWidth, numBpsExpected),
1017             format!"numBpsKnown:           %*d"(columnWidth, numBpsKnown),
1018             format!"numBpsResult:          %*d"(columnWidth, numBpsResult),
1019             format!"numBpsCorrect:         %*d"(columnWidth, numBpsCorrect),
1020             format!"numBpsInGaps:          %*d"(columnWidth, numBpsInGaps),
1021             format!"numBpsInClosedGaps:    %*d"(columnWidth, numBpsInClosedGaps),
1022             format!"numBpsInNonClosedGaps: %*d"(columnWidth, numBpsInGaps - numBpsInClosedGaps),
1023 
1024             format!"maximumN50:            %*d"(columnWidth, maximumN50),
1025             format!"inputN50:              %*d"(columnWidth, inputN50),
1026             format!"resultN50:             %*d"(columnWidth, resultN50),
1027             format!"gapMedian:             %*d"(columnWidth, gapMedian),
1028             format!"closedGapMedian:       %*d"(columnWidth, closedGapMedian),
1029             format!"maxClosedGap:          %*s"(columnWidth, toString(maxClosedGap)),
1030             gapLengthHistogram.length > 0
1031                 ? format!"gapLengthHistogram:\n    \n%s"(histsToString(
1032                     correctGapLengthHistograms[0],
1033                     correctGapLengthHistograms[1],
1034                     correctGapLengthHistograms[2],
1035                     correctGapLengthHistograms[3],
1036                     closedGapLengthHistogram,
1037                     gapLengthHistogram,
1038                 ))
1039                 : null,
1040         ];
1041 
1042         return format!"%-(%s\n%)"(rows.filter!"a !is null");
1043     }
1044 
1045     const string toString(in ReferenceInterval interval)
1046     {
1047         return format!"[%d, %d) | len=%d"(interval.begin, interval.end, interval.size);
1048     }
1049 
1050     const string histsToString(Hists...)(in Hists hists)
1051     {
1052         assert(hists.length >= 1);
1053         assert([hists].all!(h => h.bucketSize == hists[0].bucketSize));
1054         auto limitsWidth = numWidth(hists[0].bucketSize * [hists].map!"a.length".maxElement);
1055         auto countWidths = [hists]
1056             .map!(h => numWidth(max(1, h.histogram.length == 0 ? 1 : maxElement(h.histogram))))
1057             .array;
1058 
1059         auto bucketsList = tupleMap!(h => h.buckets.array)(hists);
1060         auto rows = zip(StoppingPolicy.longest, bucketsList.expand)
1061             .map!(buckets => format!"    %*d%s"(
1062                 limitsWidth,
1063                 max(tupleMap!"a.end"(buckets.expand).expand),
1064                 zip(countWidths, [buckets.expand])
1065                     .map!(pair => format!" %*d"(pair[0], pair[1].count))
1066                     .join,
1067             ))
1068             .array;
1069 
1070         return format!"%-(%s\n%)"(rows);
1071     }
1072 
1073     const Json histsToJson(Hists...)(in Hists hists)
1074     {
1075         assert(hists.length >= 1);
1076         assert([hists].all!(h => h.bucketSize == hists[0].bucketSize));
1077 
1078         auto bucketsList = tupleMap!(h => h.buckets.array)(hists);
1079         auto rows = zip(StoppingPolicy.longest, bucketsList.expand)
1080             .map!(buckets => [
1081                 "limit": max(tupleMap!"a.end"(buckets.expand).expand).toJson,
1082                 "counts": [buckets.expand]
1083                     .map!"a.count"
1084                     .array
1085                     .toJson,
1086             ])
1087             .array;
1088 
1089         return rows.toJson;
1090     }
1091 
1092     protected size_t columnWidth() const
1093     {
1094         auto numWidth = numWidth(max(
1095             numBpsExpected,
1096             numBpsKnown,
1097             numBpsResult,
1098             numTranslocatedGaps,
1099             numBpsCorrect,
1100             numCorrectGaps,
1101             numContigsExpected,
1102             numCorrectContigs,
1103             numClosedGaps,
1104             numPartlyClosedGaps,
1105             numUnaddressedGaps,
1106             numBpsInClosedGaps,
1107             numBpsInGaps,
1108             maximumN50,
1109             inputN50,
1110             resultN50,
1111             gapMedian,
1112             closedGapMedian,
1113         ));
1114         auto strWidth = max(
1115             0, // dummy
1116             toString(maxClosedGap).length,
1117         );
1118 
1119         return max(numWidth, strWidth);
1120     }
1121 
1122     static protected size_t numWidth(Int)(Int i) nothrow
1123     {
1124         auto numWidth = cast(size_t) ceil(log10(i));
1125 
1126         return numWidth;
1127     }
1128 }