1 /** 2 This is the `mergeInsertions` command of `dentist`. 3 4 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 5 License: Subject to the terms of the MIT license, as written in the 6 included LICENSE file. 7 Authors: Arne Ludwig <arne.ludwig@posteo.de> 8 */ 9 module dentist.commands.mergeInsertions; 10 11 import dentist.common.binio : InsertionDb; 12 import dentist.common.insertions : Insertion; 13 import std.algorithm : 14 isSorted, 15 map, 16 sort; 17 import dentist.util.log; 18 import std.array : array; 19 20 21 /// Execute the `mergeInsertions` command with `options`. 22 void execute(Options)(in Options options) 23 { 24 mixin(traceExecution); 25 26 auto mergedInsertions = options 27 .insertionsFiles 28 .map!readFromFile 29 .map!ensureSorted 30 .array 31 .mergeAll; 32 33 logJsonInfo( 34 "numInputFiles", options.insertionsFiles.length, 35 "totalNumInsertions", mergedInsertions.length, 36 ); 37 38 InsertionDb.write(options.mergedInsertionsFile, mergedInsertions); 39 } 40 41 Insertion[] readFromFile(in string fileName) 42 { 43 auto insertionDb = InsertionDb.parse(fileName); 44 45 return insertionDb[]; 46 } 47 48 Insertion[] ensureSorted(Insertion[] insertions) 49 { 50 if (!insertions.isSorted) 51 insertions.sort; 52 53 return insertions; 54 } 55 56 private auto mergeAll(T)(T[][] insertionsList) 57 { 58 return InsertionsMerger!T(insertionsList); 59 } 60 61 unittest 62 { 63 import std.algorithm.comparison : equal; 64 import std.range : retro; 65 66 int[] a = [1, 3, 5]; 67 int[] b = [2, 3, 4]; 68 int[] c = [3, 5, 6]; 69 70 assert(mergeAll([a, b, c]).equal([1, 2, 3, 3, 3, 4, 5, 5, 6])); 71 } 72 73 import std.conv; 74 import std.stdio; 75 76 private struct InsertionsMerger(T) 77 { 78 public T[][] sources; 79 private size_t _lastFrontIndex = size_t.max; 80 81 this(T[][] sources) 82 { 83 this.sources = sources; 84 this._lastFrontIndex = frontIndex; 85 } 86 87 this(this) 88 { 89 this.sources = this.sources.dup; 90 } 91 92 @property bool empty() const pure nothrow 93 { 94 return _lastFrontIndex == size_t.max; 95 } 96 97 @property auto ref front() pure nothrow 98 { 99 debug assert(sources[_lastFrontIndex].length > 0); 100 101 return sources[_lastFrontIndex][0]; 102 } 103 104 private size_t frontIndex() pure nothrow 105 { 106 size_t bestIndex = size_t.max; // indicate undefined 107 T bestElement; 108 foreach (i, source; sources) 109 { 110 if (source.length == 0) continue; 111 if (bestIndex == size_t.max || // either this is the first or 112 source[0] < bestElement) 113 { 114 bestIndex = i; 115 bestElement = source[0]; 116 } 117 } 118 assert(bestIndex == size_t.max || sources[bestIndex].length > 0); 119 120 return bestIndex; 121 } 122 123 void popFront() pure nothrow 124 { 125 sources[_lastFrontIndex] = sources[_lastFrontIndex][1 .. $]; 126 _lastFrontIndex = frontIndex; 127 } 128 129 @property auto save() pure nothrow 130 { 131 return this; 132 } 133 134 @property size_t length() const pure nothrow 135 { 136 size_t result; 137 foreach (source; sources) 138 result += source.length; 139 140 return result; 141 } 142 143 alias opDollar = length; 144 }