1 /**
2     This is the `mergeInsertions` command of `dentist`.
3 
4     Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de>
5     License: Subject to the terms of the MIT license, as written in the
6              included LICENSE file.
7     Authors: Arne Ludwig <arne.ludwig@posteo.de>
8 */
9 module dentist.commands.mergeInsertions;
10 
11 import dentist.common.binio : InsertionDb;
12 import dentist.common.insertions : Insertion;
13 import std.algorithm :
14     isSorted,
15     map,
16     sort;
17 import dentist.util.log;
18 import std.array : array;
19 
20 
21 /// Execute the `mergeInsertions` command with `options`.
22 void execute(Options)(in Options options)
23 {
24     mixin(traceExecution);
25 
26     auto mergedInsertions = options
27         .insertionsFiles
28         .map!readFromFile
29         .map!ensureSorted
30         .array
31         .mergeAll;
32 
33     logJsonInfo(
34         "numInputFiles", options.insertionsFiles.length,
35         "totalNumInsertions", mergedInsertions.length,
36     );
37 
38     InsertionDb.write(options.mergedInsertionsFile, mergedInsertions);
39 }
40 
41 Insertion[] readFromFile(in string fileName)
42 {
43     auto insertionDb = InsertionDb.parse(fileName);
44 
45     return insertionDb[];
46 }
47 
48 Insertion[] ensureSorted(Insertion[] insertions)
49 {
50     if (!insertions.isSorted)
51         insertions.sort;
52 
53     return insertions;
54 }
55 
56 private auto mergeAll(T)(T[][] insertionsList)
57 {
58     return InsertionsMerger!T(insertionsList);
59 }
60 
61 unittest
62 {
63     import std.algorithm.comparison : equal;
64     import std.range : retro;
65 
66     int[] a = [1, 3, 5];
67     int[] b = [2, 3, 4];
68     int[] c = [3, 5, 6];
69 
70     assert(mergeAll([a, b, c]).equal([1, 2, 3, 3, 3, 4, 5, 5, 6]));
71 }
72 
73         import std.conv;
74 import std.stdio;
75 
76 private struct InsertionsMerger(T)
77 {
78     public T[][] sources;
79     private size_t _lastFrontIndex = size_t.max;
80 
81     this(T[][] sources)
82     {
83         this.sources = sources;
84         this._lastFrontIndex = frontIndex;
85     }
86 
87     this(this)
88     {
89         this.sources = this.sources.dup;
90     }
91 
92     @property bool empty() const pure nothrow
93     {
94         return _lastFrontIndex == size_t.max;
95     }
96 
97     @property auto ref front() pure nothrow
98     {
99         debug assert(sources[_lastFrontIndex].length > 0);
100 
101         return sources[_lastFrontIndex][0];
102     }
103 
104     private size_t frontIndex() pure nothrow
105     {
106         size_t bestIndex = size_t.max; // indicate undefined
107         T bestElement;
108         foreach (i, source; sources)
109         {
110             if (source.length == 0) continue;
111             if (bestIndex == size_t.max || // either this is the first or
112                 source[0] < bestElement)
113             {
114                 bestIndex = i;
115                 bestElement = source[0];
116             }
117         }
118         assert(bestIndex == size_t.max || sources[bestIndex].length > 0);
119 
120         return bestIndex;
121     }
122 
123     void popFront() pure nothrow
124     {
125         sources[_lastFrontIndex] = sources[_lastFrontIndex][1 .. $];
126         _lastFrontIndex = frontIndex;
127     }
128 
129     @property auto save() pure nothrow
130     {
131         return this;
132     }
133 
134     @property size_t length() const pure nothrow
135     {
136         size_t result;
137         foreach (source; sources)
138             result += source.length;
139 
140         return result;
141     }
142 
143     alias opDollar = length;
144 }