1 /** 2 This package contains methods to handle the proprietary binary data 3 container for `Insertion`s. 4 5 Copyright: © 2018 Arne Ludwig <arne.ludwig@posteo.de> 6 License: Subject to the terms of the MIT license, as written in the 7 included LICENSE file. 8 Authors: Arne Ludwig <arne.ludwig@posteo.de> 9 */ 10 module dentist.common.binio.insertiondb; 11 12 import core.exception : AssertError; 13 import dentist.common : ReferencePoint; 14 import dentist.common.alignments : AlignmentChain; 15 import dentist.common.binio._base : 16 ArrayStorage, 17 CompressedBaseQuad, 18 CompressedSequence, 19 DbIndex, 20 lockIfPossible, 21 readRecord, 22 readRecordAt, 23 readRecords; 24 import dentist.common.insertions : 25 Insertion, 26 InsertionInfo, 27 SpliceSite; 28 import dentist.common.scaffold : 29 ContigNode, 30 ContigPart; 31 import std.array : minimallyInitializedArray; 32 import std.conv : to; 33 import std.exception : assertThrown, enforce, ErrnoException; 34 import std.format : format; 35 import std.range : 36 ElementType, 37 empty, 38 front, 39 hasLength, 40 isForwardRange, 41 isInputRange, 42 popFront, 43 save; 44 import std.stdio : File; 45 import std.traits : isArray; 46 import std.typecons : tuple, Tuple; 47 48 version (unittest) import dentist.common.binio._testdata : 49 getInsertionsTestData, 50 numCompressedBaseQuads, 51 numInsertions, 52 numSpliceSites; 53 54 55 class InsertionDbException : Exception 56 { 57 pure nothrow @nogc @safe this(string msg, string file = __FILE__, 58 size_t line = __LINE__, Throwable nextInChain = null) 59 { 60 super(msg, file, line, nextInChain); 61 } 62 } 63 64 struct InsertionDb 65 { 66 private alias DbSlices = Tuple!( 67 ArrayStorage!(StorageType!Insertion), "insertions", 68 ArrayStorage!(StorageType!CompressedBaseQuad), "compressedBaseQuads", 69 ArrayStorage!(StorageType!SpliceSite), "spliceSites", 70 ); 71 72 private File file; 73 private InsertionDbIndex index; 74 private DbSlices slices; 75 76 @property auto insertions() const pure nothrow 77 { 78 return index.insertions; 79 } 80 81 @property auto compressedBaseQuads() const pure nothrow 82 { 83 return index.compressedBaseQuads; 84 } 85 86 @property auto spliceSites() const pure nothrow 87 { 88 return index.spliceSites; 89 } 90 91 static InsertionDb parse(in string dbFile) 92 { 93 auto file = File(dbFile, "rb"); 94 lockIfPossible(file); 95 auto db = InsertionDb(file); 96 db.ensureDbIndex(); 97 98 return db; 99 } 100 101 void releaseDb() 102 { 103 file.close(); 104 } 105 106 Insertion[] opIndex() 107 { 108 ensureDbIndex(); 109 110 return readSlice(0, length); 111 } 112 113 Insertion opIndex(size_t i) 114 { 115 ensureDbIndex(); 116 enforce!InsertionDbException( 117 i < length, 118 format!"cannot read block %d in `%s`: out of bounds [0, %d)"( 119 i, file.name, length) 120 ); 121 122 return readSlice(i, i + 1)[0]; 123 } 124 125 Insertion[] opIndex(size_t[2] slice) 126 { 127 auto from = slice[0]; 128 auto to = slice[1]; 129 ensureDbIndex(); 130 enforce!InsertionDbException( 131 to <= length, 132 format!"cannot read blocks %d-%d in `%s`: out of bounds [0, %d]"( 133 from, to, file.name, length) 134 ); 135 136 return readSlice(from, to); 137 } 138 139 size_t[2] opSlice(size_t dim)(size_t from, size_t to) 140 if (dim == 0) 141 { 142 assert(from < to, "invalid slice"); 143 144 return [from, to]; 145 } 146 147 @property size_t length() 148 { 149 ensureDbIndex(); 150 151 return index.insertions.length; 152 } 153 154 alias opDollar = length; 155 156 private void ensureDbIndex() 157 { 158 if (index != index.init) 159 return; 160 161 index = file.readRecord!InsertionDbIndex(); 162 } 163 164 private Insertion[] readSlice(size_t from, size_t to) 165 { 166 assert(from <= to && to <= length); 167 168 if (from == to) 169 return []; 170 171 // Step 1: determine memory requirements and DB slices 172 slices = getSlices(from, to); 173 174 // Step 2: allocate minimally initialized memory for all blocks 175 auto insertions = minimallyInitializedArray!(Insertion[])(slices.insertions.length); 176 auto compressedBaseQuads = minimallyInitializedArray!(CompressedBaseQuad[])(slices.compressedBaseQuads.length); 177 auto spliceSites = minimallyInitializedArray!(SpliceSite[])(slices.spliceSites.length); 178 179 // Step 3: parse each record for each block assigning already 180 // allocated array slices to the array fields 181 parse(insertions, compressedBaseQuads, spliceSites); 182 parse(compressedBaseQuads); 183 parse(spliceSites); 184 185 return insertions; 186 } 187 188 private DbSlices getSlices(size_t from, size_t to) 189 { 190 auto insertions = index.insertions[from .. to]; 191 auto firstInsertion = file.readRecordAt!(StorageType!Insertion)(insertions[0]); 192 auto lastInsertion = file.readRecordAt!(StorageType!Insertion)(insertions[$ - 1]); 193 194 auto compressedBaseQuads = ArrayStorage!(StorageType!CompressedBaseQuad).fromPtrs( 195 firstInsertion.sequence[0], 196 lastInsertion.sequence[$], 197 ); 198 199 auto spliceSites = ArrayStorage!(StorageType!SpliceSite).fromPtrs( 200 firstInsertion.spliceSites[0], 201 lastInsertion.spliceSites[$], 202 ); 203 204 return DbSlices( 205 insertions, 206 compressedBaseQuads, 207 spliceSites, 208 ); 209 } 210 211 private void parse( 212 ref Insertion[] insertions, 213 CompressedBaseQuad[] compressedBaseQuads, 214 SpliceSite[] spliceSites, 215 ) 216 { 217 file.seek(slices.insertions.ptr); 218 219 size_t[2] compressedBaseQuadsSlice; 220 size_t[2] spliceSitesSlice; 221 222 foreach (ref insertion; insertions) 223 { 224 auto insertionStorage = file.readRecord!(StorageType!Insertion); 225 226 compressedBaseQuadsSlice[0] = compressedBaseQuadsSlice[1]; 227 compressedBaseQuadsSlice[1] += insertionStorage.sequence.length; 228 229 spliceSitesSlice[0] = spliceSitesSlice[1]; 230 spliceSitesSlice[1] += insertionStorage.spliceSites.length; 231 232 insertion = Insertion( 233 insertionStorage.start, 234 insertionStorage.end, 235 InsertionInfo( 236 CompressedSequence( 237 compressedBaseQuads[ 238 compressedBaseQuadsSlice[0] .. compressedBaseQuadsSlice[1] 239 ], 240 insertionStorage.baseOffset, 241 insertionStorage.sequenceLength, 242 ), 243 insertionStorage.contigLength, 244 spliceSites[spliceSitesSlice[0] .. spliceSitesSlice[1]], 245 ), 246 ); 247 } 248 } 249 250 private void parse( 251 ref SpliceSite[] spliceSites, 252 ) 253 { 254 static assert(SpliceSite.sizeof == StorageType!SpliceSite.sizeof); 255 file.seek(slices.spliceSites.ptr); 256 spliceSites = file.readRecords(spliceSites); 257 } 258 259 private void parse( 260 ref CompressedBaseQuad[] compressedBaseQuads, 261 ) 262 { 263 static assert(CompressedBaseQuad.sizeof == StorageType!CompressedBaseQuad.sizeof); 264 file.seek(slices.compressedBaseQuads.ptr); 265 compressedBaseQuads = file.readRecords(compressedBaseQuads); 266 } 267 268 static void write(R)(in string dbFile, R insertions) 269 if (isForwardRange!R && hasLength!R && is(ElementType!R : const(Insertion))) 270 { 271 auto writer = InsertionDbFileWriter!R(File(dbFile, "wb"), insertions); 272 273 lockIfPossible(writer.file); 274 writer.writeToFile(); 275 } 276 } 277 278 unittest 279 { 280 import dentist.util.tempfile : mkstemp; 281 import std.file : remove; 282 283 auto insertions = getInsertionsTestData(); 284 285 enum totalDbSize = 286 InsertionDbIndex.sizeof + 287 StorageType!Insertion.sizeof * numInsertions + 288 StorageType!CompressedBaseQuad.sizeof * numCompressedBaseQuads + 289 StorageType!SpliceSite.sizeof * numSpliceSites; 290 291 auto tmpDb = mkstemp("./.unittest-XXXXXX"); 292 scope (exit) 293 { 294 tmpDb.file.close(); 295 remove(tmpDb.name); 296 } 297 298 InsertionDbFileWriter!(Insertion[])(tmpDb.file, insertions).writeToFile(); 299 tmpDb.file.sync(); 300 301 assert(tmpDb.file.size == totalDbSize); 302 303 tmpDb.file.rewind(); 304 auto insertionDb = InsertionDb(tmpDb.file); 305 306 assert(insertionDb[] == insertions); 307 } 308 309 private struct InsertionDbFileWriter(R) 310 if (isForwardRange!R && hasLength!R && is(ElementType!R : const(Insertion))) 311 { 312 File file; 313 R insertions; 314 InsertionDbIndex index; 315 316 void writeToFile() 317 { 318 index = InsertionDbIndex.from(insertions.save); 319 320 file.rawWrite([index]); 321 writeBlock!Insertion(); 322 writeBlock!CompressedBaseQuad(); 323 writeBlock!SpliceSite(); 324 } 325 326 void writeBlock(T : Insertion)() 327 { 328 auto compressedBaseQuads = index.compressedBaseQuads; 329 compressedBaseQuads.length = 0; 330 auto spliceSites = index.spliceSites; 331 spliceSites.length = 0; 332 333 version (assert) 334 { 335 auto insertions = index.insertions; 336 insertions.length = 0; 337 assert(insertions.ptr == file.tell()); 338 } 339 foreach (insertion; this.insertions.save) 340 { 341 compressedBaseQuads.length = insertion.payload.sequence.compressedLength; 342 spliceSites.length = insertion.payload.spliceSites.length; 343 auto insertionStorage = InsertionStorage( 344 insertion.start, 345 insertion.end, 346 insertion.payload.sequence.baseOffset, 347 insertion.payload.sequence.length, 348 compressedBaseQuads, 349 insertion.payload.contigLength, 350 spliceSites, 351 ); 352 353 file.rawWrite([insertionStorage]); 354 355 compressedBaseQuads.ptr = compressedBaseQuads[$]; 356 spliceSites.ptr = spliceSites[$]; 357 version (assert) 358 { 359 ++insertions.length; 360 assert(insertions[$] == file.tell()); 361 } 362 } 363 } 364 365 void writeBlock(T : CompressedBaseQuad)() 366 { 367 version (assert) 368 { 369 auto compressedBaseQuads = index.compressedBaseQuads; 370 compressedBaseQuads.length = 0; 371 assert(compressedBaseQuads.ptr == file.tell()); 372 } 373 foreach (insertion; this.insertions.save) 374 { 375 static assert(CompressedBaseQuad.sizeof == StorageType!CompressedBaseQuad.sizeof); 376 file.rawWrite(insertion.payload.sequence.data); 377 378 version (assert) 379 { 380 compressedBaseQuads.length += insertion.payload.sequence.compressedLength; 381 assert(compressedBaseQuads[$] == file.tell()); 382 } 383 } 384 } 385 386 void writeBlock(T : SpliceSite)() 387 { 388 version (assert) 389 { 390 auto spliceSites = index.spliceSites; 391 spliceSites.length = 0; 392 assert(spliceSites.ptr == file.tell()); 393 } 394 foreach (insertion; this.insertions.save) 395 { 396 static assert(SpliceSite.sizeof == StorageType!SpliceSite.sizeof); 397 file.rawWrite(insertion.payload.spliceSites); 398 399 version (assert) 400 { 401 spliceSites.length += insertion.payload.spliceSites.length; 402 assert(spliceSites[$] == file.tell()); 403 } 404 } 405 } 406 } 407 408 private struct InsertionDbIndex 409 { 410 mixin DbIndex; 411 412 private static template NextType(T) 413 { 414 static if (is(T == Insertion)) 415 alias NextType = CompressedBaseQuad; 416 else static if (is(T == CompressedBaseQuad)) 417 alias NextType = SpliceSite; 418 else static if (is(T == SpliceSite)) 419 alias NextType = EOF; 420 } 421 422 private static template fieldPtr(T) 423 { 424 static if (is(T == Insertion)) 425 alias fieldPtr = insertionsPtr; 426 else static if (is(T == CompressedBaseQuad)) 427 alias fieldPtr = compressedBaseQuadsPtr; 428 else static if (is(T == SpliceSite)) 429 alias fieldPtr = spliceSitesPtr; 430 else static if (is(T == EOF)) 431 alias fieldPtr = eofPtr; 432 } 433 434 size_t insertionsPtr; 435 size_t compressedBaseQuadsPtr; 436 size_t spliceSitesPtr; 437 size_t eofPtr; 438 439 @property alias insertions = arrayStorage!Insertion; 440 @property alias compressedBaseQuads = arrayStorage!CompressedBaseQuad; 441 @property alias spliceSites = arrayStorage!SpliceSite; 442 443 static InsertionDbIndex from(R)(R insertions) nothrow pure 444 if (isInputRange!R && hasLength!R && is(ElementType!R : const(Insertion))) 445 { 446 InsertionDbIndex index; 447 448 index.beginPtr!Insertion = InsertionDbIndex.sizeof; 449 index.endPtr!Insertion = StorageType!Insertion.sizeof * insertions.length; 450 foreach (insertion; insertions) 451 { 452 index.endPtr!CompressedBaseQuad += StorageType!CompressedBaseQuad.sizeof * 453 insertion.payload.sequence.compressedLength; 454 index.endPtr!SpliceSite += StorageType!SpliceSite.sizeof * 455 insertion.payload.spliceSites.length; 456 } 457 458 index.compressedBaseQuadsPtr += index.insertionsPtr; 459 index.spliceSitesPtr += index.compressedBaseQuadsPtr; 460 index.eofPtr += index.spliceSitesPtr; 461 462 return index; 463 } 464 465 unittest 466 { 467 auto dbIndex = InsertionDbIndex.from(getInsertionsTestData()); 468 469 assert(dbIndex.insertionsPtr == InsertionDbIndex.sizeof); 470 assert(dbIndex.compressedBaseQuadsPtr == 471 dbIndex.insertionsPtr + 472 StorageType!Insertion.sizeof * numInsertions); 473 assert(dbIndex.spliceSitesPtr == 474 dbIndex.compressedBaseQuadsPtr + 475 StorageType!CompressedBaseQuad.sizeof * numCompressedBaseQuads); 476 assert(dbIndex.eofPtr == 477 dbIndex.spliceSitesPtr + 478 StorageType!SpliceSite.sizeof * numSpliceSites); 479 } 480 } 481 482 unittest 483 { 484 enum begin = 1; 485 enum end = 2; 486 enum modified = 3; 487 488 { 489 InsertionDbIndex dbIndex; 490 491 dbIndex.insertionsPtr = begin; 492 dbIndex.compressedBaseQuadsPtr = end; 493 494 assert(dbIndex.beginPtr!Insertion == begin); 495 assert(dbIndex.endPtr!Insertion == end); 496 497 dbIndex.beginPtr!Insertion = modified; 498 dbIndex.endPtr!Insertion = modified; 499 500 assert(dbIndex.insertionsPtr == modified); 501 assert(dbIndex.compressedBaseQuadsPtr == modified); 502 } 503 { 504 InsertionDbIndex dbIndex; 505 506 dbIndex.compressedBaseQuadsPtr = begin; 507 dbIndex.spliceSitesPtr = end; 508 509 assert(dbIndex.beginPtr!CompressedBaseQuad == begin); 510 assert(dbIndex.endPtr!CompressedBaseQuad == end); 511 512 dbIndex.beginPtr!CompressedBaseQuad = modified; 513 dbIndex.endPtr!CompressedBaseQuad = modified; 514 515 assert(dbIndex.compressedBaseQuadsPtr == modified); 516 assert(dbIndex.spliceSitesPtr == modified); 517 } 518 { 519 InsertionDbIndex dbIndex; 520 521 dbIndex.spliceSitesPtr = begin; 522 dbIndex.eofPtr = end; 523 524 assert(dbIndex.beginPtr!SpliceSite == begin); 525 assert(dbIndex.endPtr!SpliceSite == end); 526 527 dbIndex.beginPtr!SpliceSite = modified; 528 dbIndex.endPtr!SpliceSite = modified; 529 530 assert(dbIndex.spliceSitesPtr == modified); 531 assert(dbIndex.eofPtr == modified); 532 } 533 } 534 535 private template StorageType(T) 536 { 537 static if (is(T == Insertion)) 538 alias StorageType = InsertionStorage; 539 else static if (is(T == CompressedBaseQuad[])) 540 alias StorageType = ArrayStorage!(StorageType!CompressedBaseQuad); 541 else static if (is(T == CompressedBaseQuad)) 542 alias StorageType = CompressedBaseQuad; 543 else static if (is(T == SpliceSite)) 544 alias StorageType = SpliceSite; 545 else static if (is(T == SpliceSite[])) 546 alias StorageType = ArrayStorage!(StorageType!SpliceSite); 547 } 548 549 private struct InsertionStorage 550 { 551 ContigNode start; 552 ContigNode end; 553 ubyte baseOffset; 554 size_t sequenceLength; 555 StorageType!(CompressedBaseQuad[]) sequence; 556 size_t contigLength; 557 StorageType!(SpliceSite[]) spliceSites; 558 }