//////////////////////////////////////////////////////////////////////////////
// NCList
//////////////////////////////////////////////////////////////////////////////
/**
* The NCList is a container for intervals that allows fast searching of overlaping regions.
*
* - Nested Containment List (NCList): A new algorithm for accelerating
* - interval query of genome alignment and interval databases.
* - Alekseyenko, A., and Lee, C. (2007).
* - Bioinformatics, doi:10.1093/bioinformatics/btl647
* - https://academic.oup.com/bioinformatics/article/23/11/1386/199545/Nested-Containment-List-NCList-a-new-algorithm-for
* - Code adapted from
* https://searchcode.com/codesearch/view/17093141
*/
class NCList {
/**
* Each interval should have a start and stop property.
* @param {Array} intervals - Array of Intervals used to create the NCList.
* @param {Object} options -
* @return {NCList}
* @private
*/
constructor(intervals = [], options = {}) {
this.intervals = [];
this.circularLength = options.circularLength;
this.startProperty = options.startProperty || 'start';
this.stopProperty = options.stopProperty || 'stop';
this.fill(intervals);
}
/**
* @member {Number} - The number of intervals in the NCList
*/
get length() {
return this._length;
}
/**
* Splits intervals that span the Origin of cicular sequences
*/
_normalize(intervals) {
let interval;
const nomalizedIntervals = [];
for (let i = 0, len = intervals.length; i < len; i++) {
interval = intervals[i];
// if (interval.start <= interval.stop) {
if (interval[this.startProperty] <= interval[this.stopProperty]) {
nomalizedIntervals.push( {interval: interval, index: i});
} else {
nomalizedIntervals.push({
interval: interval,
index: i,
// start: interval.start,
// start: this.start(interval),
// stop: this.circularLength,
[this.startProperty]: this.start(interval),
[this.stopProperty]: this.circularLength,
crossesOrigin: true
});
nomalizedIntervals.push({
interval: interval,
index: i,
// start: 1,
// stop: this.end(interval),
[this.startProperty]: 1,
[this.stopProperty]: this.end(interval),
crossesOrigin: true
});
}
}
return nomalizedIntervals;
}
/**
* Fils the NCList with the given intervals
* @param {Array} intervals - Array of intervals
*/
fill(intervals) {
this._length = intervals.length;
if (intervals.length === 0) {
this.topList = [];
return;
}
// const start = this.start;
// const end = this.end;
const sublist = this.sublist;
intervals = this._normalize(intervals);
this.intervals = intervals;
// Sort by overlap
// intervals.sort(function(a, b) {
intervals.sort( (a, b) => {
if (this.start(a) !== this.start(b)) return this.start(a) - this.start(b);
else return this.end(b) - this.end(a);
});
const sublistStack = [];
let curList = [];
this.topList = curList;
curList.push(intervals[0]);
if (intervals.length === 1) return;
let curInterval, topSublist;
for (let i = 1, len = intervals.length; i < len; i++) {
curInterval = intervals[i];
// if this interval is contained in the previous interval,
if (this.end(curInterval) < this.end(intervals[i - 1])) {
// create a new sublist starting with this interval
sublistStack.push(curList);
curList = new Array(curInterval);
sublist(intervals[i - 1], curList);
} else {
// find the right sublist for this interval
while (true) {
if (0 === sublistStack.length) {
curList.push(curInterval);
break;
} else {
topSublist = sublistStack[sublistStack.length - 1];
if (this.end(topSublist[topSublist.length - 1])
> this.end(curInterval)) {
// curList is the first (deepest) sublist that
// curInterval fits into
curList.push(curInterval);
break;
} else {
curList = sublistStack.pop();
}
}
}
}
}
}
/**
* Method to retrieve the stop coordinate of the interval
*/
end(interval) {
// return interval.stop || interval.interval.stop;
// return interval.stop || interval.interval[this.stopProperty];
return interval[this.stopProperty] || interval.interval[this.stopProperty];
}
/**
* Method to retrieve the start coordinate of the interval
*/
start(interval) {
// return interval.start || interval.interval.start;
// return interval.start || interval.interval[this.startProperty];
return interval[this.startProperty] || interval.interval[this.startProperty];
}
/**
* Method to set the sublist for the given interval.
*/
sublist(interval, list) {
interval.sublist = list;
}
_run(start, stop = start, step = 1, callback = function() {}, list = this.topList) {
let skip;
const len = list.length;
let i, direction;
if (step > 0) {
direction = 1;
i = this._binarySearch(list, start, true, 'end');
} else if (step < 0) {
direction = -1;
i = this._binarySearch(list, stop, false, 'start');
}
while (i >= 0 && i < len &&
( (direction === 1) ? (this.start(list[i]) <= stop) : (this.end(list[i]) >= start) ) ) {
skip = false;
if (list[i].crossesOrigin) {
if (this._runIntervalsCrossingOrigin.indexOf(list[i].interval) !== -1) {
skip = true;
} else {
this._runIntervalsCrossingOrigin.push(list[i].interval);
}
}
if (!skip && list[i].index % step === 0) {
callback.call(list[i].interval, list[i].interval);
}
if (list[i].sublist) {
this._run(start, stop, step, callback, list[i].sublist);
}
i += direction;
}
}
/*
* Run the callback for each interval that overlaps with the given range.
* @param {Number} start - Start position of the range
* @param {Number} stop - Stop position of the range [Default: same as start]
* @param {Number} step - Skip intervals by increasing the step [Default: 1]
*/
run(start, stop = start, step = 1, callback = function() {}) {
this._runIntervalsCrossingOrigin = [];
if (this.circularLength && stop < start) {
this._run(start, this.circularLength, step, callback);
this._run(1, stop, step, callback);
} else {
this._run(start, stop, step, callback);
}
}
/*
* Count the number of intervals that overlaps with the given range.
* @param {Number} start - Start position of the range
* @param {Number} stop - Stop position of the range [Default: same as start]
* @param {Number} step - Skip intervals by increasing the step [Default: 1]
* @return {Number}
*/
count(start, stop, step) {
let count = 0;
this.run(start, stop, step, () => {
count++;
});
return count;
}
/*
* Return intervals that overlaps with the given range.
* @param {Number} start - Start position of the range
* @param {Number} stop - Stop position of the range [Default: same as start]
* @param {Number} step - Skip intervals by increasing the step [Default: 1]
* @return {Array}
*/
find(start, stop, step) {
const overlaps = [];
this.run(start, stop, step, (i) => {
overlaps.push(i);
});
return overlaps;
}
_binarySearch(data, searchValue, upper, getter) {
let minIndex = -1;
let maxIndex = data.length;
let currentIndex, currentValue;
while (maxIndex - minIndex > 1) {
currentIndex = (minIndex + maxIndex) / 2 | 0;
currentValue = this[getter](data[currentIndex]);
if (currentValue < searchValue) {
minIndex = currentIndex;
} else if (currentValue > searchValue) {
maxIndex = currentIndex;
} else {
return currentIndex;
}
}
return (upper ? maxIndex : minIndex);
}
/*
* Test that the correct intervals are returned especially for circular sequences
*/
static test() {
function testInterval(nc, start, stop, expected) {
const result = nc.find(start, stop).map( n => n.name ).sort().join(', ');
expected = expected.sort().join(', ');
let testOut = `${start}..${stop}: ${expected} - `;
testOut += (result === expected) ? 'Pass' : `${'FAIL' + ' - '}${result}`;
console.log(testOut);
}
const intervals = [
{name: 'A', start: 1, stop: 20},
{name: 'B', start: 10, stop: 15},
{name: 'C', start: 10, stop: 20},
{name: 'D', start: 15, stop: 30},
{name: 'E', start: 20, stop: 30},
{name: 'F', start: 20, stop: 50},
{name: 'G', start: 80, stop: 100},
{name: 'H', start: 90, stop: 95},
{name: 'I', start: 90, stop: 5},
{name: 'J', start: 95, stop: 15},
{name: 'K', start: 95, stop: 2},
{name: 'L', start: 92, stop: 50}
];
const nc = new NCList(intervals, { circularLength: 100 });
testInterval(nc, 10, 20, ['A', 'B', 'C', 'D', 'E', 'F', 'J', 'L']);
testInterval(nc, 40, 85, ['F', 'G', 'L']);
testInterval(nc, 40, 95, ['F', 'G', 'H', 'I', 'J', 'K', 'L']);
testInterval(nc, 95, 10, ['A', 'B', 'C', 'G', 'H', 'I', 'J', 'K', 'L']);
return nc;
}
static testMapStarts() {
function testInterval(nc, start, stop, expected) {
const result = nc.find(start, stop).map( n => n.name ).sort().join(', ');
expected = expected.sort().join(', ');
let testOut = `${start}..${stop}: ${expected} - `;
testOut += (result === expected) ? 'Pass' : `${'FAIL' + ' - '}${result}`;
console.log(testOut);
}
const intervals = [
{name: 'A', mapStart: 10, mapStop: 10},
{name: 'B', mapStart: 20, mapStop: 21},
{name: 'C', mapStart: 950, mapStop: 5},
];
// const nc = new NCList(intervals, { circularLength: 1000 });
const nc = new NCList(intervals, { circularLength: 1000, startProperty: 'mapStart', stopProperty: 'mapStop'});
testInterval(nc, 900, 200, ['A', 'B', 'C']);
return nc;
}
static testMapStarts2() {
function testInterval(nc, start, stop, expected) {
const result = nc.find(start, stop).map( n => n.name ).sort().join(', ');
expected = expected.sort().join(', ');
let testOut = `${start}..${stop}: ${expected} - `;
testOut += (result === expected) ? 'Pass' : `${'FAIL' + ' - '}${result}`;
console.log(testOut);
}
const intervals = [
{name: 'A', start: 10, stop: 10},
{name: 'B', start: 20, stop: 21},
{name: 'C', start: 950, stop: 5},
];
// const nc = new NCList(intervals, { circularLength: 1000 });
const nc = new NCList(intervals, { circularLength: 1000, startProperty: 'start', stopProperty: 'stop'});
testInterval(nc, 900, 200, ['A', 'B', 'C']);
return nc;
}
}
export default NCList;