//////////////////////////////////////////////////////////////////////////////
// SequenceExtractor
//////////////////////////////////////////////////////////////////////////////
import WorkerFeatureExtraction from './WorkerFeatureExtraction';
import WorkerBaseContent from './WorkerPlotExtraction';
import utils from './Utils';
/**
* The Extractor creates features or plots based on the sequence
*/
class SequenceExtractor {
/**
* Create a Sequence Extractor
* @param {Viewer} sequence - The sequence to extract from.
* @param {Object} options - Options and stuff
* @private
*/
constructor(sequence, options = {}) {
this.sequence = sequence;
if (!sequence.seq) {
throw ('Sequence invalid. The sequence must be provided.');
}
}
//////////////////////////////////////////////////////////////////////////
// MEMBERS
//////////////////////////////////////////////////////////////////////////
/**
* @member {Sequence} - Get or set the sequence.
*/
get sequence() {
return this._sequence;
}
set sequence(value) {
if (value) {
this._sequence = value;
}
}
/**
* @member {String} - Get the seqeunce as a string
*/
// get seqString() {
// return this.sequence.seq;
// }
/**
* @member {String} - Get the viewer
*/
get viewer() {
return this.sequence.viewer;
}
/**
* @member {Number} - Get the seqeunce length.
*/
get length() {
return this.sequence.length;
}
//////////////////////////////////////////////////////////////////////////
// METHODS
//////////////////////////////////////////////////////////////////////////
fn2workerURL(fn) {
const blob = new Blob([`(${fn.toString()})()`], {type: 'application/javascript'});
return URL.createObjectURL(blob);
}
sequenceInput(concatenate = false) {
let type, data;
if (this.sequence.hasMultipleContigs && !concatenate) {
type = 'contigs';
data = this.sequence.contigs().map( c => c.toJSON() );
} else {
type = 'sequence';
data = [ { seq: this.sequence.seq } ];
}
return {type: type, data: data};
}
extractTrackData(track, extractType, options = {}) {
if (!utils.validate(extractType, ['start-stop-codons', 'orfs', 'gc-skew', 'gc-content'])) { return; }
switch (extractType) {
case 'start-stop-codons':
case 'orfs':
track.dataType = 'feature';
this.generateFeatures(track, extractType, options);
break;
case 'gc-skew':
case 'gc-content':
track.dataType = 'plot';
this.generatePlot(track, extractType, options);
break;
}
}
generateFeatures(track, extractType, options = {}) {
if (!utils.validate(extractType, ['start-stop-codons', 'orfs'])) { return; }
let startTime = new Date().getTime();
const viewer = this.viewer;
// Start worker
const url = this.fn2workerURL(WorkerFeatureExtraction);
const worker = new Worker(url);
// Sequence data
const seqInput = this.sequenceInput();
// Prepare message
const message = {
type: extractType,
// seqString: this.seqString,
seqType: seqInput.type,
seqData: seqInput.data,
seqTotalLength: this.sequence.length,
options: {
// startPattern: utils.defaultFor(options.start, 'ATG'),
// stopPattern: utils.defaultFor(options.stop, 'TAA,TAG,TGA'),
// These are start/stop codons for Genetic Code Table 11
startPattern: utils.defaultFor(options.start, 'ATG, TTG, CTG, ATT, ATC, ATA, GTG'),
stopPattern: utils.defaultFor(options.stop, 'TAA,TAG,TGA'),
minORFLength: utils.defaultFor(options.minORFLength, 100)
}
};
worker.postMessage(message);
worker.onmessage = (e) => {
const messageType = e.data.messageType;
if (messageType === 'progress') {
// track.loadProgress = e.data.progress;
track.update({loadProgress: e.data.progress});
viewer.layout.drawProgress();
}
if (messageType === 'complete') {
// track.loadProgress = 100;
track.update({loadProgress: 100});
const featureDataArray = e.data.featureDataArray;
console.log(`Features '${extractType}' Worker Time: ${utils.elapsedTime(startTime)}` );
startTime = new Date().getTime();
let featureData;
const legends = this.createLegendItems(extractType);
console.log(extractType);
for (let i = 0, len = featureDataArray.length; i < len; i++) {
featureDataArray[i].legend = legends[featureDataArray[i].type];
}
const features = viewer.addFeatures(featureDataArray);
console.log(`Features '${extractType}' Creation Time: ${utils.elapsedTime(startTime)}` );
startTime = new Date().getTime();
track._features = features;
track.updateSlots();
track.triggerUpdate();
console.log(`Features '${extractType}' Update Time: ${utils.elapsedTime(startTime)}` );
viewer.drawFull();
}
};
worker.onerror = (e) => {
// do stuff
};
}
generatePlot(track, extractType, options = {}) {
if (!utils.validate(extractType, ['gc-content', 'gc-skew'])) { return; }
const startTime = new Date().getTime();
// let extractType = options.sequence;
const viewer = this.viewer;
// Start worker
const url = this.fn2workerURL(WorkerBaseContent);
const worker = new Worker(url);
// Sequence data
// FIXME: concatenate set to true; should come from the user
const seqInput = this.sequenceInput(true);
// Prepare message
const message = {
type: extractType,
// seqString: this.seqString
seqType: seqInput.type,
seqData: seqInput.data,
seqTotalLength: this.sequence.length,
options: {
window: utils.defaultFor(options.window, this.getWindowStep().window),
step: utils.defaultFor(options.step, this.getWindowStep().step),
deviation: utils.defaultFor(options.deviation, 'scale') // 'scale' or 'average
}
};
worker.postMessage(message);
worker.onmessage = (e) => {
const messageType = e.data.messageType;
if (messageType === 'progress') {
// track.loadProgress = e.data.progress;
track.update({loadProgress: e.data.progress});
viewer.layout.drawProgress();
}
if (messageType === 'complete') {
// track.loadProgress = 100;
track.update({loadProgress: 100});
const baseContent = e.data.baseContent;
const data = { positions: baseContent.positions, scores: baseContent.scores, baseline: baseContent.average };
data.legendPositive = this.getLegendItem(extractType, '+').name;
data.legendNegative = this.getLegendItem(extractType, '-').name;
data.name = extractType;
data.extractedFromSequence = true;
// const plot = new CGV.Plot(viewer, data);
const plots = viewer.addPlots(data);
track._plot = plots[0];
track.updateSlots();
track.triggerUpdate();
console.log(`Plot '${extractType}' Worker Time: ${utils.elapsedTime(startTime)}` );
viewer.drawFull();
}
};
worker.onerror = (e) => {
// do stuff
};
}
createLegendItems(extractType) {
let legends = {};
if (extractType === 'orfs') {
legends = {
'ORF': this.getLegendItem('ORF')
};
} else if (extractType === 'start-stop-codons') {
legends = {
'start-codon': this.getLegendItem('start-codon'),
'stop-codon': this.getLegendItem('stop-codon')
};
}
return legends;
}
getLegendItem(extractType, sign) {
const legend = this.viewer.legend;
let item;
switch (extractType) {
case 'start-codon':
item = legend.findLegendItemOrCreate('Start', 'blue', 'arc');
break;
case 'stop-codon':
item = legend.findLegendItemOrCreate('Stop', 'red', 'arc');
break;
case 'ORF':
item = legend.findLegendItemOrCreate('ORF', 'green', 'arc');
break;
case 'gc-content':
const color = this.viewer.settings.backgroundColor.copy().invert()
item = legend.findLegendItemOrCreate('GC Content', color);
break;
case 'gc-skew': {
const color = (sign === '+') ? 'rgb(0,153,0)' : 'rgb(153,0,153)';
const name = (sign === '+') ? 'GC Skew+' : 'GC Skew-';
item = legend.findLegendItemOrCreate(name, color);
break;
}
default:
item = legend.findLegendItemOrCreate('Unknown', 'grey');
}
return item;
}
getWindowStep() {
let windowSize, step;
const length = this.length;
if (length < 1e3 ) {
windowSize = 10;
step = 1;
} else if (length < 1e4) {
windowSize = 50;
step = 1;
} else if (length < 1e5) {
windowSize = 500;
step = 1;
} else if (length < 1e6) {
windowSize = 1000;
step = 10;
} else if (length < 1e7) {
windowSize = 10000;
step = 100;
} else if (length < 1e8) {
windowSize = 50000;
step = 1000;
}
return { step: step, window: windowSize };
}
}
export default SequenceExtractor;
// extractFeatures(options = {}) {
// let features = new CGV.CGArray();
// if (options.sequence === 'start-stop-codons') {
// features = this.extractStartStops(options);
// } else if (options.sequence === 'orfs') {
// features = this.extractORFs(options);
// }
// return features
// }
// generateFeatures(track, options) {
// if (options.sequence === 'start-stop-codons') {
// features = this.generateStartStops(options);
// } else if (options.sequence === 'orfs') {
// features = this.extractORFs(options);
// }
// }
//
//
// extractPlot(options = {}) {
// if (options.sequence === 'gc-content') {
// return this.extractBaseContentPlot('gc-content', options);
// } else if (options.sequence === 'gc-skew') {
// return this.extractBaseContentPlot('gc-skew', options);
// }
// }
//
// // PLOTS should be bp: [1,23,30,45], score: [0, 0.4, 1]
// // score must be between 0 and 1
// extractBaseContentPlot(type, options = {}) {
// let startTime = new Date().getTime();
// if (!CGV.validate(type, ['gc-content', 'gc-skew'])) { return }
// this.viewer.flash("Creating '" + type + "' Plot...");
//
//
// options.window = CGV.defaultFor(options.window, this.getWindowStep().window);
// options.step = CGV.defaultFor(options.step, this.getWindowStep().step);
// let step = options.step
// let deviation = CGV.defaultFor(options.deviation, 'scale'); // 'scale' or 'average'
// // let deviation = CGV.defaultFor(options.deviation, 'average'); // 'scale' or 'average'
//
// let baseContent = this.calculateBaseContent(type, options);
// let positions = [];
// let position;
//
// // The current position marks the middle of the calculated window.
// // Adjust the bp position to mark where the plot changes,
// // NOT the center point of the window.
// // i.e. half way between the current position and the last
// for (let i = 0, len = baseContent.positions.length; i < len; i++) {
// position = baseContent.positions[i];
// if (i === 0) {
// positions.push(1);
// } else {
// positions.push(position - step/2);
// }
// }
// let data = { positions: positions, scores: baseContent.scores, baseline: baseContent.average };
// data.legendPositive = this.getLegendItem(type, '+').text;
// data.legendNegative = this.getLegendItem(type, '-').text;
//
// let plot = new CGV.Plot(this.viewer, data);
// console.log("Plot '" + type + "' Extraction Time: " + CGV.elapsedTime(startTime) );
// return plot
// }
// calculateBaseContent(type, options) {
// let windowSize = CGV.defaultFor(options.window, this.getWindowStep().window);
// let step = CGV.defaultFor(options.step, this.getWindowStep().step);
// let deviation = CGV.defaultFor(options.deviation, 'scale'); // 'scale' or 'average'
// // let deviation = CGV.defaultFor(options.deviation, 'average'); // 'scale' or 'average'
//
// let positions = [];
// let scores = [];
// let average = CGV.Sequence.baseCalculation(type, this.seqString);
// // Starting points for min and max
// let min = 1;
// let max = 0;
// let halfWindowSize = windowSize / 2;
// let start, stop;
//
// // FIXME: not set up for linear sequences
// // position marks the middle of the calculated window
// for (let position = 1, len = this.length; position < len; position += step) {
// // Extract DNA for window and calculate score
// start = this.sequence.subtractBp(position, halfWindowSize);
// stop = this.sequence.addBp(position, halfWindowSize);
// let range = new CGV.CGRange(this.sequence, start, stop);
// let seq = this.sequence.forRange(range);
// let score = CGV.Sequence.baseCalculation(type, seq);
//
// if (score > max) {
// max = score;
// }
// if (score < min) {
// min = score;
// }
//
// positions.push(position);
// scores.push(score);
// }
//
// // Adjust scores if scaled
// // Min value becomes 0
// // Max value becomes 1
// // Average becomes 0.5
// if (deviation === 'scale') {
// scores = scores.map( (score) => {
// if (score >= average) {
// return CGV.scaleValue(score, {min: average, max: max}, {min: 0.5, max: 1});
// } else {
// return CGV.scaleValue(score, {min: min, max: average}, {min: 0, max: 0.5});
// }
// });
// min = 0;
// max = 1;
// average = 0.5;
// }
// return { positions: positions, scores: scores, min: min, max: max, average: average }
// }
// extractORFs(options = {}) {
// this.viewer.flash('Finding ORFs...');
// let startTime = new Date().getTime();
// let features = new CGV.CGArray();
// let type = 'ORF'
// let source = 'orfs'
// let minORFLength = CGV.defaultFor(options.minORFLength, 100)
// // Get start features by reading frame
// let startPattern = CGV.defaultFor(options.start, 'ATG')
// let startFeatures = this.createFeaturesFromPattern(startPattern, 'start-codon', 'start-stop-codons');
// let startsByRF = this.sequence.featuresByReadingFrame(startFeatures);
// // Get stop features by reading frame
// let stopPattern = CGV.defaultFor(options.stop, 'TAA,TAG,TGA');
// let stopFeatures = this.createFeaturesFromPattern(stopPattern, 'start-codon', 'start-stop-codons');
// let stopsByRF = this.sequence.featuresByReadingFrame(stopFeatures);
// // Get forward ORFs
// let position, orfLength, range, readingFrames;
// readingFrames = ['rfPlus1', 'rfPlus2', 'rfPlus3'];
// let start, stop, stopIndex;
// for (let rf of readingFrames) {
// position = 1;
// stopIndex = 0;
// for (let i = 0, len_i = startsByRF[rf].length; i < len_i; i++) {
// start = startsByRF[rf][i];
// if (start.start < position) {
// continue;
// }
// for (let j = stopIndex, len_j = stopsByRF[rf].length; j < len_j; j++) {
// stop = stopsByRF[rf][j];
// orfLength = stop.stop - start.start;
// if (orfLength >= minORFLength) {
// position = stop.stop;
// range = new CGV.CGRange(this.sequence, start.start, stop.stop);
// features.push( this.createFeature(range, type, 1, source ) );
// stopIndex = j;
// break;
// }
// }
// }
// }
// // Get reverse ORFs
// readingFrames = ['rfMinus1', 'rfMinus2', 'rfMinus3'];
// for (let rf of readingFrames) {
// stopIndex = 0;
// position = this.sequence.length;
// let startsByRFSorted = startsByRF[rf].order_by('start', true);
// let stopsByRFSorted = stopsByRF[rf].order_by('start', true);
// for (let i = 0, len_i = startsByRF[rf].length; i < len_i; i++) {
// start = startsByRF[rf][i];
// if (start.start > position) {
// continue;
// }
// for (let j = stopIndex, len_j = stopsByRF[rf].length; j < len_j; j++) {
// stop = stopsByRF[rf][j];
// orfLength = start.stop - stop.start;
// if (orfLength >= minORFLength) {
// position = stop.start;
// range = new CGV.CGRange(this.sequence, stop.start, start.stop);
// features.push( this.createFeature(range, type, -1, source ) );
// stopIndex = j;
// break;
// }
// }
// }
// }
// console.log('ORF Extraction Time: ' + CGV.elapsedTime(startTime) );
// return features
// }
// extractStartStops(options = {}) {
// this.viewer.flash('Finding Start/Stop Codons...');
// let startTime = new Date().getTime();
// // Forward and Reverse Starts
// let startPattern = CGV.defaultFor(options.start, 'ATG')
// let features = this.createFeaturesFromPattern(startPattern, 'start-codon', 'start-stop-codons');
// // Forward and Reverse Stops
// let stopPattern = CGV.defaultFor(options.stop, 'TAA,TAG,TGA');
// features.merge( this.createFeaturesFromPattern(stopPattern, 'stop-codon', 'start-stop-codons'))
// console.log('Start/Stop Extraction Time: ' + CGV.elapsedTime(startTime) );
// return features
// }
//
// createFeaturesFromPattern(pattern, type, source) {
// let features = new CGV.CGArray();
// pattern = pattern.toUpperCase().split(',').map( (s) => { return s.trim() }).join('|')
// for (let strand of [1, -1]) {
// // let startTime = new Date().getTime();
// let ranges = this.sequence.findPattern(pattern, strand)
// // console.log("Find Pattern '" + pattern + "' Strand " + strand + " Time: " + CGV.elapsedTime(startTime) );
// // let startTime = new Date().getTime();
// for (let i = 0, len = ranges.length; i < len; i++) {
// features.push( this.createFeature(ranges[i], type, strand, source ) );
// }
// // console.log("Features for Pattern '" + pattern + "' Strand " + strand + " Time: " + CGV.elapsedTime(startTime) );
// }
// return features.order_by('start')
// }
// createFeature(range, type, strand, source) {
// let featureData = {
// type: type,
// start: range.start,
// stop: range.stop,
// strand: strand,
// source: source,
// extractedFromSequence: true
// }
// featureData.legend = this.getLegendItem(type).text;
// return new CGV.Feature(this.viewer, featureData)
// }