// Permutation.cpp : Responsible for inserting permuted DStrings into dTable //Coded by: Nicholas Urrea (nurrea@uci.edu) unless otherwise stated //ICS 175A, Winter 2004 //Project: Motif detection in yeast //Purpose: Computes all substrings for a set of genes and stores them into a customized hash table (Dtable). #include "Permutation.h" // count initialization & boundary checking void Permutation::initialize() { // bounds checking if( SUBSTR_MIN_LENGTH > SUBSTR_MAX_LENGTH || SUBSTR_MAX_LENGTH < SUBSTR_MIN_LENGTH ) cout << "Minimum and maximum length improperly defined in Permutation.h" << endl; count = 0; } // initialize a permutation Permutation::Permutation () { initialize(); } // passes in a DString through the constructor // and immediately permutes the DString (calls permute string function) // where each substring is stored into Dtable's hash table. Permutation::Permutation(DString str) { initialize(); permute(str); } // stores DString substrings of length [MIN,MAX] into dtable. void Permutation::permute(DString str) { // traverse index from 0 to n // hash each valid substring from cnt index to min and max length // #subsequences per genome = ((size*5) - 15) for( unsigned int i = 0; i < str.size(); i++ ) for( unsigned int j = SUBSTR_MIN_LENGTH; j <= SUBSTR_MAX_LENGTH; j++ ) { if ( i+j > str.size() ) j = SUBSTR_MAX_LENGTH+1; else { // add both a subsequence and its reverse complement w/ a filter // that will not add the reverse complement if it is the same string // as the non-reverse complement. dTable.add( str.substr(i,i+j) ); if ( str.substr(i,i+j) == getComplement(str.substr(i, i+j) ) ) ; else dTable.add( getComplement(str.substr(i, i+j) ) ); count++; } } } // stores dString subsequences from a DGFile in FASTA format into dTable (including subsequence lengths) void Permutation::permute(DGFile &file) { DString * string; CGene * cgene; // impt: genome sequence size needs to be passed to SequenceFinder // thus, dtable must store the entire genome sequence length for ( unsigned long i = 0; i < file.num_genes; i++ ) { string = file.GetGeneData(i); cgene = file.GetGene(i); permute( string->data() ); dTable.setSequenceRepresentedLength( dTable.getSequenceRepresentedLength() + cgene->sequence.size() ); } } //returns total # of permutation sequences int Permutation::getCount() { return count; } //prints dTable; hash table of genomic subsequences void Permutation::printDTable() { dTable.print(); } // PURPOSE: returns the reverse complement of a ACGT string // EDITED BY: Nicholas Urrea, Vishakh DString Permutation::getComplement(DString str) { int j = str.size()-1; DString moo = str; for(int i=0; i