// fileio.cpp (166 lines) // // Coded by: Joseph Bertolami // Email: jbertola@uci.edu // // ICS 175A, Winter 2004 // #include "fileio.h" // Author: Joseph Bertolami // Email: jbertola@uci.edu int DFile::ReadFile(char *filename) { if ( !filename ) return 0; long f_size = 0; FILE *f = fopen(filename, "r"); if ( !f ) printf("Error: Unable to open %s\n", filename); // read file size (avoid Win32!) fseek(f, 0, SEEK_END); f_size = ftell(f); fclose(f); f = fopen(filename, "r"); if ( !f ) printf("Error: Unable to open %s\n", filename); char *data = (char *) malloc(f_size+1); memset(data, 0, f_size); int read = (int) fread(data, 1, (size_t) f_size, f); if ( read == 0 ) printf("Error: Unable to read bytes from %s\n", filename); else printf("Read %i bytes\n", read); data[f_size] = '\0'; fclose(f); if ( buffer ) delete buffer; buffer = new DString(data); return buffer->length; } // Author: Joseph Bertolami // Email: jbertola@uci.edu int DFile::WriteFile(char *filename) { if ( !filename || !buffer ) return 0; FILE *f = fopen(filename, "w"); if ( !f ) printf("Error: Unable to open file %s\n", filename); int wrote = (int) fwrite(buffer->buffer, 1, (size_t) buffer->length, f); if ( wrote == 0 ) printf("Unable to write bytes to %s\n", filename); else printf("Wrote %i bytes\n", wrote); fclose(f); return wrote; } // Author: Joseph Bertolami // Email: jbertola@uci.edu // Modified by: Nicholas Urrea (just added case insensivitiy for reading in genes) int DGFile::ReadGeneFile(char *filename) { if ( ReadFile(filename) == 0 ) return 0; unsigned long cur_gene = -1; // now buffer is valid and contains full file data, we simply must parse // tokenize by newline, a line is either a gene header, or gene data // if it is a gene header, parse it, if it is data, add it to current gene's data char * backup = strdup(buffer->buffer); char * token = strtok(backup, "\r\n"); // first lets gather the number of genomic sequences since we lack a good // data structure basecode do { if ( token[0] == '>' ) num_genes++; } while ( token = strtok(NULL, "\r\n") ); printf("Number of genes in file: %i\n", num_genes); genes = new CGene[num_genes]; if ( backup ) free(backup); backup = strdup(buffer->buffer); token = strtok(backup, "\r\n"); do { if ( token[0] == '>' ) { // parsing header cur_gene++; if ( cur_gene >= num_genes ) { printf("Critical error: gene buffer overrun\n"); return -1; } char gname[80]; memset(gname, 0, 80); int ret = sscanf(token, "> %s from %i to %i, size %i", gname, genes[cur_gene].neg_range, genes[cur_gene].pos_range, genes[cur_gene].size); genes[cur_gene].name.set(gname); printf("Read Gene %i: %s\n", cur_gene, genes[cur_gene].name.buffer); } // this check really isnt necessary... but for security else if ( token[0] == 'A' || token[0] == 'T' || token[0] == 'G' || token[0] == 'C' || token[0] == 'a' || token[0] == 't' || token[0] == 'g' || token[0] == 'c' ) { // if speed becomes an issue, pre-size the sequence buffers as specified // by the size parameter in the gene header.. this isnt done for now b/c // we arent assuming the header is necessarily correct // Case insensitivity added by Nicholas Urrea: // convert all chars to upper case !! for(unsigned int i=0; i= 0 && gene < num_genes ) return &genes[gene]; } // Author: Joseph Bertolami // Email: jbertola@uci.edu DString * DGFile::GetGeneData(int gene) { if ( gene >= 0 && gene < num_genes ) return &genes[gene].sequence; }