diff --git a/src/tools/make_kernel_string_pool.c b/src/tools/make_kernel_string_pool.c index 5e10f3e..87a0cb9 100755 --- a/src/tools/make_kernel_string_pool.c +++ b/src/tools/make_kernel_string_pool.c @@ -29,17 +29,48 @@ #include #include #include +#include +#include +#include "..\grayspace-misc\gsdefines.h" + +typedef struct tagSUBSTR +{ + // string + char * p_str; + + // frequency + int freq; + + // index + int idx; +} +SUBSTR; + typedef struct tagSUBSTRLIST { - char ** pp_substrs; + SUBSTR * p_substrsmem; + SUBSTR ** pp_substrs; int numsubstrs; } SUBSTRLIST; -// current open sub string file +typedef struct tagSTRLIST +{ + char ** pp_strs; + int numstrs; +} +STRLIST; + +// current open string file FILE * g_p_file_substrs = NULL; +// main/current list of strings +STRLIST g_mainstrs = { NULL, 0 }; + +// current open sub string file +FILE * g_p_file_strs = NULL; + // main/current list of sub strings SUBSTRLIST g_mainsubstrs = { NULL, 0 }; @@ -47,31 +78,90 @@ SUBSTRLIST g_tempsubstrs1 = { NULL, 0 }; SUBSTRLIST g_tempsubstrs2 = { NULL, 0 }; +// free all memory associated with the string list +static void Free_StringList( STRLIST * p_sl ) +{ + int i; + if( p_sl->pp_strs != NULL ) + { + for( i = 0; i < p_sl->numstrs; i++ ) + { + if( p_sl->pp_strs[i] != NULL ) + { + free( p_sl->pp_strs[i] ); + p_sl->pp_strs[i] = NULL; + } + } + + free( p_sl->pp_strs ); + p_sl->pp_strs = NULL; + p_sl->numstrs = 0; + } +} + // free all memory associated with the sub string list static void Free_SubStringList( SUBSTRLIST * p_ssl ) { int i; if( p_ssl->pp_substrs != NULL ) { + free( p_ssl->pp_substrs ); + p_ssl->pp_substrs = NULL; + } + + if( p_ssl->p_substrsmem != NULL ) + { for( i = 0; i < p_ssl->numsubstrs; i++ ) { - if( p_ssl->pp_substrs[i] != NULL ) + if( p_ssl->p_substrsmem[i].p_str != NULL ) { - free( p_ssl->pp_substrs[i] ); - p_ssl->pp_substrs[i] = NULL; + free( p_ssl->p_substrsmem[i].p_str ); + p_ssl->p_substrsmem[i].p_str = NULL; } } - free( p_ssl->pp_substrs ); - p_ssl->pp_substrs = NULL; + free( p_ssl->p_substrsmem ); + p_ssl->p_substrsmem = NULL; p_ssl->numsubstrs = 0; } } -// compare two pointers to strings -static int CompareStringPtr( const void * p1, const void * p2 ) +// allocates only memory for sub string structures +// and pointers *not* the strings +// also: +// - assigns sub-string pointers +// - sets string pointers to NULL for safety +// - sets all frequencies to 0 +// - sets all idxs to -1 +// note: +// - destroys previous contents if exists +static void MakeNew_SubStringList( SUBSTRLIST * p_ssl, int numsubstrs ) { - return strcmp( *((const char **) p1), *((const char **) p2) ); + int i; + + Free_SubStringList( p_ssl ); + p_ssl->numsubstrs = numsubstrs; + p_ssl->pp_substrs + = (SUBSTR **) malloc( sizeof(SUBSTR *) * p_ssl->numsubstrs ); + p_ssl->p_substrsmem + = (SUBSTR *) malloc( sizeof(SUBSTR) * p_ssl->numsubstrs ); + + for( i = 0; i < p_ssl->numsubstrs; i++ ) + { + p_ssl->pp_substrs[i] = p_ssl->p_substrsmem + i; + p_ssl->pp_substrs[i]->p_str = NULL; + p_ssl->pp_substrs[i]->freq = 0; + p_ssl->pp_substrs[i]->idx = -1; + } +} + +// compare two pointers to strings +static int CompareSubStringPtr( const void * p1, const void * p2 ) +{ + const SUBSTR * pss1 = *((const SUBSTR **) p1); + const SUBSTR * pss2 = *((const SUBSTR **) p2); + + return strcmp( pss1->p_str, pss2->p_str ); } // sorts a sub string list into alphabetical order @@ -84,8 +174,8 @@ // sort by pointer qsort( p_ssl->pp_substrs, p_ssl->numsubstrs, - sizeof(char *), - CompareStringPtr ); + sizeof(SUBSTR *), + CompareSubStringPtr ); return 1; } @@ -93,6 +183,133 @@ return 0; } +// compare two pointers to strings +static int CompareSubStringPtrByFreq( const void * p1, const void * p2 ) +{ + const SUBSTR * pss1 = *((const SUBSTR **) p1); + const SUBSTR * pss2 = *((const SUBSTR **) p2); + + return pss2->freq - pss1->freq; +} + +// sorts a sub string list into alphabetical order +// list cannot be empty for success +// returns 1 on success, 0 on failure +static int Sort_SubStringListByFreq( SUBSTRLIST * p_ssl ) +{ + if( p_ssl->numsubstrs && p_ssl->pp_substrs ) + { + // sort by pointer + qsort( p_ssl->pp_substrs, + p_ssl->numsubstrs, + sizeof(SUBSTR *), + CompareSubStringPtrByFreq ); + + return 1; + } + + return 0; +} + +static SUBSTR * Find_SubStringByStr( SUBSTRLIST * p_ssl, char * p_str ) +{ + SUBSTR key; + SUBSTR * p_key; + SUBSTR ** pp_retval; + + p_key = &key; + key.p_str = p_str; + + if( p_ssl->numsubstrs && p_ssl->pp_substrs ) + { + pp_retval + = bsearch( &p_key, + p_ssl->pp_substrs, + p_ssl->numsubstrs, + sizeof(SUBSTR *), + CompareSubStringPtr ); + + if( pp_retval ) + { + return *pp_retval; + } + } + + return NULL; +} + + + +// given an open string file, creates a 'STRLIST' with its +// contents (the file should contain a list of strings, one on each +// line and in plain text) +// +// on failure: +// - frees/empties the given list +// - returns 0 +// on success: +// - returns 1 +static int Create_StringList( STRLIST * p_sl, FILE * p_file ) +{ + int i; + int numlines; + char buff[8192]; + char * p_curbuff; + + // destroy the current list + Free_StringList( p_sl ); + + // go to beginning of file + if( fseek( p_file, 0L, SEEK_SET ) ) + { + goto failbail; + } + + // count our lines: + numlines = 0; + while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) ) + { + numlines++; + } + + // go to beginning of file + if( fseek( p_file, 0L, SEEK_SET ) ) + { + goto failbail; + } + + // read each string from file + p_sl->numstrs = numlines; + p_sl->pp_strs = (char **) malloc( sizeof(char *) * p_sl->numstrs ); + for( i = 0; i < p_sl->numstrs; i++ ) + { + if( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) ) + { + // strip newline + p_curbuff = strtok( buff, "\n" ); + if( p_curbuff ) + { + // allocate string + p_sl->pp_strs[i] + = (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) ); + // copy + strcpy( p_sl->pp_strs[i], p_curbuff ); + continue; + } + } + + goto failbail; + } + + // success + return 1; + +failbail: + Free_StringList( p_sl ); + printf( "[Create_StringList] Error" ); + return 0; +} + // given an open sub string file, creates a 'SUBSTRLIST' with its // contents (the file should contain a list of unique substrings // in alphabetical order) @@ -106,7 +323,8 @@ { int i; int numlines; - char buff[1025]; + char buff[1024]; + char * p_curbuff; // go to beginning of file if( fseek( p_file, 0L, SEEK_SET ) ) @@ -116,7 +334,7 @@ // count our lines: numlines = 0; - while( fgets( buff, 1024, p_file ) != NULL ) + while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) != NULL ) { numlines++; } @@ -128,19 +346,23 @@ } // re-allocate our substring list: - Free_SubStringList( p_ssl ); - p_ssl->numsubstrs = numlines; - p_ssl->pp_substrs = (char **) malloc( sizeof(char *) * p_ssl->numsubstrs ); + MakeNew_SubStringList( p_ssl, numlines ); // read each substring for( i = 0; i < p_ssl->numsubstrs; i++ ) { - if( fgets( buff, 1024, p_file ) != NULL ) + if( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) != NULL ) { - p_ssl->pp_substrs[i] - = (char *) malloc( sizeof(char) * (strlen( buff ) + 1) ); - strcpy( p_ssl->pp_substrs[i], buff ); - continue; + // strip newline + p_curbuff = strtok( buff, "\n" ); + if( p_curbuff ) + { + // allocate and copy + p_ssl->pp_substrs[i]->p_str + = (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) ); + strcpy( p_ssl->pp_substrs[i]->p_str, p_curbuff ); + continue; + } } goto failbail; @@ -168,7 +390,7 @@ { int i; int numwords; - char buff[1025]; + char buff[1024]; char * p_curbuff; // destroy the current list @@ -182,7 +404,10 @@ // count our words: numwords = 0; - while( (p_curbuff = fgets( buff, 1024, p_file )) != NULL ) + while( ( p_curbuff + = fgets( buff, + sizeof(buff)/sizeof(char) - 1, + p_file ) ) != NULL ) { p_curbuff = strtok( p_curbuff, g_whitespace ); while( p_curbuff ) @@ -199,18 +424,20 @@ } // read each substring from words in file - p_ssl->numsubstrs = numwords; - p_ssl->pp_substrs = (char **) malloc( sizeof(char *) * p_ssl->numsubstrs ); + MakeNew_SubStringList( p_ssl, numwords ); for( i = 0; i < p_ssl->numsubstrs; ) { - if( (p_curbuff = fgets( buff, 1024, p_file )) != NULL ) + if( ( p_curbuff + = fgets( buff, + sizeof(buff)/sizeof(char) - 1, + p_file ) ) != NULL ) { p_curbuff = strtok( p_curbuff, g_whitespace ); while( p_curbuff ) { - p_ssl->pp_substrs[i] + p_ssl->pp_substrs[i]->p_str = (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) ); - strcpy( p_ssl->pp_substrs[i], p_curbuff ); + strcpy( p_ssl->pp_substrs[i]->p_str, p_curbuff ); i++; p_curbuff = strtok( NULL, g_whitespace ); } @@ -229,10 +456,109 @@ failbail: Free_SubStringList( p_ssl ); - printf( "[Create_SubStringList] Error" ); + printf( "[BuildFromFile_SubStrings] Error" ); return 0; } + +// given a sub string list and a string list +// refreshes the frequency of each sub string from the strings +// in the string list +static int UpdateFrequencies_SubStrings( SUBSTRLIST * p_ssl, + const STRLIST * p_sl ) +{ + int i; + char buff[1024]; + char * p_curbuff; + SUBSTR * p_curss; + + // clear all current frequencies + for( i = 0; i < p_ssl->numsubstrs; i++ ) + { + p_ssl->p_substrsmem[i].freq = 0; + } + + // parse every token in the string list + // and update the frequency count of the + // related sub string + for( i = 0; i < p_sl->numstrs; i++ ) + { + if( p_sl->pp_strs[i] ) + { + // copy working string + strcpy( buff, p_sl->pp_strs[i] ); + p_curbuff = strtok( buff, g_whitespace ); + while( p_curbuff ) + { + // get related substring (*MUST* exist) + p_curss = Find_SubStringByStr( p_ssl, p_curbuff ); + if( p_curss ) + { + // update frequency + p_curss->freq++; + + // next token + p_curbuff = strtok( NULL, g_whitespace ); + + // continue + continue; + } + + goto failbail; + } + + continue; + } + + goto failbail; + } + + // success: + return 1; + +failbail: + // clear all current frequencies (again) + for( i = 0; i < p_ssl->numsubstrs; i++ ) + { + p_ssl->p_substrsmem[i].freq = 0; + } + return 0; +} + +// given a sub string list +// assigns each sub string an index +// according to the current sorting scheme +static int UpdateIndices_SubStrings( SUBSTRLIST * p_ssl ) +{ + int i; + + // re-assign all current indices + for( i = 0; i < p_ssl->numsubstrs; i++ ) + { + if( p_ssl->pp_substrs[i] ) + { + p_ssl->pp_substrs[i]->idx = i; + continue; + } + + goto failbail; + } + + // success + return 1; + +failbail: + // clear all indices + for( i = 0; i < p_ssl->numsubstrs; i++ ) + { + p_ssl->p_substrsmem[i].idx = -1; + } + + // fail + return 0; +} + + // creates a new 'SUBSTRLIST' into 'p_ssl_dst' by // combining the sub-strings in 'p_ssl_src1' and 'p_ssl_src2' // then it clears/frees both 'p_ssl_src1' and 'p_ssl_src2' @@ -248,15 +574,10 @@ int dst_idx; SUBSTRLIST * p_ssl_src; - // free the destination list - Free_SubStringList( p_ssl_dst ); - - // set number of strings in dest list - p_ssl_dst->numsubstrs = p_ssl_src1->numsubstrs + p_ssl_src2->numsubstrs; - - // allocate strings pointers - p_ssl_dst->pp_substrs - = (char **) malloc( sizeof(char *) * p_ssl_dst->numsubstrs ); + // (re)allocate the destination list + MakeNew_SubStringList( p_ssl_dst, + p_ssl_src1->numsubstrs + + p_ssl_src2->numsubstrs ); // combine lists, preserving alphabetical order src_idx_1 = 0; @@ -265,22 +586,22 @@ while( src_idx_1 < p_ssl_src1->numsubstrs && src_idx_2 < p_ssl_src2->numsubstrs ) { - if( strcmp( p_ssl_src1->pp_substrs[src_idx_1], - p_ssl_src2->pp_substrs[src_idx_2] ) <= 0 ) + if( strcmp( p_ssl_src1->pp_substrs[src_idx_1]->p_str, + p_ssl_src2->pp_substrs[src_idx_2]->p_str ) <= 0 ) { // move string from source 1 - p_ssl_dst->pp_substrs[dst_idx] - = p_ssl_src1->pp_substrs[src_idx_1]; - p_ssl_src1->pp_substrs[src_idx_1] = NULL; + p_ssl_dst->pp_substrs[dst_idx]->p_str + = p_ssl_src1->pp_substrs[src_idx_1]->p_str; + p_ssl_src1->pp_substrs[src_idx_1]->p_str = NULL; dst_idx++; src_idx_1++; continue; } // move strings from source 2 - p_ssl_dst->pp_substrs[dst_idx] - = p_ssl_src2->pp_substrs[src_idx_2]; - p_ssl_src2->pp_substrs[src_idx_2] = NULL; + p_ssl_dst->pp_substrs[dst_idx]->p_str + = p_ssl_src2->pp_substrs[src_idx_2]->p_str; + p_ssl_src2->pp_substrs[src_idx_2]->p_str = NULL; dst_idx++; src_idx_2++; } @@ -298,9 +619,9 @@ while( src_idx < p_ssl_src->numsubstrs ) { // move strings from source - p_ssl_dst->pp_substrs[dst_idx] - = p_ssl_src->pp_substrs[src_idx]; - p_ssl_src->pp_substrs[src_idx] = NULL; + p_ssl_dst->pp_substrs[dst_idx]->p_str + = p_ssl_src->pp_substrs[src_idx]->p_str; + p_ssl_src->pp_substrs[src_idx]->p_str = NULL; dst_idx++; src_idx++; } @@ -328,17 +649,17 @@ while( idx < p_ssl->numsubstrs ) { // get new check string - p_checkstr = p_ssl->pp_substrs[idx]; + p_checkstr = p_ssl->pp_substrs[idx]->p_str; idx++; // remove all duplicates of check string while( idx < p_ssl->numsubstrs ) { - if( strcmp( p_checkstr, p_ssl->pp_substrs[idx] ) == 0 ) + if( strcmp( p_checkstr, p_ssl->pp_substrs[idx]->p_str ) == 0 ) { // remove duplicate - free( p_ssl->pp_substrs[idx] ); - p_ssl->pp_substrs[idx] = NULL; + free( p_ssl->pp_substrs[idx]->p_str ); + p_ssl->pp_substrs[idx]->p_str = NULL; strsremoved++; idx++; @@ -359,7 +680,7 @@ // find next empty string section while( dst_idx < p_ssl->numsubstrs ) { - if( p_ssl->pp_substrs[dst_idx] ) + if( p_ssl->pp_substrs[dst_idx]->p_str ) { dst_idx++; continue; @@ -371,7 +692,7 @@ src_idx = dst_idx + 1; while( src_idx < p_ssl->numsubstrs ) { - if( !p_ssl->pp_substrs[src_idx] ) + if( !p_ssl->pp_substrs[src_idx]->p_str ) { src_idx++; continue; @@ -384,11 +705,11 @@ while( dst_idx < src_idx && idx < p_ssl->numsubstrs ) { - if( p_ssl->pp_substrs[idx] ) + if( p_ssl->pp_substrs[idx]->p_str ) { // move string - p_ssl->pp_substrs[dst_idx] = p_ssl->pp_substrs[idx]; - p_ssl->pp_substrs[idx] = NULL; + p_ssl->pp_substrs[dst_idx]->p_str = p_ssl->pp_substrs[idx]->p_str; + p_ssl->pp_substrs[idx]->p_str = NULL; dst_idx++; idx++; @@ -419,15 +740,13 @@ Free_SubStringList( p_ssl_dst ); // allocate new dest sub-strings - p_ssl_dst->numsubstrs = p_ssl_src->numsubstrs; - p_ssl_dst->pp_substrs - = (char **) malloc( sizeof(char *) * p_ssl_dst->numsubstrs ); + MakeNew_SubStringList( p_ssl_dst, p_ssl_src->numsubstrs ); // move the strings over for( i = 0; i < p_ssl_dst->numsubstrs; i++ ) { - p_ssl_dst->pp_substrs[i] = p_ssl_src->pp_substrs[i]; - p_ssl_src->pp_substrs[i] = NULL; + p_ssl_dst->pp_substrs[i]->p_str = p_ssl_src->pp_substrs[i]->p_str; + p_ssl_src->pp_substrs[i]->p_str = NULL; } // clear/free the source string @@ -435,6 +754,45 @@ Free_SubStringList( p_ssl_src ); } +// writes given string list into given file +// on success: +// - old file contents are destroyed +// - returns 1 +// on failure: +// - old file contents are destroyed or mangled +// - returns 0 +static int Write_Strings( STRLIST * p_sl, + FILE * p_file ) +{ + int i; + + // go to beginning of file + if( fseek( p_file, 0L, SEEK_SET ) ) + { + goto failbail; + } + + // write out all strings + for( i = 0; i < p_sl->numstrs; i++ ) + { + if( fputs( p_sl->pp_strs[i], p_file ) != EOF ) + { + if( fputs( "\n", p_file ) != EOF ) + { + continue; + } + } + + goto failbail; + } + + // success: + return 1; + +failbail: + printf( "[Write_Strings] Error, cannot write to file\n" ); + return 0; +} // writes given sub string list into given file // on success: @@ -457,7 +815,7 @@ // write out all strings for( i = 0; i < p_ssl->numsubstrs; i++ ) { - if( fputs( p_ssl->pp_substrs[i], p_file ) != EOF ) + if( fputs( p_ssl->pp_substrs[i]->p_str, p_file ) != EOF ) { if( fputs( "\n", p_file ) != EOF ) { @@ -476,6 +834,41 @@ return 0; } +// writes out and closes the current string file if possible +// and frees the current string list +// returns 1 on success, 0 on failure +int WriteAndClose_StringFile() +{ + if( g_p_file_strs ) + { + if( Write_Strings( &g_mainstrs, g_p_file_strs ) ) + { + // close the file + if( fclose( g_p_file_strs ) == 0 ) + { + // success + g_p_file_strs = NULL; + return 1; + } + + printf( "[WriteAndClose_StringFile] Error closing file\n" ); + goto failbail; + } + + printf( "[WriteAndClose_StringFile] Error, cannot write to file\n" ); + goto failbail; + } + + printf( "[WriteAndClose_StringFile] Error, never opened \n" ); + +failbail: + if( fclose( g_p_file_strs ) ) + { + g_p_file_strs = NULL; + } + return 0; +} + // writes out and closes the current sub string file if possible // and frees the current sub string list // returns 1 on success, 0 on failure @@ -511,6 +904,29 @@ return 0; } +// closes the current open string file and frees +// the current string list +// returns 1 on success, 0 on failure +int Close_StringFile() +{ + if( g_p_file_strs ) + { + // close the file + if( fclose( g_p_file_strs ) == 0 ) + { + // success + g_p_file_strs = NULL; + return 1; + } + + printf( "[Close_StringFile] Error closing file\n" ); + return 0; + } + + printf( "[Close_StringFile] Error, never opened \n" ); + return 0; +} + // closes the current open sub string file and frees // the current sub string list // returns 1 on success, 0 on failure @@ -534,21 +950,123 @@ return 0; } -#if 0 - -void OpenStringFile( const char * p_path ) +// given a path, opens an existing string file +// (a simple line by line list of strings) +// reads in the contents as the string list and sets +// the file as the current open string file +// +// returns 1 on success, 0 on failure +int Open_StringFile( const char * p_path ) { + g_p_file_strs = fopen( p_path, "a+t" ); + if( g_p_file_strs ) + { + if( Create_StringList( &g_mainstrs, g_p_file_strs ) ) + { + // success + return 1; + } + + fclose( g_p_file_strs ); + printf( "[Open_StringFile] Error" ); + return 0; + } + + printf( "[Open_StringFile] Error, cannot open file: %s.\n", p_path ); + return 0; } -void CreateStringFile( const char * p_path ) +// given a path, opens an existing string file +// (a simple line by line list of strings) +// reads in the contents and appends them to the +// current string list and the current string file +// +// returns 1 on success, 0 on failure +// p_startidx_o <- beginning index of strings within the pool +// p_size_o <- number of strings appended +int AppendStringsFromFile( int * p_startidx_o, + int * p_size_o, + const char * p_path ) { -} + char buff[8192]; + FILE * p_srcfile; + + if( g_p_file_strs ) + { + p_srcfile = fopen( p_path, "rt" ); + if( p_srcfile ) + { + // go to beginning of source file + if( fseek( p_srcfile, 0L, SEEK_SET ) ) + { + goto failbail; + } -void AddStringsFromFile( const char * p_path ) -{ -} + // go to beginning of dest file + if( fseek( g_p_file_strs, 0L, SEEK_SET ) ) + { + goto failbail; + } -#endif + // count our lines: + (*p_startidx_o) = 0; + while( fgets( buff, sizeof(buff)/sizeof(char) - 1, g_p_file_strs ) ) + { + (*p_startidx_o)++; + } + + // go to end of dest file + if( fseek( g_p_file_strs, 0L, SEEK_END ) ) + { + goto failbail; + } + + // copy each line from + // the source file to the dest file + (*p_size_o) = 0; + while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_srcfile ) ) + { + if( fputs( buff, g_p_file_strs ) != EOF ) + { + (*p_size_o)++; + continue; + } + + goto failbail; + } + + // flush all IO operations + if( fflush( g_p_file_strs ) != EOF ) + { + // cause current string list to be reloaded + if( Create_StringList( &g_mainstrs, g_p_file_strs ) ) + { + // close source file + if( fclose( p_srcfile ) == 0 ) + { + // success + return 1; + } + + printf( "[AppendStringsFromFile] Error closing file: %s\n", p_path ); + goto failbail; + } + } + } + // if( p_srcfile ) + printf( "[AppendStringsFromFile] Error, cannot open file: %s\n", p_path ); + return 0; + + } + // if( g_p_file_strs ) + printf( "[AppendStringsFromFile] Error, String file never opened\n" ); + return 0; + +failbail: + fclose( p_srcfile ); + printf( "[AppendStringsFromFile] Error\n" ); + return 0; +} // given a plain text files, extracts any unique sub strings // and adds them into the current sub string list @@ -620,6 +1138,26 @@ return 0; } +// given a path, creates a string file for writing to +// as the current open string file +// +// returns 1 on success, 0 on failure +int Create_StringFile( const char * p_path ) +{ + g_p_file_strs = fopen( p_path, "w+t" ); + if( g_p_file_strs ) + { + // free current list + Free_StringList( &g_mainstrs ); + + // success + return 1; + } + + printf( "[Create_StringFile] Error, cannot create file: %s.\n", p_path ); + return 0; +} + // given a path, creates a sub string file for writing to // as the current open sub string file // @@ -640,9 +1178,828 @@ return 0; } +static char * GetTimeStr( char * p_dest ) +{ + struct tm * p_time; + time_t thetime; + p_dest[0] = 0; + time( &thetime ); + p_time = localtime( &thetime ); + strcpy( p_dest, asctime( p_time ) ); + return p_dest; +} + +// write the file header for the source file +static int KSSC_WriteSourceHeader( FILE * p_file, + const char * p_src_name ) +{ + char buff[1024]; + +/* form of source header */ +#if 0 +/* + "" + + auto-generated by: "make_kernel_string_pool.c" + for: UbixOS Project + date: + + purpose: - contains constant data for a string pool +*/ + +#endif + fprintf( p_file, "/*\n" ); + fprintf( p_file, " \"%s\"\n", p_src_name ); + fprintf( p_file, "\n" ); + fprintf( p_file, " auto-generated by: \"make_kernel_string_pool.c\"\n" ); + fprintf( p_file, " for: UbixOS Project\n" ); + fprintf( p_file, " date: %s", GetTimeStr( buff ) ); + fprintf( p_file, "\n" ); + fprintf( p_file, " purpose: - contains constant data for a string pool\n" ); + fprintf( p_file, "*/\n" ); + fprintf( p_file, "\n" ); + + return 1; +} + +// write the include lines +static int KSSC_WriteSourceIncludes( FILE * p_file, + const char ** pp_includes, + int numincludes ) +{ + int i; + + for( i = 0; i < numincludes; i++ ) + { + fprintf( p_file, "#include %s\n", pp_includes[i] ); + } + fprintf( p_file, "\n" ); + + return 1; +} + +static char * KSSC_GetSubStringID( char * p_dst, + const char * p_pool_name ) +{ + sprintf( p_dst, "%s_substrs", p_pool_name ); + return p_dst; +} + +#define KSSC_BYTETYPESTR ("BYTEg") + +static int KSSC_WriteByteVal( FILE * p_file, + int bytevalidx, + int byteval, + int withcomma ) +{ + if( (bytevalidx & 15) == 0 ) + { + fprintf( p_file, "\n" ); + } + + fprintf( p_file, "%d", byteval ); + + if( withcomma ) + { + fprintf( p_file, ", " ); + } + + return bytevalidx + 1; +} + +// write the sub strings array +// return size of memory included into source +static int KSSC_WriteSubStrings( FILE * p_file, + const char * p_pool_name ) +{ + char buff[4096]; + int i, ii; + int bytevalidx; + int numchars; + int memsize; + + // write out sub strings to source file + memsize = 0; + +/* form of sub strings */ +#if 0 + static [] = + { + , , , ... + , , , ... + , , , ... + . + . + . + }; +#endif + + fprintf( p_file, + "static %s %s[] =\n", + KSSC_BYTETYPESTR, + KSSC_GetSubStringID( buff, p_pool_name ) ); + fprintf( p_file, "{" ); + bytevalidx = 0; + for( i = 0; i < g_mainsubstrs.numsubstrs - 1; i++ ) + { + // get number of characters in sub string + numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str ); + if( numchars >= 1 ) + { + if( numchars <= 256 ) + { + // write number of characters in byte string + numchars--; + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + numchars, + 1 ); + memsize++; + numchars++; + + // write each character of byte string + for( ii = 0; ii < numchars; ii++ ) + { + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + (int) (g_mainsubstrs.\ + pp_substrs[i]->p_str)[ii], + 1 ); + memsize++; + } + + continue; + } + } + + printf( "[KSSC_WriteSubStrings], ERROR! invalid substring: \"%s\"\n", g_mainsubstrs.pp_substrs[i] ); + return 0; + } + + // get number of characters in final sub string + numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str ); + if( numchars >= 1 ) + { + if( numchars <= 256 ) + { + // write number of characters in byte string + numchars--; + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + numchars, + 1 ); + memsize++; + numchars++; + + // write each character of byte string + for( ii = 0; ii < numchars - 1; ii++ ) + { + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + (int) (g_mainsubstrs.\ + pp_substrs[i]->p_str)[ii], + 1 ); + memsize++; + } + + // write final character + KSSC_WriteByteVal( p_file, + bytevalidx, + (int) (g_mainsubstrs.\ + pp_substrs[i]->p_str)[ii], + 0 ); + memsize++; + + // write closing braces etc. + fprintf( p_file, "\n};\n\n" ); + + // success + return memsize; + } + } + + printf( "[KSSC_WriteSubStrings], ERROR! invalid substring: \"%s\"\n", g_mainsubstrs.pp_substrs[i] ); + return 0; +} + +static char * KSSC_GetSubStringOffsetID( char * p_dst, + const char * p_pool_name ) +{ + sprintf( p_dst, "%s_substroffs", p_pool_name ); + return p_dst; +} + +static int KSSC_WriteOffsetVal( FILE * p_file, + int offsetidx, + unsigned int offset, + int withcomma ) +{ + if( (offsetidx & 7) == 0 ) + { + fprintf( p_file, "\n" ); + } + + fprintf( p_file, "%u", offset ); + + if( withcomma ) + { + fprintf( p_file, ", " ); + } + + return offsetidx + 1; +} + + +// 86'd to save memory at cost of speed +#if 0 + +// write the sub strings offset array +// return size of memory included into source +static int KSSC_WriteSubStringOffsets( FILE * p_file, + const char * p_pool_name ) +{ + char buff[4096]; + unsigned int curoffset; + int i; + int numchars; + int offsetidx; + int memsize; + + memsize = 0; + +/* form of sub string offsets */ +#if 0 + static unsigned int [] = + { + , , , ... + , , , ... + , , , ... + . + . + . + }; +#endif + + fprintf( p_file, + "static unsigned int %s[] =\n", + KSSC_GetSubStringOffsetID( buff, p_pool_name ) ); + fprintf( p_file, "{" ); + offsetidx = 0; + curoffset = 0; + for( i = 0; i < g_mainsubstrs.numsubstrs - 1; i++ ) + { + // get number of characters in sub string + numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str ); + + // write the offset + offsetidx + = KSSC_WriteOffsetVal( p_file, + offsetidx, + curoffset, + 1 ); + memsize += sizeof(unsigned int); + + // update the offset + curoffset += numchars + 1; + } + + // write final offset + KSSC_WriteOffsetVal( p_file, + offsetidx, + curoffset, + 0 ); + memsize += sizeof(unsigned int); + + // write closing braces etc. + fprintf( p_file, "\n};\n\n" ); + + // success + return memsize; +} + +#endif // #if 0 + +static int KSSC_GetStringProfile( int * p_numwords_o, + int * p_numwordbytes_o, + BYTEg * p_wordbytes_o, + const char * p_str ) +{ + char buff[4096]; + char * p_curbuff; + int idx; + int idxmask; + SUBSTR * p_curss; + + // copy working string + strcpy( buff, p_str ); + (*p_numwords_o) = 0; + (*p_numwordbytes_o) = 1; + if( (p_curbuff = strtok( buff, g_whitespace )) ) + { + do + { + // get related substring (*MUST* exist) + p_curss = Find_SubStringByStr( &g_mainsubstrs, p_curbuff ); + if( p_curss ) + { + // update number of words + (*p_numwords_o)++; + + // update bytes in KString (if needed) + if( p_wordbytes_o ) + { + // assume normal expression + idxmask = 0; + idx = p_curss->idx; + if( p_curss->idx > 63 ) + { + // use escape sequence to express + idxmask = 0x40; + if( p_curss->idx > 16383 ) + { + // use larger escape sequence to express + idxmask = 0x80; + p_wordbytes_o[(*p_numwordbytes_o) + 2] + = (BYTEg) (idx&0xFF); + idx >>= 8; + } + p_wordbytes_o[(*p_numwordbytes_o) + 1] + = (BYTEg) (idx&0xFF); + idx >>= 8; + } + p_wordbytes_o[(*p_numwordbytes_o)] = (BYTEg) (idx|idxmask); + } + + // update total bytes + (*p_numwordbytes_o)++; + if( p_curss->idx > 63 ) + { + (*p_numwordbytes_o)++; + if( p_curss->idx > 16383 ) + { + (*p_numwordbytes_o)++; + } + } + + // next token + p_curbuff = strtok( NULL, g_whitespace ); + + // continue + continue; + } + + // fail + return 0; + } + while( p_curbuff ); + + if( (*p_numwordbytes_o) >= 1 && (*p_numwordbytes_o) <= 256 ) + { + // update bytes in KString, number of bytes, (if needed) + if( p_wordbytes_o ) + { + p_wordbytes_o[0] = (BYTEg) ((*p_numwordbytes_o) - 1); + } + + // success + return 1; + } + } + + // fail + return 0; +} + +static char * KSSC_GetKStringID( char * p_dst, + const char * p_pool_name ) +{ + sprintf( p_dst, "%s_kstrs", p_pool_name ); + return p_dst; +} + +// write the kernel strings array +// returns size of memory included into source +static int KSSC_WriteStrings( FILE * p_file, + const char * p_pool_name ) +{ + char buff[4096]; + BYTEg buff2[4096]; + int i, ii; + int bytevalidx; + int numwords; + int numwordbytes; + int memsize; + + memsize = 0; + +/* form of kernel strings */ +#if 0 + static [] = + { + , , , ... + , , , ... + , , , ... + . + . + . + }; +#endif + + fprintf( p_file, + "static %s %s[] =\n", + KSSC_BYTETYPESTR, + KSSC_GetKStringID( buff, p_pool_name ) ); + fprintf( p_file, "{" ); + bytevalidx = 0; + for( i = 0; i < g_mainstrs.numstrs - 1; i++ ) + { + // get profile of string + if( KSSC_GetStringProfile( &numwords, + &numwordbytes, + buff2, + g_mainstrs.pp_strs[i] ) ) + { + // write out each byte in kstring + for( ii = 0; ii < numwordbytes; ii++ ) + { + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + (int) buff2[ii], + 1 ); + } + memsize += numwordbytes; + + // continue + continue; + } + + // fail + printf( "[KSSC_WriteStrings], Error, invalid string? -> %s\n", g_mainstrs.pp_strs[i] ); + return 0; + } + + // write final kstring + if( KSSC_GetStringProfile( &numwords, + &numwordbytes, + buff2, + g_mainstrs.pp_strs[i] ) ) + { + // write out each byte in kstring + for( ii = 0; ii < numwordbytes - 1; ii++ ) + { + bytevalidx + = KSSC_WriteByteVal( p_file, + bytevalidx, + (int) buff2[ii], + 1 ); + } + + // write final byte + KSSC_WriteByteVal( p_file, + bytevalidx, + (int) buff2[ii], + 0 ); + memsize += numwordbytes; + + // write closing braces etc. + fprintf( p_file, "\n};\n\n" ); + + // success + return memsize; + } + + // fail + printf( "[KSSC_WriteStrings], Error, invalid string? -> %s\n", g_mainstrs.pp_strs[i] ); + return 0; +} + +// write the string pool structure itself +static int KSSC_WriteStringPool( FILE * p_file, + const char * p_pool_name ) +{ + char buff1[2048]; + char buff2[2048]; + +/* form of kernel string pool */ +#if 0 + /* kernel string pool */ + KSTR_POOL = { , }; +#endif + + fprintf( p_file, + "/* kernel string pool */\n" ); + fprintf( p_file, + "KSTR_POOL %s = { %s, %s };\n", + p_pool_name, + KSSC_GetKStringID( buff1, p_pool_name ), + KSSC_GetSubStringID( buff2, p_pool_name ) ); + + return 1; +} + +// converts a filename to a define symbol +static char * KSSC_GetHeaderDefineSymbol( char * p_dst, + const char * p_hdr_name ) +{ + char * p_curdst; + + sprintf( p_dst, "_%s", p_hdr_name ); + p_curdst = p_dst; + while( (*p_curdst) != 0 ) + { + if( (*p_curdst) != '.' ) + { + if( islower( (int) (*p_curdst) ) ) + { + (*p_curdst) = (char) toupper( (int) (*p_curdst) ); + } + + p_curdst++; + continue; + } + + (*p_curdst) = '_'; + p_curdst++; + continue; + } + + return p_dst; +} + +// write the file header for the header file +static int KSSC_WriteHeaderHeader( FILE * p_file, + const char * p_hdr_name ) +{ + char hdrsym[1024]; + char buff[1024]; + +/* form of header header */ +#if 0 +/* + "" + + auto-generated by: "make_kernel_string_pool.c" + for: UbixOS Project + date: + + purpose: - header file for a string pool +*/ + +/* #ifndef _ +#define _ */ + +#endif + fprintf( p_file, "/*\n" ); + fprintf( p_file, " \"%s\"\n", p_hdr_name ); + fprintf( p_file, "\n" ); + fprintf( p_file, " auto-generated by: \"make_kernel_string_pool.c\"\n" ); + fprintf( p_file, " for: UbixOS Project\n" ); + fprintf( p_file, " date: %s", GetTimeStr( buff ) ); + fprintf( p_file, "\n" ); + fprintf( p_file, " purpose: - header file for a string pool\n" ); + fprintf( p_file, "*/\n" ); + fprintf( p_file, "\n" ); + KSSC_GetHeaderDefineSymbol( hdrsym, p_hdr_name ); + fprintf( p_file, "#ifndef %s\n", hdrsym ); + fprintf( p_file, "#define %s\n", hdrsym ); + fprintf( p_file, "\n" ); + + return 1; +} + +// write the string pool structure declaration +static int KSSC_WriteStringPoolDecl( FILE * p_file, + const char * p_pool_name ) +{ +/* form of kernel string pool declaration */ +#if 0 + /* kernel string pool */ + extern KSTR_POOL ; + +#endif + + fprintf( p_file, + "/* kernel string pool */\n" ); + fprintf( p_file, + "extern KSTR_POOL %s;\n", + p_pool_name ); + + return 1; +} + +// write the file footer for the header file +static int KSSC_WriteHeaderFooter( FILE * p_file, + const char * p_hdr_name ) +{ + char buff[1024]; + +/* form of header footer */ +#if 0 + +'#endif' /* _ */ +#endif + fprintf( p_file, "\n" ); + fprintf( p_file, + "#endif /* %s */\n", + KSSC_GetHeaderDefineSymbol( buff, p_hdr_name ) ); + + return 1; +} + +/* + Input: + - p_hdr_path <- full path to header file to write + relative to current working directory + - p_hdr_name <- name of header file + - p_src_path <- full path to source file to write + relativeto current working directory + - p_src_name <- name of source file + - pp_includes <- list of files to include eg. , "h4.h", ... + - numincludes <- number of files in include list + - p_pool_name <- name of identifier to use for string pool + generated ( NOTE: sub-arrays of string pool + will use this name to base their names ) + Output: + - generates both a header file and a source code file which + can be used to embed compressed strings into an object module + based on the currently open string list and sub string list + - returns 1 on success, 0 on failure + + Assumptions: + - a string list is current open + - a sub string list is current open + - the strings in the open string file are a superset of the sub strings + in the open string file +*/ +int Create_KStringSourceCode( const char * p_hdr_path, + const char * p_hdr_name, + const char * p_src_path, + const char * p_src_name, + const char ** pp_includes, + int numincludes, + const char * p_pool_name ) +{ + FILE * p_srccode; + FILE * p_hdr; + int memsize; + int totalmemsize; + + memsize = 0; + totalmemsize = 0; + + // build full path names + + // open source code file + p_srccode = fopen( p_src_path, "w+t" ); + if( p_srccode ) + { + // open header file + p_hdr = fopen( p_hdr_path, "w+t" ); + if( p_hdr ) + { + // write the file header for the source file + if( !KSSC_WriteSourceHeader( p_srccode, p_src_name ) ) + { + goto failbail; + } + + // write the include lines + if( !KSSC_WriteSourceIncludes( p_srccode, + pp_includes, + numincludes ) ) + { + goto failbail; + } + + // sort by frequency + if( !UpdateFrequencies_SubStrings( &g_mainsubstrs, &g_mainstrs ) ) + { + // fail + printf( "[Create_KStringSourceCode], Error assigning substring frequencies.\n" ); + goto failbail; + } + if( !Sort_SubStringListByFreq( &g_mainsubstrs ) ) + { + // fail, but attempt to restore sorting to alphabetic + Sort_SubStringList( &g_mainsubstrs ); + printf( "[Create_KStringSourceCode], Error sorting substrings by frequency.\n" ); + goto failbail; + } + + // assign indices to sub strings based on frequency + if( !UpdateIndices_SubStrings( &g_mainsubstrs ) ) + { + // fail, but attempt to restore sorting to alphabetic + Sort_SubStringList( &g_mainsubstrs ); + printf( "[Create_KStringSourceCode], Error assigning indices to sub strings based on frequency.\n" ); + goto failbail; + } + + // write the sub strings array + memsize = KSSC_WriteSubStrings( p_srccode, p_pool_name ); + if( !memsize ) + { + goto failbail; + } + printf( "KSSC_WriteSubStrings->MemSize = %d\n", memsize ); + totalmemsize += memsize; + + +// 86'd to save memory at cost of speed +#if 0 + // write the sub strings offset array + memsize = KSSC_WriteSubStringOffsets( p_srccode, p_pool_name ); + if( !memsize ) + { + goto failbail; + } + printf( "KSSC_WriteSubStringOffsets->MemSize = %d\n", memsize ); + totalmemsize += memsize; +#endif + + // restore sorting to alphabetic + if( !Sort_SubStringList( &g_mainsubstrs ) ) + { + // fail + printf( "[Create_KStringSourceCode], Error restoring alphabetic ordering to sub strings.\n" ); + goto failbail; + } + + // write the kernel strings array + memsize = KSSC_WriteStrings( p_srccode, p_pool_name ); + if( !memsize ) + { + goto failbail; + } + printf( "KSSC_WriteStrings->MemSize = %d\n", memsize ); + totalmemsize += memsize; + + // write the string pool structure itself + if( !KSSC_WriteStringPool( p_srccode, p_pool_name ) ) + { + goto failbail; + } + + // write the file header for the header file + if( !KSSC_WriteHeaderHeader( p_hdr, p_hdr_name ) ) + { + goto failbail; + } + + // write the string pool structure declaration + if( !KSSC_WriteStringPoolDecl( p_hdr, p_pool_name ) ) + { + goto failbail; + } + + // write the file footer for the header file + if( !KSSC_WriteHeaderFooter( p_hdr, p_hdr_name ) ) + { + goto failbail; + } + + // close both files + if( fclose( p_hdr ) == 0 ) + { + p_hdr = NULL; + } + if( fclose( p_srccode ) == 0 ) + { + p_srccode = NULL; + } + + if( !p_hdr && !p_srccode ) + { + printf( "Create_KStringSourceCode->MemSize = %d Bytes, %d KBytes\n", totalmemsize, ((totalmemsize + 1023)>>10) ); + + // success + return 1; + } + + // close header file + fclose( p_hdr ); + } + // if( p_hdr ) + + // close source code file + fclose( p_srccode ); + } + // if( p_src_path ) + +failbail: + return 0; +} + + // Resets *everything*, frees all memory in use, closes all open files void MakeKSP_ResetAll() { + Free_StringList( &g_mainstrs ); Free_SubStringList( &g_mainsubstrs ); Free_SubStringList( &g_tempsubstrs1 ); Free_SubStringList( &g_tempsubstrs2 ); @@ -652,4 +2009,10 @@ fclose( g_p_file_substrs ); g_p_file_substrs = NULL; } + + if( g_p_file_strs ) + { + fclose( g_p_file_strs ); + g_p_file_strs = NULL; + } } \ No newline at end of file diff --git a/src/tools/make_kernel_string_pool.h b/src/tools/make_kernel_string_pool.h index 2cee201..2291dec 100755 --- a/src/tools/make_kernel_string_pool.h +++ b/src/tools/make_kernel_string_pool.h @@ -29,6 +29,43 @@ // Resets *everything*, frees all memory in use, closes all open files void MakeKSP_ResetAll(); +// given a path, opens an existing string file +// (a simple line by line list of strings) +// reads in the contents and appends them to the +// current string list and the current string file +// +// returns 1 on success, 0 on failure +// p_startidx_o <- beginning index of strings within the pool +// p_size_o <- number of strings appended +int AppendStringsFromFile( int * p_startidx_o, + int * p_size_o, + const char * p_path ); + +// given a path, opens an existing string file +// (a simple line by line list of strings) +// reads in the contents as the string list and sets +// the file as the current open string file +// +// returns 1 on success, 0 on failure +int Open_StringFile( const char * p_path ); + +// closes the current open string file and frees +// the current string list +// returns 1 on success, 0 on failure +int Close_StringFile(); + +// given a path, creates a string file for writing to +// as the current open string file +// +// returns 1 on success, 0 on failure +int Create_StringFile( const char * p_path ); + +// writes out and closes the current string file if possible +// and frees the current string list +// returns 1 on success, 0 on failure +int WriteAndClose_StringFile(); + + // given a path, creates a sub string file for writing to // as the current open sub string file // @@ -59,6 +96,37 @@ // returns 1 on success, 0 on failure int Add_SubStringsFromFile( const char * p_path ); +/* + Input: + - p_hdr_path <- full path to header file to write + relative to current working directory + - p_hdr_name <- name of header file + - p_src_path <- full path to source file to write + relativeto current working directory + - p_src_name <- name of source file + - pp_includes <- list of files to include eg. , "h4.h", ... + - numincludes <- number of files in include list + - p_pool_name <- name of identifier to use for string pool + generated ( NOTE: sub-arrays of string pool + will use this name to base their names ) + Output: + - generates both a header file and a source code file which + can be used to embed compressed strings into an object module + based on the currently open string list and sub string list + - returns 1 on success, 0 on failure + Assumptions: + - a string list is current open + - a sub string list is current open + - the strings in the open string file are a superset of the sub strings + in the open string file +*/ +int Create_KStringSourceCode( const char * p_hdr_path, + const char * p_hdr_name, + const char * p_src_path, + const char * p_src_name, + const char ** pp_includes, + int numincludes, + const char * p_pool_name ); #endif // _MAKE_KERNEL_STRING_POOL_H