/*
"make_kernel_string_pool.c"
created by: grayspace aka J. Leveille
for: UbixOS Project
date: July 3, 2002
purpose: - tool to create string pools for in kernel use
- will generate the source for including a global
set of ASCII (for now) strings into the kernel
to avoid memory wastage for kernel output strings
for long lists of things such as device descriptions
NOTEs:
- for now only ASCII is supported
TODO:
- expand to support unicode
- use huffman encoding instead
$Id$
*/
/*
SubStringfile format:
- each on it's own line, each unique substring
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <ctype.h>
#include "..\grayspace-misc\gsdefines.h"
typedef struct tagSUBSTR
{
// string
char * p_str;
// frequency
int freq;
// index
int idx;
}
SUBSTR;
typedef struct tagSUBSTRLIST
{
SUBSTR * p_substrsmem;
SUBSTR ** pp_substrs;
int numsubstrs;
}
SUBSTRLIST;
typedef struct tagSTRLIST
{
char ** pp_strs;
int numstrs;
}
STRLIST;
// current open string file
FILE * g_p_file_substrs = NULL;
// main/current list of strings
STRLIST g_mainstrs = { NULL, 0 };
// current open sub string file
FILE * g_p_file_strs = NULL;
// main/current list of sub strings
SUBSTRLIST g_mainsubstrs = { NULL, 0 };
// temporary lists of substrings
SUBSTRLIST g_tempsubstrs1 = { NULL, 0 };
SUBSTRLIST g_tempsubstrs2 = { NULL, 0 };
// free all memory associated with the string list
static void Free_StringList( STRLIST * p_sl )
{
int i;
if( p_sl->pp_strs != NULL )
{
for( i = 0; i < p_sl->numstrs; i++ )
{
if( p_sl->pp_strs[i] != NULL )
{
free( p_sl->pp_strs[i] );
p_sl->pp_strs[i] = NULL;
}
}
free( p_sl->pp_strs );
p_sl->pp_strs = NULL;
p_sl->numstrs = 0;
}
}
// free all memory associated with the sub string list
static void Free_SubStringList( SUBSTRLIST * p_ssl )
{
int i;
if( p_ssl->pp_substrs != NULL )
{
free( p_ssl->pp_substrs );
p_ssl->pp_substrs = NULL;
}
if( p_ssl->p_substrsmem != NULL )
{
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
if( p_ssl->p_substrsmem[i].p_str != NULL )
{
free( p_ssl->p_substrsmem[i].p_str );
p_ssl->p_substrsmem[i].p_str = NULL;
}
}
free( p_ssl->p_substrsmem );
p_ssl->p_substrsmem = NULL;
p_ssl->numsubstrs = 0;
}
}
// allocates only memory for sub string structures
// and pointers *not* the strings
// also:
// - assigns sub-string pointers
// - sets string pointers to NULL for safety
// - sets all frequencies to 0
// - sets all idxs to -1
// note:
// - destroys previous contents if exists
static void MakeNew_SubStringList( SUBSTRLIST * p_ssl, int numsubstrs )
{
int i;
Free_SubStringList( p_ssl );
p_ssl->numsubstrs = numsubstrs;
p_ssl->pp_substrs
= (SUBSTR **) malloc( sizeof(SUBSTR *) * p_ssl->numsubstrs );
p_ssl->p_substrsmem
= (SUBSTR *) malloc( sizeof(SUBSTR) * p_ssl->numsubstrs );
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
p_ssl->pp_substrs[i] = p_ssl->p_substrsmem + i;
p_ssl->pp_substrs[i]->p_str = NULL;
p_ssl->pp_substrs[i]->freq = 0;
p_ssl->pp_substrs[i]->idx = -1;
}
}
// compare two pointers to strings
static int CompareSubStringPtr( const void * p1, const void * p2 )
{
const SUBSTR * pss1 = *((const SUBSTR **) p1);
const SUBSTR * pss2 = *((const SUBSTR **) p2);
return strcmp( pss1->p_str, pss2->p_str );
}
// sorts a sub string list into alphabetical order
// list cannot be empty for success
// returns 1 on success, 0 on failure
static int Sort_SubStringList( SUBSTRLIST * p_ssl )
{
if( p_ssl->numsubstrs && p_ssl->pp_substrs )
{
// sort by pointer
qsort( p_ssl->pp_substrs,
p_ssl->numsubstrs,
sizeof(SUBSTR *),
CompareSubStringPtr );
return 1;
}
return 0;
}
// compare two pointers to strings
static int CompareSubStringPtrByFreq( const void * p1, const void * p2 )
{
const SUBSTR * pss1 = *((const SUBSTR **) p1);
const SUBSTR * pss2 = *((const SUBSTR **) p2);
return pss2->freq - pss1->freq;
}
// sorts a sub string list into alphabetical order
// list cannot be empty for success
// returns 1 on success, 0 on failure
static int Sort_SubStringListByFreq( SUBSTRLIST * p_ssl )
{
if( p_ssl->numsubstrs && p_ssl->pp_substrs )
{
// sort by pointer
qsort( p_ssl->pp_substrs,
p_ssl->numsubstrs,
sizeof(SUBSTR *),
CompareSubStringPtrByFreq );
return 1;
}
return 0;
}
static SUBSTR * Find_SubStringByStr( SUBSTRLIST * p_ssl, char * p_str )
{
SUBSTR key;
SUBSTR * p_key;
SUBSTR ** pp_retval;
p_key = &key;
key.p_str = p_str;
if( p_ssl->numsubstrs && p_ssl->pp_substrs )
{
pp_retval
= bsearch( &p_key,
p_ssl->pp_substrs,
p_ssl->numsubstrs,
sizeof(SUBSTR *),
CompareSubStringPtr );
if( pp_retval )
{
return *pp_retval;
}
}
return NULL;
}
// given an open string file, creates a 'STRLIST' with its
// contents (the file should contain a list of strings, one on each
// line and in plain text)
//
// on failure:
// - frees/empties the given list
// - returns 0
// on success:
// - returns 1
static int Create_StringList( STRLIST * p_sl, FILE * p_file )
{
int i;
int numlines;
char buff[8192];
char * p_curbuff;
// destroy the current list
Free_StringList( p_sl );
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// count our lines:
numlines = 0;
while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) )
{
numlines++;
}
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// read each string from file
p_sl->numstrs = numlines;
p_sl->pp_strs = (char **) malloc( sizeof(char *) * p_sl->numstrs );
for( i = 0; i < p_sl->numstrs; i++ )
{
if( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) )
{
// strip newline
p_curbuff = strtok( buff, "\n" );
if( p_curbuff )
{
// allocate string
p_sl->pp_strs[i]
= (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) );
// copy
strcpy( p_sl->pp_strs[i], p_curbuff );
continue;
}
}
goto failbail;
}
// success
return 1;
failbail:
Free_StringList( p_sl );
printf( "[Create_StringList] Error" );
return 0;
}
// given an open sub string file, creates a 'SUBSTRLIST' with its
// contents (the file should contain a list of unique substrings
// in alphabetical order)
//
// on failure:
// - frees/empties the given list
// - returns 0
// on success:
// - returns 1
static int Create_SubStringList( SUBSTRLIST * p_ssl, FILE * p_file )
{
int i;
int numlines;
char buff[1024];
char * p_curbuff;
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// count our lines:
numlines = 0;
while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) != NULL )
{
numlines++;
}
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// re-allocate our substring list:
MakeNew_SubStringList( p_ssl, numlines );
// read each substring
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
if( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_file ) != NULL )
{
// strip newline
p_curbuff = strtok( buff, "\n" );
if( p_curbuff )
{
// allocate and copy
p_ssl->pp_substrs[i]->p_str
= (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) );
strcpy( p_ssl->pp_substrs[i]->p_str, p_curbuff );
continue;
}
}
goto failbail;
}
// sort the substrings into alphabetical order
if( Sort_SubStringList( p_ssl ) )
{
// success:
return 1;
}
failbail:
Free_SubStringList( p_ssl );
printf( "[Create_SubStringList] Error" );
return 0;
}
// Given an open file which contains plain text, builds a sub string list
// of unique sub strings into the list given as 'p_ssl'
// NOTE: destroys/frees the contents of 'p_ssl'
// regardless of success or failure
const static char g_whitespace[] = " \t\n";
static int BuildFromFile_SubStrings( SUBSTRLIST * p_ssl, FILE * p_file )
{
int i;
int numwords;
char buff[1024];
char * p_curbuff;
// destroy the current list
Free_SubStringList( p_ssl );
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// count our words:
numwords = 0;
while( ( p_curbuff
= fgets( buff,
sizeof(buff)/sizeof(char) - 1,
p_file ) ) != NULL )
{
p_curbuff = strtok( p_curbuff, g_whitespace );
while( p_curbuff )
{
numwords++;
p_curbuff = strtok( NULL, g_whitespace );
}
}
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// read each substring from words in file
MakeNew_SubStringList( p_ssl, numwords );
for( i = 0; i < p_ssl->numsubstrs; )
{
if( ( p_curbuff
= fgets( buff,
sizeof(buff)/sizeof(char) - 1,
p_file ) ) != NULL )
{
p_curbuff = strtok( p_curbuff, g_whitespace );
while( p_curbuff )
{
p_ssl->pp_substrs[i]->p_str
= (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) );
strcpy( p_ssl->pp_substrs[i]->p_str, p_curbuff );
i++;
p_curbuff = strtok( NULL, g_whitespace );
}
continue;
}
goto failbail;
}
// sort the substrings into alphabetical order
if( Sort_SubStringList( p_ssl ) )
{
// success:
return 1;
}
failbail:
Free_SubStringList( p_ssl );
printf( "[BuildFromFile_SubStrings] Error" );
return 0;
}
// given a sub string list and a string list
// refreshes the frequency of each sub string from the strings
// in the string list
static int UpdateFrequencies_SubStrings( SUBSTRLIST * p_ssl,
const STRLIST * p_sl )
{
int i;
char buff[1024];
char * p_curbuff;
SUBSTR * p_curss;
// clear all current frequencies
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
p_ssl->p_substrsmem[i].freq = 0;
}
// parse every token in the string list
// and update the frequency count of the
// related sub string
for( i = 0; i < p_sl->numstrs; i++ )
{
if( p_sl->pp_strs[i] )
{
// copy working string
strcpy( buff, p_sl->pp_strs[i] );
p_curbuff = strtok( buff, g_whitespace );
while( p_curbuff )
{
// get related substring (*MUST* exist)
p_curss = Find_SubStringByStr( p_ssl, p_curbuff );
if( p_curss )
{
// update frequency
p_curss->freq++;
// next token
p_curbuff = strtok( NULL, g_whitespace );
// continue
continue;
}
goto failbail;
}
continue;
}
goto failbail;
}
// success:
return 1;
failbail:
// clear all current frequencies (again)
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
p_ssl->p_substrsmem[i].freq = 0;
}
return 0;
}
// given a sub string list
// assigns each sub string an index
// according to the current sorting scheme
static int UpdateIndices_SubStrings( SUBSTRLIST * p_ssl )
{
int i;
// re-assign all current indices
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
if( p_ssl->pp_substrs[i] )
{
p_ssl->pp_substrs[i]->idx = i;
continue;
}
goto failbail;
}
// success
return 1;
failbail:
// clear all indices
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
p_ssl->p_substrsmem[i].idx = -1;
}
// fail
return 0;
}
// creates a new 'SUBSTRLIST' into 'p_ssl_dst' by
// combining the sub-strings in 'p_ssl_src1' and 'p_ssl_src2'
// then it clears/frees both 'p_ssl_src1' and 'p_ssl_src2'
// so that they are empty
// NOTE: will destroy any contents in 'p_ssl_dst' safely
static void Combine_SubStringLists( SUBSTRLIST * p_ssl_dst,
SUBSTRLIST * p_ssl_src1,
SUBSTRLIST * p_ssl_src2 )
{
int src_idx_1;
int src_idx_2;
int src_idx;
int dst_idx;
SUBSTRLIST * p_ssl_src;
// (re)allocate the destination list
MakeNew_SubStringList( p_ssl_dst,
p_ssl_src1->numsubstrs
+ p_ssl_src2->numsubstrs );
// combine lists, preserving alphabetical order
src_idx_1 = 0;
src_idx_2 = 0;
dst_idx = 0;
while( src_idx_1 < p_ssl_src1->numsubstrs
&& src_idx_2 < p_ssl_src2->numsubstrs )
{
if( strcmp( p_ssl_src1->pp_substrs[src_idx_1]->p_str,
p_ssl_src2->pp_substrs[src_idx_2]->p_str ) <= 0 )
{
// move string from source 1
p_ssl_dst->pp_substrs[dst_idx]->p_str
= p_ssl_src1->pp_substrs[src_idx_1]->p_str;
p_ssl_src1->pp_substrs[src_idx_1]->p_str = NULL;
dst_idx++;
src_idx_1++;
continue;
}
// move strings from source 2
p_ssl_dst->pp_substrs[dst_idx]->p_str
= p_ssl_src2->pp_substrs[src_idx_2]->p_str;
p_ssl_src2->pp_substrs[src_idx_2]->p_str = NULL;
dst_idx++;
src_idx_2++;
}
// find which source still has strings
p_ssl_src = p_ssl_src1;
src_idx = src_idx_1;
if( src_idx_2 < p_ssl_src2->numsubstrs )
{
p_ssl_src = p_ssl_src2;
src_idx = src_idx_2;
}
// add remaining strings
while( src_idx < p_ssl_src->numsubstrs )
{
// move strings from source
p_ssl_dst->pp_substrs[dst_idx]->p_str
= p_ssl_src->pp_substrs[src_idx]->p_str;
p_ssl_src->pp_substrs[src_idx]->p_str = NULL;
dst_idx++;
src_idx++;
}
// empty source strings
p_ssl_src1->numsubstrs = 0;
p_ssl_src2->numsubstrs = 0;
Free_SubStringList( p_ssl_src1 );
Free_SubStringList( p_ssl_src2 );
}
// removes duplicate sub strings in sub string list
static void RemoveDuplicates_SubStrings( SUBSTRLIST * p_ssl )
{
char * p_checkstr;
int strsremoved;
int idx;
int dst_idx;
int src_idx;
// first pass, remove and free all duplicate strings
// in their current position
strsremoved = 0;
idx = 0;
while( idx < p_ssl->numsubstrs )
{
// get new check string
p_checkstr = p_ssl->pp_substrs[idx]->p_str;
idx++;
// remove all duplicates of check string
while( idx < p_ssl->numsubstrs )
{
if( strcmp( p_checkstr, p_ssl->pp_substrs[idx]->p_str ) == 0 )
{
// remove duplicate
free( p_ssl->pp_substrs[idx]->p_str );
p_ssl->pp_substrs[idx]->p_str = NULL;
strsremoved++;
idx++;
// process next string
continue;
}
// get new check string
break;
}
}
// second pass, re-order
dst_idx = 0;
src_idx = 0;
while( src_idx < p_ssl->numsubstrs )
{
// find next empty string section
while( dst_idx < p_ssl->numsubstrs )
{
if( p_ssl->pp_substrs[dst_idx]->p_str )
{
dst_idx++;
continue;
}
break;
}
// find next non-empty string section
src_idx = dst_idx + 1;
while( src_idx < p_ssl->numsubstrs )
{
if( !p_ssl->pp_substrs[src_idx]->p_str )
{
src_idx++;
continue;
}
break;
}
// move all non-empty strings into empty space
idx = src_idx;
while( dst_idx < src_idx
&& idx < p_ssl->numsubstrs )
{
if( p_ssl->pp_substrs[idx]->p_str )
{
// move string
p_ssl->pp_substrs[dst_idx]->p_str = p_ssl->pp_substrs[idx]->p_str;
p_ssl->pp_substrs[idx]->p_str = NULL;
dst_idx++;
idx++;
// process next string
continue;
}
// start over
break;
}
}
// adjust number of strings in list
p_ssl->numsubstrs -= strsremoved;
}
// moves one string list to another
// (also cleans memory a little)
// NOTES:
// - this operation empties/frees 'p_ssl_src'
// - this operation overwrites the contents of 'p_ssl_dst'
static void Move_SubStrings( SUBSTRLIST * p_ssl_dst,
SUBSTRLIST * p_ssl_src )
{
int i;
// free/clear the dest substrings
Free_SubStringList( p_ssl_dst );
// allocate new dest sub-strings
MakeNew_SubStringList( p_ssl_dst, p_ssl_src->numsubstrs );
// move the strings over
for( i = 0; i < p_ssl_dst->numsubstrs; i++ )
{
p_ssl_dst->pp_substrs[i]->p_str = p_ssl_src->pp_substrs[i]->p_str;
p_ssl_src->pp_substrs[i]->p_str = NULL;
}
// clear/free the source string
p_ssl_src->numsubstrs = 0;
Free_SubStringList( p_ssl_src );
}
// writes given string list into given file
// on success:
// - old file contents are destroyed
// - returns 1
// on failure:
// - old file contents are destroyed or mangled
// - returns 0
static int Write_Strings( STRLIST * p_sl,
FILE * p_file )
{
int i;
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// write out all strings
for( i = 0; i < p_sl->numstrs; i++ )
{
if( fputs( p_sl->pp_strs[i], p_file ) != EOF )
{
if( fputs( "\n", p_file ) != EOF )
{
continue;
}
}
goto failbail;
}
// success:
return 1;
failbail:
printf( "[Write_Strings] Error, cannot write to file\n" );
return 0;
}
// writes given sub string list into given file
// on success:
// - old file contents are destroyed
// - returns 1
// on failure:
// - old file contents are destroyed or mangled
// - returns 0
static int Write_SubStrings( SUBSTRLIST * p_ssl,
FILE * p_file )
{
int i;
// go to beginning of file
if( fseek( p_file, 0L, SEEK_SET ) )
{
goto failbail;
}
// write out all strings
for( i = 0; i < p_ssl->numsubstrs; i++ )
{
if( fputs( p_ssl->pp_substrs[i]->p_str, p_file ) != EOF )
{
if( fputs( "\n", p_file ) != EOF )
{
continue;
}
}
goto failbail;
}
// success:
return 1;
failbail:
printf( "[Write_SubStrings] Error, cannot write to file\n" );
return 0;
}
// writes out and closes the current string file if possible
// and frees the current string list
// returns 1 on success, 0 on failure
int WriteAndClose_StringFile()
{
if( g_p_file_strs )
{
if( Write_Strings( &g_mainstrs, g_p_file_strs ) )
{
// close the file
if( fclose( g_p_file_strs ) == 0 )
{
// success
g_p_file_strs = NULL;
return 1;
}
printf( "[WriteAndClose_StringFile] Error closing file\n" );
goto failbail;
}
printf( "[WriteAndClose_StringFile] Error, cannot write to file\n" );
goto failbail;
}
printf( "[WriteAndClose_StringFile] Error, never opened \n" );
failbail:
if( fclose( g_p_file_strs ) )
{
g_p_file_strs = NULL;
}
return 0;
}
// writes out and closes the current sub string file if possible
// and frees the current sub string list
// returns 1 on success, 0 on failure
int WriteAndClose_SubStringFile()
{
if( g_p_file_substrs )
{
if( Write_SubStrings( &g_mainsubstrs, g_p_file_substrs ) )
{
// close the file
if( fclose( g_p_file_substrs ) == 0 )
{
// success
g_p_file_substrs = NULL;
return 1;
}
printf( "[WriteAndClose_SubStringFile] Error closing file\n" );
goto failbail;
}
printf( "[WriteAndClose_SubStringFile] Error, cannot write to file\n" );
goto failbail;
}
printf( "[WriteAndClose_SubStringFile] Error, never opened \n" );
failbail:
if( fclose( g_p_file_substrs ) )
{
g_p_file_substrs = NULL;
}
return 0;
}
// closes the current open string file and frees
// the current string list
// returns 1 on success, 0 on failure
int Close_StringFile()
{
if( g_p_file_strs )
{
// close the file
if( fclose( g_p_file_strs ) == 0 )
{
// success
g_p_file_strs = NULL;
return 1;
}
printf( "[Close_StringFile] Error closing file\n" );
return 0;
}
printf( "[Close_StringFile] Error, never opened \n" );
return 0;
}
// closes the current open sub string file and frees
// the current sub string list
// returns 1 on success, 0 on failure
int Close_SubStringFile()
{
if( g_p_file_substrs )
{
// close the file
if( fclose( g_p_file_substrs ) == 0 )
{
// success
g_p_file_substrs = NULL;
return 1;
}
printf( "[Close_SubStringFile] Error closing file\n" );
return 0;
}
printf( "[Close_SubStringFile] Error, never opened \n" );
return 0;
}
// given a path, opens an existing string file
// (a simple line by line list of strings)
// reads in the contents as the string list and sets
// the file as the current open string file
//
// returns 1 on success, 0 on failure
int Open_StringFile( const char * p_path )
{
g_p_file_strs = fopen( p_path, "a+t" );
if( g_p_file_strs )
{
if( Create_StringList( &g_mainstrs, g_p_file_strs ) )
{
// success
return 1;
}
fclose( g_p_file_strs );
printf( "[Open_StringFile] Error" );
return 0;
}
printf( "[Open_StringFile] Error, cannot open file: %s.\n", p_path );
return 0;
}
// given a path, opens an existing string file
// (a simple line by line list of strings)
// reads in the contents and appends them to the
// current string list and the current string file
//
// returns 1 on success, 0 on failure
// p_startidx_o <- beginning index of strings within the pool
// p_size_o <- number of strings appended
int AppendStringsFromFile( int * p_startidx_o,
int * p_size_o,
const char * p_path )
{
char buff[8192];
FILE * p_srcfile;
if( g_p_file_strs )
{
p_srcfile = fopen( p_path, "rt" );
if( p_srcfile )
{
// go to beginning of source file
if( fseek( p_srcfile, 0L, SEEK_SET ) )
{
goto failbail;
}
// go to beginning of dest file
if( fseek( g_p_file_strs, 0L, SEEK_SET ) )
{
goto failbail;
}
// count our lines:
(*p_startidx_o) = 0;
while( fgets( buff, sizeof(buff)/sizeof(char) - 1, g_p_file_strs ) )
{
(*p_startidx_o)++;
}
// go to end of dest file
if( fseek( g_p_file_strs, 0L, SEEK_END ) )
{
goto failbail;
}
// copy each line from
// the source file to the dest file
(*p_size_o) = 0;
while( fgets( buff, sizeof(buff)/sizeof(char) - 1, p_srcfile ) )
{
if( fputs( buff, g_p_file_strs ) != EOF )
{
(*p_size_o)++;
continue;
}
goto failbail;
}
// flush all IO operations
if( fflush( g_p_file_strs ) != EOF )
{
// cause current string list to be reloaded
if( Create_StringList( &g_mainstrs, g_p_file_strs ) )
{
// close source file
if( fclose( p_srcfile ) == 0 )
{
// success
return 1;
}
printf( "[AppendStringsFromFile] Error closing file: %s\n", p_path );
goto failbail;
}
}
}
// if( p_srcfile )
printf( "[AppendStringsFromFile] Error, cannot open file: %s\n", p_path );
return 0;
}
// if( g_p_file_strs )
printf( "[AppendStringsFromFile] Error, String file never opened\n" );
return 0;
failbail:
fclose( p_srcfile );
printf( "[AppendStringsFromFile] Error\n" );
return 0;
}
// given a plain text files, extracts any unique sub strings
// and adds them into the current sub string list
//
// returns 1 on success, 0 on failure
int Add_SubStringsFromFile( const char * p_path )
{
FILE * p_file = fopen( p_path, "rt" );
if( p_file )
{
// build a new substring list from the file
if( BuildFromFile_SubStrings( &g_tempsubstrs1, p_file ) )
{
// combine new substrings with current list
// into a new temporary list
// NOTE: this operation empties/frees
// both 'g_tempsubstrs1' and 'g_mainsubstrs'
Combine_SubStringLists( &g_tempsubstrs2,
&g_tempsubstrs1,
&g_mainsubstrs );
// remove duplicates from new temporary list
RemoveDuplicates_SubStrings( &g_tempsubstrs2 );
// move new temporary list into main list
// (also cleans memory a little)
// NOTE: this operation empties/frees 'g_tempsubstrs2'
Move_SubStrings( &g_mainsubstrs, &g_tempsubstrs2 );
// close the file
fclose( p_file );
// success
return 1;
}
fclose( p_file );
printf( "[Add_SubStringsFromFile] Error" );
return 0;
}
printf( "[Add_SubStringsFromFile] Error, cannot open file: %s.\n", p_path );
return 0;
}
// given a path, opens an existing sub string file
// (shoudl conatin an alphabetically sorted list of unique substrings)
// reads in the contents as the current sub string list and sets
// the file as the current open sub string file
//
// returns 1 on success, 0 on failure
int Open_SubStringFile( const char * p_path )
{
g_p_file_substrs = fopen( p_path, "r+t" );
if( g_p_file_substrs )
{
if( Create_SubStringList( &g_mainsubstrs, g_p_file_substrs ) )
{
// success
return 1;
}
fclose( g_p_file_substrs );
printf( "[OpenSubStringFile] Error" );
return 0;
}
printf( "[OpenSubStringFile] Error, cannot open file: %s.\n", p_path );
return 0;
}
// given a path, creates a string file for writing to
// as the current open string file
//
// returns 1 on success, 0 on failure
int Create_StringFile( const char * p_path )
{
g_p_file_strs = fopen( p_path, "w+t" );
if( g_p_file_strs )
{
// free current list
Free_StringList( &g_mainstrs );
// success
return 1;
}
printf( "[Create_StringFile] Error, cannot create file: %s.\n", p_path );
return 0;
}
// given a path, creates a sub string file for writing to
// as the current open sub string file
//
// returns 1 on success, 0 on failure
int Create_SubStringFile( const char * p_path )
{
g_p_file_substrs = fopen( p_path, "w+t" );
if( g_p_file_substrs )
{
// free current list
Free_SubStringList( &g_mainsubstrs );
// success
return 1;
}
printf( "[Create_SubStringFile] Error, cannot create file: %s.\n", p_path );
return 0;
}
static char * GetTimeStr( char * p_dest )
{
struct tm * p_time;
time_t thetime;
p_dest[0] = 0;
time( &thetime );
p_time = localtime( &thetime );
strcpy( p_dest, asctime( p_time ) );
return p_dest;
}
// write the file header for the source file
static int KSSC_WriteSourceHeader( FILE * p_file,
const char * p_src_name )
{
char buff[1024];
/* form of source header */
#if 0
/*
"<SRC_NAME>"
auto-generated by: "make_kernel_string_pool.c"
for: UbixOS Project
date: <DATE>
purpose: - contains constant data for a string pool
*/
#endif
fprintf( p_file, "/*\n" );
fprintf( p_file, " \"%s\"\n", p_src_name );
fprintf( p_file, "\n" );
fprintf( p_file, " auto-generated by: \"make_kernel_string_pool.c\"\n" );
fprintf( p_file, " for: UbixOS Project\n" );
fprintf( p_file, " date: %s", GetTimeStr( buff ) );
fprintf( p_file, "\n" );
fprintf( p_file, " purpose: - contains constant data for a string pool\n" );
fprintf( p_file, "*/\n" );
fprintf( p_file, "\n" );
return 1;
}
// write the include lines
static int KSSC_WriteSourceIncludes( FILE * p_file,
const char ** pp_includes,
int numincludes )
{
int i;
for( i = 0; i < numincludes; i++ )
{
fprintf( p_file, "#include %s\n", pp_includes[i] );
}
fprintf( p_file, "\n" );
return 1;
}
static char * KSSC_GetSubStringID( char * p_dst,
const char * p_pool_name )
{
sprintf( p_dst, "%s_substrs", p_pool_name );
return p_dst;
}
#define KSSC_BYTETYPESTR ("BYTEg")
static int KSSC_WriteByteVal( FILE * p_file,
int bytevalidx,
int byteval,
int withcomma )
{
if( (bytevalidx & 15) == 0 )
{
fprintf( p_file, "\n" );
}
fprintf( p_file, "%d", byteval );
if( withcomma )
{
fprintf( p_file, ", " );
}
return bytevalidx + 1;
}
// write the sub strings array
// return size of memory included into source
static int KSSC_WriteSubStrings( FILE * p_file,
const char * p_pool_name )
{
char buff[4096];
int i, ii;
int bytevalidx;
int numchars;
int memsize;
// write out sub strings to source file
memsize = 0;
/* form of sub strings */
#if 0
static <KSSC_BYTETYPESTR> <SUBSTRID>[] =
{
<BYTEVAL0>, <BYTEVAL1>, <BYTEVAL2>, ...
<BYTEVAL16>, <BYTEVAL17>, <BYTEVAL18>, ...
<BYTEVAL32>, <BYTEVAL33>, <BYTEVAL34>, ...
.
.
.
};
#endif
fprintf( p_file,
"static %s %s[] =\n",
KSSC_BYTETYPESTR,
KSSC_GetSubStringID( buff, p_pool_name ) );
fprintf( p_file, "{" );
bytevalidx = 0;
for( i = 0; i < g_mainsubstrs.numsubstrs - 1; i++ )
{
// get number of characters in sub string
numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str );
if( numchars >= 1 )
{
if( numchars <= 256 )
{
// write number of characters in byte string
numchars--;
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
numchars,
1 );
memsize++;
numchars++;
// write each character of byte string
for( ii = 0; ii < numchars; ii++ )
{
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
(int) (g_mainsubstrs.\
pp_substrs[i]->p_str)[ii],
1 );
memsize++;
}
continue;
}
}
printf( "[KSSC_WriteSubStrings], ERROR! invalid substring: \"%s\"\n", g_mainsubstrs.pp_substrs[i] );
return 0;
}
// get number of characters in final sub string
numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str );
if( numchars >= 1 )
{
if( numchars <= 256 )
{
// write number of characters in byte string
numchars--;
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
numchars,
1 );
memsize++;
numchars++;
// write each character of byte string
for( ii = 0; ii < numchars - 1; ii++ )
{
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
(int) (g_mainsubstrs.\
pp_substrs[i]->p_str)[ii],
1 );
memsize++;
}
// write final character
KSSC_WriteByteVal( p_file,
bytevalidx,
(int) (g_mainsubstrs.\
pp_substrs[i]->p_str)[ii],
0 );
memsize++;
// write closing braces etc.
fprintf( p_file, "\n};\n\n" );
// success
return memsize;
}
}
printf( "[KSSC_WriteSubStrings], ERROR! invalid substring: \"%s\"\n", g_mainsubstrs.pp_substrs[i] );
return 0;
}
static char * KSSC_GetSubStringOffsetID( char * p_dst,
const char * p_pool_name )
{
sprintf( p_dst, "%s_substroffs", p_pool_name );
return p_dst;
}
static int KSSC_WriteOffsetVal( FILE * p_file,
int offsetidx,
unsigned int offset,
int withcomma )
{
if( (offsetidx & 7) == 0 )
{
fprintf( p_file, "\n" );
}
fprintf( p_file, "%u", offset );
if( withcomma )
{
fprintf( p_file, ", " );
}
return offsetidx + 1;
}
// 86'd to save memory at cost of speed
#if 0
// write the sub strings offset array
// return size of memory included into source
static int KSSC_WriteSubStringOffsets( FILE * p_file,
const char * p_pool_name )
{
char buff[4096];
unsigned int curoffset;
int i;
int numchars;
int offsetidx;
int memsize;
memsize = 0;
/* form of sub string offsets */
#if 0
static unsigned int <SUBSTROFFID>[] =
{
<OFFSET0>, <OFFSET1>, <OFFSET2>, ...
<OFFSET8>, <OFFSET9>, <OFFSET10>, ...
<OFFSET16>, <OFFSET17>, <OFFSET18>, ...
.
.
.
};
#endif
fprintf( p_file,
"static unsigned int %s[] =\n",
KSSC_GetSubStringOffsetID( buff, p_pool_name ) );
fprintf( p_file, "{" );
offsetidx = 0;
curoffset = 0;
for( i = 0; i < g_mainsubstrs.numsubstrs - 1; i++ )
{
// get number of characters in sub string
numchars = strlen( g_mainsubstrs.pp_substrs[i]->p_str );
// write the offset
offsetidx
= KSSC_WriteOffsetVal( p_file,
offsetidx,
curoffset,
1 );
memsize += sizeof(unsigned int);
// update the offset
curoffset += numchars + 1;
}
// write final offset
KSSC_WriteOffsetVal( p_file,
offsetidx,
curoffset,
0 );
memsize += sizeof(unsigned int);
// write closing braces etc.
fprintf( p_file, "\n};\n\n" );
// success
return memsize;
}
#endif // #if 0
static int KSSC_GetStringProfile( int * p_numwords_o,
int * p_numwordbytes_o,
BYTEg * p_wordbytes_o,
const char * p_str )
{
char buff[4096];
char * p_curbuff;
int idx;
int idxmask;
SUBSTR * p_curss;
// copy working string
strcpy( buff, p_str );
(*p_numwords_o) = 0;
(*p_numwordbytes_o) = 1;
if( (p_curbuff = strtok( buff, g_whitespace )) )
{
do
{
// get related substring (*MUST* exist)
p_curss = Find_SubStringByStr( &g_mainsubstrs, p_curbuff );
if( p_curss )
{
// update number of words
(*p_numwords_o)++;
// update bytes in KString (if needed)
if( p_wordbytes_o )
{
// assume normal expression
idxmask = 0;
idx = p_curss->idx;
if( p_curss->idx > 63 )
{
// use escape sequence to express
idxmask = 0x40;
if( p_curss->idx > 16383 )
{
// use larger escape sequence to express
idxmask = 0x80;
p_wordbytes_o[(*p_numwordbytes_o) + 2]
= (BYTEg) (idx&0xFF);
idx >>= 8;
}
p_wordbytes_o[(*p_numwordbytes_o) + 1]
= (BYTEg) (idx&0xFF);
idx >>= 8;
}
p_wordbytes_o[(*p_numwordbytes_o)] = (BYTEg) (idx|idxmask);
}
// update total bytes
(*p_numwordbytes_o)++;
if( p_curss->idx > 63 )
{
(*p_numwordbytes_o)++;
if( p_curss->idx > 16383 )
{
(*p_numwordbytes_o)++;
}
}
// next token
p_curbuff = strtok( NULL, g_whitespace );
// continue
continue;
}
// fail
return 0;
}
while( p_curbuff );
if( (*p_numwordbytes_o) >= 1 && (*p_numwordbytes_o) <= 256 )
{
// update bytes in KString, number of bytes, (if needed)
if( p_wordbytes_o )
{
p_wordbytes_o[0] = (BYTEg) ((*p_numwordbytes_o) - 1);
}
// success
return 1;
}
}
// fail
return 0;
}
static char * KSSC_GetKStringID( char * p_dst,
const char * p_pool_name )
{
sprintf( p_dst, "%s_kstrs", p_pool_name );
return p_dst;
}
// write the kernel strings array
// returns size of memory included into source
static int KSSC_WriteStrings( FILE * p_file,
const char * p_pool_name )
{
char buff[4096];
BYTEg buff2[4096];
int i, ii;
int bytevalidx;
int numwords;
int numwordbytes;
int memsize;
memsize = 0;
/* form of kernel strings */
#if 0
static <KSSC_BYTETYPESTR> <KSTRID>[] =
{
<BYTEVAL0>, <BYTEVAL1>, <BYTEVAL2>, ...
<BYTEVAL16>, <BYTEVAL17>, <BYTEVAL18>, ...
<BYTEVAL32>, <BYTEVAL33>, <BYTEVAL34>, ...
.
.
.
};
#endif
fprintf( p_file,
"static %s %s[] =\n",
KSSC_BYTETYPESTR,
KSSC_GetKStringID( buff, p_pool_name ) );
fprintf( p_file, "{" );
bytevalidx = 0;
for( i = 0; i < g_mainstrs.numstrs - 1; i++ )
{
// get profile of string
if( KSSC_GetStringProfile( &numwords,
&numwordbytes,
buff2,
g_mainstrs.pp_strs[i] ) )
{
// write out each byte in kstring
for( ii = 0; ii < numwordbytes; ii++ )
{
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
(int) buff2[ii],
1 );
}
memsize += numwordbytes;
// continue
continue;
}
// fail
printf( "[KSSC_WriteStrings], Error, invalid string? -> %s\n", g_mainstrs.pp_strs[i] );
return 0;
}
// write final kstring
if( KSSC_GetStringProfile( &numwords,
&numwordbytes,
buff2,
g_mainstrs.pp_strs[i] ) )
{
// write out each byte in kstring
for( ii = 0; ii < numwordbytes - 1; ii++ )
{
bytevalidx
= KSSC_WriteByteVal( p_file,
bytevalidx,
(int) buff2[ii],
1 );
}
// write final byte
KSSC_WriteByteVal( p_file,
bytevalidx,
(int) buff2[ii],
0 );
memsize += numwordbytes;
// write closing braces etc.
fprintf( p_file, "\n};\n\n" );
// success
return memsize;
}
// fail
printf( "[KSSC_WriteStrings], Error, invalid string? -> %s\n", g_mainstrs.pp_strs[i] );
return 0;
}
// write the string pool structure itself
static int KSSC_WriteStringPool( FILE * p_file,
const char * p_pool_name )
{
char buff1[2048];
char buff2[2048];
/* form of kernel string pool */
#if 0
/* kernel string pool */
KSTR_POOL <POOLNAME> = { <KSTRID>, <SUBSTRID> };
#endif
fprintf( p_file,
"/* kernel string pool */\n" );
fprintf( p_file,
"KSTR_POOL %s = { %s, %s };\n",
p_pool_name,
KSSC_GetKStringID( buff1, p_pool_name ),
KSSC_GetSubStringID( buff2, p_pool_name ) );
return 1;
}
// converts a filename to a define symbol
static char * KSSC_GetHeaderDefineSymbol( char * p_dst,
const char * p_hdr_name )
{
char * p_curdst;
sprintf( p_dst, "_%s", p_hdr_name );
p_curdst = p_dst;
while( (*p_curdst) != 0 )
{
if( (*p_curdst) != '.' )
{
if( islower( (int) (*p_curdst) ) )
{
(*p_curdst) = (char) toupper( (int) (*p_curdst) );
}
p_curdst++;
continue;
}
(*p_curdst) = '_';
p_curdst++;
continue;
}
return p_dst;
}
// write the file header for the header file
static int KSSC_WriteHeaderHeader( FILE * p_file,
const char * p_hdr_name )
{
char hdrsym[1024];
char buff[1024];
/* form of header header */
#if 0
/*
"<HDR_NAME>"
auto-generated by: "make_kernel_string_pool.c"
for: UbixOS Project
date: <DATE>
purpose: - header file for a string pool
*/
/* #ifndef _<HDR_NAME>
#define _<HDR_NAME> */
#endif
fprintf( p_file, "/*\n" );
fprintf( p_file, " \"%s\"\n", p_hdr_name );
fprintf( p_file, "\n" );
fprintf( p_file, " auto-generated by: \"make_kernel_string_pool.c\"\n" );
fprintf( p_file, " for: UbixOS Project\n" );
fprintf( p_file, " date: %s", GetTimeStr( buff ) );
fprintf( p_file, "\n" );
fprintf( p_file, " purpose: - header file for a string pool\n" );
fprintf( p_file, "*/\n" );
fprintf( p_file, "\n" );
KSSC_GetHeaderDefineSymbol( hdrsym, p_hdr_name );
fprintf( p_file, "#ifndef %s\n", hdrsym );
fprintf( p_file, "#define %s\n", hdrsym );
fprintf( p_file, "\n" );
return 1;
}
// write the string pool structure declaration
static int KSSC_WriteStringPoolDecl( FILE * p_file,
const char * p_pool_name )
{
/* form of kernel string pool declaration */
#if 0
/* kernel string pool */
extern KSTR_POOL <POOLNAME>;
#endif
fprintf( p_file,
"/* kernel string pool */\n" );
fprintf( p_file,
"extern KSTR_POOL %s;\n",
p_pool_name );
return 1;
}
// write the file footer for the header file
static int KSSC_WriteHeaderFooter( FILE * p_file,
const char * p_hdr_name )
{
char buff[1024];
/* form of header footer */
#if 0
'#endif' /* _<HDR_NAME> */
#endif
fprintf( p_file, "\n" );
fprintf( p_file,
"#endif /* %s */\n",
KSSC_GetHeaderDefineSymbol( buff, p_hdr_name ) );
return 1;
}
/*
Input:
- p_hdr_path <- full path to header file to write
relative to current working directory
- p_hdr_name <- name of header file
- p_src_path <- full path to source file to write
relativeto current working directory
- p_src_name <- name of source file
- pp_includes <- list of files to include eg. <h1.h>, "h4.h", ...
- numincludes <- number of files in include list
- p_pool_name <- name of identifier to use for string pool
generated ( NOTE: sub-arrays of string pool
will use this name to base their names )
Output:
- generates both a header file and a source code file which
can be used to embed compressed strings into an object module
based on the currently open string list and sub string list
- returns 1 on success, 0 on failure
Assumptions:
- a string list is current open <see: 'Open_StringFile', etc.>
- a sub string list is current open <see: 'Open_SubStringFile', etc.>
- the strings in the open string file are a superset of the sub strings
in the open string file
*/
int Create_KStringSourceCode( const char * p_hdr_path,
const char * p_hdr_name,
const char * p_src_path,
const char * p_src_name,
const char ** pp_includes,
int numincludes,
const char * p_pool_name )
{
FILE * p_srccode;
FILE * p_hdr;
int memsize;
int totalmemsize;
memsize = 0;
totalmemsize = 0;
// build full path names
// open source code file
p_srccode = fopen( p_src_path, "w+t" );
if( p_srccode )
{
// open header file
p_hdr = fopen( p_hdr_path, "w+t" );
if( p_hdr )
{
// write the file header for the source file
if( !KSSC_WriteSourceHeader( p_srccode, p_src_name ) )
{
goto failbail;
}
// write the include lines
if( !KSSC_WriteSourceIncludes( p_srccode,
pp_includes,
numincludes ) )
{
goto failbail;
}
// sort by frequency
if( !UpdateFrequencies_SubStrings( &g_mainsubstrs, &g_mainstrs ) )
{
// fail
printf( "[Create_KStringSourceCode], Error assigning substring frequencies.\n" );
goto failbail;
}
if( !Sort_SubStringListByFreq( &g_mainsubstrs ) )
{
// fail, but attempt to restore sorting to alphabetic
Sort_SubStringList( &g_mainsubstrs );
printf( "[Create_KStringSourceCode], Error sorting substrings by frequency.\n" );
goto failbail;
}
// assign indices to sub strings based on frequency
if( !UpdateIndices_SubStrings( &g_mainsubstrs ) )
{
// fail, but attempt to restore sorting to alphabetic
Sort_SubStringList( &g_mainsubstrs );
printf( "[Create_KStringSourceCode], Error assigning indices to sub strings based on frequency.\n" );
goto failbail;
}
// write the sub strings array
memsize = KSSC_WriteSubStrings( p_srccode, p_pool_name );
if( !memsize )
{
goto failbail;
}
printf( "KSSC_WriteSubStrings->MemSize = %d\n", memsize );
totalmemsize += memsize;
// 86'd to save memory at cost of speed
#if 0
// write the sub strings offset array
memsize = KSSC_WriteSubStringOffsets( p_srccode, p_pool_name );
if( !memsize )
{
goto failbail;
}
printf( "KSSC_WriteSubStringOffsets->MemSize = %d\n", memsize );
totalmemsize += memsize;
#endif
// restore sorting to alphabetic
if( !Sort_SubStringList( &g_mainsubstrs ) )
{
// fail
printf( "[Create_KStringSourceCode], Error restoring alphabetic ordering to sub strings.\n" );
goto failbail;
}
// write the kernel strings array
memsize = KSSC_WriteStrings( p_srccode, p_pool_name );
if( !memsize )
{
goto failbail;
}
printf( "KSSC_WriteStrings->MemSize = %d\n", memsize );
totalmemsize += memsize;
// write the string pool structure itself
if( !KSSC_WriteStringPool( p_srccode, p_pool_name ) )
{
goto failbail;
}
// write the file header for the header file
if( !KSSC_WriteHeaderHeader( p_hdr, p_hdr_name ) )
{
goto failbail;
}
// write the string pool structure declaration
if( !KSSC_WriteStringPoolDecl( p_hdr, p_pool_name ) )
{
goto failbail;
}
// write the file footer for the header file
if( !KSSC_WriteHeaderFooter( p_hdr, p_hdr_name ) )
{
goto failbail;
}
// close both files
if( fclose( p_hdr ) == 0 )
{
p_hdr = NULL;
}
if( fclose( p_srccode ) == 0 )
{
p_srccode = NULL;
}
if( !p_hdr && !p_srccode )
{
printf( "Create_KStringSourceCode->MemSize = %d Bytes, %d KBytes\n", totalmemsize, ((totalmemsize + 1023)>>10) );
// success
return 1;
}
// close header file
fclose( p_hdr );
}
// if( p_hdr )
// close source code file
fclose( p_srccode );
}
// if( p_src_path )
failbail:
return 0;
}
// Resets *everything*, frees all memory in use, closes all open files
void MakeKSP_ResetAll()
{
Free_StringList( &g_mainstrs );
Free_SubStringList( &g_mainsubstrs );
Free_SubStringList( &g_tempsubstrs1 );
Free_SubStringList( &g_tempsubstrs2 );
if( g_p_file_substrs )
{
fclose( g_p_file_substrs );
g_p_file_substrs = NULL;
}
if( g_p_file_strs )
{
fclose( g_p_file_strs );
g_p_file_strs = NULL;
}
}