Newer
Older
ubixos / src / tools / make_kernel_string_pool.c
@grayspace grayspace on 7 Jul 2002 15 KB *** empty log message ***
/*
   "make_kernel_string_pool.c"

   created by: grayspace aka J. Leveille
   for: UbixOS Project
   date: July 3, 2002

   purpose: - tool to create string pools for in kernel use
            - will generate the source for including a global
            set of ASCII (for now) strings into the kernel
            to avoid memory wastage for kernel output strings
            for long lists of things such as device descriptions

   NOTEs:
      - for now only ASCII is supported

   TODO:
      - expand to support unicode
      - use huffman encoding instead

   $Id$
*/

/*
   SubStringfile format:
      - each on it's own line, each unique substring
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

typedef struct tagSUBSTRLIST
{
   char ** pp_substrs;
   int numsubstrs;
}
SUBSTRLIST;

// current open sub string file
FILE * g_p_file_substrs = NULL;

// main/current list of sub strings
SUBSTRLIST g_mainsubstrs = { NULL, 0 };

// temporary lists of substrings
SUBSTRLIST g_tempsubstrs1 = { NULL, 0 };
SUBSTRLIST g_tempsubstrs2 = { NULL, 0 };

// free all memory associated with the sub string list
static void Free_SubStringList( SUBSTRLIST * p_ssl )
{
   int i;
   if( p_ssl->pp_substrs != NULL )
   {
      for( i = 0; i < p_ssl->numsubstrs; i++ )
      {
         if( p_ssl->pp_substrs[i] != NULL )
         {
            free( p_ssl->pp_substrs[i] );
            p_ssl->pp_substrs[i] = NULL;
         }
      }

      free( p_ssl->pp_substrs );
      p_ssl->pp_substrs = NULL;
      p_ssl->numsubstrs = 0;
   }
}

// compare two pointers to strings
static int CompareStringPtr( const void * p1, const void * p2 )
{
   return strcmp( *((const char **) p1), *((const char **) p2) );
}

// sorts a sub string list into alphabetical order
// list cannot be empty for success
// returns 1 on success, 0 on failure
static int Sort_SubStringList( SUBSTRLIST * p_ssl )
{
   if( p_ssl->numsubstrs && p_ssl->pp_substrs )
   {
      // sort by pointer
      qsort(   p_ssl->pp_substrs,
               p_ssl->numsubstrs,
               sizeof(char *),
               CompareStringPtr  );

      return 1;
   }

   return 0;
}

// given an open sub string file, creates a 'SUBSTRLIST' with its
// contents (the file should contain a list of unique substrings
// in alphabetical order)
//
// on failure:
//    - frees/empties the given list
//    - returns 0
// on success:
//    - returns 1
static int Create_SubStringList( SUBSTRLIST * p_ssl, FILE * p_file )
{
   int i;
   int numlines;
   char buff[1025];

   // go to beginning of file
   if( fseek( p_file, 0L, SEEK_SET ) )
   {
      goto failbail;
   }

   // count our lines:
   numlines = 0;
   while( fgets( buff, 1024, p_file ) != NULL )
   {
      numlines++;
   }

   // go to beginning of file
   if( fseek( p_file, 0L, SEEK_SET ) )
   {
      goto failbail;
   }

   // re-allocate our substring list:
   Free_SubStringList( p_ssl );
   p_ssl->numsubstrs = numlines;
   p_ssl->pp_substrs = (char **) malloc( sizeof(char *) * p_ssl->numsubstrs );
   
   // read each substring
   for( i = 0; i < p_ssl->numsubstrs; i++ )
   {
      if( fgets( buff, 1024, p_file ) != NULL )
      {
         p_ssl->pp_substrs[i]
            = (char *) malloc( sizeof(char) * (strlen( buff ) + 1) );
         strcpy( p_ssl->pp_substrs[i], buff );
         continue;
      }

      goto failbail;
   }

   // sort the substrings into alphabetical order
   if( Sort_SubStringList( p_ssl ) )
   {
      // success:
      return 1;
   }

failbail:
   Free_SubStringList( p_ssl );
   printf( "[Create_SubStringList] Error" );
   return 0;
}

// Given an open file which contains plain text, builds a sub string list
// of unique sub strings into the list given as 'p_ssl'
// NOTE: destroys/frees the contents of 'p_ssl'
// regardless of success or failure
const static char g_whitespace[] = " \t\n";
static int BuildFromFile_SubStrings( SUBSTRLIST * p_ssl, FILE * p_file )
{
   int i;
   int numwords;
   char buff[1025];
   char * p_curbuff;

   // destroy the current list
   Free_SubStringList( p_ssl );

   // go to beginning of file
   if( fseek( p_file, 0L, SEEK_SET ) )
   {
      goto failbail;
   }

   // count our words:
   numwords = 0;
   while( (p_curbuff = fgets( buff, 1024, p_file )) != NULL )
   {
      p_curbuff = strtok( p_curbuff, g_whitespace );
      while( p_curbuff )
      {
         numwords++;
         p_curbuff = strtok( NULL, g_whitespace );
      }
   }

   // go to beginning of file
   if( fseek( p_file, 0L, SEEK_SET ) )
   {
      goto failbail;
   }

   // read each substring from words in file
   p_ssl->numsubstrs = numwords;
   p_ssl->pp_substrs = (char **) malloc( sizeof(char *) * p_ssl->numsubstrs );
   for( i = 0; i < p_ssl->numsubstrs; )
   {
      if( (p_curbuff = fgets( buff, 1024, p_file )) != NULL )
      {
         p_curbuff = strtok( p_curbuff, g_whitespace );
         while( p_curbuff )
         {
            p_ssl->pp_substrs[i]
               = (char *) malloc( sizeof(char) * (strlen( p_curbuff ) + 1) );
            strcpy( p_ssl->pp_substrs[i], p_curbuff );
            i++;
            p_curbuff = strtok( NULL, g_whitespace );
         }
         continue;
      }

      goto failbail;
   }

   // sort the substrings into alphabetical order
   if( Sort_SubStringList( p_ssl ) )
   {
      // success:
      return 1;
   }

failbail:
   Free_SubStringList( p_ssl );
   printf( "[Create_SubStringList] Error" );
   return 0;
}

// creates a new 'SUBSTRLIST' into 'p_ssl_dst' by
// combining the sub-strings in 'p_ssl_src1' and 'p_ssl_src2'
// then it clears/frees both 'p_ssl_src1' and 'p_ssl_src2'
// so that they are empty
// NOTE: will destroy any contents in 'p_ssl_dst' safely
static void Combine_SubStringLists( SUBSTRLIST * p_ssl_dst,
                                    SUBSTRLIST * p_ssl_src1,
                                    SUBSTRLIST * p_ssl_src2    )
{
   int src_idx_1;
   int src_idx_2;
   int src_idx;
   int dst_idx;
   SUBSTRLIST * p_ssl_src;
   
   // free the destination list
   Free_SubStringList( p_ssl_dst );

   // set number of strings in dest list   
   p_ssl_dst->numsubstrs = p_ssl_src1->numsubstrs + p_ssl_src2->numsubstrs;

   // allocate strings pointers
   p_ssl_dst->pp_substrs
      = (char **) malloc( sizeof(char *) * p_ssl_dst->numsubstrs );

   // combine lists, preserving alphabetical order
   src_idx_1 = 0;
   src_idx_2 = 0;
   dst_idx = 0;
   while(   src_idx_1 < p_ssl_src1->numsubstrs
            && src_idx_2 < p_ssl_src2->numsubstrs  )
   {
      if( strcmp( p_ssl_src1->pp_substrs[src_idx_1],
                  p_ssl_src2->pp_substrs[src_idx_2]   ) <= 0   )
      {
         // move string from source 1
         p_ssl_dst->pp_substrs[dst_idx]
            = p_ssl_src1->pp_substrs[src_idx_1];
         p_ssl_src1->pp_substrs[src_idx_1] = NULL;
         dst_idx++;
         src_idx_1++;
         continue;
      }

      // move strings from source 2
      p_ssl_dst->pp_substrs[dst_idx]
         = p_ssl_src2->pp_substrs[src_idx_2];
      p_ssl_src2->pp_substrs[src_idx_2] = NULL;
      dst_idx++;
      src_idx_2++;
   }

   // find which source still has strings
   p_ssl_src = p_ssl_src1;
   src_idx = src_idx_1;
   if( src_idx_2 < p_ssl_src2->numsubstrs )
   {
      p_ssl_src = p_ssl_src2;
      src_idx = src_idx_2;
   }

   // add remaining strings
   while( src_idx < p_ssl_src->numsubstrs )
   {
      // move strings from source
      p_ssl_dst->pp_substrs[dst_idx]
         = p_ssl_src->pp_substrs[src_idx];
      p_ssl_src->pp_substrs[src_idx] = NULL;
      dst_idx++;
      src_idx++;
   }

   // empty source strings
   p_ssl_src1->numsubstrs = 0;
   p_ssl_src2->numsubstrs = 0;
   Free_SubStringList( p_ssl_src1 );
   Free_SubStringList( p_ssl_src2 );
}

// removes duplicate sub strings in sub string list
static void RemoveDuplicates_SubStrings( SUBSTRLIST * p_ssl )
{
   char * p_checkstr;
   int strsremoved;
   int idx;
   int dst_idx;
   int src_idx;
   
   // first pass, remove and free all duplicate strings
   // in their current position
   strsremoved = 0;
   idx = 0;
   while( idx < p_ssl->numsubstrs )
   {
      // get new check string
      p_checkstr = p_ssl->pp_substrs[idx];
      idx++;
      
      // remove all duplicates of check string
      while( idx < p_ssl->numsubstrs )
      {
         if( strcmp( p_checkstr, p_ssl->pp_substrs[idx] ) == 0 )
         {
            // remove duplicate
            free( p_ssl->pp_substrs[idx] );
            p_ssl->pp_substrs[idx] = NULL;
            strsremoved++;
            idx++;

            // process next string
            continue;
         }

         // get new check string
         break;
      }
   }

   // second pass, re-order
   dst_idx = 0;
   src_idx = 0;
   while( src_idx < p_ssl->numsubstrs )
   {
      // find next empty string section
      while( dst_idx < p_ssl->numsubstrs )
      {
         if( p_ssl->pp_substrs[dst_idx] )
         {
            dst_idx++;
            continue;
         }
         break;
      }

      // find next non-empty string section
      src_idx = dst_idx + 1;
      while( src_idx < p_ssl->numsubstrs )
      {
         if( !p_ssl->pp_substrs[src_idx] )
         {
            src_idx++;
            continue;
         }
         break;
      }

      // move all non-empty strings into empty space
      idx = src_idx;
      while(   dst_idx < src_idx
               && idx < p_ssl->numsubstrs )
      {
         if( p_ssl->pp_substrs[idx] )
         {
            // move string
            p_ssl->pp_substrs[dst_idx] = p_ssl->pp_substrs[idx];
            p_ssl->pp_substrs[idx] = NULL;
            dst_idx++;
            idx++;

            // process next string
            continue;
         }

         // start over
         break;
      }
   }

   // adjust number of strings in list
   p_ssl->numsubstrs -= strsremoved;
}

// moves one string list to another
// (also cleans memory a little)
// NOTES:
//    - this operation empties/frees 'p_ssl_src'
//    - this operation overwrites the contents of 'p_ssl_dst'
static void Move_SubStrings(  SUBSTRLIST * p_ssl_dst,
                              SUBSTRLIST * p_ssl_src  )
{
   int i;

   // free/clear the dest substrings
   Free_SubStringList( p_ssl_dst );

   // allocate new dest sub-strings
   p_ssl_dst->numsubstrs = p_ssl_src->numsubstrs;
   p_ssl_dst->pp_substrs
      = (char **) malloc( sizeof(char *) * p_ssl_dst->numsubstrs );

   // move the strings over
   for( i = 0; i < p_ssl_dst->numsubstrs; i++ )
   {
      p_ssl_dst->pp_substrs[i] = p_ssl_src->pp_substrs[i];
      p_ssl_src->pp_substrs[i] = NULL;
   }

   // clear/free the source string
   p_ssl_src->numsubstrs = 0;
   Free_SubStringList( p_ssl_src );
}


// writes given sub string list into given file
// on success:
//    - old file contents are destroyed
//    - returns 1
// on failure:
//    - old file contents are destroyed or mangled
//    - returns 0
static int Write_SubStrings(  SUBSTRLIST * p_ssl,
                              FILE * p_file        )
{
   int i;
   
   // go to beginning of file
   if( fseek( p_file, 0L, SEEK_SET ) )
   {
      goto failbail;
   }

   // write out all strings
   for( i = 0; i < p_ssl->numsubstrs; i++ )
   {
      if( fputs( p_ssl->pp_substrs[i], p_file ) != EOF )
      {
         if( fputs( "\n", p_file ) != EOF )
         {
            continue;
         }
      }

      goto failbail;
   }

   // success:
   return 1;

failbail:
   printf( "[Write_SubStrings] Error, cannot write to file\n" );
   return 0;
}

// writes out and closes the current sub string file if possible
// and frees the current sub string list
// returns 1 on success, 0 on failure
int WriteAndClose_SubStringFile()
{
   if( g_p_file_substrs )
   {
      if( Write_SubStrings( &g_mainsubstrs, g_p_file_substrs ) )
      {
         // close the file
         if( fclose( g_p_file_substrs ) == 0 )
         {
            // success
            g_p_file_substrs = NULL;
            return 1;
         }

         printf( "[WriteAndClose_SubStringFile] Error closing file\n" );
         goto failbail;
      }

      printf( "[WriteAndClose_SubStringFile] Error, cannot write to file\n" );
      goto failbail;
   }

   printf( "[WriteAndClose_SubStringFile] Error, never opened \n" );

failbail:
   if( fclose( g_p_file_substrs ) )
   {
      g_p_file_substrs = NULL;
   }
   return 0;
}

// closes the current open sub string file and frees
// the current sub string list
// returns 1 on success, 0 on failure
int Close_SubStringFile()
{
   if( g_p_file_substrs )
   {
      // close the file
      if( fclose( g_p_file_substrs ) == 0 )
      {
         // success
         g_p_file_substrs = NULL;
         return 1;
      }

      printf( "[Close_SubStringFile] Error closing file\n" );
      return 0;
   }

   printf( "[Close_SubStringFile] Error, never opened \n" );
   return 0;
}

#if 0

void OpenStringFile( const char * p_path )
{
}

void CreateStringFile( const char * p_path )
{
}

void AddStringsFromFile( const char * p_path )
{
}

#endif

// given a plain text files, extracts any unique sub strings
// and adds them into the current sub string list
//
// returns 1 on success, 0 on failure
int Add_SubStringsFromFile( const char * p_path )
{
   FILE * p_file = fopen( p_path, "rt" );
   if( p_file )
   {
      // build a new substring list from the file
      if( BuildFromFile_SubStrings( &g_tempsubstrs1, p_file ) )
      {
         // combine new substrings with current list
         // into a new temporary list
         // NOTE: this operation empties/frees
         //    both 'g_tempsubstrs1' and 'g_mainsubstrs'
         Combine_SubStringLists( &g_tempsubstrs2,
                                 &g_tempsubstrs1,
                                 &g_mainsubstrs    );

         // remove duplicates from new temporary list
         RemoveDuplicates_SubStrings( &g_tempsubstrs2 );

         // move new temporary list into main list
         // (also cleans memory a little)
         // NOTE: this operation empties/frees 'g_tempsubstrs2'
         Move_SubStrings( &g_mainsubstrs, &g_tempsubstrs2 );
      
         // close the file
         fclose( p_file );

         // success
         return 1;
      }

      fclose( p_file );
      printf( "[Add_SubStringsFromFile] Error" );
      return 0;
   }
   
   printf( "[Add_SubStringsFromFile] Error, cannot open file: %s.\n", p_path );
   return 0;
}

// given a path, opens an existing sub string file
// (shoudl conatin an alphabetically sorted list of unique substrings)
// reads in the contents as the current sub string list and sets
// the file as the current open sub string file
//
// returns 1 on success, 0 on failure
int Open_SubStringFile( const char * p_path )
{
   g_p_file_substrs = fopen( p_path, "r+t" );
   if( g_p_file_substrs )
   {
      if( Create_SubStringList( &g_mainsubstrs, g_p_file_substrs ) )
      {
         // success
         return 1;
      }

      fclose( g_p_file_substrs );
      printf( "[OpenSubStringFile] Error" );
      return 0;
   }
   
   printf( "[OpenSubStringFile] Error, cannot open file: %s.\n", p_path );
   return 0;
}

// given a path, creates a sub string file for writing to
// as the current open sub string file
//
// returns 1 on success, 0 on failure
int Create_SubStringFile( const char * p_path )
{
   g_p_file_substrs = fopen( p_path, "w+t" );
   if( g_p_file_substrs )
   {
      // free current list
      Free_SubStringList( &g_mainsubstrs );

      // success
      return 1;
   }

   printf( "[Create_SubStringFile] Error, cannot create file: %s.\n", p_path );
   return 0;
}

// Resets *everything*, frees all memory in use, closes all open files
void MakeKSP_ResetAll()
{
   Free_SubStringList( &g_mainsubstrs );
   Free_SubStringList( &g_tempsubstrs1 );
   Free_SubStringList( &g_tempsubstrs2 );

   if( g_p_file_substrs )
   {
      fclose( g_p_file_substrs );
      g_p_file_substrs = NULL;
   }
}