//********************************************************************************
//* BubbleSortParse.cpp                                                          *
//* Author     : Mahlon R. Smith                                                 *
//*              Copyright (c) 2025 Mahlon R. Smith, The Software Samurai        *
//*                  GNU GPL copyright notice located in FileMangler.hpp         *
//* Date       : 19-Feb-2025                                                     *
//* Version    : (see appVersion in BubbleSort.hpp)                              *
//*                                                                              *
//* Description:                                                                 *
//* Support methods for the BubbleSort application.                              *
//*                                                                              *
//*                                                                              *
//********************************************************************************

//*****************
//* Include Files *
//*****************
#include "BubbleSort.hpp"
#include <filesystem>            // filesystem (C++17)
#include <fstream>               // file I/O
#include <sys/stat.h>            // stat64 definitions

//***************
//* Definitions *
//***************
//* Maximum bytes to hold GNU/Linux path and filename strings *
//* Note that these values correspond to the PATH_MAX and     *
//* NAME_MAX definitions in <linux/limits.h>.                 *
const int32_t MAX_PATH = 4096 ;     //* Size of buffer to hold a filespec
const int32_t MAX_FNAME = 256 ;     //* Size of buffer to hold a filename

//* Convert the C-style structure syntax to C++ format.  *
//* Used for stat{} dirent{}, etc.                       *
typedef struct stat64 FileStats ;

//**************
//* Local data *
//**************

//********************
//* Local prototypes *
//********************


//********************************************************************************
//* GetCommlineArgs:                                                             *
//* Parse command-line arguments specified by user and set caller's flags        *
//* accordingly.                                                                 *
//*                                                                              *
//* Input  : argc    : number of arguments                                       *
//*          argv    : text of each argument (argv[0] is the application name)   *
//*          argenv  : array of environment variables (currently unused)         *
//*          low2high: (by reference) receives sort direction                    *
//*                    'true'  : sort low-to-high (default)                      *
//*                    'false' : sort high-to-low                                *
//*          stype   : (by reference) receives source data type                  *
//*                    member of enum srcType (default: stPtr)                   *
//*          version : (by reference) request for application version            *
//*                     if specified, overrides all other flags                  *
//*                    'true'  : report application version                      *
//*                    'false' : no version report (default)                     *
//*                                                                              *
//* Returns: 'true'  if valid arguments                                          *
//*          'false' if a) invalid argument(s),                                  *
//*                     b) no arguments                                          *
//*                     c) request for Help or Version                           *
//********************************************************************************
//* Notes:                                                                       *
//* -- Note the fancy parsing that allows either a single dash or a double dash  *
//*    for each switch token. Ex: '-dir=a' vs. '--dir=a'                         *
//*    will be recognized as valid. (This would not be acceptable for most       *
//*    applications, but for this demo, it's fun.)                               *
//*                                                                              *
//* -- The "--shuffle=n" argument is available for development and debugging.    *
//*    This option reorders the source text data BEFORE the sort is executed.    *
//*    The intent of this option is to exercise each possible comparison in      *
//*    concert with both the type=t and type=T options.                          *
//*    The option takes a numeric argument which is a seed value for the shuffle.*
//*    Range: 1 <= value < srcRECORDS                                            *
//*    (This option is not listed in the command-line help.)                     *
//*                                                                              *
//* -- The "--compcoll" argument is available for development and debugging.     *
//*    This option uses the gString-class "compcoll" function to perform the     *
//*    string comparisons.                                                       *
//*    The C-library 'wcscoll' function seems to have some bugs. It is not       *
//*    clear what the returned value actually means.                             *
//*    (This option is not listed in the command-line help.)                     *
//*                                                                              *
//* -- The "--sand" argument is available for development and debugging.         *
//*    This option invokes the application sandbox for ad-hoc testing.           *
//*    (This option is not listed in the command-line help.)                     *
//*                                                                              *
//********************************************************************************

bool BubbSort::GetCommlineArgs ( int argc, char* argv[], char* argenv[] )
{
   #define DEBUG_GCLA (0)           // for debugging only
   #if DEBUG_GCLA != 0
   gString gsdbg ;
   #endif   // DEBUG_GCLA

   gString gs ;                     // text analysis
   int  indx,                       // for parsing command-line arguments
        shuf = ZERO ;               // shuffle seed value
   char argChar, argCharL ;         // for parsing command-line arguments
   bool status = false ;            // return value

   if ( argc > 1 )
   {
      for ( short i = 1 ; i < argc ; ++i )
      {
         //* Isolate the character following the ']' *
         gs = argv[i] ;
         if ( (indx = gs.after( L'=' )) > ZERO )
         {
            argChar  = gs.gstr()[indx] ;     // as received
            argCharL = tolower( argChar ) ;  // lower-case
         }

         #if DEBUG_GCLA != 0
         gsdbg.compose( "arg %hd) %S", &i, gs.gstr() ) ;
         wcout << gsdbg << endl ;
         #endif   // DEBUG_GCLA

         if ( (gs.gstr()[0] == L'-') &&
              (((indx = gs.find( L"-version" )) == 1) || (indx == ZERO)) )
         {
            this->version = true ;
            status = false ;
            break ;
         }

         //* Sort Direction *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-dir=" )) == 1) || (indx == ZERO)) )
         {
            if ( argCharL == L'a' )
            { this->low2high = true ; status = true ; }
            else if ( argCharL == L'd' )
            { this->low2high = false ; status = true ; }
            else
            { status = false ; break ; }
         }

         //* Source Data Type *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-type=" )) == 1) || (indx == ZERO)) )
         {
            status = true ;               // hope for the best
            switch ( argChar )
            {
               case L't':  this->stype = tpSptr ;  break ;
               case L'T':  this->stype = tpStxt ;  break ;
               case L'a':  this->stype = tpAptr ;  break ;
               case L'A':  this->stype = tpAtxt ;  break ;
               case L'r':  this->stype = tpRptr ;  break ;
               case L'R':  this->stype = tpRrec ;  break ;
               case L'n':
               case L'N':  this->stype = tpInt ;   break ;
               default:    status = false ;        break ;
            } ;
            if ( ! status )
               break ;
         }

         //* Case Sensitivity *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-case=" )) == 1) || (indx == ZERO)) )
         {
            if ( argCharL == L'y' )
            { this->casesen = true ; status = true ; }
            else if ( argCharL == L'n' )
            { this->casesen = false ; status = true ; }
            else
            { status = false ; break ; }
         }

         //* Source Filename *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-src=" )) == 1) || (indx == ZERO)) )
         {
            indx = gs.after( L'=' ) ;
            gs.shiftChars( -(indx) ) ;    // discard the switch substring
            if ( (gs.gschars() > 1) && (this->VerifySourceFile ( gs )) )
            {
               this->stype = tpFile ;
               status = true  ;
            }
            else
            { status = false ; break ; }
         }

         //* Shuffle the Source (debugging only) - Applies only to text source. *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-shuffle=" )) == 1) || (indx == ZERO)) )
         {
            if ( (this->stype == tpSptr) || (this->stype == tpStxt) ||
                 (this->stype == tpAptr) || (this->stype == tpAtxt) || 
                 (this->stype == tpColl) )
            {
               indx = gs.find( L'=' ) ;
               gs.shiftChars( -(indx) ) ;
               if ( (gs.gscanf( "=%d", &shuf )) != 1 )
               { shuf = ZERO ; status = false ; break ; }
            }
         }

         //* Sort via 'compcoll' (debugging only) *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-compcoll" )) == 1) || (indx = ZERO)) )
         {
            this->stype = tpColl ;
            status = true ;
         }

         //* Sandbox (debugging only) *
         else if ( (gs.gstr()[0] == L'-') &&
                   (((indx = gs.find( L"-sand" )) == 1) || (indx = ZERO)) )
         {
            this->sandbox () ;
            this->version = true ;
            status = false ;
            break ;
         }

         else if ( (gs.find( L"-h" )) == ZERO )       // short-form help
         { status = false ; break ; }
         else  // unknown switch, display help
         { status = false ; break ; }
      }

      //* If data shuffle was specified *
      if ( shuf != ZERO )
         this->shuffle ( shuf ) ;
   }

   return status ;

   #undef DEBUG_GCLA
}  //* End GetCommlineArgs() *

//********************************************************************************
//* VerifySourceFile:                                                            *
//* File must be:                                                                *
//*  1) a regular file                                                           *
//*  2) read-accessible by owner                                                 *
//*     Note: It is assumed that user==owner, if not there could be access error.*
//*  3) file size > ZERO (file must contain data.                                *
//*  4) A plain-text file is provisionally assumed. i.e. records are terminated  *
//*     by newlines (or cr/lf pairs). A non-zero record count will verify.       *
//*  5) Maximum record length is 1Kbyte. If a longer record is encountered,      *
//*     record parsing may fail.                                                 *
//*  6) The validation is therefore not robust, but remember:                    *
//*     "It's Only a Northern Song", Lennon and McCartney.                       *
//*                                                                              *
//* Input  : fname : filename of source file                                     *
//*                                                                              *
//* Returns: 'true'  if file found and contains valid source data, else 'false'  *
//********************************************************************************
//* Programmer's Note: We considered using the C++17 std::filesystem construct   *
//* for file stats; however, after studying is we found it to be outrageously    *
//* inefficient and inexcusably opaque. Bad C++, this is why the use of Boost    *
//* should generally be used only within a classroom setting. Using Boost, the   *
//* student learns essentially nothing, echoing the bad old days of Visual Basic.*
//********************************************************************************

bool BubbSort::VerifySourceFile ( const gString& fname )
{
   char fullPath[MAX_PATH] ;              // full path/filename
   FileStats   fstats ;                   // file "stat" data
   gString gs ;                           // text formatting
   int  srcBytes,                         // size of source file in bytes
        recCount = ZERO,                  // number of source records
        cri ;                             // utility index (carriage-returns)
   bool done = false,                     // loop control
        status = false ;                  // return value

   //* Convert the provided filename string to a full filespec. *
   //* Symbolic links _are_followed.                            *
   if ( (realpath ( fname.ustr(), fullPath )) != NULL )
   {
      //* If source file exists *
      if ( (status = stat64 ( fullPath, &fstats )) == ZERO )
      {
         //* File must be:                            *
         //*  1) a regular file (this is a macro)     *
         //*  2) read-accessible by owner (bit mask)  *
         //*  3) file size > ZERO                     *
         //*  plain-text file provisionally assumed   *
         if ( (S_ISREG(fstats.st_mode)) && bool((fstats.st_mode & S_IRUSR)) && 
              ((srcBytes = fstats.st_size) > ZERO) )
         {
            char ibuff[gsALLOCMIN] ;      // input buffer
            int trgindx = ZERO ;

            //* Allocate storage space.                    *
            //* Data space is 20% larger than source data. *
            //* (Space for pointers is allocated below.)   *
            if ( this->extData != NULL )  // prevent memory leaks
               delete [] this->extData ;
            this->extData = new wchar_t[this->extSize = int(srcBytes * 1.2)] ;

            ifstream ifs ( fullPath, ifstream::in ) ;
            if ( ifs.is_open() )
            {
               while ( ! done )
               {
                  ifs.getline ( ibuff, gsALLOCMIN, NEWLINE ) ;
                  if ( (ifs.good()) || (ifs.gcount() > ZERO) )
                  {
                     gs = ibuff ;         // check for Windoze-style CR/LF sequence
                     if ( (cri = gs.find( L'\r' )) >= ZERO )
                        gs.erase( '\r', ZERO, false, true ) ;
                     gs.compose( "%s\n", ibuff ) ; // restore the newline
                     gs.copy( &this->extData[trgindx], gsALLOCMIN ) ;
                     trgindx += gs.gschars() ;
                     ++recCount ;
                  }
                  else
                     done = true ;
               }
               ifs.close() ;              // close source file
            }
            if ( recCount > ZERO )        // if successful record capture
            {
               //* Save the source file path and record count.*
               gs = fullPath ;
               gs.copy( this->srcFile, gsALLOCMIN ) ;
               this->records = recCount ;

               //* Allocate storage space. Pointer space is two(2)    *
               //* larger than record count to prevent buffer overrun.*
               if ( this->extDPtr != NULL )  // prevent memory leaks
                  delete [] this->extDPtr ;
               this->extDPtr = new wchar_t*[recCount + 2] ;

               //* Initialize the record pointers *
               int pi = ZERO ;            // pointer index
               this->extDPtr[pi++] = &this->extData[ZERO] ; // pointer to first record
               for ( int ri = ZERO ; (ri < this->extSize) && (pi < this->records) ; ++ri )
               {
                  if ( this->extData[ri] == NEWLINE )
                  {
                     ++ri ;               // reference the null terminator
                     this->extDPtr[pi++] = &this->extData[ri + 1] ;
                  }
               }
               status = true ;
            }
            else
            {
               if ( this->extData != NULL )
               {
                  delete [] this->extData ;
                  this->extSize = ZERO ;
                  this->records = ZERO ;
               }
            }
         }
      }
   }
   return status ;

}  //* End VerifySourceFile() *

//********************************************************************************
//* sandbox: Debugging function: ad-hoc tests                                    *
//*                                                                              *
//*                                                                              *
//* Input  : none                                                                *
//*                                                                              *
//* Returns: nothing                                                             *
//********************************************************************************

void BubbSort::sandbox ( void )
{
   gString gs( "Hello World!" ) ;

   wcout << gs << endl ;

}  //* End sandbox() *

//********************************************************************************
//* Version: Display application version number and copyright notice.            *
//*                                                                              *
//* Input  : none                                                                *
//*                                                                              *
//* Returns: nothing                                                             *
//********************************************************************************

void BubbSort::Version ( void )
{
   gString gsOut ;
   const char* gsversion = gsOut.Get_gString_Version() ;
   const char* localename = this->ioLocale->name().c_str() ;
   gsOut.compose( "gString-class version:%s   locale:%s\n", 
                  gsversion, localename ) ;
   gsOut.padCols( (((gsOut.gscols()) * 2) + 25), L'-' ) ;
   gsOut.append( L"\n%s", freeSoftware ) ;
   wcout << gsOut << endl ;

}  //* End Version() *

//********************************************************************************
//* Help: Display application help.                                              *
//*                                                                              *
//* Input  : none                                                                *
//*                                                                              *
//* Returns: nothing                                                             *
//********************************************************************************

void BubbSort::Help ( void )
{
   gString gsOut( 
      "Demonstration of a bidirectional bubble sort algorithm.\n"
      "By default, sample data are used to perform the sort; however, external \n"
      "source data may be specified. Sorted data are written to 'stdout'.  \n"
      "----------------------------------------------------------------------------\n"
      "Usage: bubsort --dir==[a | d] [OPTIONAL ARGS]\n"
      " '--dir' : sort direction: 'a':ascending, 'd':descending\n"
      " '--type': type of source data (optional)\n"
      "           't':text pointers (default), 'T':text data direct\n"
      "           'a':alt. text pointers,      'A':alt. text data direct\n"
      "           'r':complex record pointers, 'R':complex records direct\n"
      "           'n':numeric (integer) data\n"
      " '--case': specify case sensitivity for sorting (optional)\n"
      "           'y' yes, case-sensitive sort (default)\n"
      "           'n' no,  case-insensitive sort\n"
      " '--src' : source-data filename (optional)\n"
      "           a plain-text file containing the data to be sorted\n"
      " '--help': (or '-h') display command-line help\n"
      " '--version': display version number and copyright message\n"
      ) ;
   wcout << gsOut << endl ;

}  //* End Help() *

//********************************************************************************
//* x:                                                                           *
//*                                                                              *
//*                                                                              *
//* Input  :                                                                     *
//*                                                                              *
//* Returns:                                                                     *
//********************************************************************************

