//********************************************************************************
//* File       : idpp.hpp                                                        *
//* Author     : Mahlon R. Smith                                                 *
//*              Copyright (c) 2014-2025 Mahlon R. Smith, The Software Samurai   *
//*                 GNU GPL copyright notice below                               *
//* Date       : 02-Aug-2025                                                     *
//* Version    : (see AppVersion string)                                         *
//*                                                                              *
//* Description: Definitions and data for Infodoc Post-processor (idpp),         *
//* an HTML post-processing utility for use with HTML documents generated from   *
//* Texinfo source.                                                              *
//*                                                                              *
//*                                                                              *
//********************************************************************************
//* Copyright Notice:                                                            *
//* This program is free software: you can redistribute it and/or modify it      *
//* under the terms of the GNU General Public License as published by the Free   *
//* Software Foundation, either version 3 of the License, or (at your option)    *
//* any later version.                                                           *
//*                                                                              *
//* This program is distributed in the hope that it will be useful, but          *
//* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY   *
//* or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License     *
//* for more details.                                                            *
//*                                                                              *
//* You should have received a copy of the GNU General Public License along      *
//* with this program.  If not, see <http://www.gnu.org/licenses/>.              *
//*                                                                              *
//*         Full text of the GPL License may be found in the TexInfo             *
//*         documentation for this program under 'Copyright Notice'.             *
//********************************************************************************

//****************
//* Header Files *
//****************
#include <iostream>           //* Standard I/O definitions
#include <sstream>            //* Definitions for iostringstream classes
#include <iomanip>            //* Output formatting specifications
#include <cstring>            //* String manipulation from C
#include <unistd.h>           //* UNIX system interface
#include <cctype>             //* Character testing
#include <cmath>              //* Math library
#include <cstdlib>            //* Misc. functionality
#include <ctime>              //* Access to system time
#include <thread>             //* std::thread definition (and POSIX thread defs)
#include <condition_variable> //* Additional info for threads
#include <chrono>             //* Timers for temporarily putting threads to sleep
#include <mutex>              //* For access locks on critical data
#include <locale>             //* Locale handling for character encoding, etc. 
#include <langinfo.h>         //* Support for world languages identification/conversion

#include "idpp_file.hpp"      //* File I/O and other system functions
#include "gString.hpp"        //* gString class definition


using namespace std ;         //* Scope quailfier


//***************
//* Definitions *
//***************
#define ZERO         (0)
#define SPACE        (' ')
#define DASH         ('-')
#define PERIOD       ('.')
#define NEWLINE      ('\n')
#define NULLCHAR     ('\0')

//* Count the number of invalid user responses and  *
//* Report the results after processing is complete.*
#define DEBUG_USER_INPUT (0)     // for debugging only

//* Compensate for texi2any bug which inappropriately *
//* inserts an HTML &para; entity into heading tags.  *
//* This will go away when texi2any bug is resolved.  *
#define PARA_FIX (1)

//*****************
//* Constant Data *
//*****************
const short OK  = (0) ;          // standard ncurses good status
const short ERR = (-1) ;         // standard ncurses error status


//*****************************************
//* Holds captured command-line arguments *
//*****************************************
class commArgs
{
   public:
   commArgs ( int argc, char** argv, char** argenv ) :
               argCount(argc), argList(argv), envList(argenv)
   {
      this->reset() ;
   }
   void reset ( void )
   {
      this->helpOption = ZERO ;
      this->preFlag = this->verFlag = false ;
   }
   short    argCount ;     // command-line arguments
   char**   argList ;      // list of argument strings
   char**   envList ;      // pointer to terminal environment
   short    helpOption ;   // 'true' if command-line help (overrides all except verFlag)
   bool     preFlag ;      // 'true' if pre-scan is to be performed
   bool     verFlag ;      // 'true' if application version request (overrides all others)
} ;

//* FIFO push-back buffer for source input stream *
class fifoBuff
{
   public:
   wchar_t b[gsALLOCDFLT] ;
} ;
class fifo
{
   public:
   ~fifo ( void )             // destructor
   {
      #if 0    // This causes a runtime error when application exits.
      if ( this->fbuff != NULL )
      {
         delete [] this->fbuff ;
         this->fbuff = NULL ;
      }
      #endif   // runtime error
   }
   fifo ( short c )           // initialization constructor
   {
      this->fbCount = ((c >= 1) && (c <= 64)) ? c : 1 ;
      this->fbuff = new fifoBuff[this->fbCount] ;
      this->reset() ;
   }
   fifo ( void )              // default constructor
   {
      this->fbuff = new fifoBuff[1] ;
      this->reset() ;
   }
   void reset ( void )
   {
      for ( short i = ZERO ; i < this->fbCount ; ++i )
      {
         this->fbuff[i].b[ZERO] = NULLCHAR ;
      }
   }
   bool push ( const gString& gs )
   {
      bool status = false ;
      for ( short i = ZERO ; i < this->fbCount ; ++i )
      {
         if ( this->fbuff[i].b[ZERO] == NULLCHAR )
         {
            gs.copy( this->fbuff[i].b, gsALLOCDFLT ) ;
            status = true ;
            break ;
         }
      }
      return status ;
   }
   bool pull ( gString& gs )
   {
      bool status = false ;
      if ( this->fbuff[ZERO].b[ZERO] != NULLCHAR )
      {
         gs = this->fbuff[ZERO].b ;    // copy data to caller's buffer
         status = true ;
         gString gstmp ;
         short trg = ZERO,
               src = trg + 1 ;
         while ( src < this->fbCount ) // shift all elements upward
         {
            gstmp = this->fbuff[src].b ;
            this->fbuff[src++].b[ZERO] = NULLCHAR ;
            gstmp.copy( this->fbuff[trg++].b, gsALLOCDFLT ) ;
         }
      }
      return status ;
   }
   private:
   fifoBuff* fbuff ;
   short fbCount ;
} ;

//***********************************************
//* Constant Data:                              *
//***********************************************

//* Application version string. Keep it current! *
const wchar_t* const AppVersion = L"0.0.16" ;
const wchar_t* const AppYears   = L"2014-2025" ;

const wchar_t CR = 0x000D ;   // carriage-return character
const wchar_t STAR = ('*') ;  // asterisk
const wchar_t SLASH = ('/') ; // forward slash
const wchar_t BSLASH = ('\\') ; // back slash
const wchar_t SGLQUOTE = ('\'') ;// single quote
const wchar_t DBLQUOTE = ('"') ;// double quotation mark
const wchar_t HASH = ('#') ;  // hash mark, pound/number sign

//* Application title *
const wchar_t* const titleTemplate = L"%S(idpp) v:%S Copyright(c) %S %S\n" ;
//const wchar_t AppTitleTemplate[] = L"\n%S v:%S (c)%S %S\n%S" ;
const wchar_t* const AppTitle1 = L"Infodoc Post-processor" ;
const wchar_t* const AppTitle2 = L"The Software Samurai" ;
//const wchar_t* const AppTitle3 = // (this string == minTERMCOLS)
//L"------------------------------------------------------------------------------" ;

const short sfMAX = 24 ;         // max number of source files to process
const short emMAX = 24 ;         // max number of error messages
const short CSS_VER_LEN = 32 ;   // length of CSS definition file version string
const short TARG_TEXT_LEN = 64 ; // size of utText[] member - Deprecated

//* Session configuration options *
enum Cfg : short {
                   cfgAuto,      // automatically apply formatting
                   cfgNone,      // do not apply formatting
                   cfgSpec,      // ask user to specify formatting option
                 }  ;

//* Tokens to identify the various block constructs.                 *
//* Programmer's Note: Keep synchronized with the "blockName" array. *
enum blkType : short
{
   stdI,    smaI,    lrgI,       // standard, small and large "indentedblock"
   stdQ,    smaQ,    lrgQ,       // standard, small and large "quotation" block
   stdF,    smaF,    lrgF,       // standard, small and large "format" block
   stdD,    smaD,    lrgD,       // standard, small and large "display" block
   stdE,    smaE,    lrgE,       // standard, small and large "example" block
   stdL,    smaL,    lrgL,       // standard, small and large "lisp" block
   stdV,    smaV,    lrgV,       // standard, small and large "verbatim" block
   btNone                        // unknown block type
} ;

//********************************
//* Application class definition *
//********************************
class Idpp
{
   public:
   virtual ~Idpp () ;               // destructor
   Idpp ( commArgs& ca ) ;          // constructor
   short ProcStatus ( void )        // returns 'OK' if all files processed successfully
   { return this->procStatus ; }


   private:

   //*** Methods for start-up, memory management ***
   //***-----------------------------------------***
   //* Interpret user's command options and gather specified source-filenames. *
   bool  GetCommandLineArgs ( commArgs& ca ) ;
   bool  gclaGetFilename ( const gString& cmd, gString& fname ) ;
   //* Pre-scan the source and if already processed *
   //* ask user whether to continue.                *
   bool  Prescan ( short srcIndex, bool prompt = false ) ;
   //* Format application title, version, date, author *
   void ComposeTitle ( gString& gs, bool tailNL = false, bool headNL = false ) ;
   //* Display the application's title, version and copyright info.            *
   void  DisplayAppVersion ( void ) ;
   //* Display command-line options *
   void  DisplayCommandLineHelp ( bool helpless ) ;
   //* Process specified HTML documents.                                       *
   short ProcessSource ( void ) ;
   //* Write text to the display                                               *
   void  textOut ( const gString& tOut, bool newln = true ) ;
   void  textOut ( const char*    tOut, bool newln = true ) ;
   void  textOut ( const wchar_t* tOut, bool newln = true ) ;
   //* Get user response to interactive-mode prompts.                          *
   void  userResponse ( gString& gsIn ) ;
   //* If user provides an invalid response to the prompt, re-prompt.          *
   void  invalidResponse ( void ) ;
   //* Sleep (pause) execution for the specified time period                   *
   void nsleep ( uint16_t tenths, uint16_t millisec = ZERO, time_t nanosec = ZERO ) const ;
   //* Debugging support: Decrement the 'skip' counter.                        *
   void  skipCounter ( void ) ;

   //*** Methods to read and modify files        ***
   //***-----------------------------------------***
   //* Convert raw HTML to CSS-styled HTML.                                    *
   short ppfProcessSrcHTML ( const gString& src, const gString& trg ) ;
   short ppfProcTOC ( const gString& gsBegin ) ;
   short ppfProcINDEX ( const gString& gsBegin ) ;
   short ppfProcTABLE ( const gString& gsBegin ) ;
   short ppfProcHEAD ( void ) ;
   short ppfProcMETA ( const gString& gsmeta ) ;

   bool  ppfItemizedList ( const gString& gsln, short& status, short wi, bool bookit = false ) ;
   bool  ppfTestItemizedList ( const gString& gssrc, short wi ) ;
   short ppfProcItemizedList ( gString& gst ) ;
   bool  ppfUListToken ( wchar_t& uSize ) ;

   bool  ppfEnumeratedList ( const gString& gsln, short& status, short wi, bool bookit = false ) ;
   bool  ppfTestEnumeratedList ( const gString& gssrc, short wi ) ;
   short ppfProcEnumeratedList ( gString& gst ) ;
   bool  ppfOListToken ( wchar_t& eType, short& eStart, wchar_t& eSize, 
                         wchar_t& eDir, short& etIndx ) ;
   bool  ppfOListResponse ( const gString& gsIn, wchar_t& eType, short& eStart,
                            wchar_t& fSize, wchar_t& eDir, short& etIndx ) ;

   bool  ppfFormattedBlock ( const gString& gsln, short& status, bool bookit = false ) ;
   bool  ppfTestFormattedBlock ( blkType& bType, const gString& gsln ) ;
   short ppfProcFormattedBlock ( blkType bType, const gString& gsb ) ;

   bool  ppfIndentedBlock ( const gString& gsln, short& status, bool bookit = false ) ;
   bool  ppfTestIndentedBlock ( blkType& bType, const gString& gsln ) ;
   short ppfProcIndentedBlock ( blkType bType, const gString& gsbq ) ;

   bool  ppfQuotationBlock ( const gString& gsln, short& status, bool bookit = false ) ;
   bool  ppfTestQuotationBlock ( blkType& bType, const gString& gsln ) ;
   short ppfProcQuotationBlock ( blkType bType, const gString& gsbq ) ;

   bool  ppfVerbatimBlock ( const gString& gsln, short& status, bool bookit = false ) ;
   bool  ppfTestVerbatimBlock ( blkType& bType, const gString& gsln ) ;
   short ppfProcVerbatimBlock ( blkType bType, const gString& gsb ) ;

   void  ppfPFB_List ( gString& gsLine ) ;
   short ppfProcInnerBlock ( blkType bType, const gString& gsb ) ;
   wchar_t ppfBlockPrompt ( const wchar_t* blkName, const wchar_t* firstLine ) ;
   short ppfProcGNU ( bool gpl ) ;
   bool  ppfTestComment ( const gString& gsln ) ;
   bool  ppfProcComment ( gString& gsc ) ;
   bool  ppfInsertCustomData ( const gString& fileSpec ) ;
   short ppfReadSrcLine ( gString& gs, short& windex ) ;
   void  ppfUnReadSrcLine ( const gString& gs ) ;
   short ppfReadLine ( ifstream& ifs, gString& gs ) ;
   short ppfWriteLine ( const gString& gsOut ) ;

   //* Test whether the specified file (or directory) exists.                  *
   bool  ppfTargetExists ( const gString& fPath, bool isDir = false ) ;
   //* Test whether the specified file is a valid HTML document.               *
   bool  ppfTargetIsHTML ( const gString& fPath ) ;
   //* Test whether the specified CSS definition file is valid.                *
   bool  ppfTargetIsCSS ( const gString& fPath, gString& gsVer ) ;
   //* Scan the target directory for HTML source documents.                    *
   bool  ppfScan4Src ( void ) ;

   //* Get the user's current working directory.                               *
   short ppfGetCWD ( gString& dPath ) ;
   //* Concatenate path and filename strings to create path/filename spec.     *
   short ppfCatPathFilename ( gString& pgs, const gString& wPath, const char* uFile ) ;
   short ppfCatPathFilename ( gString& pgs, const gString& wPath, const wchar_t* wFile ) ;
   //* Change the current working directory to specified target path.          *
   short ppfCdTarget ( const char* dirPath ) ;
   //* Delete a file.                                                          *
   short ppfDeleteFile ( const gString& trgPath ) ;
   //* Rename a file.                                                          *
   short ppfRenameFile ( const gString& srcPath, const gString& trgPath ) ;
   //* Decode the full path/filename specification for target.                 *
   short ppfRealpath ( gString& realPath, const gString& rawPath ) ;
   //* Extract the source filename from the path/filename string.              *
   void  ppfExtractFilename ( gString& gsName, const gString& fPath ) ;
   //* Extract the filename extension (if any) from the filename.              *
   void  ppfExtractFileExtension ( gString& gsExt, const char* fName ) ;

   gString  cwDir ;        //* Current-working-directory on start-up
   gString  cssFile ;      //* Path/filename for CSS definitions
   gString  respFile ;     //* Path/filename for interactive response file
   gString  userMeta ;     //* Path/filename for User Text to be inserted into document head section
   wchar_t  cssVersion[CSS_VER_LEN] ;  //* CSS definition file version
   wchar_t  srcFiles[sfMAX][gsALLOCDFLT] ; //* List of source filenames
   short    sfCount ;      //* Number of source files to process
   wchar_t  ErrorMsg[emMAX][gsALLOCDFLT] ; //* Formatted error message strings
   short    emCount ;      //* Number of error messages in queue
   uint16_t slCount ;      //* Line number of source file being processed (in case of error)
   uint16_t tlCount ;      //* Line number of target file being written (verbose diagnostics)
   fifo*    pushBack ;     //* data pushed back into the source input stream
   short    procStatus ;   //* OK if all files processes successfully, else ERR
   ifstream ifs ;          //* Access to HTML source file
   ofstream ofs ;          //* Access to HTML target file
   ifstream rifs ;         //* Access to interactive-mode response file
   bool     iMode ;        // 'true' if Interactive Mode, 'false' if automatic processing
   bool     allFiles ;     // 'true' if processing all source files in target directory
   bool     multiPass ;    // 'true' if source file has been previously processed
   bool     abort ;        // 'true' if user (or response file) indicates abort
   Cfg      ulLists ;      // formatting options for itemized (un-ordered <ul>) lists
   Cfg      olLists ;      // formatting options for enumerated (ordered <ol>) lists
   Cfg      tabBorder ;    // formatting options for table (<table>) borders
   Cfg      blkFont ;      // formatting options for pre-formatted blocks
   Cfg      cartFormat ;   // formatting options for cartouche constructs
   bool     liSpec ;       // 'true' if special-case list processing enabled
   bool     tocMod ;       // 'true' if converting Table of Contents to bullet list
   bool     tocDel ;       // 'true' if removing Table of Contents from document
   bool     verbose ;      // 'true' if 'verbose output' (applies only to text mode)
   bool     css_mod ;      // 'true' if user wants to adjust the CSS definition file (CURRENTLY UNREFERENCED)
   bool     no_mods ;      // 'true' if scan only, no modifications to be performed
   bool     no_body ;      // 'true' if <body> tag is not to be processed
   bool     no_bloc ;      // 'true' if unnecessary leading blank lines not to be removed
   bool     no_auth ;      // 'true' if @author sub-command's output not to be adjusted
   bool     no_meta ;      // 'true' if retaining meta-data in <head>
   bool     no_link ;      // 'true' if retaining links in <head>
   bool     no_cont ;      // 'true' if container class is not to be inserted
   #if PARA_FIX != 0       // member variables
   bool     no_para ;      // 'true' if strip '&para;' entities from HTML headings: <h.>
   short    paraDel ;      // number of "&para;" entities removed
   #endif   // PARA_FIX

   // DEPRECATED MEMBERS - MAY BE REMOVED IN A FUTURE RELEASE.
   wchar_t  utPath[gsALLOCDFLT] ;    //* path to insert in "up target" link - Deprecated
   wchar_t  utText[TARG_TEXT_LEN] ; //* display text to insert for "up target" link - Deprecated
   bool     ulAssign ;     // 'true' if automatic assignment of bullet class to <ul> lists (IGNORED)
   bool     no_utrg ;      // 'true' if retaining original header "Up" target
   bool     upTarg ;       // 'true' if user has specified an alternate "up" target (upTargPath)
   bool     no_html5 ;     // 'true' if obsolete HTML3/4 tags not to be updated

   //* For debugging only:                                                    *
   //* a) scan_beg/scan_end: display the source lines as they are read        *
   //* b) rsteps: pause (10ths) after each token read from the response file  *
   //* c) skip: process the first 'n' items automatically, then revert        *
   //*    to interactive mode.                                                *
   //* d) scan: set if scan range is active                                   *
   //* e) book: bookend the blocks (report begin and end),                    *
   uint16_t scan_beg, scan_end ;    // scan range
   short    rsteps ;                // inter-token pause (1/10 second)
   short    skip ;                  // auto-token countdown
   bool     scan ;                  // scan range specified
   bool     book ;                  // book-ending active

} ;

