/*---------------------------------------------------------------------------------
 * General-purpose scanner.
 *
 * Filename:    skanx.c 
 * Author:      Randall Gellens.
 * Version:     1.2
 * Last edited: 2 November 1998
 *---------------------------------------------------------------------------------
 */

#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "skanx.h"



/*--------------------------------------------------------------------------------
 *                v a l u e    a r r a y s
 *--------------------------------------------------------------------------------
*/

/*                               0     0     1     1     2     2     3     3 */
/*                               0123 4567 8901 234567 89012 345678 90123 45 */
const char szTokenTypeNames[] = "....\0End\0.ID\0..Num\0.Str\0.Spec\0....\0";
/*                               ^     ^     ^     ^     ^     ^     ^     ^ */




/*******************************************************************************/

/*
 * Initialize a strParseLine structure.
 *
 * Parameters:
 *    psPL:      pointer to strParseLine structure.
 *    cComm:     comment character, or 0 to disable this feature.
 *    cEsc:      escape character for strings, or 0 to disable this feature.
 *    flags:     bit mask containing parse flags.
 *
 */
void init_strParseLine ( strParseLine *psPL, 
                        char cComm, 
                        char cEsc, 
                        int flags )
    {
    short iX;


    psPL->pTokenS    = psPL->pTokenE   = NULL;
    psPL->iTokenLen  = psPL->lTokenVal = 0;
    psPL->TokenType  = tkEnd;
    psPL->cComment   = cComm;
    psPL->cEscape    = cEsc;
    psPL->flags      = flags;
    psPL->iHistX     = MAX_PARSER_BACKUP;

    /* wipe the history table */
    for ( iX = 0; iX < MAX_PARSER_BACKUP; iX++ )
        {
        psPL->history[iX] = NULL;
        }
    } /* end init_strParseLine */


/******************************************************************************/

/*
 * Setup strParseLine structure for newly read-in line.
 *
 * Parameters:
 *    psPL:     pointer to strParseLine structure.
 *    pszLine:  pointer to char array which contains current line.
 *
 * Returns:
 *    -1 if pszLine is invalid, 0 otherwise.
 *
 */

int setup_new_line ( strParseLine *psPL, char *pszLine )
    {
    short iX;


    if ( !pszLine )
        return ( -1 );

    psPL->iTokenLen  = psPL->lTokenVal = 0;
    psPL->TokenType  = tkEnd;
    psPL->iHistX     = MAX_PARSER_BACKUP;
        
    /* skip start pointer past any leading white space on new line */
    psPL->pTokenE = psPL->pTokenS = skip_ws(pszLine);

    /* wipe the history table */
    for ( iX = 0; iX < MAX_PARSER_BACKUP; iX++ )
        {
        psPL->history[iX] = NULL;
        }

    return ( 0 );
    } /* end setup_new_line */


/******************************************************************************/

/*
 * Scan for next token.
 *
 * Parameters:
 *    psPL:   pointer to strParseLine structure.
 *    flags:  bit flags to modify scanning.
 *
 * Returns:
 *     type of token found (which may be tkEnd if end-of-line reached).
 *
 */
token_type get_next_token ( strParseLine *psPL, unsigned short flags )
    {
    int     fNonDig = 0;


    /* skip start pointer past white space following current token */

    psPL->pTokenS = skip_ws(psPL->pTokenE);

    /* if at end-of-line (or comment) return */

    if ( *(psPL->pTokenS) == psPL->cComment || *(psPL->pTokenS) == 0 )
        {
        psPL->TokenType = tkEnd;
        psPL->iTokenLen = psPL->lTokenVal = 0;
        return ( tkEnd );
        }

    /* put end pointer past end of token and set token type and value */

   if ( CHAR_IS_LETT(*(psPL->pTokenS)) )
        { /* starts with a letter */
        psPL->pTokenE   = check_id(psPL->pTokenS, &fNonDig, flags);
        psPL->TokenType = tkID;
        psPL->lTokenVal = *(psPL->pTokenS);
        }
    
    else if ( CHAR_IS_DIGT(*psPL->pTokenS) && (flags & gtkfNoNum) )
        { /* starts with a digit, but caller wants to treat digits as IDs */
        psPL->pTokenE   = check_id(psPL->pTokenS, &fNonDig, flags);
        psPL->TokenType = tkID;
        psPL->lTokenVal = *(psPL->pTokenS);
        }
    
    else if ( CHAR_IS_DIGT(*(psPL->pTokenS)) )
        { /* starts with a digit (and caller is treating digits as numbers) */
        psPL->pTokenE   = check_digits(psPL->pTokenS, &fNonDig);
        if ( fNonDig )
            { /* starts with a digit, but has letters in it  */
            if ( flags & gtkfLeadDigOK )
                {
                /* leading digits are OK in identifiers */
                psPL->TokenType = tkID;
                psPL->lTokenVal = *(psPL->pTokenS);
                }
            else
                {
                /* the non-digit stops the token */
                psPL->pTokenE   = skip_digits(psPL->pTokenS);
                psPL->TokenType = tkNum;
                psPL->lTokenVal = atoi(psPL->pTokenS);
                }
            }
        else
            { /* all digits */
            psPL->TokenType = tkNum;
            psPL->lTokenVal = atoi(psPL->pTokenS);
            }
        }
    
    else if ( CHAR_IS_SIGN(*(psPL->pTokenS)) &&
              (flags & gtkfDashOK)           &&
              (flags & gtkfNoNum)              )
        { /* starts with a dash, but digits and are IDs and dashes are OK */
        psPL->pTokenE   = check_id(psPL->pTokenS, &fNonDig, flags);
        psPL->TokenType = tkID;
        psPL->lTokenVal = *(psPL->pTokenS);
        }

    else if ( CHAR_IS_SIGN(*(psPL->pTokenS  )) &&
              CHAR_IS_DIGT(*(psPL->pTokenS+1)) )
        {
        psPL->pTokenE    = check_digits(psPL->pTokenS+1, &fNonDig);
        if ( fNonDig )
            {
            psPL->TokenType  = tkSpec;
            psPL->pTokenE    = psPL->pTokenS + 1;
            psPL->lTokenVal  = *(psPL->pTokenS);
            }
        else
            {
            psPL->TokenType  = tkNum;
            psPL->lTokenVal  = atoi(psPL->pTokenS);
            }
        }
    else if ( *(psPL->pTokenS) == '\"' && (flags & gtkfNoQuotes) == 0 )
        {
        psPL->pTokenE   = skip_quote(psPL->pTokenS, psPL->cEscape);
        psPL->TokenType = tkStr;
        psPL->lTokenVal = *(psPL->pTokenS+1);
        }
    else 
        {
        psPL->pTokenE   = psPL->pTokenS + 1;
        psPL->TokenType = tkSpec;
        psPL->lTokenVal = *(psPL->pTokenS);
        }

    /* if white space only scan, reset end pointer */
    /* We reset the length here; otherwise we'd have to duplicate the
       code above that sets the token type and value, or sprinkle the
       code with 'if ( flags & gtkfBrkWS )'. */

    if ( flags & gtkfBrkWS )
        { /* break on white space only */
        psPL->pTokenE   = skip_token_ws(psPL->pTokenS);
        }

    /* set history */
    psPL->iHistX = ( psPL->iHistX + 1 ) % MAX_PARSER_BACKUP;
    psPL->history[psPL->iHistX] = psPL->pTokenS;

    /* set token length and return token type */
    psPL->iTokenLen = psPL->pTokenE - psPL->pTokenS;
    return ( psPL->TokenType );
    } /* end get_next_token */


/******************************************************************************/

/*
 * Back-up the scanner, so the next call to get_next_token will return a
 * previously-scanned token.
 *
 * Parameters:
 *    psPL:    pointer to strParseLine structure.
 *    iDepth:  How far back to go.  0 means rescan current token, 1 means
 *                prior token, etc.
 *
 * Returns:
 *     -1 if iDepth exceeds MAX_PARSER_BACKUP.
 *
 * Notes:
 *    Multiple calls to backup_token without calling get_next_token at least
 *    iDepth times in between, may yield undesired results.
 *
 */
int backup_token ( strParseLine *psPL, short iDepth )
    {
    short iX = psPL->iHistX - iDepth;


    if ( iDepth == 0 )
        {
        psPL->pTokenE = psPL->pTokenS;
        }
    else if ( iDepth < MAX_PARSER_BACKUP )
        {
        if ( iX < 0 )
            iX = MAX_PARSER_BACKUP - iX;

        psPL->pTokenS = psPL->pTokenE = psPL->history[iX];
        }
    else
        return ( -1 );

    psPL->TokenType = tkEnd;
    psPL->iTokenLen = psPL->lTokenVal = 0;
    psPL->iHistX    = iX;
    return ( 0 );   
    } /* end backup_token */


/*******************************************************************************/

/*
 * Test current token.
 *
 * Parameters:
 *    psz:  pointer to target string.
 *    psPL: pointer to strParseLine structure.
 *
 * Returns:
 *     TRUE if current token is psz.
 *     FALSE otherwise.
 *
 */
 int check_token ( const char *psz, const strParseLine *psPL )
    {
    return ( (int)(strlen(psz)) == psPL->iTokenLen &&
             mem_cmp(psz, psPL->pTokenS, psPL->iTokenLen) == 0 );
    } /* end check_token */


/*******************************************************************************/

/*
 * Tests if current token starts with a given string.
 *
 * Parameters:
 *    psz:  pointer to target string.
 *    psPL: pointer to strParseLine structure.
 *
 * Returns:
 *     TRUE if current token starts with psz.
 *     FALSE otherwise.
 *
 */
 int check_token_init ( const char *psz, const strParseLine *psPL )
    {
    return ( psPL->iTokenLen >= (int)(strlen(psz)) &&
             mem_cmp(psz, psPL->pTokenS, strlen(psz)) == 0 );
    } /* end check_token_init */


/*******************************************************************************/

/*
 * Check if a character is white space.
 *
 * Parameters:
 *    c:  character.
 *
 * Returns:
 *     1 if character is white space, 0 otherwise.
 *
 */
 int char_is_ws ( char c )
    {
    return ( c == ' '   ||       /* space */
             c == '\n'  ||       /* line feed */
             c == '\t'  ||       /* horizontal tab */
             c == '\r'     );    /* carriage return */
    } /* end char_is_ws */

/*******************************************************************************/

/*
 * Skip a pointer past white space.
 *
 * Parameters:
 *    p:  pointer.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_ws ( char *p )
    {
    while ( char_is_ws(*p) )
        p++;

    return ( p );
    } /* end skip_ws */


/*******************************************************************************/

/*
 * Skip a pointer past end of white space delimited token.
 *
 * Parameters:
 *    p:  pointer.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_token_ws ( char *p )
    {
    do
        {
        p++;
        }
    while ( *p != 0 && !char_is_ws(*p) );
        
    return ( p );
    } /* end skip_token_ws */

/*******************************************************************************/

/*
 * Skip a pointer past end of letter token.
 *
 * Parameters:
 *    p:  pointer.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_letters ( char *p )
    {
    while ( CHAR_IS_LETT(*p) )
        p++;
    return ( p );
    } /* end skip_letter */



/*******************************************************************************/

/*
 * Skip a pointer past end of integer token.
 *
 * Parameters:
 *    p:  pointer.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_digits ( char *p )
    {
    while ( CHAR_IS_DIGT(*p) )
        p++;
    return ( p );
    } /* end skip_digits */


/*******************************************************************************/

/*
 * Skip a pointer past end of alphanumeric token.
 *
 * Parameters:
 *    p:  pointer.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_alpha ( char *p )
    {
    while ( CHAR_IS_ALPH(*p) )
        p++;
    return ( p );
    } /* end skip_alpha */

/*******************************************************************************/

/*
 * Skip a pointer past end of alphanumeric token and check for non-digit chars.
 *
 * Parameters:
 *    p:       pointer.
 *    fNonDig: pointer to int that will be set to TRUE if a non-digit is found.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */

 char *check_digits ( char *p, int *fNonDig )
    {
    while ( CHAR_IS_ALPH(*p) )
        {
        if ( CHAR_IS_LETT(*p) )
            *fNonDig = 1;
        p++;
        }
    return ( p );
    } /* end check_digits */



/*******************************************************************************/

/*
 * Skip a pointer past end of quoted string.  Escaped quote characters are
 * not treated as quote characters for string delimination.
 *
 * Parameters:
 *    p:    pointer (points at first quote mark on entry).
 *    es:   the escape character (e.g., '\', or 0).
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
 char *skip_quote ( char *p, char esc )
    {
    do
		{
        p++;
		if ( esc != 0 && *p == esc && *(p+1) == '\"' )
				p += 2;
		}
    while
		( *p != '\"' && *p != 0 );

    if ( *p == '\"' )
        p++; /* skip past closing quote */

    return ( p );
    } /* end skip_quote */




/*******************************************************************************/

/*
 * Retrieves an integer (optionally followed by a quantifier) from the token stream.
 *
 * Parameters:
 *    psPL: pointer to strParseLine structure.
 *
 * Returns:
 *    positive integer value, or 0xFFFF to indicate an error.
 *
 * Notes:
 *    On entry, current token is start of integer.
 *    On exit, current token is whatever follows integer.
 *
 */
long get_integer ( strParseLine *psPL )
    {
    long theValue = 0xFFFF;
    long theQuant = 1;


    if ( psPL->TokenType == tkNum )
        {
        theValue = psPL->lTokenVal;

        switch ( *(psPL->pTokenE) )
            {
            case 'k':
            case 'K':
                theQuant = 1024;
                break;
            case 'm':
            case 'M':
                theQuant =  1048576;
                break;
            case 'g':
            case 'G':
                theQuant =  1073741824;
                break;
            }

        if ( theQuant != 1 )
            {
            psPL->pTokenE++;
            psPL->iTokenLen++;
            }
        }

    return ( theValue * theQuant );
    } /* end get_integer */


/*******************************************************************************/

/*
 * Skips a pointer past end of token and checks for non-digit chars.
 *
 * Parameters:
 *    p:       pointer.
 *    fNonDig: pointer to int that will be set to TRUE if a non-digit is found.
 *    flags:   bit flags to tell us if underscores and/or dashes are OK.
 *
 * Returns:
 *     pointer, advanced or unchanged.
 *
 */
char *check_id ( char *p, int *fNonDig, unsigned short flags )
    {
    while ( (  CHAR_IS_ALPH(*p)                    ) || 
            ( (flags & gtkfDashOK)  && (*p == '-') ) ||
            ( (flags & gtkfUnderOK) && (*p == '_') )   )
        {
        if ( !(CHAR_IS_DIGT(*p)) )
            *fNonDig = 1;
        p++;
        }
    return ( p );
    } /* end check_id */

/*******************************************************************************/

/*
 * Copy a string from the token stream to a newly-created buffer.
 *
 * Parameters:
 *    psPL:      pointer to strParseLine structure.
 *    fEmptyOK:  TRUE if an empty string is permitted.
 *
 * Returns:
 *    pointer to newly-created null-terminated buffer, or NULL.
 *
 * Notes:
 *    On entry, current token is string or prior to string.
 *    On exit, current token is the string.
 *
 *    Escape characters are converted (if psPL->cEscape is set).
 *
 *    Caller must free the buffer.
 *
 */
char *get_string ( strParseLine *psPL, int fEmptyOK )
    {
    char *buf, *p, *q;
    int   i;
	#define CR 13
	#define LF 10


    if ( psPL->TokenType != tkStr )
        {
        if ( get_next_token(psPL, gtNormal) != tkStr )
            return ( NULL );
        }


    if ( psPL->iTokenLen < 3 && !fEmptyOK )
        return ( NULL );

	/* allocate a buffer big enough to hold the string, not including
	   the opening or closing quote characters, but allow for the
	   terminating null */

    buf = malloc ( psPL->iTokenLen -1 );
    if ( !buf )
        return ( NULL );

	/* copy characters from the token pointer into the string, skipping
	   the opening and closing quote characters, and counting any
	   escaped characters as the character itself (that is, skip the
	   escape character) or as the target value of the escape sequence */

    p = psPL->pTokenS + 1;
    q = buf;
    i = psPL->iTokenLen - 2;
    while ( i > 0 )
        {
        if ( psPL->cEscape != 0 && *p == psPL->cEscape )
			{
			i--;
			p++;

			switch ( *p )
				{
				case 'a':  *q++ = '\a'; break;            /* Bell (alert)    */
                case 'b':  *q++ = '\b'; break;            /* Backspace       */
				case 'f':  *q++ = '\f'; break;            /* Formfeed        */
				case 'n':  *q++ = CR;                     /* New line        */
					       *q++ = LF;   break; 
				case 'r':  *q++ = '\r'; break;            /* Carriage return */
				case 't':  *q++ = '\t'; break;            /* Horizontal tab  */
				case 'v':  *q++ = '\v'; break;            /* Vertical tab    */
				default:   *q++ = *p;   break;            /* (itself)        */
				}
			}
		else
			*q++ = *p;

        i--;
        p++;
        }

    *q = 0;

    return ( buf );
    } /* end get_string */


/*******************************************************************************/

/*
 * Translates escape sequences into characters, inline.
 *
 * Parameters:
 *    pStr:     pointer to string to translate.
 *    iLen:     length of string, not counting any null termination.
 *    cEsc:     escape character (e.g., '\')
 *
 * Returns:
 *    pStr is returned.
 *
 */
char *translate_esc ( char *pStr, long iLen, char cEsc )
	{
     char *pEsc = pStr;
     int num;
     int escLen;
	 int valLen;

      while ( (pEsc = strchr(pEsc, '\\')) != NULL )
		{
        escLen = 2; /* characters in the escape sequence, including the '\' */
		valLen = 1; /* bytes of the value */

        switch ( tolower(pEsc[1]) )
			{
			case '\\':					/* (itself) */
			default:
				  break;

			case 'r':					/* Carriage return */
				  *pEsc++ = '\r';
				  break;

			case 'n':					/* newline, i.e., CRLF */
				  *pEsc++ = CR;
				  *pEsc++ = LF;
				  valLen  =  2;
				  break;

			case 't':					/* Horizontal tab */
				  *pEsc++ = '\t';
				  break;

			case 'v':					/* Vertical tab */
				  *pEsc++ = '\v';
				  break;

			case 'a':					/* Bell (alert) */
				  *pEsc++ = '\a';
				  break;

			case '0':					/* octal value */
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				  escLen  = 1 + sscanf ( pEsc, "%o", &num );
				  *pEsc++ = (char) num;
				  break;

			case 'x':					/* hex value */
				  escLen  = 1 + sscanf ( pEsc, "%x", &num );
				  *pEsc++ = (char) num;
				  break;
			}

		if ( escLen != valLen )
			{ /* shift string down by escLen - valLen */
			int iExcess = escLen - valLen;
			int iStrLen = (iLen - (pEsc - pStr) - iExcess) +1; /* include null */
			memmove ( pEsc, (pEsc + iExcess), iStrLen );
			}
		}
      return pStr;
	}