2011-12-19 19:24:09 +00:00

900 lines
24 KiB
C

/***************************************************************
* ptkn.c
* Tokenization Package
*
* Copyright (C) 2008-2009 Gregory Nutt. All rights reserved.
* Author: Gregory Nutt <spudmonkey@racsa.co.cr>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name NuttX nor the names of its contributors may be
* used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************/
/***************************************************************
* Included Functions
***************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "keywords.h"
#include "pasdefs.h"
#include "ptdefs.h"
#include "pedefs.h"
#include "pas.h"
#include "ptkn.h"
#include "ptbl.h"
#include "perr.h"
/***************************************************************
* Private Function Prototypes
***************************************************************/
static void getCharacter (void);
static void skipLine (void);
static bool getLine (void);
static void identifier (void);
static void string (void);
static void unsignedNumber (void);
static void unsignedRealNumber (void);
static void unsignedExponent (void);
static void unsignedHexadecimal (void);
static void unsignedBinary (void);
/***************************************************************
* Private Variables
***************************************************************/
static char *strStack; /* String Stack */
static uint16_t inChar; /* last gotten character */
/***************************************************************
* Public Variables
***************************************************************/
char *tkn_strt; /* Start of token in string stack */
char *stringSP; /* Top of string stack */
/***************************************************************
* Public Functions
***************************************************************/
int16_t primeTokenizer(unsigned long stringStackSize)
{
TRACE(lstFile,"[primeTokenizer]");
/* Allocate and initialize the string stack and stack pointers */
strStack = malloc(stringStackSize);
if (!strStack)
{
fatal(eNOMEMORY);
}
/* Initially, everything points to the bottom of the
* string stack.
*/
tkn_strt = strStack;
stringSP = strStack;
/* Set up for input at the initial level of file parsing */
rePrimeTokenizer();
return 0;
}
/***************************************************************/
int16_t rePrimeTokenizer(void)
{
TRACE(lstFile,"[rePrimeTokenizer]");
/* (Re-)set the char pointer to the beginning of the line */
FP->cp = FP->buffer;
/* Read the next line from the input stream */
if (!fgets(FP->cp, LINE_SIZE, FP->stream))
{
/* EOF.. close file */
return 1;
}
/* Initialize the line nubmer */
FP->line = 1;
/* Get the first character from the new file */
getCharacter();
return 0;
}
/***************************************************************/
/* Tell 'em what what the next character will be (if they should
* choose to get it). This is similar to getCharacter(), except that
* the character pointer is not incremented past the character. The
* next time that getCharacter() is called, it will get the character
* again.
*/
char getNextCharacter(bool skipWhiteSpace)
{
/* Get the next character from the line buffer. */
inChar = *(FP->cp);
/* If it is the EOL then read the next line from the input file */
if (!inChar)
{
/* We have used all of the characters on this line. Read the next
* line of data
*/
if (getLine())
{
/* Uh-oh, we are out of data! Just return some bogus value. */
inChar = '?';
} /* end if */
else
{
/* Otherwise, recurse to try again. */
return getNextCharacter(skipWhiteSpace);
} /* end else */
} /* end if */
/* If it is a space and we have been told to skip spaces then consume
* the input line until a non-space or the EOL is encountered.
*/
else if (skipWhiteSpace)
{
while ((isspace(inChar)) && (inChar))
{
/* Skip over the space */
(FP->cp)++;
/* A get the character after the space */
inChar = *(FP->cp);
} /* end while */
/* If we hit the EOL while searching for the next non-space, then
* recurse to try again on the next line
*/
if (!inChar)
{
return getNextCharacter(skipWhiteSpace);
}
} /* end else if */
return inChar;
} /* end getNextCharacter */
/***************************************************************/
void getToken(void)
{
/* Skip over leading spaces and comments */
while (isspace(inChar)) getCharacter();
/* Point to the beginning of the next token */
tkn_strt = stringSP;
/* Process Identifier, Symbol, or Reserved Word */
if ((isalpha(inChar)) || (inChar == '_'))
identifier();
/* Process Numeric */
else if (isdigit(inChar))
unsignedNumber();
/* Process string */
else if (inChar == SQUOTE)
string(); /* process string type */
/* Process ':' or assignment */
else if (inChar == ':')
{
getCharacter();
if (inChar == '=') {token = tASSIGN; getCharacter();}
else token = ':';
} /* end else if */
/* Process '.' or subrange or real-number */
else if (inChar == '.')
{
/* Get the character after the '.' */
getCharacter();
/* ".." indicates a subrange */
if (inChar == '.')
{
token = tSUBRANGE;
getCharacter();
}
/* '.' digit is a real number */
else if (isdigit(inChar))
unsignedRealNumber();
/* Otherwise, it is just a '.' */
else token = '.';
} /* end else if */
/* Process '<' or '<=' or '<>' or '<<' */
else if (inChar == '<')
{
getCharacter();
if (inChar == '>') {token = tNE; getCharacter();}
else if (inChar == '=') {token = tLE; getCharacter();}
else if (inChar == '<') {token = tSHL; getCharacter();}
else token = tLT;
} /* end else if */
/* Process '>' or '>=' or '><' or '>>' */
else if (inChar == '>')
{
getCharacter();
if (inChar == '<') {token = tNE; getCharacter();}
else if (inChar == '=') {token = tGE; getCharacter();}
else if (inChar == '>') {token = tSHR; getCharacter();}
else token = tGT;
} /* end else if */
/* Get Comment -- form { .. } */
else if (inChar == '{')
{
do getCharacter(); /* get the next character */
while (inChar != '}'); /* loop until end of comment */
getCharacter(); /* skip over end of comment */
getToken(); /* get the next real token */
} /* end else if */
/* Get comment -- form (* .. *) */
else if (inChar == '(')
{
getCharacter(); /* skip over comment character */
if (inChar != '*') /* is this a comment? */
{
token = '('; /* No return '(' leaving the
* unprocessed char in inChar */
}
else
{
uint16_t lastChar = ' '; /* YES... prime the look behind */
for (;;) /* look for end of comment */
{
getCharacter(); /* get the next character */
if ((lastChar == '*') && /* Is it '*)' ? */
(inChar == ')'))
{
break; /* Yes... break out */
}
lastChar = inChar; /* save the last character */
} /* end for */
getCharacter(); /* skip over the comment end char */
getToken(); /* and get the next real token */
} /* end else */
} /* end else if */
/* NONSTANDARD: All C/C++-style comments */
else if (inChar == '/')
{
getCharacter(); /* skip over comment character */
if (inChar == '/') /* C++ style comment? */
{
skipLine(); /* Yes, skip rest of line */
getToken(); /* and get the next real token */
}
else if (inChar != '*') /* is this a C-style comment? */
{
token = '/'; /* No return '/' leaving the
* unprocessed char in inChar */
}
else
{
uint16_t lastChar = ' '; /* YES... prime the look behind */
for (;;) /* look for end of comment */
{
getCharacter(); /* get the next character */
if ((lastChar == '*') && /* Is it '*)' ? */
(inChar == '/'))
{
break; /* Yes... break out */
}
lastChar = inChar; /* save the last character */
} /* end for */
getCharacter(); /* skip over the comment end char */
getToken(); /* and get the next real token */
} /* end else */
} /* end else if */
/* Check for $XXXX (hex) */
else if (inChar == '%')
unsignedHexadecimal();
/* Check for $BBBB (binary) */
else if (inChar == '%')
unsignedBinary();
/* if inChar is an ASCII character then return token = character */
else if (isascii(inChar))
{
token = inChar;
getCharacter();
} /* end else if */
/* Otherwise, discard the character and try again */
else
{
getCharacter();
getToken();
} /* end else */
DEBUG(lstFile,"[%02x]", token);
} /* End getToken */
/***************************************************************
* Private Functions
***************************************************************/
static void identifier(void)
{
const RTYPE *rptr; /* Pointer to reserved word */
tknSubType = txNONE; /* Initialize */
/* Concatenate identifier */
do
{
*stringSP++ = toupper(inChar); /* concatenate char */
getCharacter(); /* get next character */
}
while ((isalnum(inChar)) || (inChar == '_'));
*stringSP++ = '\0'; /* make ASCIIZ string */
/* Check if the identifier is a reserved word */
rptr = findReservedWord(tkn_strt);
if (rptr)
{
token = rptr->rtype; /* get type from rsw table */
tknSubType = rptr->subtype; /* get subtype from rsw table */
stringSP = tkn_strt; /* pop token from stack */
} /* End if */
/* Check if the identifier is a symbol */
else
{
tknPtr = findSymbol(tkn_strt);
if (tknPtr)
{
token = tknPtr->sKind; /* get type from symbol table */
stringSP = tkn_strt; /* pop token from stack */
/* The following assignments only apply to constants. However it
* is simpler just to make the assignments than it is to determine
* if is appropriate to do so
*/
if (token == tREAL_CONST)
tknReal = tknPtr->sParm.c.val.f;
else
tknInt = tknPtr->sParm.c.val.i;
} /* End if */
/* Otherwise, the token is an identifier */
else
token = tIDENT;
} /* end else */
} /* End identifier */
/***************************************************************/
/* Process string */
static void string(void)
{
register int16_t count = 0; /* # chars in string */
token = tSTRING_CONST; /* indicate string constant type */
getCharacter(); /* skip over 1st single quote */
while (inChar != SQUOTE) /* loop until next single quote */
{
if (inChar == '\n') /* check for EOL in string */
{
error(eNOSQUOTE); /* ERROR, terminate string */
break;
} /* end if */
else
{
*stringSP++ = inChar; /* concatenate character */
count++; /* bump count of chars */
} /* end else */
getCharacter(); /* get the next character */
} /* end while */
*stringSP++ = '\0'; /* terminate ASCIIZ string */
getCharacter(); /* skip over last single quote */
if (count == 1) /* Check for char constant */
{
token = tCHAR_CONST; /* indicate char constant type */
tknInt = *tkn_strt; /* (integer) value = single char */
stringSP = tkn_strt; /* "pop" from string stack */
} /* end if */
} /* end string */
/***************************************************************/
static void getCharacter(void)
{
/* Get the next character from the line buffer. If EOL, get next line */
inChar = *(FP->cp)++;
if (!inChar)
{
/* We have used all of the characters on this line. Read the next
* line of data
*/
skipLine();
}
}
/***************************************************************/
static void skipLine(void)
{
if (getLine())
{
/* Uh-oh, we are out of data! Just return some bogus value. */
inChar = '?';
} /* end if */
else
{
/* Otherwise, get the first character from the line */
getCharacter();
}
}
/***************************************************************/
static bool getLine(void)
{
bool endOfFile = false;
/* Reset the character pointer to the start of the new line */
FP->cp = FP->buffer;
/* Read the next line from the currently active file */
if (!fgets(FP->cp, LINE_SIZE, FP->stream))
{
/* We are at an EOF for this file. Check if we are processing an
* included file
*/
if (includeIndex > 0)
{
/* Yes. Close the file */
closeNestedFile();
/* Indicate that there is no data on the input line. NOTE:
* that FP now refers to the previous file at the next lower
* level of nesting.
*/
FP->buffer[0] = '\0';
} /* end if */
else
{
/* No. We are completely out of data. Return true in this case. */
endOfFile = true;
} /* end else */
} /* end if */
else
{
/* We have a new line of data. Increment the line number, then echo
* the new line to the list file.
*/
(FP->line)++;
fprintf(lstFile, "%d:%04ld %s", FP->include, FP->line, FP->buffer);
} /* end else */
return endOfFile;
} /* end getLine */
/***************************************************************/
static void unsignedNumber(void)
{
/* This logic (along with with unsignedRealNumber, and
* unsignedRealExponent) handles:
*
* FORM: integer-number = decimal-integer | hexadecimal-integer |
* binary-integer
* FORM: decimal-integer = digit-sequence
* FORM: real-number =
* digit-sequence '.' [digit-sequence] [ exponent scale-factor ] |
* '.' digit-sequence [ exponent scale-factor ] |
* digit-sequence exponent scale-factor
* FORM: exponent = 'e' | 'E'
*
* When called, inChar is equal to the leading digit of a
* digit-sequence. NOTE that the real-number form beginning with
* '.' does not use this logic.
*/
/* Assume an integer type (might be real) */
token = tINT_CONST;
/* Concatenate all digits until an non-digit is found */
do
{
*stringSP++ = inChar;
getCharacter();
}
while (isdigit(inChar));
/* If it is a digit-sequence followed by 'e' (or 'E'), then
* continue processing this token as a real number.
*/
if ((inChar == 'e') || (inChar == 'E'))
{
unsignedExponent();
}
/* If the digit-sequence is followed by '.' but not by ".." (i.e.,
* this is not a subrange), then switch we are parsing a real time.
* Otherwise, convert the integer string to binary.
*/
else if ((inChar != '.') || (getNextCharacter(false) == '.'))
{
/* Terminate the integer string and convert it using sscanf */
*stringSP++ = '\0';
(void)sscanf(tkn_strt, "%ld", &tknInt);
/* Remove the integer string from the character identifer stack */
stringSP = tkn_strt;
} /* end if */
else
{
/* Its a real value! Now really get the next character and
* after the decimal point (this will work whether or not
* getNextCharacter() was called). Then process the real number.
*/
getCharacter();
unsignedRealNumber();
} /* end if */
}
/***************************************************************/
static void unsignedRealNumber(void)
{
/* This logic (along with with unsignedNumber and unsignedExponent)
* handles:
*
* FORM: real-number =
* digit-sequence '.' [digit-sequence] [ exponent scale-factor ] |
* '.' digit-sequence [ exponent scale-factor ] |
* digit-sequence exponent scale-factor
* FORM: exponent = 'e' | 'E'
*
* When called:
* - inChar is the character AFTER the '.'.
* - Any leading digit-sequence is already in the character stack
* - the '.' is not in the character stack.
*/
/* Its a real constant */
token = tREAL_CONST;
/* Save the decimal point (inChar points to the character after
* the decimal point).
*/
*stringSP++ = '.';
/* Now, loop to process the optional digit-sequence after the
* decimal point.
*/
while (isdigit(inChar))
{
*stringSP++ = inChar;
getCharacter();
}
/* If it is a digit-sequence followed by 'e' (or 'E'), then
* continue processing this token as a real number.
*/
if ((inChar == 'e') || (inChar == 'E'))
{
unsignedExponent();
}
else
{
/* There is no exponent...
* Terminate the real number string and convert it to binay
* using sscanf.
*/
*stringSP++ = '\0';
(void) sscanf(tkn_strt, "%lf", &tknReal);
} /* end if */
/* Remove the number string from the character identifer stack */
stringSP = tkn_strt;
}
/***************************************************************/
static void unsignedExponent(void)
{
/* This logic (along with with unsignedNumber and unsignedRealNumber)
* handles:
*
* FORM: real-number =
* digit-sequence '.' [digit-sequence] [ exponent scale-factor ] |
* '.' digit-sequence [ exponent scale-factor ] |
* digit-sequence exponent scale-factor
* FORM: exponent = 'e'
* FORM: scale-factor = [ sign ] digit-sequence
*
* When called:
* - inChar holds the 'E' (or 'e') exponent
* - Any leading digit-sequences or decimal points are already in the
* character stack
* - the 'E' (or 'e') is not in the character stack.
*/
/* Its a real constant */
token = tREAL_CONST;
/* Save the decimal point (inChar points to the character after
* the decimal point).
*/
*stringSP++ = inChar;
getCharacter();
/* Check for an optional sign before the exponent value */
if ((inChar == '-') || (inChar == '+'))
{
/* Add the sign to the stack */
*stringSP++ = inChar;
getCharacter();
}
else
{
/* Add a '+' sign to the stack */
*stringSP++ = '+';
}
/* A digit sequence must appear after the exponent and optional
* sign.
*/
if (!isdigit(inChar))
{
error(eEXPONENT);
tknReal = 0.0;
}
else
{
/* Now, loop to process the required digit-sequence */
do
{
*stringSP++ = inChar;
getCharacter();
}
while (isdigit(inChar));
/* Terminate the real number string and convert it to binay
* using sscanf.
*/
*stringSP++ = '\0';
(void) sscanf(tkn_strt, "%lf", &tknReal);
}
/* Remove the number string from the character identifer stack */
stringSP = tkn_strt;
}
/***************************************************************/
static void unsignedHexadecimal(void)
{
/* FORM: integer-number = decimal-integer | hexadecimal-integer |
* binary-integer
* FORM: hexadecimal-integer = '$' hex-digit-sequence
* FORM: hex-digit-sequence = hex-digit { hex-digit }
* FORM: hex-digit = digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
*
* On entry, inChar is '$'
*/
/* This is another representation for an integer */
token = tINT_CONST;
/* Loop to process each hex 'digit' */
for (;;)
{
/* Get the next character */
getCharacter();
/* Is it a decimal digit? */
if (isdigit(inChar))
*stringSP++ = inChar;
/* Is it a hex 'digit'? */
else if ((inChar >= 'A') && (inChar <= 'F'))
*stringSP++ = inChar;
else if ((inChar >= 'a') && (inChar <= 'f'))
*stringSP++ = _toupper(inChar);
/* Otherwise, that must be the end of the hex value */
else break;
}
/* Terminate the hex string and convert to binary using sscanf */
*stringSP++ = '\0';
(void)sscanf(tkn_strt, "%lx", &tknInt);
/* Remove the hex string from the character identifer stack */
stringSP = tkn_strt;
}
/***************************************************************/
static void unsignedBinary(void)
{
uint32_t value;
/* FORM: integer-number = decimal-integer | hexadecimal-integer |
* binary-integer
* FORM: binary-integer = '%' binary-digit-sequence
* FORM: binary-digit-sequence = binary-digit { binary-digit }
* FORM: binary-digit = '0' | '1'
*
* On entry, inChar is '%'
*/
/* This is another representation for an integer */
token = tINT_CONST;
/* Loop to process each hex 'digit' */
value = 0;
for (;;)
{
/* Get the next character */
getCharacter();
/* Is it a binary 'digit'? */
if (inChar == '0')
value <<= 1;
else if (inChar == '1')
{
value <<= 1;
value |= 1;
}
/* Otherwise, that must be the end of the binary value */
else break;
}
/* I don't there there is an sscanf conversion for binary, that's
* why we did it above.
*/
tknInt = (int32_t)value;
}
/***************************************************************/