1 /* $XConsortium: token.c,v 1.3 94/02/04 17:07:17 gildea Exp $ */
2 /* Copyright International Business Machines,Corp. 1991
5 * License to use, copy, modify, and distribute this software
6 * and its documentation for any purpose and without fee is
7 * hereby granted, provided that the above copyright notice
8 * appear in all copies and that both that copyright notice and
9 * this permission notice appear in supporting documentation,
10 * and that the name of IBM not be used in advertising or
11 * publicity pertaining to distribution of the software without
12 * specific, written prior permission.
14 * IBM PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES
15 * OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT
16 * LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT OF
18 * THIRD PARTY RIGHTS. THE ENTIRE RISK AS TO THE QUALITY AND
19 * PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
20 * OR MAINTAIN, BELONGS TO THE LICENSEE. SHOULD ANY PORTION OF
21 * THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM) ASSUMES
22 * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION. IN
23 * NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR
24 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
25 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
26 * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
30 /* Authors: Sig Nin & Carol Thompson IBM Almaden Research Laboratory */
39 * -------------------------------------------------------------------
41 * -------------------------------------------------------------------
44 /* These variables are set by the caller */
45 char *tokenStartP; /* Pointer to token buffer in VM */
46 char *tokenMaxP; /* Pointer to last byte in buffer + 1 */
48 /* These variables are set by TOKEN */
49 int tokenLength; /* Characters in token */
50 boolean tokenTooLong; /* Token too long for buffer */
51 int tokenType; /* Type of token identified */
52 psvalue tokenValue; /* Token value */
55 * -------------------------------------------------------------------
57 * -------------------------------------------------------------------
60 static FILE *inputFileP; /* Current input file */
64 static char *tokenCharP; /* Pointer to next character in token */
67 * -------------------------------------------------------------------
68 * Private routines for manipulating numbers
69 * -------------------------------------------------------------------
75 : (-64 <= (e) && (e) <= 63\
81 static double Exp10T[128] = {
82 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
83 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49,
84 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
85 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33,
86 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25,
87 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
88 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,
89 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
90 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
91 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
92 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
93 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
94 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
95 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47,
96 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
97 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63
100 static double P10(exponent)
107 value = (exponent & 1 ? power : 1.0);
108 exponent = -(++exponent >> 1); /* portable C for -(exponent/2) */
112 value = (exponent & 1 ? power : 1.0);
113 exponent = exponent >> 1;
116 while(exponent > 0) {
127 * -------------------------------------------------------------------
128 * Private routines and macros for manipulating the input
129 * -------------------------------------------------------------------
132 /* Get next character from the input --
135 #define next_ch() (getc(inputFileP))
137 /* Push a character back into the input --
139 * Ungetc of EOF will fail, but that's ok: the next getc will
142 * NOTE: These macros are presently written to return the character
143 * pushed, or EOF if none was pushed. However, they are not
144 * required to return anything in particular, and callers should
145 * not rely on the returned value.
147 #define back_ch(ch) (ungetc(ch, inputFileP))
149 /* Push a character back into the input if it was not white space.
150 * If it is a carriage return (\r) then check next char for
151 * linefeed and consume them both, otherwise put next char back.
154 #define back_ch_not_white(ch) \
158 ? (((ch = next_ch()) == '\n')\
168 * -------------------------------------------------------------------
169 * Private routines and macros for manipulating the token buffer
170 * -------------------------------------------------------------------
173 /* Add a character to the token
174 * ---- use ONLY when you KNOW that this character will
175 * be stored within the token buffer.
177 #define save_unsafe_ch(ch) (*tokenCharP++ = ch)
179 /* Add a character to the token, if not too long to fit */
180 #define save_ch(ch) \
181 ((tokenCharP < tokenMaxP)\
182 ? save_unsafe_ch(ch)\
183 : (tokenTooLong = TRUE)\
186 #define save_ch_no_inc(ch) \
187 ((tokenCharP < tokenMaxP) && (*tokenCharP = ch))
190 * -------------------------------------------------------------------
194 * -- take int ch as a parameter
195 * -- return int ch if no token was recognized, DONE otherwise
196 * -- leave the next character in the input, if returning DONE
197 * -------------------------------------------------------------------
202 /* Get the next input character */
203 static int next_char(ch)
209 /* Add character to token */
210 static int add_char(ch)
218 /* -------------------------------------------------------------------
219 * Skip white space and comments
222 /* Skip white space */
223 static int skip_space(ch)
228 } while(isWHITE_SPACE(ch));
233 static int skip_comment(ch)
238 } while(isCOMMENT(ch));
242 /* -------------------------------------------------------------------
243 * Collect value elements for a number
246 /* decimal integer or real number mantissa */
251 /* real number exponent */
261 static int add_sign(ch)
269 static int add_1st_digits(ch)
273 return(add_digits(ch));
276 static int add_digits(ch)
279 long value, p_value, scale;
282 /* On entry, expect m_sign to be set to '+' or '-';
283 * ch is a decimal digit.
284 * Expect at most one character saved at this point,
285 * a sign. This routine will save up to 10 more
286 * characters without checking the buffer boundary.
293 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
294 value = (value << 3) + (value << 1) + (ch - '0');
299 /* Quick exit for small integers --
300 * |x| <= 10*((MAX_INTEGER/10)-1)+9
301 * |x| <= 2,147,483,639 for 32 bit integers
303 if (isNUMBER_ENDER(ch)) {
304 back_ch_not_white(ch);
305 tokenValue.integer = (m_sign == '-' ? -value : value);
306 tokenType = TOKEN_INTEGER;
310 /* Handle additional digits. Beyond the boundary case,
311 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
312 * just count the digits: the number is too large to
313 * represent as an integer and will be returned as a real.
314 * The mantissa of a real holds fewer bits than an integer.
317 value = (m_sign == '-' ? -value : value);
320 if (isDECIMAL_DIGIT(ch)) {
322 /* Handle the boundary case */
323 if (p_value == (MAX_INTEGER/10)) {
326 /* Must handle positive and negative values separately */
327 /* for 2's complement arithmetic */
329 if (digit <= MAX_INTEGER%10)
330 value = (value << 3) + (value << 1) + digit;
332 ++scale; /* Too big, just count it */
335 /* Use positive % operands for portability */
336 if (digit <= -(MIN_INTEGER+10)%10)
337 value = (value << 3) + (value << 1) - digit;
339 ++scale; /* Too big, just count it */
343 ++scale; /* Not boundary case, just count digit */
348 /* Continue scanning digits, but can't store them */
349 while(isDECIMAL_DIGIT(ch)) {
356 /* Continue from here scanning radix integer or real */
360 /* Initialize for possible real */
368 static int add_1st_decpt(ch)
372 return(add_decpt(ch));
375 static int add_decpt(ch)
378 /* On entry, expect m_sign to be set to '+' or '-' */
385 static int add_fraction(ch)
391 /* On entry, expect m_value and m_scale to be initialized,
392 * and m_sign to be set to '+' or '-'. Expect m_value and m_sign
393 * to be consistent (this is not checked).
398 /* Scan leading zeroes */
406 /* Scan first significant digit */
407 if (isDECIMAL_DIGIT(ch)) {
410 value = (m_sign == '-' ? -value : value);
415 /* no significant digits -- number is zero */
418 /* value != 0 || value == 0 && !isDECIMAL_DIGIT(ch) */
420 /* Scan additional significant digits */
421 if (isDECIMAL_DIGIT(ch)) {
423 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
425 value = (value << 3) + (value << 1) + (ch - '0');
429 /* Check boundary case */
430 if (isDECIMAL_DIGIT(ch) && value == (MAX_INTEGER/10)) {
432 if (digit <= MAX_INTEGER%10) {
434 value = (value << 3) + (value << 1) + digit;
442 while(isDECIMAL_DIGIT(ch) && value > -(-(MIN_INTEGER+10)/10+1)) {
443 /* Use positive / operands for portability */
445 value = (value << 3) + (value << 1) - (ch - '0');
449 /* Check boundary case */
450 if (isDECIMAL_DIGIT(ch)
451 && value == -(-(MIN_INTEGER+10)/10+1)) {
453 if (digit <= -(MIN_INTEGER+10)%10) {
454 /* Use positive % operands for portability */
456 value = (value << 3) + (value << 1) - digit;
463 /* Additional digits can be discarded */
464 while(isDECIMAL_DIGIT(ch)) {
474 /* Initialize for possible real */
482 static int add_e_sign(ch)
490 static int add_exponent(ch)
497 /* On entry, expect e_sign to be set to '+' or '-' */
503 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
504 value = (value << 3) + (value << 1) + (ch - '0');
510 value = (e_sign == '-' ? -value : value);
512 /* Handle additional digits. Beyond the boundary case,
513 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
514 * just count the digits: the number is too large to
515 * represent as an integer.
517 if (isDECIMAL_DIGIT(ch)) {
519 /* Examine boundary case */
520 if (p_value == (MAX_INTEGER/10)) {
523 /* Must handle positive and negative values separately */
524 /* for 2's complement arithmetic */
526 if (digit <= MAX_INTEGER%10)
527 value = (value << 3) + (value << 1) + digit;
529 ++scale; /* Too big, just count it */
532 /* Use positive % operands for portability */
533 if (digit <= -(MIN_INTEGER+10)%10)
534 value = (value << 3) + (value << 1) - digit;
536 ++scale; /* Too big, just count it */
540 ++scale; /* Not boundary case, just count digit */
545 /* Continue scanning digits, but can't store any more */
546 while(isDECIMAL_DIGIT(ch)) {
560 static int add_radix(ch)
563 if (2 <= m_value && m_value <= 36 && m_scale == 0) {
569 /* Radix invalid, complete a name token */
570 return(AAH_NAME(ch));
574 static int add_r_digits(ch)
581 /* NOTE: The syntax of a radix number allows only for
582 * values of zero or more. The value will be stored as
583 * a 32 bit integer, which PostScript then interprets
584 * as signed. This means, for example, that the numbers:
591 * are all interpreted as -1. This routine implements this
592 * idea explicitly: it accumulates the number's value
593 * as unsigned, then casts it to signed when done.
596 /* Expect r_base to be initialized */
601 /* Scan leading zeroes */
607 /* Handle first non-zero digit */
608 if ((digit=digit_value[ch]) < radix) {
613 /* Add digits until boundary case reached */
614 while((digit=digit_value[ch]) < radix
615 && value < (MAX_ULONG / radix)) {
616 value = value * radix + digit;
621 /* Scan remaining digits */
622 if ((digit=digit_value[ch]) < radix) {
624 /* Examine boundary case ---
625 * radix*(MAX_ULONG/radix) <= number <= MAX_ULONG
627 if (value == (MAX_ULONG/radix) && digit <= MAX_ULONG%radix)
628 value = value * radix + digit;
632 /* Continue scanning digits, but can't store them */
635 while(digit_value[ch] < radix) {
644 r_value = (long) value; /* result is signed */
650 /* -------------------------------------------------------------------
651 * Complete a number; set token type and done flag.
652 * Put current input character back, if it is not white space.
655 /* Done: Radix Number */
656 static int RADIX_NUMBER(ch)
659 back_ch_not_white(ch);
661 tokenValue.integer = r_value;
662 tokenType = TOKEN_INTEGER;
665 tokenType = TOKEN_NAME;
671 static int INTEGER(ch)
674 back_ch_not_white(ch);
676 tokenValue.integer = m_value;
677 tokenType = TOKEN_INTEGER;
680 tokenValue.real = (double)(m_value) * Exp10(m_scale);
681 tokenType = TOKEN_REAL;
692 back_ch_not_white(ch);
694 /* NOTE: ignore e_scale, since e_value alone will cause
695 * exponent overflow if e_scale > 0.
698 /* HAZARD: exponent overflow of intermediate result
699 * (e.g., in 370 floating point); this should not be a problem
700 * with IEEE floating point. Reduce exponent overflow hazard by
701 * combining m_scale and e_value first, if they have different signs,
702 * or multiplying m_value and one of the other factors, if both
703 * m_scale and e_value are negative.
705 if ((m_scale >= 0 && e_value <= 0)
706 || (m_scale <= 0 && e_value >= 0)) {
707 tokenValue.real = (double)(m_value) * Exp10(m_scale + e_value);
710 temp = (double)(m_value) * Exp10(m_scale);
711 tokenValue.real = temp * Exp10(e_value);
714 tokenType = TOKEN_REAL;
719 /* -------------------------------------------------------------------
720 * Assemble a hex string; set token type and done flag.
723 /* Done: Hex String */
724 static int HEX_STRING(ch)
731 /* Process odd digit */
733 if (!isHEX_DIGIT(ch)) {
735 /* Skip white space */
736 while(isWHITE_SPACE(ch))
739 /* Check for terminator */
740 if (!isHEX_DIGIT(ch)) {
744 value = digit_value[ch] << 4;
746 /* Process even digit */
748 if (!isHEX_DIGIT(ch)) {
750 /* Skip white space */
751 while(isWHITE_SPACE(ch))
754 /* Check for terminator */
755 if (!isHEX_DIGIT(ch)) {
760 save_ch(value + digit_value[ch]);
763 /* Classify result, based on why loop ended */
765 tokenType = TOKEN_HEX_STRING;
767 /* save the invalid character for error reporting */
769 tokenType = TOKEN_INVALID;
775 /* -------------------------------------------------------------------
776 * Assemble a string; set token type and done flag
779 /* Save a backslash-coded character in a string --
781 * Store the proper character for special cases
782 * "\b", "\f", "\n", "\r", and "\t".
784 * Decode and store octal-coded character, up to
785 * three octal digits, "\o", "\oo", and "\ooo".
787 * The sequence "\<newline>" is a line continuation,
788 * so consume both without storing anything.
790 * The sequence "\<EOF>" is an error; exit without
791 * storing anything and let the caller handle it.
793 * For other characters, including the sequences
794 * "\\", "\(", and "\)", simply store the second
797 static void save_digraph(ch)
804 case 'b': /* backspace */
808 case 'f': /* formfeed */
812 case 'n': /* newline */
816 case 'r': /* carriage return */
820 case 't': /* horizontal tab */
824 case '\n': /* line continuation -- consume it */
827 case '\r': /* carriage return -- consume it */
828 ch = next_ch(); /* look at next character, is it \n? */
829 if (ch == '\n') return;
830 back_ch(ch); /* if not a line feed, then return it */
833 case EOF: /* end of file -- forget it */
837 /* scan up to three octal digits to get value */
838 if (isOCTAL_DIGIT(ch)) {
839 value = digit_value[ch];
841 if (isOCTAL_DIGIT(ch)) {
842 value = (value << 3) + digit_value[ch];
844 if (isOCTAL_DIGIT(ch))
845 value = (value << 3) + digit_value[ch];
855 /* Found a character to save */
860 static int STRING(ch)
865 tokenType = TOKEN_STRING;
870 while(!isSTRING_SPECIAL(ch)) {
883 if (--nest_level > 0)
888 save_digraph(next_ch());
892 /* All carriage returns (\r) are turned into linefeeds (\n)*/
893 ch = next_ch(); /* get the next one, is it \n? */
894 if (ch != '\n') { /* if not, then put it back. */
897 save_ch('\n'); /* in either case, save a linefeed */
902 tokenType = TOKEN_INVALID; /* Unterminated string */
907 } while(nest_level > 0);
909 /* If there's room, add a 0-byte termination without increasing string
910 length. This fixes certain dependencies on 0-terminated strings */
917 /* -------------------------------------------------------------------
918 * Assemble a name; set token type and done flag.
919 * Put current input character back, if it is not white space.
923 * (Safe version used to complete name tokens that
924 * start out looking like something else).
927 static int AAH_NAME(ch)
935 back_ch_not_white(ch);
936 tokenType = TOKEN_NAME;
975 back_ch_not_white(ch);
976 tokenType = TOKEN_NAME;
980 /* Done: Literal Name */
981 static int LITERAL_NAME(ch)
1013 back_ch_not_white(ch);
1014 tokenType = TOKEN_LITERAL_NAME;
1018 /* Done: immediate Name */
1019 static int IMMED_NAME(ch)
1052 back_ch_not_white(ch);
1053 tokenType = TOKEN_IMMED_NAME;
1057 /* Done: Name found while looking for something else */
1058 static int OOPS_NAME(ch)
1061 back_ch_not_white(ch);
1062 tokenType = TOKEN_NAME;
1067 /* -------------------------------------------------------------------
1068 * Complete a miscellaneous token; set token type and done flag.
1071 /* Done: Unmatched Right Angle-Bracket */
1072 static int RIGHT_ANGLE(ch)
1075 tokenType = TOKEN_RIGHT_ANGLE;
1079 /* Done: Unmatched Right Parenthesis */
1080 static int RIGHT_PAREN(ch)
1083 tokenType = TOKEN_RIGHT_PAREN;
1087 /* Done: Left Brace */
1088 static int LEFT_BRACE(ch)
1091 tokenType = TOKEN_LEFT_BRACE;
1095 /* Done: Right Brace */
1096 static int RIGHT_BRACE(ch)
1099 tokenType = TOKEN_RIGHT_BRACE;
1103 /* Done: Left Bracket */
1104 static int LEFT_BRACKET(ch)
1108 tokenType = TOKEN_LEFT_BRACKET;
1112 /* Done: Right Bracket */
1113 static int RIGHT_BRACKET(ch)
1117 tokenType = TOKEN_RIGHT_BRACKET;
1122 static int BREAK_SIGNAL(ch)
1125 tokenType = TOKEN_BREAK;
1129 /* Done: No Token Found */
1130 static int NO_TOKEN(ch)
1133 tokenType = TOKEN_EOF;
1139 * -------------------------------------------------------------------
1140 * scan_token -- scan one token from the input. It uses a simple
1141 * finite state machine to recognize token classes.
1143 * The input is from a file.
1147 * inputP -> input PostScript object, a file.
1148 * tokenStartP -> buffer in VM for accumulating the token.
1149 * tokenMaxP -> last character in the token buffer
1153 * tokenLength = number of characters in the token
1154 * tokenTooLong = TRUE if the token did not fit in the buffer
1155 * tokenType = code for the type of token parsed.
1156 * tokenValue = converted value of a numeric token.
1159 * -------------------------------------------------------------------
1161 void scan_token(inputP)
1165 unsigned char *stateP = s0;
1166 unsigned char entry;
1169 /* Define input source */
1170 inputFileP = inputP->data.fileP;
1171 if (inputFileP == NULL) {
1172 tokenType = TOKEN_EOF;
1176 /* Ensure enough space for most cases
1177 * (so we don't have to keep checking)
1178 * The length needs to cover the maximum number
1179 * of save_unsafe_ch() calls that might be executed.
1180 * That number is 11 (a sign and 10 decimal digits, e.g.,
1181 * when scanning -2147483648), but use MAX_NAME_LEN
1182 * in case someone changes that without checking.
1184 tokenStartP = vm_next_byte();
1185 tokenMaxP = tokenStartP + MIN(vm_free_bytes(), MAX_STRING_LEN);
1187 if ((tokenMaxP-tokenStartP) < (MAX_NAME_LEN)) {
1189 tokenTooLong = TRUE;
1190 tokenType = TOKEN_NONE;
1191 tokenValue.integer = 0;
1196 tokenCharP = tokenStartP;
1197 tokenTooLong = FALSE;
1199 /* Scan one token */
1203 stateP = classActionTable[entry].nextStateP;
1204 actionP = classActionTable[entry].actionRoutineP;
1205 ch = (*actionP)(ch);
1206 } while(ch != DONE);
1209 /* Return results */
1210 tokenLength = tokenCharP - tokenStartP;