]> git.sesse.net Git - pgn-extract/blob - lex.c
Push through a computer/human flag to the binary output.
[pgn-extract] / lex.c
1 /*
2  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3  *  Copyright (C) 1994-2014 David Barnes
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 1, or (at your option)
7  *  any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17  *
18  *  David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19  *  http://www.cs.kent.ac.uk/people/staff/djb/
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <limits.h>
27 #include <ctype.h>
28 #if defined(__BORLANDC__) || defined(_MSC_VER)
29 #include <io.h>
30 #ifndef R_OK
31 #define R_OK 0
32 #endif
33 #else
34 #include <unistd.h>
35 #endif
36 #include "bool.h"
37 #include "mymalloc.h"
38 #include "defs.h"
39 #include "typedef.h"
40 #include "tokens.h"
41 #include "taglist.h"
42 #include "lex.h"
43 #include "moves.h"
44 #include "lists.h"
45 #include "decode.h"
46 #include "lines.h"
47 #include "grammar.h"
48 #include "apply.h"
49 #include "output.h"
50
51         /* Prototypes for the functions in this file. */
52 static void save_string(const char *result);
53 /* When a move is saved, what is known of its source and destination coordinates
54  * should also be saved.
55  */
56 static void save_move(const unsigned char *move);
57 static void save_q_castle(void);
58 static void save_k_castle(void);
59 static void terminate_input(void);
60 static Boolean extract_yytext(const unsigned char *symbol_start,
61                               const unsigned char *linep);
62 static Boolean open_input(const char *infile);
63 static Boolean open_input_file(int file_number);
64
65 static unsigned long line_number = 0;
66 /* Keep track of the Recursive Annotation Variation level. */
67 static unsigned RAV_level = 0;
68 /* Keep track of the last move found. */
69 static unsigned char last_move[MAX_MOVE_LEN+1];
70 /* How many games we have extracted from this file. */
71 static unsigned games_in_file = 0;
72
73 /* Provide an input file pointer.
74  * This is intialised in init_lex_tables.
75  */
76 static FILE *yyin = NULL;
77
78 /* Define space for holding matched tokens. */
79 #define MAX_YYTEXT 100
80 static unsigned char yytext[MAX_YYTEXT+1];
81 YYSTYPE yylval;
82
83 #define MAX_CHAR 256
84 #define ALPHA_DIST ('a'-'A')
85 /* Table of symbol classifications. */
86 static TokenType ChTab[MAX_CHAR];
87 /* A boolean array as to whether a character is allowed in a move or not. */
88 static short MoveChars[MAX_CHAR];
89
90 /* Define a table to hold the list of tag strings and the corresponding
91  * TagName index. This is initialised in init_list_of_known_tags().
92  */
93 static const char **TagList;
94 static unsigned tag_list_length = 0;
95
96         /* Initialise the TagList. This should be stored in alphabetical order,
97          * by virtue of the order in which the _TAG values are defined.
98          */
99 static void
100 init_list_of_known_tags(void)
101 {
102     unsigned i;
103     tag_list_length = ORIGINAL_NUMBER_OF_TAGS;
104     TagList = (const char **) MallocOrDie(tag_list_length*sizeof(*TagList));
105     /* Be paranoid and put a string in every entry. */
106     for(i = 0; i < tag_list_length; i++){
107         TagList[i] = "";
108     }
109     TagList[ANNOTATOR_TAG] = "Annotator";
110     TagList[BLACK_TAG] = "Black";
111     TagList[BLACK_ELO_TAG] = "BlackElo";
112     TagList[BLACK_NA_TAG] = "BlackNA";
113     TagList[BLACK_TITLE_TAG] = "BlackTitle";
114     TagList[BLACK_TYPE_TAG] = "BlackType";
115     TagList[BLACK_USCF_TAG] = "BlackUSCF";
116     TagList[BOARD_TAG] = "Board";
117     TagList[DATE_TAG] = "Date";
118     TagList[ECO_TAG] = "ECO";
119     TagList[PSEUDO_ELO_TAG] = "Elo";
120     TagList[EVENT_TAG] = "Event";
121     TagList[EVENT_DATE_TAG] = "EventDate";
122     TagList[EVENT_SPONSOR_TAG] = "EventSponsor";
123     TagList[FEN_TAG] = "FEN";
124     TagList[PSEUDO_FEN_PATTERN_TAG] = "FENPattern";
125     TagList[HASHCODE_TAG] = "HashCode";
126     TagList[LONG_ECO_TAG] = "LongECO";
127     TagList[MODE_TAG] = "Mode";
128     TagList[NIC_TAG] = "NIC";
129     TagList[OPENING_TAG] = "Opening";
130     TagList[PSEUDO_PLAYER_TAG] = "Player";
131     TagList[PLY_COUNT_TAG] = "PlyCount";
132     TagList[RESULT_TAG] = "Result";
133     TagList[ROUND_TAG] = "Round";
134     TagList[SECTION_TAG] = "Section";
135     TagList[SETUP_TAG] = "SetUp";
136     TagList[SITE_TAG] = "Site";
137     TagList[STAGE_TAG] = "Stage";
138     TagList[SUB_VARIATION_TAG] = "SubVariation";
139     TagList[TERMINATION_TAG] = "Termination";
140     TagList[TIME_TAG] = "Time";
141     TagList[TIME_CONTROL_TAG] = "TimeControl";
142     TagList[TOTAL_PLY_COUNT_TAG] = "TotalPlyCount";
143     TagList[UTC_DATE_TAG] = "UTCDate";
144     TagList[UTC_TIME_TAG] = "UTCTime";
145     TagList[VARIATION_TAG] = "Variation";
146     TagList[WHITE_TAG] = "White";
147     TagList[WHITE_ELO_TAG] = "WhiteElo";
148     TagList[WHITE_NA_TAG] = "WhiteNA";
149     TagList[WHITE_TITLE_TAG] = "WhiteTitle";
150     TagList[WHITE_TYPE_TAG] = "WhiteType";
151     TagList[WHITE_USCF_TAG] = "WhiteUSCF";
152 }
153
154     /* Extend TagList to accomodate a new tag string.
155      * Return the current value of tag_list_length as its
156      * index, having incremented its value.
157      */
158 static TagName
159 make_new_tag(const char *tag)
160 {   unsigned tag_index = tag_list_length;
161     tag_list_length++;
162     TagList = (const char **) ReallocOrDie((void *)TagList,
163                     tag_list_length*sizeof(*TagList));
164     TagList[tag_index] = copy_string(tag);
165     /* Ensure that the game header's tags array can accommodate
166      * the new tag.
167      */
168     increase_game_header_tags_length(tag_list_length);
169     return tag_index;
170 }
171
172 const char *
173 tag_header_string(TagName tag)
174 {
175     if(tag < tag_list_length){
176         return TagList[tag];
177     }
178     else{
179         fprintf(GlobalState.logfile,"Internal error in tag_header_string(%d)\n",
180                                 tag);
181         return NULL;
182     }
183 }
184
185         /* Initialise ChTab[], the classification of the initial characters
186          * of symbols.
187          * Initialise MoveChars, the classification of secondary characters
188          * of moves.
189          */
190 void
191 init_lex_tables(void)
192 {   int i;
193
194     /* Assume standard input will be used, until we know otherwise. */
195     yyin = stdin;
196     init_list_of_known_tags();
197     /* Initialise ChTab[]. */
198     for(i = 0; i < MAX_CHAR; i++){
199         ChTab[i] = ERROR_TOKEN;
200     }
201     ChTab[' '] = WHITESPACE;
202     ChTab['\t'] = WHITESPACE;
203     /* Take account of DOS line-ends. */
204     ChTab['\r'] = WHITESPACE;
205     ChTab['['] = TAG_START;
206     ChTab[']'] = TAG_END;
207     ChTab['"'] = DOUBLE_QUOTE;
208     ChTab['{'] = COMMENT_START;
209     ChTab['}'] = COMMENT_END;
210     ChTab['$'] = NAG;
211     ChTab['!'] = ANNOTATE;
212     ChTab['?'] = ANNOTATE;
213     ChTab['+'] = CHECK_SYMBOL;
214     ChTab['#'] = CHECK_SYMBOL;
215     ChTab['.'] = DOT;
216     ChTab['('] = RAV_START;
217     ChTab[')'] = RAV_END;
218     ChTab['%'] = PERCENT;
219     ChTab['\\'] = ESCAPE;
220     ChTab['\0'] = EOS;
221     ChTab['*'] = STAR;
222     ChTab['-'] = DASH;
223
224     /* Operators allowed only in the tag file. */
225     ChTab['<'] = OPERATOR;
226     ChTab['>'] = OPERATOR;
227     ChTab['='] = OPERATOR;        /* Overloaded in MoveChars. */
228
229     for(i = '0'; i <= '9'; i++){
230         ChTab[i] = DIGIT;
231     }
232     for(i = 'A'; i <= 'Z'; i++){
233         ChTab[i] = ALPHA;
234         ChTab[i+ALPHA_DIST] = ALPHA;
235     }
236     ChTab['_'] = ALPHA;
237
238     /* Classify the Russian piece letters as ALPHA. */
239     ChTab[RUSSIAN_KNIGHT_OR_KING] = ALPHA; /* King and Knight. */
240     ChTab[RUSSIAN_KING_SECOND_LETTER] = ALPHA; /* King (second character). */
241     ChTab[RUSSIAN_QUEEN] = ALPHA; /* Queen. */
242     ChTab[RUSSIAN_ROOK] = ALPHA; /* Rook. */
243     ChTab[RUSSIAN_BISHOP] = ALPHA; /* Bishop. */
244
245     /* Initialise MoveChars[]. */
246     for(i = 0; i < MAX_CHAR; i++){
247         MoveChars[i] = 0;
248     }
249     /* Files. */
250     for(i = 'a'; i <= 'h'; i++){
251         MoveChars[i] = 1;
252     }
253     /* Ranks. */
254     for(i = '1'; i <= '8'; i++){
255         MoveChars[i] = 1;
256     }
257     /* Upper-case pieces. */
258     MoveChars['K'] = 1;
259     MoveChars['Q'] = 1;
260     MoveChars['R'] = 1;
261     MoveChars['N'] = 1;
262     MoveChars['B'] = 1;
263     /* Lower-case pieces. */
264     MoveChars['k'] = 1;
265     MoveChars['q'] = 1;
266     MoveChars['r'] = 1;
267     MoveChars['n'] = 1;
268     MoveChars['b'] = 1;
269     /* Other u-c Dutch/German characters. */
270     MoveChars['D'] = 1; /* Queen. */
271     MoveChars['T'] = 1; /* Rook. */
272     MoveChars['S'] = 1; /* Knight. */
273     MoveChars['P'] = 1; /* Knight. */
274     MoveChars['L'] = 1; /* Bishop. */
275     /* Russian characters. */
276     MoveChars[RUSSIAN_KNIGHT_OR_KING] = 1; /* King and Knight. */
277     MoveChars[RUSSIAN_KING_SECOND_LETTER] = 1; /* King (second character). */
278     MoveChars[RUSSIAN_QUEEN] = 1; /* Queen. */
279     MoveChars[RUSSIAN_ROOK] = 1; /* Rook. */
280     MoveChars[RUSSIAN_BISHOP] = 1; /* Bishop. */
281
282     /* Capture and square separators. */
283     MoveChars['x'] = 1;
284     MoveChars['X'] = 1;
285     MoveChars[':'] = 1;
286     MoveChars['-'] = 1;
287     /* Promotion character. */
288     MoveChars['='] = 1;
289     /* Castling. */
290     MoveChars['O'] = 1;
291     MoveChars['o'] = 1;
292     MoveChars['0'] = 1;
293     /* Allow a trailing p for ep. */
294     MoveChars['p'] = 1;
295 }
296
297         /* Starting from linep in line, gather up the string until
298          * the closing quote.  Skip over the closing quote.
299          */
300 LinePair
301 gather_string(char *line, unsigned char *linep)
302 {   LinePair resulting_line;
303     char ch;
304     unsigned len = 0;
305     char *str;
306
307     do{ ch = *linep++;
308         len++;
309         if(ch == '\\'){
310             /* Escape the next character. */
311             len++;
312             ch = *linep++;
313             if(ch != '\0'){
314               len++;
315               ch = *linep++;
316             }
317         }
318     } while((ch != '"') && (ch != '\0'));
319     /* The last one doesn't belong in the string. */
320     len--;
321     /* Allocate space for the result. */
322     str = MallocOrDie(len+1);
323     strncpy(str,(const char *) (linep-len-1),len);
324     str[len] = '\0';
325     /* Store it in yylval. */
326     yylval.token_string = str;
327
328     /* Make sure that the string was properly terminated, by
329      * looking at the last character examined.
330      */
331     if(ch == '\0'){
332         /* Too far. */
333         if(!GlobalState.skipping_current_game) {
334             fprintf(GlobalState.logfile,"Missing closing quote in %s\n",line);
335         }
336         if(len > 1){
337             /* Move back to the null. */
338             linep--;
339             str[len-1] = '\0';
340         }
341     }
342     else{
343         /* We have already skipped over the closing quote. */
344     }
345     resulting_line.line = line;
346     resulting_line.linep = linep;
347     resulting_line.token = STRING;
348     return resulting_line;
349 }
350
351         /*
352          * Is ch of the given character class?
353          * External access to ChTab.
354          */
355 Boolean
356 is_character_class(unsigned char ch, TokenType character_class)
357 {
358     return ChTab[ch] == character_class;
359 }
360
361         /* Starting from linep in line, gather up a comment until
362          * the END_COMMENT.  Skip over the END_COMMENT.
363          */
364 static LinePair
365 gather_comment(char *line, unsigned char *linep)
366 {   LinePair resulting_line;
367     char ch;
368     unsigned len = 0;
369     /* The string list in which the current comment will be gathered. */
370     StringList *current_comment = NULL;
371     /* The pointer to be returned. */
372     CommentList *comment;
373
374     do{
375         /* Restart a new segment. */
376         len = 0;
377         do{ ch = *linep++;
378             len++;
379         } while((ch != '}') && (ch != '\0'));
380         /* The last one doesn't belong in the comment. */
381         len--;
382         if(GlobalState.keep_comments){
383             char *comment_str;
384
385             /* Allocate space for the result. */
386             comment_str = (char *)MallocOrDie(len+1);
387             strncpy(comment_str,(const char *) (linep-len-1) ,len);
388             comment_str[len] = '\0';
389             current_comment = save_string_list_item(current_comment,comment_str);
390         }
391         if(ch == '\0'){
392             line = next_input_line(yyin);
393             linep = (unsigned char *) line;
394         }
395     } while((ch != '}') && (line != NULL));
396
397     /* Set up the structure to be returned. */
398     comment = MallocOrDie(sizeof(*comment));
399     comment->Comment = current_comment;
400     comment->next = NULL;
401     yylval.comment = comment;
402
403     resulting_line.line = line;
404     resulting_line.linep = linep;
405     resulting_line.token = COMMENT;
406     return resulting_line;
407 }
408
409         /* Remember that 0 can start 0-1 and 0-0.
410          * Remember that 1 can start 1-0 and 1/2.
411          */
412 static LinePair
413 gather_possible_numeric(char *line, unsigned char *linep, char initial_digit)
414 {   LinePair resulting_line;
415     TokenType token = MOVE_NUMBER;
416     /* Keep a record of where this token started. */
417     const unsigned char *symbol_start = linep-1;
418
419     if(initial_digit == '0'){
420         /* Could be castling or a result. */
421         if(strncmp((const char *) linep,"-1",2) == 0){
422             token = TERMINATING_RESULT;
423             save_string("0-1");
424             linep += 2;
425         }
426         else if(strncmp((const char *) linep,"-0-0",4) == 0){
427             token = MOVE;
428             save_q_castle();
429             linep += 4;
430         }
431         else if(strncmp((const char *) linep,"-0",2) == 0){
432             token = MOVE;
433             save_k_castle();
434             linep += 2;
435         }
436         else{
437             /* MOVE_NUMBER */
438         }
439     }
440     else if(initial_digit == '1'){
441         if(strncmp((const char *) linep,"-0",2) == 0){
442             token = TERMINATING_RESULT;
443             save_string("1-0");
444             linep += 2;
445         }
446         else if(strncmp((const char *) linep,"/2",2) == 0){
447             token = TERMINATING_RESULT;
448             linep += 2;
449             /* Check for the full form. */
450             if(strncmp((const char *) linep,"-1/2",4) == 0){
451                 token = TERMINATING_RESULT;
452                 linep += 4;
453             }
454             /* Make sure that the full form of the draw result
455              * is saved. 
456              */
457             save_string("1/2-1/2");
458         }
459         else{
460             /* MOVE_NUMBER */
461         }
462     }
463     else{
464         /* MOVE_NUMBER */
465     }
466     if(token == MOVE_NUMBER){
467         /* Gather the remaining digits. */
468         while(isdigit((unsigned) *linep)){
469             linep++;
470         }
471     }
472     if(token == MOVE_NUMBER){
473         /* Fill out the fields of yylval. */
474         if(extract_yytext(symbol_start,linep)){
475                 yylval.move_number = 0;
476                 (void) sscanf((const char *)yytext,"%u",&yylval.move_number);
477                 /* Skip any trailing dots. */
478                 while(*linep == '.'){
479                     linep++;
480                 }
481             }
482         else{
483             token = NO_TOKEN;
484         }
485     }
486     else{
487         /* TERMINATING_RESULT and MOVE have already been dealt with. */
488     }
489     resulting_line.line = line;
490     resulting_line.linep = linep;
491     resulting_line.token = token;
492     return resulting_line;
493 }
494
495     /* Look up tag_string in TagList[] and return its _TAG
496      * value or -1 if it isn't there.
497      * Although the strings are sorted initially, further
498      * tags identified in the source files will be appended
499      * without further sorting. So we cannot use a binary
500      * search on the list.
501      */
502 static int
503 identify_tag(const char *tag_string)
504 {   unsigned tag_index;
505
506     for(tag_index = 0; tag_index < tag_list_length; tag_index++){
507         if(strcmp(tag_string,TagList[tag_index]) == 0){
508             return tag_index;
509         }
510     }
511     /* Not found. */
512     return -1;
513 }
514
515         /* Starting from linep in line, gather up the tag name.
516          * Skip over any preceding white space.
517          */
518 LinePair
519 gather_tag(char *line, unsigned char *linep)
520 {   LinePair resulting_line;
521     char ch;
522     unsigned len = 0;
523
524     do{
525         /* Check for end of line while skipping white space. */
526         if(*linep == '\0'){
527             line = next_input_line(yyin);
528             linep = (unsigned char *) line;
529         }
530         if(line != NULL) {
531             while(ChTab[(unsigned)*linep] == WHITESPACE){
532                 linep++;
533             }
534         }
535     }
536     while((line != NULL) && (ChTab[(unsigned)*linep] == '\0'));
537
538     if(line != NULL){
539         ch = *linep++;
540         while(isalpha((unsigned) ch) || isdigit((unsigned) ch) || (ch == '_')){
541             len++;
542             ch = *linep++;
543         }
544         /* The last one wasn't part of the tag. */
545         linep--;
546         if(len > 0){
547             int tag_item;
548             char *tag_string;
549
550             /* Allocate space for the result. */
551             tag_string = MallocOrDie(len+1);
552             strncpy((char *)tag_string,(const char *)(linep-len),len);
553             tag_string[len] = '\0';
554             tag_item = identify_tag(tag_string);
555             if(tag_item < 0){
556                 tag_item = make_new_tag(tag_string);
557             }
558             if(tag_item >= 0 && ((unsigned) tag_item) < tag_list_length){
559                 yylval.tag_index = tag_item;
560                 resulting_line.token = TAG;
561                 (void) free((void *)tag_string);
562             }
563             else{
564                 fprintf(GlobalState.logfile,
565                     "Internal error: invalid tag index %d in gather_tag.\n",
566                     tag_item);
567                 exit(1);
568             }
569         }
570         else{
571             resulting_line.token = NO_TOKEN;
572         }
573     }
574     else{
575         resulting_line.token = NO_TOKEN;
576     }
577     resulting_line.line = line;
578     resulting_line.linep = linep;
579     return resulting_line;
580 }
581
582 static Boolean
583 extract_yytext(const unsigned char *symbol_start,const unsigned char *linep)
584 {   /* Whether the string fitted. */
585     Boolean Ok = TRUE;
586     long len = linep-symbol_start;
587
588     if(len < MAX_YYTEXT){
589         strncpy((char *) yytext,(const char *) symbol_start,len);
590         yytext[len] = '\0';
591     }
592     else{
593         strncpy((char *) yytext,(const char *) symbol_start,MAX_YYTEXT);
594         yytext[MAX_YYTEXT] = '\0';
595         if(!GlobalState.skipping_current_game)
596             fprintf(GlobalState.logfile,"Symbol %s exceeds length of %u.\n",
597                         yytext, MAX_YYTEXT);
598         Ok = FALSE;
599     }
600     return Ok;
601 }
602
603         /* Identify the next symbol.
604          * Don't take any action on EOF -- leave that to next_token.
605          */
606 static TokenType
607 get_next_symbol(void)
608 {   static char *line = NULL;
609     static unsigned char *linep = NULL;
610     /* The token to be returned. */
611     TokenType token;
612     LinePair resulting_line;
613
614     do{
615         /* Remember where in line the current symbol starts. */
616         const unsigned char *symbol_start;
617
618         /* Clear any remaining symbol. */
619         *yytext = '\0';
620         if(line == NULL){
621             line = next_input_line(yyin);
622             linep = (unsigned char *) line;
623             if(line != NULL){
624                 token = NO_TOKEN;
625             }
626             else{
627                 token = EOF_TOKEN;
628             }
629         }
630         else{
631             int next_char = *linep & 0x0ff;
632
633             /* Remember where we start. */
634             symbol_start = linep;
635             linep++;
636             token = ChTab[next_char];
637
638             switch(token){
639                 case WHITESPACE:
640                     while(ChTab[(unsigned)*linep] == WHITESPACE)
641                         linep++;
642                     token = NO_TOKEN;
643                     break;
644                 case TAG_START:
645                     resulting_line = gather_tag(line,linep);
646                     /* Pick up where we are now. */
647                     line = resulting_line.line;
648                     linep = resulting_line.linep;
649                     token = resulting_line.token;
650                     break;
651                 case TAG_END:
652                     token = NO_TOKEN;
653                     break;
654                 case DOUBLE_QUOTE:
655                     resulting_line = gather_string(line,linep);
656                     /* Pick up where we are now. */
657                     line = resulting_line.line;
658                     linep = resulting_line.linep;
659                     token = resulting_line.token;
660                     break;
661                 case COMMENT_START:
662                     resulting_line = gather_comment(line,linep);
663                     /* Pick up where we are now. */
664                     line = resulting_line.line;
665                     linep = resulting_line.linep;
666                     token = resulting_line.token;
667                     break;
668                 case COMMENT_END:
669                     if(!GlobalState.skipping_current_game) {
670                         fprintf(GlobalState.logfile,"Unmatched comment end.\n");
671                     }
672                     token = NO_TOKEN;
673                     break;
674                 case NAG:
675                     while(isdigit((unsigned) *linep)){
676                         linep++;
677                     }
678                     if(extract_yytext(symbol_start,linep)){
679                         save_string((const char *) yytext);
680                     }
681                     else{
682                         token = NO_TOKEN;
683                     }
684                     break;
685                 case ANNOTATE:
686                     /* Don't return anything in case of error. */
687                     token = NO_TOKEN;
688                     while(ChTab[(unsigned)*linep] == ANNOTATE){
689                         linep++;
690                     }
691                     if(extract_yytext(symbol_start,linep)){
692                         switch(yytext[0]){
693                             case '!':
694                                 switch(yytext[1]){
695                                     case '!':
696                                         save_string("$3");
697                                         break;
698                                     case '?':
699                                         save_string("$5");
700                                         break;
701                                     default:
702                                         save_string("$1");
703                                         break;
704                                 }
705                                 token = NAG;
706                                 break;
707                             case '?':
708                                 switch(yytext[1]){
709                                     case '!':
710                                         save_string("$6");
711                                         break;
712                                     case '?':
713                                         save_string("$4");
714                                         break;
715                                     default:
716                                         save_string("$2");
717                                         break;
718                                 }
719                                 token = NAG;
720                                 break;
721                         }
722                     }
723                     break;
724                 case CHECK_SYMBOL:
725                     /* Allow ++ */
726                     while(ChTab[(unsigned)*linep] == CHECK_SYMBOL){
727                         linep++;
728                     }
729                     break;
730                 case DOT:
731                     while(ChTab[(unsigned)*linep] == DOT)
732                         linep++;
733                     token = NO_TOKEN;
734                     break;
735                 case PERCENT:
736                     /* Trash the rest of the line. */
737                     line = next_input_line(yyin);
738                     linep = (unsigned char *) line;
739                     token = NO_TOKEN;
740                     break;
741                 case ESCAPE:
742                     /* @@@ What to do about this? */
743                     if(*linep != '\0'){
744                         linep++;
745                     }
746                     token = NO_TOKEN;
747                     break;
748                 case ALPHA:
749                     /* Not all ALPHAs are move characters. */
750                     if(MoveChars[next_char]){
751                         /* Scan through the possible move characters. */
752                         while(MoveChars[*linep & 0x0ff]){
753                             linep++;
754                         }
755                         if(extract_yytext(symbol_start,linep)){
756                             /* Only classify it as a move if it
757                              * seems to be a complete move.
758                              */
759                             if(move_seems_valid(yytext)){
760                                 save_move(yytext);
761                                 token = MOVE;
762                             }
763                             else{
764                                 if(!GlobalState.skipping_current_game){
765                                     print_error_context(GlobalState.logfile);
766                                     fprintf(GlobalState.logfile,
767                                                 "Unknown move text %s.\n",yytext);
768                                 }
769                                 token = NO_TOKEN;
770                             }
771                         }
772                         else{
773                             token = NO_TOKEN;
774                         }
775                     }
776                     else{
777                         if(!GlobalState.skipping_current_game){
778                             print_error_context(GlobalState.logfile);
779                             fprintf(GlobalState.logfile,
780                                     "Unknown character %c (Hex: %x).\n",
781                                     next_char,next_char);
782                         }
783                         /* Skip any sequence of them. */
784                         while(ChTab[(unsigned)*linep] == ERROR_TOKEN)
785                             linep++;
786                     }
787                     break;
788                 case DIGIT:
789                     /* Remember that 0 can start 0-1 and 0-0.
790                      * Remember that 1 can start 1-0 and 1/2.
791                      */
792                     resulting_line = gather_possible_numeric(
793                                         line,linep,next_char);
794                     /* Pick up where we are now. */
795                     line = resulting_line.line;
796                     linep = resulting_line.linep;
797                     token = resulting_line.token;
798                     break;
799                 case EOF_TOKEN:
800                     break;
801                 case RAV_START:
802                     RAV_level++;
803                     break;
804                 case RAV_END:
805                     if(RAV_level > 0){
806                         RAV_level--;
807                     }
808                     else{
809                         if(!GlobalState.skipping_current_game){
810                             print_error_context(GlobalState.logfile);
811                             fprintf(GlobalState.logfile,"Too many ')' found.\n");
812                         }
813                         token = NO_TOKEN;
814                     }
815                     break;
816                 case STAR:
817                     save_string("*");
818                     token = TERMINATING_RESULT;
819                     break;
820                 case DASH:
821                     if(ChTab[(unsigned) *linep] == DASH) {
822                         linep++;
823                         save_move((const unsigned char *) NULL_MOVE_STRING);
824                         token = MOVE;
825                     }
826                     else {
827                         fprintf(GlobalState.logfile,"Single '-' not allowed.\n");
828                         print_error_context(GlobalState.logfile);
829                         token = NO_TOKEN;
830                     }
831                     break;
832                 case EOS:
833                     /* End of the string. */
834                     line = next_input_line(yyin);
835                     linep = (unsigned char *) line;
836                     token = NO_TOKEN;
837                     break;
838                 case ERROR_TOKEN:
839                     if(!GlobalState.skipping_current_game){
840                         print_error_context(GlobalState.logfile);
841                         fprintf(GlobalState.logfile,
842                                 "Unknown character %c (Hex: %x).\n",
843                                 next_char,next_char);
844                     }
845                     /* Skip any sequence of them. */
846                     while(ChTab[(unsigned)*linep] == ERROR_TOKEN)
847                         linep++;
848                     break;
849                 case OPERATOR:
850                     print_error_context(GlobalState.logfile);
851                     fprintf(GlobalState.logfile,
852                         "Operator in illegal context: %c.\n",*symbol_start);
853                     /* Skip any sequence of them. */
854                     while(ChTab[(unsigned)*linep] == OPERATOR)
855                         linep++;
856                     token = NO_TOKEN;
857                     break;
858                 default:
859                     if(!GlobalState.skipping_current_game){
860                         print_error_context(GlobalState.logfile);
861                         fprintf(GlobalState.logfile,
862                             "Internal error: Missing case for %d on char %x.\n",
863                             token,next_char);
864                     }
865                     token = NO_TOKEN;
866                     break;
867             }
868         }
869     } while(token == NO_TOKEN);
870     return token;
871 }
872
873 TokenType
874 next_token(void)
875 {   TokenType token = get_next_symbol();
876
877         /* Don't call yywrap if parsing the ECO file. */
878     while((token == EOF_TOKEN) && !GlobalState.parsing_ECO_file &&
879                         !yywrap()){
880         token = get_next_symbol();
881     }
882     return token;
883 }
884
885         /* Return TRUE if token is one to skip when looking for
886          * the start or end of a game.
887          */
888 static Boolean
889 skip_token(TokenType token)
890 {
891     switch(token){
892         case TERMINATING_RESULT:
893         case TAG:
894         case MOVE:
895         case EOF_TOKEN:
896             return FALSE;
897         default:
898             return TRUE;
899     }
900 }
901
902         /* Skip tokens until the next game looks like it is
903          * about to start. This is signalled by
904          * a tag section a terminating result from the
905          * previous game, or a move.
906          */
907 TokenType
908 skip_to_next_game(TokenType token)
909 {   
910     if(skip_token(token)){
911         GlobalState.skipping_current_game = TRUE;
912         do{
913             if(token == COMMENT){
914                 /* Free the space. */
915                 if((yylval.comment != NULL) &&
916                                 (yylval.comment->Comment != NULL)){
917                     free_string_list(yylval.comment->Comment);
918                     free((void *)yylval.comment);
919                     yylval.comment = NULL;
920                 }
921             }
922             token = next_token();
923         } while(skip_token(token));
924         GlobalState.skipping_current_game = FALSE;
925     }
926     return token;
927 }
928
929         /* Save castling moves in a standard way. */
930 static void
931 save_q_castle(void)
932 {
933     save_move((const unsigned char *) "O-O-O");
934 }
935
936         /* Save castling moves in a standard way. */
937 static void
938 save_k_castle(void)
939 {
940     save_move((const unsigned char *) "O-O");
941 }
942
943         /* Make a copy of the matched text of the move. */
944 static void
945 save_move(const unsigned char *move)
946 {
947     /* Decode the move into its components. */
948     yylval.move_details = decode_move(move);
949     /* Remember the last move. */
950     strcpy((char *) last_move,(const char *) move);
951 }
952
953 void
954 restart_lex_for_new_game(void)
955 {
956     *last_move = '\0';
957     RAV_level = 0;
958 }
959
960         /* Make it possible to read multiple input files.
961          * These are held in list_of_files. The list
962          * is built up from the program's arguments.
963          */
964 static int current_file_num = 0;
965     /* Keep track of the list of PGN files.  These will either be the
966      * remaining arguments once flags have been dealt with, or
967      * those read from -c and -f arguments.
968      */
969 static FILE_LIST list_of_files = {
970         (const char **) NULL,
971         (SourceFileType *) NULL,
972         0, 0
973 };
974
975     /* Return the index number of the current input file in list_of_files. */
976 unsigned
977 current_file_number(void)
978 {
979     return current_file_num;
980 }
981
982
983         /* Read a list of lines from fp. These are the names of files
984          * to be added to the existing list_of_files.
985          * list_of_files.list must have a (char *)NULL on the end.
986          */
987 void
988 add_filename_list_from_file(FILE *fp,SourceFileType file_type)
989 {
990     if((list_of_files.files == NULL) || (list_of_files.max_files == 0)){
991          /* Allocate an initial number of pointers for the lines.
992           * This must always include an extra one for terminating NULL.
993           */
994          list_of_files.files = (const char **) MallocOrDie((INIT_LIST_SPACE+1)*
995                                                 sizeof(const char *));
996          list_of_files.file_type = (SourceFileType *) MallocOrDie((INIT_LIST_SPACE+1)*
997                                                 sizeof(SourceFileType));
998          list_of_files.max_files = INIT_LIST_SPACE;
999          list_of_files.num_files = 0;
1000     }
1001     if(list_of_files.files != NULL){
1002         /* Find the first line. */
1003         char *line = read_line(fp);
1004
1005         while(line != NULL){
1006              if(non_blank_line(line)){
1007                  add_filename_to_source_list(line,file_type);
1008              }
1009              else{
1010                  (void) free((void *)line);
1011              }
1012              line = read_line(fp);
1013         }
1014      }
1015 }
1016
1017 void
1018 add_filename_to_source_list(const char *filename,SourceFileType file_type)
1019 {    /* Where to put it. */
1020      unsigned location = list_of_files.num_files;
1021
1022      if(access(filename,R_OK) != 0){
1023          fprintf(GlobalState.logfile,"Unable to find %s\n",filename);
1024          exit(1);
1025      }
1026      else{
1027          /* Ok. */
1028      }
1029      /* See if there is room. */
1030      if(list_of_files.num_files == list_of_files.max_files){
1031          /* There isn't, so increase the amount of available space,
1032           * ensuring that there is always an extra slot for the terminating
1033           * NULL.
1034           */
1035         if((list_of_files.files == NULL) || (list_of_files.max_files == 0)){
1036              /* Allocate an initial number of pointers for the lines.
1037               * This must always include an extra one for terminating NULL.
1038               */
1039              list_of_files.files = (const char **) MallocOrDie((INIT_LIST_SPACE+1)*
1040                                                     sizeof(const char *));
1041              list_of_files.file_type = (SourceFileType *)
1042                                 MallocOrDie((INIT_LIST_SPACE+1)*
1043                                                     sizeof(SourceFileType));
1044              list_of_files.max_files = INIT_LIST_SPACE;
1045              list_of_files.num_files = 0;
1046         }
1047         else{
1048              list_of_files.files = (const char **)realloc((void *)list_of_files.files,
1049                     (list_of_files.max_files+MORE_LIST_SPACE+1)*
1050                                                     sizeof(const char *));
1051              list_of_files.file_type = (SourceFileType *)
1052                     realloc((void *)list_of_files.file_type,
1053                     (list_of_files.max_files+MORE_LIST_SPACE+1)*
1054                                                     sizeof(SourceFileType));
1055              list_of_files.max_files += MORE_LIST_SPACE;
1056             if((list_of_files.files == NULL) && (list_of_files.file_type == NULL)){
1057                 perror("");
1058                 abort();
1059             }
1060         }
1061      }
1062      /* We know that there is space. Ensure that CHECKFILEs are all
1063       * stored before NORMALFILEs.
1064       */
1065      if(file_type == CHECKFILE){
1066
1067          for(location = 0; (location < list_of_files.num_files) &&
1068                         (list_of_files.file_type[location] == CHECKFILE); location++){
1069             /* Do nothing. */
1070          }
1071          if(location < list_of_files.num_files){
1072             /* Put the new one here.
1073              * Move the rest down.
1074              */
1075             unsigned j;
1076
1077             for(j = list_of_files.num_files; j > location; j--){
1078                 list_of_files.files[j] = list_of_files.files[j-1];
1079                 list_of_files.file_type[j] = list_of_files.file_type[j-1];
1080             }
1081         }
1082     }
1083     list_of_files.files[location] = copy_string(filename);
1084     list_of_files.file_type[location] = file_type;
1085     list_of_files.num_files++;
1086     /* Keep the list properly terminated. */
1087     list_of_files.files[list_of_files.num_files] = (char *) NULL;
1088 }
1089
1090         /* Use infile as the input source. */
1091 static Boolean
1092 open_input(const char *infile)
1093 {   
1094     yyin = fopen(infile,"r");
1095     if(yyin != NULL){
1096         GlobalState.current_input_file = infile;
1097         if(GlobalState.verbose){
1098             fprintf(GlobalState.logfile,"Processing %s\n",
1099                                 GlobalState.current_input_file);
1100         }
1101     }
1102     return yyin != NULL;
1103 }
1104
1105 Boolean
1106 seek_to_begin(void)
1107 {
1108     if(GlobalState.start_position <= 0) {
1109         return TRUE;
1110     }
1111     if(fseek(yyin, GlobalState.start_position, SEEK_SET) != 0) {
1112         fprintf(GlobalState.logfile,"Cannot seek to position %ld in %s\n",
1113                 GlobalState.start_position,
1114                 GlobalState.current_input_file);
1115         return FALSE;
1116     }
1117     return TRUE;
1118 }
1119
1120 Boolean
1121 at_end_of_input(void)
1122 {
1123     long pos;
1124     if(GlobalState.end_position >= LONG_MAX) {
1125         return FALSE;
1126     }
1127     pos = ftell(yyin); 
1128     if(pos == -1) {
1129         fprintf(GlobalState.logfile,"Cannot find position in %s\n",
1130                 GlobalState.current_input_file); 
1131         return TRUE;
1132     }
1133     return pos >= GlobalState.end_position;
1134 }
1135
1136         /* Simple interface to open_input for the ECO file. */
1137 Boolean
1138 open_eco_file(const char *eco_file)
1139 {
1140     return open_input(eco_file);
1141 }
1142
1143         /* Open the input file whose number is the argument. */
1144 static Boolean
1145 open_input_file(int file_number)
1146 {
1147     /* Depending on the type of file, ensure that the
1148      * current_file_type is set correctly.
1149      */
1150     if(open_input(list_of_files.files[file_number])){
1151         GlobalState.current_file_type = list_of_files.file_type[file_number];
1152         GlobalState.current_file_number = file_number + GlobalState.start_file_number;
1153         return TRUE;
1154     }
1155     else{
1156         return FALSE;
1157     }
1158 }
1159
1160         /* Open the first input file. */
1161 Boolean
1162 open_first_file(void)
1163 {   Boolean ok = TRUE;
1164
1165     if(list_of_files.num_files == 0){
1166         /* Use standard input. */
1167         yyin = stdin;
1168         GlobalState.current_input_file = "stdin";
1169         /* @@@ Should this be set?
1170         GlobalState.current_file_type = NORMALFILE;
1171          */
1172         if(GlobalState.verbose){
1173             fprintf(GlobalState.logfile,"Processing %s\n",
1174                                 GlobalState.current_input_file);
1175         }
1176     }
1177     else if(open_input_file(0)){
1178     }
1179     else{
1180         fprintf(GlobalState.logfile,
1181                         "Unable to open the PGN file: %s\n",input_file_name(0));
1182         ok = FALSE;
1183     }
1184     return ok;
1185 }
1186
1187         /* Return the name of the file corresponding to the given
1188          * file number.
1189          */
1190 const char *
1191 input_file_name(unsigned file_number)
1192 {
1193     if(file_number >= list_of_files.num_files) {
1194         return NULL;
1195     }
1196     else {
1197         return list_of_files.files[file_number];
1198     }
1199 }
1200
1201
1202         /* Give some error information. */
1203 void
1204 print_error_context(FILE *fp)
1205 {
1206     if(GlobalState.current_input_file != NULL){
1207         fprintf(fp,"File %s: ",GlobalState.current_input_file);
1208     }
1209     fprintf(fp,"Line number: %lu\n",line_number);
1210 }
1211
1212
1213         /* Make the given str accessible. */
1214 static void
1215 save_string(const char *str)
1216 {   const size_t len = strlen(str);
1217     char *token;
1218
1219     token = MallocOrDie(len+1);
1220     strcpy(token,str);
1221     yylval.token_string = token;
1222 }
1223
1224         /* Return the next line of input from fp. */
1225 char *
1226 next_input_line(FILE *fp)
1227 {   /* Retain each line in turn, so as to be able to free it. */
1228     static char *line = NULL;
1229
1230     if(line != NULL){
1231         (void) free((void *)line);
1232     }
1233
1234     line = read_line(fp);
1235
1236     if(line != NULL){
1237         line_number++;
1238     }
1239     return line;
1240 }
1241
1242         /* Handle the end of a file. */
1243 int
1244 yywrap(void)
1245 {   int time_to_exit;
1246
1247     /* Beware of this being called in inappropriate circumstances. */
1248     if(list_of_files.files == NULL){
1249         /* There are no files. */
1250         time_to_exit = 1;
1251     }
1252     else if(input_file_name(current_file_num) == NULL){
1253         /* There was no last file! */
1254         time_to_exit = 1;
1255     }
1256     else{
1257         /* Close the input files.  */
1258         terminate_input();
1259         /* See if there is another. */
1260         current_file_num++;
1261         if(input_file_name(current_file_num) == NULL){
1262             /* We have processed the last file. */
1263             time_to_exit = 1;
1264         }
1265         else if(!open_input_file(current_file_num)){
1266             fprintf(GlobalState.logfile,"Unable to open the PGN file: %s\n",
1267                                         input_file_name(current_file_num));
1268             time_to_exit = 1;
1269         }
1270         else{
1271             /* Ok, we opened it. */
1272             time_to_exit = 0;
1273             /* Set everything up for a new file. */
1274             /* Depending on the type of file, ensure that the
1275              * current_file_type is set correctly.
1276              */
1277             GlobalState.current_file_type =
1278                 list_of_files.file_type[current_file_num];
1279             restart_lex_for_new_game();
1280             games_in_file = 0;
1281             reset_line_number();
1282         }
1283     }
1284     return time_to_exit;
1285 }
1286
1287
1288     /* Reset the file's line number. */
1289 void
1290 reset_line_number(void)
1291 {
1292     line_number = 0;
1293 }
1294
1295 static void
1296 terminate_input(void)
1297 {
1298     if((yyin != stdin) && (yyin != NULL)){
1299         (void) fclose(yyin);
1300         yyin = NULL;
1301     }
1302 }
1303
1304     /* Return the position in the current file. Returns -1 if it is unseekable. */
1305 long get_position(void)
1306 {
1307     return ftell(yyin);
1308 }