]> git.sesse.net Git - pgn-extract/blob - lex.c
0c8d26f192148e54c1ea5ac32f54587dc29dbfbc
[pgn-extract] / lex.c
1 /*
2  *  Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3  *  Copyright (C) 1994-2014 David Barnes
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 1, or (at your option)
7  *  any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17  *
18  *  David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19  *  http://www.cs.kent.ac.uk/people/staff/djb/
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #if defined(__BORLANDC__) || defined(_MSC_VER)
28 #include <io.h>
29 #ifndef R_OK
30 #define R_OK 0
31 #endif
32 #else
33 #include <unistd.h>
34 #endif
35 #include "bool.h"
36 #include "mymalloc.h"
37 #include "defs.h"
38 #include "typedef.h"
39 #include "tokens.h"
40 #include "taglist.h"
41 #include "lex.h"
42 #include "moves.h"
43 #include "lists.h"
44 #include "decode.h"
45 #include "lines.h"
46 #include "grammar.h"
47 #include "apply.h"
48 #include "output.h"
49
50         /* Prototypes for the functions in this file. */
51 static void save_string(const char *result);
52 /* When a move is saved, what is known of its source and destination coordinates
53  * should also be saved.
54  */
55 static void save_move(const unsigned char *move);
56 static void save_q_castle(void);
57 static void save_k_castle(void);
58 static void terminate_input(void);
59 static Boolean extract_yytext(const unsigned char *symbol_start,
60                               const unsigned char *linep);
61 static Boolean open_input(const char *infile);
62 static Boolean open_input_file(int file_number);
63
64 static unsigned long line_number = 0;
65 /* Keep track of the Recursive Annotation Variation level. */
66 static unsigned RAV_level = 0;
67 /* Keep track of the last move found. */
68 static unsigned char last_move[MAX_MOVE_LEN+1];
69 /* How many games we have extracted from this file. */
70 static unsigned games_in_file = 0;
71
72 /* Provide an input file pointer.
73  * This is intialised in init_lex_tables.
74  */
75 static FILE *yyin = NULL;
76
77 /* Define space for holding matched tokens. */
78 #define MAX_YYTEXT 100
79 static unsigned char yytext[MAX_YYTEXT+1];
80 YYSTYPE yylval;
81
82 #define MAX_CHAR 256
83 #define ALPHA_DIST ('a'-'A')
84 /* Table of symbol classifications. */
85 static TokenType ChTab[MAX_CHAR];
86 /* A boolean array as to whether a character is allowed in a move or not. */
87 static short MoveChars[MAX_CHAR];
88
89 /* Define a table to hold the list of tag strings and the corresponding
90  * TagName index. This is initialised in init_list_of_known_tags().
91  */
92 static const char **TagList;
93 static unsigned tag_list_length = 0;
94
95         /* Initialise the TagList. This should be stored in alphabetical order,
96          * by virtue of the order in which the _TAG values are defined.
97          */
98 static void
99 init_list_of_known_tags(void)
100 {
101     unsigned i;
102     tag_list_length = ORIGINAL_NUMBER_OF_TAGS;
103     TagList = (const char **) MallocOrDie(tag_list_length*sizeof(*TagList));
104     /* Be paranoid and put a string in every entry. */
105     for(i = 0; i < tag_list_length; i++){
106         TagList[i] = "";
107     }
108     TagList[ANNOTATOR_TAG] = "Annotator";
109     TagList[BLACK_TAG] = "Black";
110     TagList[BLACK_ELO_TAG] = "BlackElo";
111     TagList[BLACK_NA_TAG] = "BlackNA";
112     TagList[BLACK_TITLE_TAG] = "BlackTitle";
113     TagList[BLACK_TYPE_TAG] = "BlackType";
114     TagList[BLACK_USCF_TAG] = "BlackUSCF";
115     TagList[BOARD_TAG] = "Board";
116     TagList[DATE_TAG] = "Date";
117     TagList[ECO_TAG] = "ECO";
118     TagList[PSEUDO_ELO_TAG] = "Elo";
119     TagList[EVENT_TAG] = "Event";
120     TagList[EVENT_DATE_TAG] = "EventDate";
121     TagList[EVENT_SPONSOR_TAG] = "EventSponsor";
122     TagList[FEN_TAG] = "FEN";
123     TagList[PSEUDO_FEN_PATTERN_TAG] = "FENPattern";
124     TagList[HASHCODE_TAG] = "HashCode";
125     TagList[LONG_ECO_TAG] = "LongECO";
126     TagList[MODE_TAG] = "Mode";
127     TagList[NIC_TAG] = "NIC";
128     TagList[OPENING_TAG] = "Opening";
129     TagList[PSEUDO_PLAYER_TAG] = "Player";
130     TagList[PLY_COUNT_TAG] = "PlyCount";
131     TagList[RESULT_TAG] = "Result";
132     TagList[ROUND_TAG] = "Round";
133     TagList[SECTION_TAG] = "Section";
134     TagList[SETUP_TAG] = "SetUp";
135     TagList[SITE_TAG] = "Site";
136     TagList[STAGE_TAG] = "Stage";
137     TagList[SUB_VARIATION_TAG] = "SubVariation";
138     TagList[TERMINATION_TAG] = "Termination";
139     TagList[TIME_TAG] = "Time";
140     TagList[TIME_CONTROL_TAG] = "TimeControl";
141     TagList[TOTAL_PLY_COUNT_TAG] = "TotalPlyCount";
142     TagList[UTC_DATE_TAG] = "UTCDate";
143     TagList[UTC_TIME_TAG] = "UTCTime";
144     TagList[VARIATION_TAG] = "Variation";
145     TagList[WHITE_TAG] = "White";
146     TagList[WHITE_ELO_TAG] = "WhiteElo";
147     TagList[WHITE_NA_TAG] = "WhiteNA";
148     TagList[WHITE_TITLE_TAG] = "WhiteTitle";
149     TagList[WHITE_TYPE_TAG] = "WhiteType";
150     TagList[WHITE_USCF_TAG] = "WhiteUSCF";
151 }
152
153     /* Extend TagList to accomodate a new tag string.
154      * Return the current value of tag_list_length as its
155      * index, having incremented its value.
156      */
157 static TagName
158 make_new_tag(const char *tag)
159 {   unsigned tag_index = tag_list_length;
160     tag_list_length++;
161     TagList = (const char **) ReallocOrDie((void *)TagList,
162                     tag_list_length*sizeof(*TagList));
163     TagList[tag_index] = copy_string(tag);
164     /* Ensure that the game header's tags array can accommodate
165      * the new tag.
166      */
167     increase_game_header_tags_length(tag_list_length);
168     return tag_index;
169 }
170
171 const char *
172 tag_header_string(TagName tag)
173 {
174     if(tag < tag_list_length){
175         return TagList[tag];
176     }
177     else{
178         fprintf(GlobalState.logfile,"Internal error in tag_header_string(%d)\n",
179                                 tag);
180         return NULL;
181     }
182 }
183
184         /* Initialise ChTab[], the classification of the initial characters
185          * of symbols.
186          * Initialise MoveChars, the classification of secondary characters
187          * of moves.
188          */
189 void
190 init_lex_tables(void)
191 {   int i;
192
193     /* Assume standard input will be used, until we know otherwise. */
194     yyin = stdin;
195     init_list_of_known_tags();
196     /* Initialise ChTab[]. */
197     for(i = 0; i < MAX_CHAR; i++){
198         ChTab[i] = ERROR_TOKEN;
199     }
200     ChTab[' '] = WHITESPACE;
201     ChTab['\t'] = WHITESPACE;
202     /* Take account of DOS line-ends. */
203     ChTab['\r'] = WHITESPACE;
204     ChTab['['] = TAG_START;
205     ChTab[']'] = TAG_END;
206     ChTab['"'] = DOUBLE_QUOTE;
207     ChTab['{'] = COMMENT_START;
208     ChTab['}'] = COMMENT_END;
209     ChTab['$'] = NAG;
210     ChTab['!'] = ANNOTATE;
211     ChTab['?'] = ANNOTATE;
212     ChTab['+'] = CHECK_SYMBOL;
213     ChTab['#'] = CHECK_SYMBOL;
214     ChTab['.'] = DOT;
215     ChTab['('] = RAV_START;
216     ChTab[')'] = RAV_END;
217     ChTab['%'] = PERCENT;
218     ChTab['\\'] = ESCAPE;
219     ChTab['\0'] = EOS;
220     ChTab['*'] = STAR;
221     ChTab['-'] = DASH;
222
223     /* Operators allowed only in the tag file. */
224     ChTab['<'] = OPERATOR;
225     ChTab['>'] = OPERATOR;
226     ChTab['='] = OPERATOR;        /* Overloaded in MoveChars. */
227
228     for(i = '0'; i <= '9'; i++){
229         ChTab[i] = DIGIT;
230     }
231     for(i = 'A'; i <= 'Z'; i++){
232         ChTab[i] = ALPHA;
233         ChTab[i+ALPHA_DIST] = ALPHA;
234     }
235     ChTab['_'] = ALPHA;
236
237     /* Classify the Russian piece letters as ALPHA. */
238     ChTab[RUSSIAN_KNIGHT_OR_KING] = ALPHA; /* King and Knight. */
239     ChTab[RUSSIAN_KING_SECOND_LETTER] = ALPHA; /* King (second character). */
240     ChTab[RUSSIAN_QUEEN] = ALPHA; /* Queen. */
241     ChTab[RUSSIAN_ROOK] = ALPHA; /* Rook. */
242     ChTab[RUSSIAN_BISHOP] = ALPHA; /* Bishop. */
243
244     /* Initialise MoveChars[]. */
245     for(i = 0; i < MAX_CHAR; i++){
246         MoveChars[i] = 0;
247     }
248     /* Files. */
249     for(i = 'a'; i <= 'h'; i++){
250         MoveChars[i] = 1;
251     }
252     /* Ranks. */
253     for(i = '1'; i <= '8'; i++){
254         MoveChars[i] = 1;
255     }
256     /* Upper-case pieces. */
257     MoveChars['K'] = 1;
258     MoveChars['Q'] = 1;
259     MoveChars['R'] = 1;
260     MoveChars['N'] = 1;
261     MoveChars['B'] = 1;
262     /* Lower-case pieces. */
263     MoveChars['k'] = 1;
264     MoveChars['q'] = 1;
265     MoveChars['r'] = 1;
266     MoveChars['n'] = 1;
267     MoveChars['b'] = 1;
268     /* Other u-c Dutch/German characters. */
269     MoveChars['D'] = 1; /* Queen. */
270     MoveChars['T'] = 1; /* Rook. */
271     MoveChars['S'] = 1; /* Knight. */
272     MoveChars['P'] = 1; /* Knight. */
273     MoveChars['L'] = 1; /* Bishop. */
274     /* Russian characters. */
275     MoveChars[RUSSIAN_KNIGHT_OR_KING] = 1; /* King and Knight. */
276     MoveChars[RUSSIAN_KING_SECOND_LETTER] = 1; /* King (second character). */
277     MoveChars[RUSSIAN_QUEEN] = 1; /* Queen. */
278     MoveChars[RUSSIAN_ROOK] = 1; /* Rook. */
279     MoveChars[RUSSIAN_BISHOP] = 1; /* Bishop. */
280
281     /* Capture and square separators. */
282     MoveChars['x'] = 1;
283     MoveChars['X'] = 1;
284     MoveChars[':'] = 1;
285     MoveChars['-'] = 1;
286     /* Promotion character. */
287     MoveChars['='] = 1;
288     /* Castling. */
289     MoveChars['O'] = 1;
290     MoveChars['o'] = 1;
291     MoveChars['0'] = 1;
292     /* Allow a trailing p for ep. */
293     MoveChars['p'] = 1;
294 }
295
296         /* Starting from linep in line, gather up the string until
297          * the closing quote.  Skip over the closing quote.
298          */
299 LinePair
300 gather_string(char *line, unsigned char *linep)
301 {   LinePair resulting_line;
302     char ch;
303     unsigned len = 0;
304     char *str;
305
306     do{ ch = *linep++;
307         len++;
308         if(ch == '\\'){
309             /* Escape the next character. */
310             len++;
311             ch = *linep++;
312             if(ch != '\0'){
313               len++;
314               ch = *linep++;
315             }
316         }
317     } while((ch != '"') && (ch != '\0'));
318     /* The last one doesn't belong in the string. */
319     len--;
320     /* Allocate space for the result. */
321     str = MallocOrDie(len+1);
322     strncpy(str,(const char *) (linep-len-1),len);
323     str[len] = '\0';
324     /* Store it in yylval. */
325     yylval.token_string = str;
326
327     /* Make sure that the string was properly terminated, by
328      * looking at the last character examined.
329      */
330     if(ch == '\0'){
331         /* Too far. */
332         if(!GlobalState.skipping_current_game) {
333             fprintf(GlobalState.logfile,"Missing closing quote in %s\n",line);
334         }
335         if(len > 1){
336             /* Move back to the null. */
337             linep--;
338             str[len-1] = '\0';
339         }
340     }
341     else{
342         /* We have already skipped over the closing quote. */
343     }
344     resulting_line.line = line;
345     resulting_line.linep = linep;
346     resulting_line.token = STRING;
347     return resulting_line;
348 }
349
350         /*
351          * Is ch of the given character class?
352          * External access to ChTab.
353          */
354 Boolean
355 is_character_class(unsigned char ch, TokenType character_class)
356 {
357     return ChTab[ch] == character_class;
358 }
359
360         /* Starting from linep in line, gather up a comment until
361          * the END_COMMENT.  Skip over the END_COMMENT.
362          */
363 static LinePair
364 gather_comment(char *line, unsigned char *linep)
365 {   LinePair resulting_line;
366     char ch;
367     unsigned len = 0;
368     /* The string list in which the current comment will be gathered. */
369     StringList *current_comment = NULL;
370     /* The pointer to be returned. */
371     CommentList *comment;
372
373     do{
374         /* Restart a new segment. */
375         len = 0;
376         do{ ch = *linep++;
377             len++;
378         } while((ch != '}') && (ch != '\0'));
379         /* The last one doesn't belong in the comment. */
380         len--;
381         if(GlobalState.keep_comments){
382             char *comment_str;
383
384             /* Allocate space for the result. */
385             comment_str = (char *)MallocOrDie(len+1);
386             strncpy(comment_str,(const char *) (linep-len-1) ,len);
387             comment_str[len] = '\0';
388             current_comment = save_string_list_item(current_comment,comment_str);
389         }
390         if(ch == '\0'){
391             line = next_input_line(yyin);
392             linep = (unsigned char *) line;
393         }
394     } while((ch != '}') && (line != NULL));
395
396     /* Set up the structure to be returned. */
397     comment = MallocOrDie(sizeof(*comment));
398     comment->Comment = current_comment;
399     comment->next = NULL;
400     yylval.comment = comment;
401
402     resulting_line.line = line;
403     resulting_line.linep = linep;
404     resulting_line.token = COMMENT;
405     return resulting_line;
406 }
407
408         /* Remember that 0 can start 0-1 and 0-0.
409          * Remember that 1 can start 1-0 and 1/2.
410          */
411 static LinePair
412 gather_possible_numeric(char *line, unsigned char *linep, char initial_digit)
413 {   LinePair resulting_line;
414     TokenType token = MOVE_NUMBER;
415     /* Keep a record of where this token started. */
416     const unsigned char *symbol_start = linep-1;
417
418     if(initial_digit == '0'){
419         /* Could be castling or a result. */
420         if(strncmp((const char *) linep,"-1",2) == 0){
421             token = TERMINATING_RESULT;
422             save_string("0-1");
423             linep += 2;
424         }
425         else if(strncmp((const char *) linep,"-0-0",4) == 0){
426             token = MOVE;
427             save_q_castle();
428             linep += 4;
429         }
430         else if(strncmp((const char *) linep,"-0",2) == 0){
431             token = MOVE;
432             save_k_castle();
433             linep += 2;
434         }
435         else{
436             /* MOVE_NUMBER */
437         }
438     }
439     else if(initial_digit == '1'){
440         if(strncmp((const char *) linep,"-0",2) == 0){
441             token = TERMINATING_RESULT;
442             save_string("1-0");
443             linep += 2;
444         }
445         else if(strncmp((const char *) linep,"/2",2) == 0){
446             token = TERMINATING_RESULT;
447             linep += 2;
448             /* Check for the full form. */
449             if(strncmp((const char *) linep,"-1/2",4) == 0){
450                 token = TERMINATING_RESULT;
451                 linep += 4;
452             }
453             /* Make sure that the full form of the draw result
454              * is saved. 
455              */
456             save_string("1/2-1/2");
457         }
458         else{
459             /* MOVE_NUMBER */
460         }
461     }
462     else{
463         /* MOVE_NUMBER */
464     }
465     if(token == MOVE_NUMBER){
466         /* Gather the remaining digits. */
467         while(isdigit((unsigned) *linep)){
468             linep++;
469         }
470     }
471     if(token == MOVE_NUMBER){
472         /* Fill out the fields of yylval. */
473         if(extract_yytext(symbol_start,linep)){
474                 yylval.move_number = 0;
475                 (void) sscanf((const char *)yytext,"%u",&yylval.move_number);
476                 /* Skip any trailing dots. */
477                 while(*linep == '.'){
478                     linep++;
479                 }
480             }
481         else{
482             token = NO_TOKEN;
483         }
484     }
485     else{
486         /* TERMINATING_RESULT and MOVE have already been dealt with. */
487     }
488     resulting_line.line = line;
489     resulting_line.linep = linep;
490     resulting_line.token = token;
491     return resulting_line;
492 }
493
494     /* Look up tag_string in TagList[] and return its _TAG
495      * value or -1 if it isn't there.
496      * Although the strings are sorted initially, further
497      * tags identified in the source files will be appended
498      * without further sorting. So we cannot use a binary
499      * search on the list.
500      */
501 static int
502 identify_tag(const char *tag_string)
503 {   unsigned tag_index;
504
505     for(tag_index = 0; tag_index < tag_list_length; tag_index++){
506         if(strcmp(tag_string,TagList[tag_index]) == 0){
507             return tag_index;
508         }
509     }
510     /* Not found. */
511     return -1;
512 }
513
514         /* Starting from linep in line, gather up the tag name.
515          * Skip over any preceding white space.
516          */
517 LinePair
518 gather_tag(char *line, unsigned char *linep)
519 {   LinePair resulting_line;
520     char ch;
521     unsigned len = 0;
522
523     do{
524         /* Check for end of line while skipping white space. */
525         if(*linep == '\0'){
526             line = next_input_line(yyin);
527             linep = (unsigned char *) line;
528         }
529         if(line != NULL) {
530             while(ChTab[(unsigned)*linep] == WHITESPACE){
531                 linep++;
532             }
533         }
534     }
535     while((line != NULL) && (ChTab[(unsigned)*linep] == '\0'));
536
537     if(line != NULL){
538         ch = *linep++;
539         while(isalpha((unsigned) ch) || isdigit((unsigned) ch) || (ch == '_')){
540             len++;
541             ch = *linep++;
542         }
543         /* The last one wasn't part of the tag. */
544         linep--;
545         if(len > 0){
546             int tag_item;
547             char *tag_string;
548
549             /* Allocate space for the result. */
550             tag_string = MallocOrDie(len+1);
551             strncpy((char *)tag_string,(const char *)(linep-len),len);
552             tag_string[len] = '\0';
553             tag_item = identify_tag(tag_string);
554             if(tag_item < 0){
555                 tag_item = make_new_tag(tag_string);
556             }
557             if(tag_item >= 0 && ((unsigned) tag_item) < tag_list_length){
558                 yylval.tag_index = tag_item;
559                 resulting_line.token = TAG;
560                 (void) free((void *)tag_string);
561             }
562             else{
563                 fprintf(GlobalState.logfile,
564                     "Internal error: invalid tag index %d in gather_tag.\n",
565                     tag_item);
566                 exit(1);
567             }
568         }
569         else{
570             resulting_line.token = NO_TOKEN;
571         }
572     }
573     else{
574         resulting_line.token = NO_TOKEN;
575     }
576     resulting_line.line = line;
577     resulting_line.linep = linep;
578     return resulting_line;
579 }
580
581 static Boolean
582 extract_yytext(const unsigned char *symbol_start,const unsigned char *linep)
583 {   /* Whether the string fitted. */
584     Boolean Ok = TRUE;
585     long len = linep-symbol_start;
586
587     if(len < MAX_YYTEXT){
588         strncpy((char *) yytext,(const char *) symbol_start,len);
589         yytext[len] = '\0';
590     }
591     else{
592         strncpy((char *) yytext,(const char *) symbol_start,MAX_YYTEXT);
593         yytext[MAX_YYTEXT] = '\0';
594         if(!GlobalState.skipping_current_game)
595             fprintf(GlobalState.logfile,"Symbol %s exceeds length of %u.\n",
596                         yytext, MAX_YYTEXT);
597         Ok = FALSE;
598     }
599     return Ok;
600 }
601
602         /* Identify the next symbol.
603          * Don't take any action on EOF -- leave that to next_token.
604          */
605 static TokenType
606 get_next_symbol(void)
607 {   static char *line = NULL;
608     static unsigned char *linep = NULL;
609     /* The token to be returned. */
610     TokenType token;
611     LinePair resulting_line;
612
613     do{
614         /* Remember where in line the current symbol starts. */
615         const unsigned char *symbol_start;
616
617         /* Clear any remaining symbol. */
618         *yytext = '\0';
619         if(line == NULL){
620             line = next_input_line(yyin);
621             linep = (unsigned char *) line;
622             if(line != NULL){
623                 token = NO_TOKEN;
624             }
625             else{
626                 token = EOF_TOKEN;
627             }
628         }
629         else{
630             int next_char = *linep & 0x0ff;
631
632             /* Remember where we start. */
633             symbol_start = linep;
634             linep++;
635             token = ChTab[next_char];
636
637             switch(token){
638                 case WHITESPACE:
639                     while(ChTab[(unsigned)*linep] == WHITESPACE)
640                         linep++;
641                     token = NO_TOKEN;
642                     break;
643                 case TAG_START:
644                     resulting_line = gather_tag(line,linep);
645                     /* Pick up where we are now. */
646                     line = resulting_line.line;
647                     linep = resulting_line.linep;
648                     token = resulting_line.token;
649                     break;
650                 case TAG_END:
651                     token = NO_TOKEN;
652                     break;
653                 case DOUBLE_QUOTE:
654                     resulting_line = gather_string(line,linep);
655                     /* Pick up where we are now. */
656                     line = resulting_line.line;
657                     linep = resulting_line.linep;
658                     token = resulting_line.token;
659                     break;
660                 case COMMENT_START:
661                     resulting_line = gather_comment(line,linep);
662                     /* Pick up where we are now. */
663                     line = resulting_line.line;
664                     linep = resulting_line.linep;
665                     token = resulting_line.token;
666                     break;
667                 case COMMENT_END:
668                     if(!GlobalState.skipping_current_game) {
669                         fprintf(GlobalState.logfile,"Unmatched comment end.\n");
670                     }
671                     token = NO_TOKEN;
672                     break;
673                 case NAG:
674                     while(isdigit((unsigned) *linep)){
675                         linep++;
676                     }
677                     if(extract_yytext(symbol_start,linep)){
678                         save_string((const char *) yytext);
679                     }
680                     else{
681                         token = NO_TOKEN;
682                     }
683                     break;
684                 case ANNOTATE:
685                     /* Don't return anything in case of error. */
686                     token = NO_TOKEN;
687                     while(ChTab[(unsigned)*linep] == ANNOTATE){
688                         linep++;
689                     }
690                     if(extract_yytext(symbol_start,linep)){
691                         switch(yytext[0]){
692                             case '!':
693                                 switch(yytext[1]){
694                                     case '!':
695                                         save_string("$3");
696                                         break;
697                                     case '?':
698                                         save_string("$5");
699                                         break;
700                                     default:
701                                         save_string("$1");
702                                         break;
703                                 }
704                                 token = NAG;
705                                 break;
706                             case '?':
707                                 switch(yytext[1]){
708                                     case '!':
709                                         save_string("$6");
710                                         break;
711                                     case '?':
712                                         save_string("$4");
713                                         break;
714                                     default:
715                                         save_string("$2");
716                                         break;
717                                 }
718                                 token = NAG;
719                                 break;
720                         }
721                     }
722                     break;
723                 case CHECK_SYMBOL:
724                     /* Allow ++ */
725                     while(ChTab[(unsigned)*linep] == CHECK_SYMBOL){
726                         linep++;
727                     }
728                     break;
729                 case DOT:
730                     while(ChTab[(unsigned)*linep] == DOT)
731                         linep++;
732                     token = NO_TOKEN;
733                     break;
734                 case PERCENT:
735                     /* Trash the rest of the line. */
736                     line = next_input_line(yyin);
737                     linep = (unsigned char *) line;
738                     token = NO_TOKEN;
739                     break;
740                 case ESCAPE:
741                     /* @@@ What to do about this? */
742                     if(*linep != '\0'){
743                         linep++;
744                     }
745                     token = NO_TOKEN;
746                     break;
747                 case ALPHA:
748                     /* Not all ALPHAs are move characters. */
749                     if(MoveChars[next_char]){
750                         /* Scan through the possible move characters. */
751                         while(MoveChars[*linep & 0x0ff]){
752                             linep++;
753                         }
754                         if(extract_yytext(symbol_start,linep)){
755                             /* Only classify it as a move if it
756                              * seems to be a complete move.
757                              */
758                             if(move_seems_valid(yytext)){
759                                 save_move(yytext);
760                                 token = MOVE;
761                             }
762                             else{
763                                 if(!GlobalState.skipping_current_game){
764                                     print_error_context(GlobalState.logfile);
765                                     fprintf(GlobalState.logfile,
766                                                 "Unknown move text %s.\n",yytext);
767                                 }
768                                 token = NO_TOKEN;
769                             }
770                         }
771                         else{
772                             token = NO_TOKEN;
773                         }
774                     }
775                     else{
776                         if(!GlobalState.skipping_current_game){
777                             print_error_context(GlobalState.logfile);
778                             fprintf(GlobalState.logfile,
779                                     "Unknown character %c (Hex: %x).\n",
780                                     next_char,next_char);
781                         }
782                         /* Skip any sequence of them. */
783                         while(ChTab[(unsigned)*linep] == ERROR_TOKEN)
784                             linep++;
785                     }
786                     break;
787                 case DIGIT:
788                     /* Remember that 0 can start 0-1 and 0-0.
789                      * Remember that 1 can start 1-0 and 1/2.
790                      */
791                     resulting_line = gather_possible_numeric(
792                                         line,linep,next_char);
793                     /* Pick up where we are now. */
794                     line = resulting_line.line;
795                     linep = resulting_line.linep;
796                     token = resulting_line.token;
797                     break;
798                 case EOF_TOKEN:
799                     break;
800                 case RAV_START:
801                     RAV_level++;
802                     break;
803                 case RAV_END:
804                     if(RAV_level > 0){
805                         RAV_level--;
806                     }
807                     else{
808                         if(!GlobalState.skipping_current_game){
809                             print_error_context(GlobalState.logfile);
810                             fprintf(GlobalState.logfile,"Too many ')' found.\n");
811                         }
812                         token = NO_TOKEN;
813                     }
814                     break;
815                 case STAR:
816                     save_string("*");
817                     token = TERMINATING_RESULT;
818                     break;
819                 case DASH:
820                     if(ChTab[(unsigned) *linep] == DASH) {
821                         linep++;
822                         save_move((const unsigned char *) NULL_MOVE_STRING);
823                         token = MOVE;
824                     }
825                     else {
826                         fprintf(GlobalState.logfile,"Single '-' not allowed.\n");
827                         print_error_context(GlobalState.logfile);
828                         token = NO_TOKEN;
829                     }
830                     break;
831                 case EOS:
832                     /* End of the string. */
833                     line = next_input_line(yyin);
834                     linep = (unsigned char *) line;
835                     token = NO_TOKEN;
836                     break;
837                 case ERROR_TOKEN:
838                     if(!GlobalState.skipping_current_game){
839                         print_error_context(GlobalState.logfile);
840                         fprintf(GlobalState.logfile,
841                                 "Unknown character %c (Hex: %x).\n",
842                                 next_char,next_char);
843                     }
844                     /* Skip any sequence of them. */
845                     while(ChTab[(unsigned)*linep] == ERROR_TOKEN)
846                         linep++;
847                     break;
848                 case OPERATOR:
849                     print_error_context(GlobalState.logfile);
850                     fprintf(GlobalState.logfile,
851                         "Operator in illegal context: %c.\n",*symbol_start);
852                     /* Skip any sequence of them. */
853                     while(ChTab[(unsigned)*linep] == OPERATOR)
854                         linep++;
855                     token = NO_TOKEN;
856                     break;
857                 default:
858                     if(!GlobalState.skipping_current_game){
859                         print_error_context(GlobalState.logfile);
860                         fprintf(GlobalState.logfile,
861                             "Internal error: Missing case for %d on char %x.\n",
862                             token,next_char);
863                     }
864                     token = NO_TOKEN;
865                     break;
866             }
867         }
868     } while(token == NO_TOKEN);
869     return token;
870 }
871
872 TokenType
873 next_token(void)
874 {   TokenType token = get_next_symbol();
875
876         /* Don't call yywrap if parsing the ECO file. */
877     while((token == EOF_TOKEN) && !GlobalState.parsing_ECO_file &&
878                         !yywrap()){
879         token = get_next_symbol();
880     }
881     return token;
882 }
883
884         /* Return TRUE if token is one to skip when looking for
885          * the start or end of a game.
886          */
887 static Boolean
888 skip_token(TokenType token)
889 {
890     switch(token){
891         case TERMINATING_RESULT:
892         case TAG:
893         case MOVE:
894         case EOF_TOKEN:
895             return FALSE;
896         default:
897             return TRUE;
898     }
899 }
900
901         /* Skip tokens until the next game looks like it is
902          * about to start. This is signalled by
903          * a tag section a terminating result from the
904          * previous game, or a move.
905          */
906 TokenType
907 skip_to_next_game(TokenType token)
908 {   
909     if(skip_token(token)){
910         GlobalState.skipping_current_game = TRUE;
911         do{
912             if(token == COMMENT){
913                 /* Free the space. */
914                 if((yylval.comment != NULL) &&
915                                 (yylval.comment->Comment != NULL)){
916                     free_string_list(yylval.comment->Comment);
917                     free((void *)yylval.comment);
918                     yylval.comment = NULL;
919                 }
920             }
921             token = next_token();
922         } while(skip_token(token));
923         GlobalState.skipping_current_game = FALSE;
924     }
925     return token;
926 }
927
928         /* Save castling moves in a standard way. */
929 static void
930 save_q_castle(void)
931 {
932     save_move((const unsigned char *) "O-O-O");
933 }
934
935         /* Save castling moves in a standard way. */
936 static void
937 save_k_castle(void)
938 {
939     save_move((const unsigned char *) "O-O");
940 }
941
942         /* Make a copy of the matched text of the move. */
943 static void
944 save_move(const unsigned char *move)
945 {
946     /* Decode the move into its components. */
947     yylval.move_details = decode_move(move);
948     /* Remember the last move. */
949     strcpy((char *) last_move,(const char *) move);
950 }
951
952 void
953 restart_lex_for_new_game(void)
954 {
955     *last_move = '\0';
956     RAV_level = 0;
957 }
958
959         /* Make it possible to read multiple input files.
960          * These are held in list_of_files. The list
961          * is built up from the program's arguments.
962          */
963 static int current_file_num = 0;
964     /* Keep track of the list of PGN files.  These will either be the
965      * remaining arguments once flags have been dealt with, or
966      * those read from -c and -f arguments.
967      */
968 static FILE_LIST list_of_files = {
969         (const char **) NULL,
970         (SourceFileType *) NULL,
971         0, 0
972 };
973
974     /* Return the index number of the current input file in list_of_files. */
975 unsigned
976 current_file_number(void)
977 {
978     return current_file_num;
979 }
980
981
982         /* Read a list of lines from fp. These are the names of files
983          * to be added to the existing list_of_files.
984          * list_of_files.list must have a (char *)NULL on the end.
985          */
986 void
987 add_filename_list_from_file(FILE *fp,SourceFileType file_type)
988 {
989     if((list_of_files.files == NULL) || (list_of_files.max_files == 0)){
990          /* Allocate an initial number of pointers for the lines.
991           * This must always include an extra one for terminating NULL.
992           */
993          list_of_files.files = (const char **) MallocOrDie((INIT_LIST_SPACE+1)*
994                                                 sizeof(const char *));
995          list_of_files.file_type = (SourceFileType *) MallocOrDie((INIT_LIST_SPACE+1)*
996                                                 sizeof(SourceFileType));
997          list_of_files.max_files = INIT_LIST_SPACE;
998          list_of_files.num_files = 0;
999     }
1000     if(list_of_files.files != NULL){
1001         /* Find the first line. */
1002         char *line = read_line(fp);
1003
1004         while(line != NULL){
1005              if(non_blank_line(line)){
1006                  add_filename_to_source_list(line,file_type);
1007              }
1008              else{
1009                  (void) free((void *)line);
1010              }
1011              line = read_line(fp);
1012         }
1013      }
1014 }
1015
1016 void
1017 add_filename_to_source_list(const char *filename,SourceFileType file_type)
1018 {    /* Where to put it. */
1019      unsigned location = list_of_files.num_files;
1020
1021      if(access(filename,R_OK) != 0){
1022          fprintf(GlobalState.logfile,"Unable to find %s\n",filename);
1023          exit(1);
1024      }
1025      else{
1026          /* Ok. */
1027      }
1028      /* See if there is room. */
1029      if(list_of_files.num_files == list_of_files.max_files){
1030          /* There isn't, so increase the amount of available space,
1031           * ensuring that there is always an extra slot for the terminating
1032           * NULL.
1033           */
1034         if((list_of_files.files == NULL) || (list_of_files.max_files == 0)){
1035              /* Allocate an initial number of pointers for the lines.
1036               * This must always include an extra one for terminating NULL.
1037               */
1038              list_of_files.files = (const char **) MallocOrDie((INIT_LIST_SPACE+1)*
1039                                                     sizeof(const char *));
1040              list_of_files.file_type = (SourceFileType *)
1041                                 MallocOrDie((INIT_LIST_SPACE+1)*
1042                                                     sizeof(SourceFileType));
1043              list_of_files.max_files = INIT_LIST_SPACE;
1044              list_of_files.num_files = 0;
1045         }
1046         else{
1047              list_of_files.files = (const char **)realloc((void *)list_of_files.files,
1048                     (list_of_files.max_files+MORE_LIST_SPACE+1)*
1049                                                     sizeof(const char *));
1050              list_of_files.file_type = (SourceFileType *)
1051                     realloc((void *)list_of_files.file_type,
1052                     (list_of_files.max_files+MORE_LIST_SPACE+1)*
1053                                                     sizeof(SourceFileType));
1054              list_of_files.max_files += MORE_LIST_SPACE;
1055             if((list_of_files.files == NULL) && (list_of_files.file_type == NULL)){
1056                 perror("");
1057                 abort();
1058             }
1059         }
1060      }
1061      /* We know that there is space. Ensure that CHECKFILEs are all
1062       * stored before NORMALFILEs.
1063       */
1064      if(file_type == CHECKFILE){
1065
1066          for(location = 0; (location < list_of_files.num_files) &&
1067                         (list_of_files.file_type[location] == CHECKFILE); location++){
1068             /* Do nothing. */
1069          }
1070          if(location < list_of_files.num_files){
1071             /* Put the new one here.
1072              * Move the rest down.
1073              */
1074             unsigned j;
1075
1076             for(j = list_of_files.num_files; j > location; j--){
1077                 list_of_files.files[j] = list_of_files.files[j-1];
1078                 list_of_files.file_type[j] = list_of_files.file_type[j-1];
1079             }
1080         }
1081     }
1082     list_of_files.files[location] = copy_string(filename);
1083     list_of_files.file_type[location] = file_type;
1084     list_of_files.num_files++;
1085     /* Keep the list properly terminated. */
1086     list_of_files.files[list_of_files.num_files] = (char *) NULL;
1087 }
1088
1089         /* Use infile as the input source. */
1090 static Boolean
1091 open_input(const char *infile)
1092 {   
1093     yyin = fopen(infile,"r");
1094     if(yyin != NULL){
1095         GlobalState.current_input_file = infile;
1096         if(GlobalState.verbose){
1097             fprintf(GlobalState.logfile,"Processing %s\n",
1098                                 GlobalState.current_input_file);
1099         }
1100     }
1101     return yyin != NULL;
1102 }
1103
1104         /* Simple interface to open_input for the ECO file. */
1105 Boolean
1106 open_eco_file(const char *eco_file)
1107 {
1108     return open_input(eco_file);
1109 }
1110
1111         /* Open the input file whose number is the argument. */
1112 static Boolean
1113 open_input_file(int file_number)
1114 {
1115     /* Depending on the type of file, ensure that the
1116      * current_file_type is set correctly.
1117      */
1118     if(open_input(list_of_files.files[file_number])){
1119         GlobalState.current_file_type = list_of_files.file_type[file_number];
1120         return TRUE;
1121     }
1122     else{
1123         return FALSE;
1124     }
1125 }
1126
1127         /* Open the first input file. */
1128 Boolean
1129 open_first_file(void)
1130 {   Boolean ok = TRUE;
1131
1132     if(list_of_files.num_files == 0){
1133         /* Use standard input. */
1134         yyin = stdin;
1135         GlobalState.current_input_file = "stdin";
1136         /* @@@ Should this be set?
1137         GlobalState.current_file_type = NORMALFILE;
1138          */
1139         if(GlobalState.verbose){
1140             fprintf(GlobalState.logfile,"Processing %s\n",
1141                                 GlobalState.current_input_file);
1142         }
1143     }
1144     else if(open_input_file(0)){
1145     }
1146     else{
1147         fprintf(GlobalState.logfile,
1148                         "Unable to open the PGN file: %s\n",input_file_name(0));
1149         ok = FALSE;
1150     }
1151     return ok;
1152 }
1153
1154         /* Return the name of the file corresponding to the given
1155          * file number.
1156          */
1157 const char *
1158 input_file_name(unsigned file_number)
1159 {
1160     if(file_number >= list_of_files.num_files) {
1161         return NULL;
1162     }
1163     else {
1164         return list_of_files.files[file_number];
1165     }
1166 }
1167
1168
1169         /* Give some error information. */
1170 void
1171 print_error_context(FILE *fp)
1172 {
1173     if(GlobalState.current_input_file != NULL){
1174         fprintf(fp,"File %s: ",GlobalState.current_input_file);
1175     }
1176     fprintf(fp,"Line number: %lu\n",line_number);
1177 }
1178
1179
1180         /* Make the given str accessible. */
1181 static void
1182 save_string(const char *str)
1183 {   const size_t len = strlen(str);
1184     char *token;
1185
1186     token = MallocOrDie(len+1);
1187     strcpy(token,str);
1188     yylval.token_string = token;
1189 }
1190
1191         /* Return the next line of input from fp. */
1192 char *
1193 next_input_line(FILE *fp)
1194 {   /* Retain each line in turn, so as to be able to free it. */
1195     static char *line = NULL;
1196
1197     if(line != NULL){
1198         (void) free((void *)line);
1199     }
1200
1201     line = read_line(fp);
1202
1203     if(line != NULL){
1204         line_number++;
1205     }
1206     return line;
1207 }
1208
1209         /* Handle the end of a file. */
1210 int
1211 yywrap(void)
1212 {   int time_to_exit;
1213
1214     /* Beware of this being called in inappropriate circumstances. */
1215     if(list_of_files.files == NULL){
1216         /* There are no files. */
1217         time_to_exit = 1;
1218     }
1219     else if(input_file_name(current_file_num) == NULL){
1220         /* There was no last file! */
1221         time_to_exit = 1;
1222     }
1223     else{
1224         /* Close the input files.  */
1225         terminate_input();
1226         /* See if there is another. */
1227         current_file_num++;
1228         if(input_file_name(current_file_num) == NULL){
1229             /* We have processed the last file. */
1230             time_to_exit = 1;
1231         }
1232         else if(!open_input_file(current_file_num)){
1233             fprintf(GlobalState.logfile,"Unable to open the PGN file: %s\n",
1234                                         input_file_name(current_file_num));
1235             time_to_exit = 1;
1236         }
1237         else{
1238             /* Ok, we opened it. */
1239             time_to_exit = 0;
1240             /* Set everything up for a new file. */
1241             /* Depending on the type of file, ensure that the
1242              * current_file_type is set correctly.
1243              */
1244             GlobalState.current_file_type =
1245                 list_of_files.file_type[current_file_num];
1246             restart_lex_for_new_game();
1247             games_in_file = 0;
1248             reset_line_number();
1249         }
1250     }
1251     return time_to_exit;
1252 }
1253
1254
1255     /* Reset the file's line number. */
1256 void
1257 reset_line_number(void)
1258 {
1259     line_number = 0;
1260 }
1261
1262 static void
1263 terminate_input(void)
1264 {
1265     if((yyin != stdin) && (yyin != NULL)){
1266         (void) fclose(yyin);
1267         yyin = NULL;
1268     }
1269 }
1270
1271     /* Return the position in the current file. Returns -1 if it is unseekable. */
1272 long get_position(void)
1273 {
1274     return ftell(yyin);
1275 }