2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2014 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
43 static TokenType current_symbol = NO_TOKEN;
45 /* Keep track of which RAV level we are at.
46 * This is used to check whether a TERMINATING_RESULT is the final one
47 * and whether NULL_MOVEs are allowed.
49 static unsigned RAV_level = 0;
51 /* Retain details of the header of a game.
52 * This comprises the Tags and any comment prefixing the
58 unsigned header_tags_length;
59 CommentList *prefix_comment;
62 static void ParseOptGameList(SourceFileType file_type);
63 static Boolean ParseGame(Move **returned_move_list);
64 Boolean ParseOptTagList(void);
65 Boolean ParseTag(void);
66 static Move *ParseMoveList(void);
67 static Move *ParseMoveAndVariants(void);
68 static Move *ParseMove(void);
69 static Move *ParseMoveUnit(void);
70 static CommentList *ParseOptCommentList(void);
71 Boolean ParseOptMoveNumber(void);
72 static StringList *ParseOptNAGList(void);
73 static Variation *ParseOptVariantList(void);
74 static Variation *ParseVariant(void);
75 static char *ParseResult(void);
77 static void setup_for_new_game(void);
79 static void check_result(char **Tags,const char *terminating_result);
80 static void free_comment_list(CommentList *comment_list);
81 static void DealWithEcoLine(Move *move_list);
82 static void DealWithGame(Move *move_list);
83 static Boolean finished_processing(void);
85 /* Initialise the game header structure to contain
86 * space for the default number of tags.
87 * The space will have to be increased if new tags are
88 * identified in the program source.
91 init_game_header(void)
94 GameHeader.header_tags_length = ORIGINAL_NUMBER_OF_TAGS;
95 GameHeader.Tags = (char **) MallocOrDie(GameHeader.header_tags_length*
96 sizeof(*GameHeader.Tags));
97 for(i = 0; i < GameHeader.header_tags_length; i++){
98 GameHeader.Tags[i] = (char *) NULL;
103 increase_game_header_tags_length(unsigned new_length)
106 if(new_length <= GameHeader.header_tags_length){
107 fprintf(GlobalState.logfile,
108 "Internal error: inappropriate length %d ",new_length);
109 fprintf(GlobalState.logfile,
110 " passed to increase_game_header_tags().\n");
113 GameHeader.Tags = (char **) ReallocOrDie((void *) GameHeader.Tags,
114 new_length*sizeof(*GameHeader.Tags));
115 for(i = GameHeader.header_tags_length; i < new_length; i++){
116 GameHeader.Tags[i] = NULL;
118 GameHeader.header_tags_length = new_length;
121 /* Try to open the given file. Error and exit on failure. */
123 must_open_file(const char *filename,const char *mode)
126 fp = fopen(filename,mode);
128 fprintf(GlobalState.logfile,"Unable to open the file: \"%s\"\n",
135 /* Print out on outfp the current details and
136 * terminate with a newline.
139 report_details(FILE *outfp)
141 if(GameHeader.Tags[WHITE_TAG] != NULL){
142 fprintf(outfp,"%s - ",GameHeader.Tags[WHITE_TAG]);
144 if(GameHeader.Tags[BLACK_TAG] != NULL){
145 fprintf(outfp,"%s ",GameHeader.Tags[BLACK_TAG]);
148 if(GameHeader.Tags[EVENT_TAG] != NULL){
149 fprintf(outfp,"%s ",GameHeader.Tags[EVENT_TAG]);
151 if(GameHeader.Tags[SITE_TAG] != NULL){
152 fprintf(outfp,"%s ",GameHeader.Tags[SITE_TAG]);
155 if(GameHeader.Tags[DATE_TAG] != NULL){
156 fprintf(outfp,"%s ",GameHeader.Tags[DATE_TAG]);
162 /* Check that terminating_result is consistent with
163 * Tags[RESULT_TAG]. If the latter is missing, fill it
164 * in from terminating_result.
167 check_result(char **Tags,const char *terminating_result)
168 { char *result_tag = Tags[RESULT_TAG];
170 if(terminating_result != NULL){
171 if((result_tag == NULL) || (*result_tag == '\0') ||
172 (strcmp(result_tag,"?") == 0)){
173 /* Use a copy of terminating result. */
174 result_tag = copy_string(terminating_result);
175 Tags[RESULT_TAG] = result_tag;
177 else if((result_tag != NULL) &&
178 (strcmp(terminating_result,"*") != 0) &&
179 (strcmp(result_tag,terminating_result) != 0)){
180 print_error_context(GlobalState.logfile);
181 fprintf(GlobalState.logfile,
182 "Inconsistent result strings in the following game.\n");
183 report_details(GlobalState.logfile);
191 /* Select which file to write to based upon the game state.
192 * This will depend upon:
193 * Whether the number of games per file is limited.
194 * Whether ECO_level > DONT_DIVIDE.
198 select_output_file(StateInfo *GameState,const char *eco)
200 if(GameState->games_per_file > 0){
201 if((GameState->num_games_matched % GameState->games_per_file) == 0){
202 /* Time to open the next one. */
205 if(GameState->outputfile != NULL){
206 (void) fclose(GameState->outputfile);
208 sprintf(filename,"%u%s",
209 GameState->next_file_number,
210 output_file_suffix(GameState->output_format));
211 GameState->outputfile = must_open_file(filename,"w");
212 GameState->next_file_number++;
215 else if(GameState->ECO_level > DONT_DIVIDE){
216 /* Open a file of the appropriate name. */
217 if(GameState->outputfile != NULL){
218 /* @@@ In practice, this might need refinement.
219 * Repeated opening and closing may prove inefficient.
221 (void) fclose(GameState->outputfile);
222 GameState->outputfile = open_eco_output_file(
223 GameState->ECO_level,
229 return GameState->outputfile;
233 * Conditions for finishing processing, other than all the input
234 * having been processed.
236 static Boolean finished_processing(void)
238 return (GlobalState.matching_game_number > 0 &&
239 GlobalState.num_games_matched == GlobalState.matching_game_number);
243 ParseOptGameList(SourceFileType file_type)
244 { Move *move_list = NULL;
246 while(ParseGame(&move_list) && !finished_processing()){
247 if(file_type == NORMALFILE){
248 DealWithGame(move_list);
250 else if(file_type == CHECKFILE){
251 DealWithGame(move_list);
253 else if(file_type == ECOFILE){
254 if(move_list != NULL){
255 DealWithEcoLine(move_list);
258 fprintf(GlobalState.logfile,"ECO line with zero moves.\n");
259 report_details(GlobalState.logfile);
265 free_move_list(move_list);
268 setup_for_new_game();
272 /* Parse a game and return a pointer to any valid list of moves
273 * in returned_move_list.
276 ParseGame(Move **returned_move_list)
277 { /* Boolean something_found = FALSE; */
278 CommentList *prefix_comment;
279 Move *move_list = NULL;
281 /* There shouldn't be a hanging comment before the result,
282 * but there sometimes is.
284 CommentList *hanging_comment;
286 /* Assume that we won't return anything. */
287 *returned_move_list = NULL;
288 /* Skip over any junk between games. */
289 current_symbol = skip_to_next_game(current_symbol);
290 prefix_comment = ParseOptCommentList();
291 if(prefix_comment != NULL){
292 /* Free this here, as it is hard to
293 * know whether it belongs to the game or the file.
294 * It is better to put game comments after the tags.
296 /* something_found = TRUE; */
297 free_comment_list(prefix_comment);
298 prefix_comment = NULL;
300 if(ParseOptTagList()){
301 /* something_found = TRUE; */
303 /* @@@ Beware of comments and/or tags without moves. */
304 move_list = ParseMoveList();
306 /* @@@ Look for a comment with no move text before the result. */
307 hanging_comment = ParseOptCommentList();
308 /* Append this to the final move, if there is one. */
310 /* Look for a result, even if there were no moves. */
311 result = ParseResult();
312 if(move_list != NULL){
313 /* Find the last move. */
314 Move *last_move = move_list;
316 while(last_move->next != NULL){
317 last_move = last_move->next;
319 if(hanging_comment != NULL) {
320 append_comments_to_move(last_move,hanging_comment);
323 /* Append it to the last move. */
324 last_move->terminating_result = result;
325 check_result(GameHeader.Tags,result);
326 *returned_move_list = move_list;
329 fprintf(GlobalState.logfile,"Missing result.\n");
330 report_details(GlobalState.logfile);
332 /* something_found = TRUE; */
335 /* @@@ Nothing to attach the comment to. */
336 (void) free((void *) hanging_comment);
337 hanging_comment = NULL;
339 * Workaround for games with zero moves.
340 * Check the result for consistency with the tags, but then
341 * there is no move to attach it to.
342 * When outputting a game, the missing result in this case
343 * will have to be supplied from the tags.
345 check_result(GameHeader.Tags,result);
347 (void) free((void *)result);
349 *returned_move_list = NULL;
351 return current_symbol != EOF_TOKEN;
355 ParseOptTagList(void)
356 { Boolean something_found = FALSE;
357 CommentList *prefix_comment;
360 something_found = TRUE;
363 /* Perform any consistency checks. */
364 if((GameHeader.Tags[SETUP_TAG] != NULL) &&
365 (strcmp(GameHeader.Tags[SETUP_TAG],"1") == 0)){
366 /* There must be a FEN_TAG to go with it. */
367 if(GameHeader.Tags[FEN_TAG] == NULL){
368 fprintf(GlobalState.logfile,
369 "Missing %s Tag to accompany %s Tag.\n",
370 tag_header_string(FEN_TAG),
371 tag_header_string(SETUP_TAG));
372 print_error_context(GlobalState.logfile);
376 prefix_comment = ParseOptCommentList();
377 if(prefix_comment != NULL){
378 GameHeader.prefix_comment = prefix_comment;
379 something_found = TRUE;
381 return something_found;
386 { Boolean TagFound = TRUE;
388 if(current_symbol == TAG){
389 TagName tag_index = yylval.tag_index;
391 current_symbol = next_token();
392 if(current_symbol == STRING){
393 char *tag_string = yylval.token_string;
395 if(tag_index < GameHeader.header_tags_length){
396 GameHeader.Tags[tag_index] = tag_string;
399 print_error_context(GlobalState.logfile);
400 fprintf(GlobalState.logfile,
401 "Internal error: Illegal tag index %d for %s\n",
402 tag_index,tag_string);
405 current_symbol = next_token();
408 print_error_context(GlobalState.logfile);
409 fprintf(GlobalState.logfile,"Missing tag string.\n");
412 else if(current_symbol == STRING){
413 print_error_context(GlobalState.logfile);
414 fprintf(GlobalState.logfile,"Missing tag for %s.\n",yylval.token_string);
415 (void) free((void *)yylval.token_string);
416 current_symbol = next_token();
427 { Move *head = NULL, *tail = NULL;
429 head = ParseMoveAndVariants();
434 while((next_move = ParseMoveAndVariants()) != NULL){
435 tail->next = next_move;
443 ParseMoveAndVariants(void)
444 { Move *move_details;
446 move_details = ParseMove();
447 if(move_details != NULL){
448 CommentList *comment;
450 move_details->Variants = ParseOptVariantList();
451 comment = ParseOptCommentList();
453 append_comments_to_move(move_details,comment);
462 { Move *move_details = NULL;
464 if(ParseOptMoveNumber()){
466 /* @@@ Watch out for finding just the number. */
467 move_details = ParseMoveUnit();
468 if(move_details != NULL){
469 CommentList *comment;
471 move_details->Nags = ParseOptNAGList();
472 comment = ParseOptCommentList();
474 append_comments_to_move(move_details,comment);
482 { Move *move_details = NULL;
484 if(current_symbol == MOVE){
485 move_details = yylval.move_details;
487 if(move_details->class == NULL_MOVE && RAV_level == 0) {
488 print_error_context(GlobalState.logfile);
489 fprintf(GlobalState.logfile, "Null moves (--) only allowed in variations.\n");
492 current_symbol = next_token();
493 if(current_symbol == CHECK_SYMBOL){
494 strcat((char *) move_details->move,"+");
495 current_symbol = next_token();
496 /* Sometimes + is followed by #, so cover this case. */
497 if(current_symbol == CHECK_SYMBOL){
498 current_symbol = next_token();
501 move_details->Comment = ParseOptCommentList();
507 ParseOptCommentList(void)
508 { CommentList *head = NULL, *tail = NULL;
510 while(current_symbol == COMMENT){
512 head = tail = yylval.comment;
515 tail->next = yylval.comment;
518 current_symbol = next_token();
524 ParseOptMoveNumber(void)
525 { Boolean something_found = FALSE;
527 if(current_symbol == MOVE_NUMBER){
528 current_symbol = next_token();
529 something_found = TRUE;
531 return something_found;
535 ParseOptNAGList(void)
536 { StringList *nags = NULL;
538 while(current_symbol == NAG){
539 if(GlobalState.keep_NAGs){
540 nags = save_string_list_item(nags,yylval.token_string);
543 (void) free((void *)yylval.token_string);
545 current_symbol = next_token();
551 ParseOptVariantList(void)
552 { Variation *head = NULL, *tail = NULL,
555 while((variation = ParseVariant()) != NULL){
557 head = tail = variation;
560 tail->next = variation;
569 { Variation *variation = NULL;
571 if(current_symbol == RAV_START){
572 CommentList *prefix_comment;
573 CommentList *suffix_comment;
578 variation = MallocOrDie(sizeof(Variation));
580 current_symbol = next_token();
581 prefix_comment = ParseOptCommentList();
582 if(prefix_comment != NULL){
584 moves = ParseMoveList();
586 print_error_context(GlobalState.logfile);
587 fprintf(GlobalState.logfile,"Missing move list in variation.\n");
589 result = ParseResult();
590 if((result != NULL) && (moves != NULL)){
591 /* Find the last move, to which to append the terminating
594 Move *last_move = moves;
595 CommentList *trailing_comment;
597 while(last_move->next != NULL){
598 last_move = last_move->next;
600 last_move->terminating_result = result;
601 /* Accept a comment after the result, but it will
602 * be printed out preceding the result.
604 trailing_comment = ParseOptCommentList();
605 if(trailing_comment != NULL){
606 append_comments_to_move(last_move,trailing_comment);
612 if(current_symbol == RAV_END){
614 current_symbol = next_token();
617 fprintf(GlobalState.logfile,"Missing ')' to close variation.\n");
619 suffix_comment = ParseOptCommentList();
620 if(suffix_comment != NULL){
622 variation->prefix_comment = prefix_comment;
623 variation->suffix_comment = suffix_comment;
624 variation->moves = moves;
625 variation->next = NULL;
632 { char *result = NULL;
634 if(current_symbol == TERMINATING_RESULT){
635 result = yylval.token_string;
637 /* In the interests of skipping any intervening material
638 * between games, set the lookahead to a dummy token.
640 current_symbol = NO_TOKEN;
643 current_symbol = next_token();
650 setup_for_new_game(void)
652 restart_lex_for_new_game();
656 /* Discard any data held in the GameHeader.Tags structure. */
661 for(tag = 0; tag < GameHeader.header_tags_length; tag++){
662 if(GameHeader.Tags[tag] != NULL){
663 free(GameHeader.Tags[tag]);
664 GameHeader.Tags[tag] = NULL;
669 /* Discard data from a gathered game. */
671 free_string_list(StringList *list)
677 if(next->str != NULL){
678 (void) free((void *)next->str);
680 (void) free((void *)next);
685 free_comment_list(CommentList *comment_list)
687 while(comment_list != NULL){
688 CommentList *this_comment = comment_list;
690 if(comment_list->Comment != NULL){
691 free_string_list(comment_list->Comment);
693 comment_list = comment_list->next;
694 (void) free((void *)this_comment);
699 free_variation(Variation *variation)
702 while(variation != NULL){
704 variation = variation->next;
705 if(next->prefix_comment != NULL){
706 free_comment_list(next->prefix_comment);
708 if(next->suffix_comment != NULL){
709 free_comment_list(next->suffix_comment);
711 if(next->moves != NULL){
712 (void) free_move_list((void *)next->moves);
714 (void) free((void *)next);
719 free_move_list(Move *move_list)
722 while(move_list != NULL){
724 move_list = move_list->next;
725 if(next->Nags != NULL){
726 free_string_list(next->Nags);
728 if(next->Comment != NULL){
729 free_comment_list(next->Comment);
731 if(next->Variants != NULL){
732 free_variation(next->Variants);
734 if(next->epd != NULL){
735 (void) free((void *)next->epd);
737 if(next->terminating_result != NULL){
738 (void) free((void *)next->terminating_result);
740 (void) free((void *)next);
744 /* Add str onto the tail of list and
745 * return the head of the resulting list.
748 save_string_list_item(StringList *list,const char *str)
751 StringList *new_item;
753 new_item = (StringList *)MallocOrDie(sizeof(*new_item));
755 new_item->next = NULL;
760 StringList *tail = list;
762 while(tail->next != NULL){
765 tail->next = new_item;
771 /* Append any comments in Comment onto the end of
772 * any associated with move.
775 append_comments_to_move(Move *move,CommentList *Comment)
778 /* Add in to the end of any already existing. */
779 if(move->Comment == NULL){
780 move->Comment = Comment;
783 /* Add in the final comment to
784 * the end of any existing for this move.
786 CommentList *tail = move->Comment;
788 while(tail->next != NULL){
791 tail->next = Comment;
797 DealWithGame(Move *move_list)
799 /* Record whether the game has been printed or not.
800 * This is used for the case of the -n flag which catches
801 * all non-printed games.
803 Boolean game_output = FALSE;
804 /* We need a dummy argument for apply_move_list. */
807 /* Update the count of how many games handled. */
808 GlobalState.num_games_processed++;
810 /* Fill in the information currently known. */
811 current_game.tags = GameHeader.Tags;
812 current_game.tags_length = GameHeader.header_tags_length;
813 current_game.prefix_comment = GameHeader.prefix_comment;
814 current_game.moves = move_list;
815 current_game.moves_checked = FALSE;
816 current_game.moves_ok = FALSE;
817 current_game.error_ply = 0;
819 /* Determine whether or not this game is wanted, on the
820 * basis of the various selection criteria available.
824 * apply_move_list checks out the moves.
825 * If it returns TRUE as a match, it will also fill in the
826 * current_game.final_hash_value and
827 * current_game.cumulative_hash_value
828 * fields of current_game so that these can be used in the
829 * previous_occurance function.
831 * If there are any tag criteria, it will be easy to quickly
832 * eliminate most games without going through the length
833 * process of game matching.
835 * If ECO adding is done, the order of checking may cause
836 * a conflict here since it won't be possible to reject a game
837 * based on its ECO code unless it already has one.
838 * Therefore, Check for the ECO tag only after everything else has
841 if(CheckTagDetailsNotECO(current_game.tags,current_game.tags_length) &&
842 apply_move_list(¤t_game,&plycount) &&
843 check_move_bounds(plycount) &&
844 check_textual_variations(current_game) &&
845 check_for_ending(current_game.moves) &&
846 check_for_only_checkmate(current_game.moves) &&
847 CheckECOTag(current_game.tags)){
848 /* If there is no original filename then the game is not a
851 const char *original_filename = previous_occurance(current_game, plycount);
853 if((original_filename == NULL) && GlobalState.suppress_originals){
854 /* Don't output first occurrences. */
856 else if((original_filename == NULL) || !GlobalState.suppress_duplicates){
857 GlobalState.num_games_matched++;
858 if(GlobalState.check_only) {
859 // We are only checking.
860 if(GlobalState.verbose){
861 /* Report progress on logfile. */
862 report_details(GlobalState.logfile);
865 else if(GlobalState.current_file_type == CHECKFILE){
866 /* We are only checking, so don't count this as a matched game. */
867 GlobalState.num_games_matched--;
869 else if(GlobalState.matching_game_number > 0 &&
870 GlobalState.num_games_matched != GlobalState.matching_game_number) {
871 /* This is not the right matching game to be output. */
874 /* This game is to be kept and output. */
875 FILE *outputfile = select_output_file(&GlobalState,
876 current_game.tags[ECO_TAG]);
878 /* See if we wish to separate out duplicates. */
879 if((original_filename != NULL) &&
880 (GlobalState.duplicate_file != NULL)){
881 static const char *last_input_file = NULL;
883 outputfile = GlobalState.duplicate_file;
884 if((last_input_file != GlobalState.current_input_file) &&
885 (GlobalState.current_input_file != NULL)){
886 /* Record which file this and succeeding
887 * duplicates come from.
889 print_str(outputfile,"{ From: ");
890 print_str(outputfile,
891 GlobalState.current_input_file);
892 print_str(outputfile," }");
893 terminate_line(outputfile);
894 last_input_file = GlobalState.current_input_file;
896 print_str(outputfile,"{ First found in: ");
897 print_str(outputfile,original_filename);
898 print_str(outputfile," }");
899 terminate_line(outputfile);
901 /* Now output what we have. */
902 output_game(current_game,outputfile);
904 if(GlobalState.verbose){
905 /* Report progress on logfile. */
906 report_details(GlobalState.logfile);
911 if(!game_output && (GlobalState.non_matching_file != NULL) &&
912 GlobalState.current_file_type != CHECKFILE){
913 /* The user wants to keep everything else. */
914 if(!current_game.moves_checked){
915 /* Make sure that the move text is in a reasonable state. */
916 (void) apply_move_list(¤t_game,&plycount);
918 if(current_game.moves_ok || GlobalState.keep_broken_games){
919 output_game(current_game,GlobalState.non_matching_file);
923 /* Game is finished with, so free everything. */
924 if(GameHeader.prefix_comment != NULL){
925 free_comment_list(GameHeader.prefix_comment);
927 /* Ensure that the GameHeader's prefix comment is NULL for
930 GameHeader.prefix_comment = NULL;
933 free_move_list(current_game.moves);
934 if((GlobalState.num_games_processed % 10) == 0){
935 fprintf(stderr,"Games: %lu\r",GlobalState.num_games_processed);
940 DealWithEcoLine(Move *move_list)
942 /* We need to know the length of a game to store with the
943 * hash information as a sanity check.
945 unsigned number_of_half_moves;
947 /* Fill in the information currently known. */
948 current_game.tags = GameHeader.Tags;
949 current_game.tags_length = GameHeader.header_tags_length;
950 current_game.prefix_comment = GameHeader.prefix_comment;
951 current_game.moves = move_list;
952 current_game.moves_checked = FALSE;
953 current_game.moves_ok = FALSE;
954 current_game.error_ply = 0;
956 /* apply_eco_move_list checks out the moves.
957 * It will also fill in the
958 * current_game.final_hash_value and
959 * current_game.cumulative_hash_value
960 * fields of current_game.
962 if(apply_eco_move_list(¤t_game,&number_of_half_moves)){
963 if(current_game.moves_ok){
964 /* Store the ECO code in the appropriate hash location. */
965 save_eco_details(current_game,number_of_half_moves);
969 /* Game is finished with, so free everything. */
970 if(GameHeader.prefix_comment != NULL){
971 free_comment_list(GameHeader.prefix_comment);
973 /* Ensure that the GameHeader's prefix comment is NULL for
976 GameHeader.prefix_comment = NULL;
979 free_move_list(current_game.moves);
982 /* If file_type == ECOFILE we are dealing with a file of ECO
983 * input rather than a normal game file.
986 yyparse(SourceFileType file_type)
988 setup_for_new_game();
989 current_symbol = skip_to_next_game(NO_TOKEN);
990 ParseOptGameList(file_type);
991 if(current_symbol == EOF_TOKEN){
995 else if(finished_processing()) {
996 /* Ok -- done all we need to. */
1000 fprintf(GlobalState.logfile,"End of input reached before end of file.\n");