2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2014 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
43 static TokenType current_symbol = NO_TOKEN;
45 /* Keep track of which RAV level we are at.
46 * This is used to check whether a TERMINATING_RESULT is the final one
47 * and whether NULL_MOVEs are allowed.
49 static unsigned RAV_level = 0;
51 /* Retain details of the header of a game.
52 * This comprises the Tags and any comment prefixing the
58 unsigned header_tags_length;
59 CommentList *prefix_comment;
62 static void ParseOptGameList(SourceFileType file_type);
63 static Boolean ParseGame(Move **returned_move_list);
64 Boolean ParseOptTagList(void);
65 Boolean ParseTag(void);
66 static Move *ParseMoveList(void);
67 static Move *ParseMoveAndVariants(void);
68 static Move *ParseMove(void);
69 static Move *ParseMoveUnit(void);
70 static CommentList *ParseOptCommentList(void);
71 Boolean ParseOptMoveNumber(void);
72 static StringList *ParseOptNAGList(void);
73 static Variation *ParseOptVariantList(void);
74 static Variation *ParseVariant(void);
75 static char *ParseResult(void);
77 static void setup_for_new_game(void);
79 static void check_result(char **Tags,const char *terminating_result);
80 static void free_comment_list(CommentList *comment_list);
81 static void DealWithEcoLine(Move *move_list);
82 static void DealWithGame(Move *move_list);
83 static Boolean finished_processing(void);
85 /* Initialise the game header structure to contain
86 * space for the default number of tags.
87 * The space will have to be increased if new tags are
88 * identified in the program source.
91 init_game_header(void)
94 GameHeader.header_tags_length = ORIGINAL_NUMBER_OF_TAGS;
95 GameHeader.Tags = (char **) MallocOrDie(GameHeader.header_tags_length*
96 sizeof(*GameHeader.Tags));
97 for(i = 0; i < GameHeader.header_tags_length; i++){
98 GameHeader.Tags[i] = (char *) NULL;
103 increase_game_header_tags_length(unsigned new_length)
106 if(new_length <= GameHeader.header_tags_length){
107 fprintf(GlobalState.logfile,
108 "Internal error: inappropriate length %d ",new_length);
109 fprintf(GlobalState.logfile,
110 " passed to increase_game_header_tags().\n");
113 GameHeader.Tags = (char **) ReallocOrDie((void *) GameHeader.Tags,
114 new_length*sizeof(*GameHeader.Tags));
115 for(i = GameHeader.header_tags_length; i < new_length; i++){
116 GameHeader.Tags[i] = NULL;
118 GameHeader.header_tags_length = new_length;
121 /* Try to open the given file. Error and exit on failure. */
123 must_open_file(const char *filename,const char *mode)
126 fp = fopen(filename,mode);
128 fprintf(GlobalState.logfile,"Unable to open the file: \"%s\"\n",
135 /* Print out on outfp the current details and
136 * terminate with a newline.
139 report_details(FILE *outfp)
141 if(GameHeader.Tags[WHITE_TAG] != NULL){
142 fprintf(outfp,"%s - ",GameHeader.Tags[WHITE_TAG]);
144 if(GameHeader.Tags[BLACK_TAG] != NULL){
145 fprintf(outfp,"%s ",GameHeader.Tags[BLACK_TAG]);
148 if(GameHeader.Tags[EVENT_TAG] != NULL){
149 fprintf(outfp,"%s ",GameHeader.Tags[EVENT_TAG]);
151 if(GameHeader.Tags[SITE_TAG] != NULL){
152 fprintf(outfp,"%s ",GameHeader.Tags[SITE_TAG]);
155 if(GameHeader.Tags[DATE_TAG] != NULL){
156 fprintf(outfp,"%s ",GameHeader.Tags[DATE_TAG]);
162 /* Check that terminating_result is consistent with
163 * Tags[RESULT_TAG]. If the latter is missing, fill it
164 * in from terminating_result.
167 check_result(char **Tags,const char *terminating_result)
168 { char *result_tag = Tags[RESULT_TAG];
170 if(terminating_result != NULL){
171 if((result_tag == NULL) || (*result_tag == '\0') ||
172 (strcmp(result_tag,"?") == 0)){
173 /* Use a copy of terminating result. */
174 result_tag = copy_string(terminating_result);
175 Tags[RESULT_TAG] = result_tag;
177 else if((result_tag != NULL) &&
178 (strcmp(terminating_result,"*") != 0) &&
179 (strcmp(result_tag,terminating_result) != 0)){
180 print_error_context(GlobalState.logfile);
181 fprintf(GlobalState.logfile,
182 "Inconsistent result strings in the following game.\n");
183 report_details(GlobalState.logfile);
191 /* Select which file to write to based upon the game state.
192 * This will depend upon:
193 * Whether the number of games per file is limited.
194 * Whether ECO_level > DONT_DIVIDE.
198 select_output_file(StateInfo *GameState,const char *eco)
200 if(GameState->games_per_file > 0){
201 if((GameState->num_games_matched % GameState->games_per_file) == 0){
202 /* Time to open the next one. */
205 if(GameState->outputfile != NULL){
206 (void) fclose(GameState->outputfile);
208 sprintf(filename,"%u%s",
209 GameState->next_file_number,
210 output_file_suffix(GameState->output_format));
211 GameState->outputfile = must_open_file(filename,"w");
212 GameState->next_file_number++;
215 else if(GameState->ECO_level > DONT_DIVIDE){
216 /* Open a file of the appropriate name. */
217 if(GameState->outputfile != NULL){
218 /* @@@ In practice, this might need refinement.
219 * Repeated opening and closing may prove inefficient.
221 (void) fclose(GameState->outputfile);
222 GameState->outputfile = open_eco_output_file(
223 GameState->ECO_level,
229 return GameState->outputfile;
233 * Conditions for finishing processing, other than all the input
234 * having been processed.
236 static Boolean finished_processing(void)
238 return (GlobalState.matching_game_number > 0 &&
239 GlobalState.num_games_matched == GlobalState.matching_game_number);
243 ParseOptGameList(SourceFileType file_type)
244 { Move *move_list = NULL;
246 while(ParseGame(&move_list) && !finished_processing()){
247 if(file_type == NORMALFILE){
248 DealWithGame(move_list);
250 else if(file_type == CHECKFILE){
251 DealWithGame(move_list);
253 else if(file_type == ECOFILE){
254 if(move_list != NULL){
255 DealWithEcoLine(move_list);
258 fprintf(GlobalState.logfile,"ECO line with zero moves.\n");
259 report_details(GlobalState.logfile);
265 free_move_list(move_list);
268 setup_for_new_game();
270 if(file_type == ECOFILE && GlobalState.dump_eco) {
276 /* Parse a game and return a pointer to any valid list of moves
277 * in returned_move_list.
280 ParseGame(Move **returned_move_list)
281 { /* Boolean something_found = FALSE; */
282 CommentList *prefix_comment;
283 Move *move_list = NULL;
285 /* There shouldn't be a hanging comment before the result,
286 * but there sometimes is.
288 CommentList *hanging_comment;
290 /* Assume that we won't return anything. */
291 *returned_move_list = NULL;
292 /* Skip over any junk between games. */
293 current_symbol = skip_to_next_game(current_symbol);
294 prefix_comment = ParseOptCommentList();
295 if(prefix_comment != NULL){
296 /* Free this here, as it is hard to
297 * know whether it belongs to the game or the file.
298 * It is better to put game comments after the tags.
300 /* something_found = TRUE; */
301 free_comment_list(prefix_comment);
302 prefix_comment = NULL;
304 if(ParseOptTagList()){
305 /* something_found = TRUE; */
307 /* @@@ Beware of comments and/or tags without moves. */
308 move_list = ParseMoveList();
310 /* @@@ Look for a comment with no move text before the result. */
311 hanging_comment = ParseOptCommentList();
312 /* Append this to the final move, if there is one. */
314 /* Look for a result, even if there were no moves. */
315 result = ParseResult();
316 if(move_list != NULL){
317 /* Find the last move. */
318 Move *last_move = move_list;
320 while(last_move->next != NULL){
321 last_move = last_move->next;
323 if(hanging_comment != NULL) {
324 append_comments_to_move(last_move,hanging_comment);
327 /* Append it to the last move. */
328 last_move->terminating_result = result;
329 check_result(GameHeader.Tags,result);
330 *returned_move_list = move_list;
333 fprintf(GlobalState.logfile,"Missing result.\n");
334 report_details(GlobalState.logfile);
336 /* something_found = TRUE; */
339 /* @@@ Nothing to attach the comment to. */
340 (void) free((void *) hanging_comment);
341 hanging_comment = NULL;
343 * Workaround for games with zero moves.
344 * Check the result for consistency with the tags, but then
345 * there is no move to attach it to.
346 * When outputting a game, the missing result in this case
347 * will have to be supplied from the tags.
349 check_result(GameHeader.Tags,result);
351 (void) free((void *)result);
353 *returned_move_list = NULL;
355 return current_symbol != EOF_TOKEN;
359 ParseOptTagList(void)
360 { Boolean something_found = FALSE;
361 CommentList *prefix_comment;
364 something_found = TRUE;
367 /* Perform any consistency checks. */
368 if((GameHeader.Tags[SETUP_TAG] != NULL) &&
369 (strcmp(GameHeader.Tags[SETUP_TAG],"1") == 0)){
370 /* There must be a FEN_TAG to go with it. */
371 if(GameHeader.Tags[FEN_TAG] == NULL){
372 fprintf(GlobalState.logfile,
373 "Missing %s Tag to accompany %s Tag.\n",
374 tag_header_string(FEN_TAG),
375 tag_header_string(SETUP_TAG));
376 print_error_context(GlobalState.logfile);
380 prefix_comment = ParseOptCommentList();
381 if(prefix_comment != NULL){
382 GameHeader.prefix_comment = prefix_comment;
383 something_found = TRUE;
385 return something_found;
390 { Boolean TagFound = TRUE;
392 if(current_symbol == TAG){
393 TagName tag_index = yylval.tag_index;
395 current_symbol = next_token();
396 if(current_symbol == STRING){
397 char *tag_string = yylval.token_string;
399 if(tag_index < GameHeader.header_tags_length){
400 GameHeader.Tags[tag_index] = tag_string;
403 print_error_context(GlobalState.logfile);
404 fprintf(GlobalState.logfile,
405 "Internal error: Illegal tag index %d for %s\n",
406 tag_index,tag_string);
409 current_symbol = next_token();
412 print_error_context(GlobalState.logfile);
413 fprintf(GlobalState.logfile,"Missing tag string.\n");
416 else if(current_symbol == STRING){
417 print_error_context(GlobalState.logfile);
418 fprintf(GlobalState.logfile,"Missing tag for %s.\n",yylval.token_string);
419 (void) free((void *)yylval.token_string);
420 current_symbol = next_token();
431 { Move *head = NULL, *tail = NULL;
433 head = ParseMoveAndVariants();
438 while((next_move = ParseMoveAndVariants()) != NULL){
439 tail->next = next_move;
447 ParseMoveAndVariants(void)
448 { Move *move_details;
450 move_details = ParseMove();
451 if(move_details != NULL){
452 CommentList *comment;
454 move_details->Variants = ParseOptVariantList();
455 comment = ParseOptCommentList();
457 append_comments_to_move(move_details,comment);
466 { Move *move_details = NULL;
468 if(ParseOptMoveNumber()){
470 /* @@@ Watch out for finding just the number. */
471 move_details = ParseMoveUnit();
472 if(move_details != NULL){
473 CommentList *comment;
475 move_details->Nags = ParseOptNAGList();
476 comment = ParseOptCommentList();
478 append_comments_to_move(move_details,comment);
486 { Move *move_details = NULL;
488 if(current_symbol == MOVE){
489 move_details = yylval.move_details;
491 if(move_details->class == NULL_MOVE && RAV_level == 0) {
492 print_error_context(GlobalState.logfile);
493 fprintf(GlobalState.logfile, "Null moves (--) only allowed in variations.\n");
496 current_symbol = next_token();
497 if(current_symbol == CHECK_SYMBOL){
498 strcat((char *) move_details->move,"+");
499 current_symbol = next_token();
500 /* Sometimes + is followed by #, so cover this case. */
501 if(current_symbol == CHECK_SYMBOL){
502 current_symbol = next_token();
505 move_details->Comment = ParseOptCommentList();
511 ParseOptCommentList(void)
512 { CommentList *head = NULL, *tail = NULL;
514 while(current_symbol == COMMENT){
516 head = tail = yylval.comment;
519 tail->next = yylval.comment;
522 current_symbol = next_token();
528 ParseOptMoveNumber(void)
529 { Boolean something_found = FALSE;
531 if(current_symbol == MOVE_NUMBER){
532 current_symbol = next_token();
533 something_found = TRUE;
535 return something_found;
539 ParseOptNAGList(void)
540 { StringList *nags = NULL;
542 while(current_symbol == NAG){
543 if(GlobalState.keep_NAGs){
544 nags = save_string_list_item(nags,yylval.token_string);
547 (void) free((void *)yylval.token_string);
549 current_symbol = next_token();
555 ParseOptVariantList(void)
556 { Variation *head = NULL, *tail = NULL,
559 while((variation = ParseVariant()) != NULL){
561 head = tail = variation;
564 tail->next = variation;
573 { Variation *variation = NULL;
575 if(current_symbol == RAV_START){
576 CommentList *prefix_comment;
577 CommentList *suffix_comment;
582 variation = MallocOrDie(sizeof(Variation));
584 current_symbol = next_token();
585 prefix_comment = ParseOptCommentList();
586 if(prefix_comment != NULL){
588 moves = ParseMoveList();
590 print_error_context(GlobalState.logfile);
591 fprintf(GlobalState.logfile,"Missing move list in variation.\n");
593 result = ParseResult();
594 if((result != NULL) && (moves != NULL)){
595 /* Find the last move, to which to append the terminating
598 Move *last_move = moves;
599 CommentList *trailing_comment;
601 while(last_move->next != NULL){
602 last_move = last_move->next;
604 last_move->terminating_result = result;
605 /* Accept a comment after the result, but it will
606 * be printed out preceding the result.
608 trailing_comment = ParseOptCommentList();
609 if(trailing_comment != NULL){
610 append_comments_to_move(last_move,trailing_comment);
616 if(current_symbol == RAV_END){
618 current_symbol = next_token();
621 fprintf(GlobalState.logfile,"Missing ')' to close variation.\n");
623 suffix_comment = ParseOptCommentList();
624 if(suffix_comment != NULL){
626 variation->prefix_comment = prefix_comment;
627 variation->suffix_comment = suffix_comment;
628 variation->moves = moves;
629 variation->next = NULL;
636 { char *result = NULL;
638 if(current_symbol == TERMINATING_RESULT){
639 result = yylval.token_string;
641 /* In the interests of skipping any intervening material
642 * between games, set the lookahead to a dummy token.
644 current_symbol = NO_TOKEN;
647 current_symbol = next_token();
654 setup_for_new_game(void)
656 restart_lex_for_new_game();
660 /* Discard any data held in the GameHeader.Tags structure. */
665 for(tag = 0; tag < GameHeader.header_tags_length; tag++){
666 if(GameHeader.Tags[tag] != NULL){
667 free(GameHeader.Tags[tag]);
668 GameHeader.Tags[tag] = NULL;
673 /* Discard data from a gathered game. */
675 free_string_list(StringList *list)
681 if(next->str != NULL){
682 (void) free((void *)next->str);
684 (void) free((void *)next);
689 free_comment_list(CommentList *comment_list)
691 while(comment_list != NULL){
692 CommentList *this_comment = comment_list;
694 if(comment_list->Comment != NULL){
695 free_string_list(comment_list->Comment);
697 comment_list = comment_list->next;
698 (void) free((void *)this_comment);
703 free_variation(Variation *variation)
706 while(variation != NULL){
708 variation = variation->next;
709 if(next->prefix_comment != NULL){
710 free_comment_list(next->prefix_comment);
712 if(next->suffix_comment != NULL){
713 free_comment_list(next->suffix_comment);
715 if(next->moves != NULL){
716 (void) free_move_list((void *)next->moves);
718 (void) free((void *)next);
723 free_move_list(Move *move_list)
726 while(move_list != NULL){
728 move_list = move_list->next;
729 if(next->Nags != NULL){
730 free_string_list(next->Nags);
732 if(next->Comment != NULL){
733 free_comment_list(next->Comment);
735 if(next->Variants != NULL){
736 free_variation(next->Variants);
738 if(next->epd != NULL){
739 (void) free((void *)next->epd);
741 if(next->bpfen != NULL){
742 (void) free((void *)next->bpfen);
744 if(next->terminating_result != NULL){
745 (void) free((void *)next->terminating_result);
747 (void) free((void *)next);
751 /* Add str onto the tail of list and
752 * return the head of the resulting list.
755 save_string_list_item(StringList *list,const char *str)
758 StringList *new_item;
760 new_item = (StringList *)MallocOrDie(sizeof(*new_item));
762 new_item->next = NULL;
767 StringList *tail = list;
769 while(tail->next != NULL){
772 tail->next = new_item;
778 /* Append any comments in Comment onto the end of
779 * any associated with move.
782 append_comments_to_move(Move *move,CommentList *Comment)
785 /* Add in to the end of any already existing. */
786 if(move->Comment == NULL){
787 move->Comment = Comment;
790 /* Add in the final comment to
791 * the end of any existing for this move.
793 CommentList *tail = move->Comment;
795 while(tail->next != NULL){
798 tail->next = Comment;
804 DealWithGame(Move *move_list)
806 /* Record whether the game has been printed or not.
807 * This is used for the case of the -n flag which catches
808 * all non-printed games.
810 Boolean game_output = FALSE;
811 /* We need a dummy argument for apply_move_list. */
814 /* Update the count of how many games handled. */
815 GlobalState.num_games_processed++;
817 /* Fill in the information currently known. */
818 current_game.tags = GameHeader.Tags;
819 current_game.tags_length = GameHeader.header_tags_length;
820 current_game.prefix_comment = GameHeader.prefix_comment;
821 current_game.moves = move_list;
822 current_game.moves_checked = FALSE;
823 current_game.moves_ok = FALSE;
824 current_game.error_ply = 0;
826 /* Determine whether or not this game is wanted, on the
827 * basis of the various selection criteria available.
831 * apply_move_list checks out the moves.
832 * If it returns TRUE as a match, it will also fill in the
833 * current_game.final_hash_value and
834 * current_game.cumulative_hash_value
835 * fields of current_game so that these can be used in the
836 * previous_occurance function.
838 * If there are any tag criteria, it will be easy to quickly
839 * eliminate most games without going through the length
840 * process of game matching.
842 * If ECO adding is done, the order of checking may cause
843 * a conflict here since it won't be possible to reject a game
844 * based on its ECO code unless it already has one.
845 * Therefore, Check for the ECO tag only after everything else has
848 if(CheckTagDetailsNotECO(current_game.tags,current_game.tags_length) &&
849 apply_move_list(¤t_game,&plycount) &&
850 check_move_bounds(plycount) &&
851 check_textual_variations(current_game) &&
852 check_for_ending(current_game.moves) &&
853 check_for_only_checkmate(current_game.moves) &&
854 CheckECOTag(current_game.tags)){
855 /* If there is no original filename then the game is not a
858 const char *original_filename = previous_occurance(current_game, plycount);
860 if((original_filename == NULL) && GlobalState.suppress_originals){
861 /* Don't output first occurrences. */
863 else if((original_filename == NULL) || !GlobalState.suppress_duplicates){
864 GlobalState.num_games_matched++;
865 if(GlobalState.check_only) {
866 // We are only checking.
867 if(GlobalState.verbose){
868 /* Report progress on logfile. */
869 report_details(GlobalState.logfile);
872 else if(GlobalState.current_file_type == CHECKFILE){
873 /* We are only checking, so don't count this as a matched game. */
874 GlobalState.num_games_matched--;
876 else if(GlobalState.matching_game_number > 0 &&
877 GlobalState.num_games_matched != GlobalState.matching_game_number) {
878 /* This is not the right matching game to be output. */
881 /* This game is to be kept and output. */
882 FILE *outputfile = select_output_file(&GlobalState,
883 current_game.tags[ECO_TAG]);
885 /* See if we wish to separate out duplicates. */
886 if((original_filename != NULL) &&
887 (GlobalState.duplicate_file != NULL)){
888 static const char *last_input_file = NULL;
890 outputfile = GlobalState.duplicate_file;
891 if((last_input_file != GlobalState.current_input_file) &&
892 (GlobalState.current_input_file != NULL)){
893 /* Record which file this and succeeding
894 * duplicates come from.
896 print_str(outputfile,"{ From: ");
897 print_str(outputfile,
898 GlobalState.current_input_file);
899 print_str(outputfile," }");
900 terminate_line(outputfile);
901 last_input_file = GlobalState.current_input_file;
903 print_str(outputfile,"{ First found in: ");
904 print_str(outputfile,original_filename);
905 print_str(outputfile," }");
906 terminate_line(outputfile);
908 /* Now output what we have. */
909 output_game(current_game,outputfile);
911 if(GlobalState.verbose){
912 /* Report progress on logfile. */
913 report_details(GlobalState.logfile);
918 if(!game_output && (GlobalState.non_matching_file != NULL) &&
919 GlobalState.current_file_type != CHECKFILE){
920 /* The user wants to keep everything else. */
921 if(!current_game.moves_checked){
922 /* Make sure that the move text is in a reasonable state. */
923 (void) apply_move_list(¤t_game,&plycount);
925 if(current_game.moves_ok || GlobalState.keep_broken_games){
926 output_game(current_game,GlobalState.non_matching_file);
930 /* Game is finished with, so free everything. */
931 if(GameHeader.prefix_comment != NULL){
932 free_comment_list(GameHeader.prefix_comment);
934 /* Ensure that the GameHeader's prefix comment is NULL for
937 GameHeader.prefix_comment = NULL;
940 free_move_list(current_game.moves);
941 if((GlobalState.num_games_processed % 10) == 0){
942 fprintf(stderr,"Games: %lu\r",GlobalState.num_games_processed);
947 DealWithEcoLine(Move *move_list)
949 /* We need to know the length of a game to store with the
950 * hash information as a sanity check.
952 unsigned number_of_half_moves;
954 /* Fill in the information currently known. */
955 current_game.tags = GameHeader.Tags;
956 current_game.tags_length = GameHeader.header_tags_length;
957 current_game.prefix_comment = GameHeader.prefix_comment;
958 current_game.moves = move_list;
959 current_game.moves_checked = FALSE;
960 current_game.moves_ok = FALSE;
961 current_game.error_ply = 0;
963 /* apply_eco_move_list checks out the moves.
964 * It will also fill in the
965 * current_game.final_hash_value and
966 * current_game.cumulative_hash_value
967 * fields of current_game.
969 if(apply_eco_move_list(¤t_game,&number_of_half_moves)){
970 if(current_game.moves_ok){
971 /* Store the ECO code in the appropriate hash location. */
972 save_eco_details(current_game,number_of_half_moves);
976 /* Game is finished with, so free everything. */
977 if(GameHeader.prefix_comment != NULL){
978 free_comment_list(GameHeader.prefix_comment);
980 /* Ensure that the GameHeader's prefix comment is NULL for
983 GameHeader.prefix_comment = NULL;
986 free_move_list(current_game.moves);
989 /* If file_type == ECOFILE we are dealing with a file of ECO
990 * input rather than a normal game file.
993 yyparse(SourceFileType file_type)
995 setup_for_new_game();
996 current_symbol = skip_to_next_game(NO_TOKEN);
997 ParseOptGameList(file_type);
998 if(current_symbol == EOF_TOKEN){
1002 else if(finished_processing()) {
1003 /* Ok -- done all we need to. */
1007 fprintf(GlobalState.logfile,"End of input reached before end of file.\n");