2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2014 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
43 static TokenType current_symbol = NO_TOKEN;
45 /* Keep track of which RAV level we are at.
46 * This is used to check whether a TERMINATING_RESULT is the final one
47 * and whether NULL_MOVEs are allowed.
49 static unsigned RAV_level = 0;
51 /* At what file position the current game started. */
52 static long game_start_position = -1;
54 /* Retain details of the header of a game.
55 * This comprises the Tags and any comment prefixing the
61 unsigned header_tags_length;
62 CommentList *prefix_comment;
65 static void ParseOptGameList(SourceFileType file_type);
66 static Boolean ParseGame(Move **returned_move_list);
67 Boolean ParseOptTagList(void);
68 Boolean ParseTag(void);
69 static Move *ParseMoveList(void);
70 static Move *ParseMoveAndVariants(void);
71 static Move *ParseMove(void);
72 static Move *ParseMoveUnit(void);
73 static CommentList *ParseOptCommentList(void);
74 Boolean ParseOptMoveNumber(void);
75 static StringList *ParseOptNAGList(void);
76 static Variation *ParseOptVariantList(void);
77 static Variation *ParseVariant(void);
78 static char *ParseResult(void);
80 static void setup_for_new_game(void);
82 static void check_result(char **Tags,const char *terminating_result);
83 static void free_comment_list(CommentList *comment_list);
84 static void DealWithEcoLine(Move *move_list);
85 static void DealWithGame(Move *move_list);
86 static Boolean finished_processing(void);
88 /* Initialise the game header structure to contain
89 * space for the default number of tags.
90 * The space will have to be increased if new tags are
91 * identified in the program source.
94 init_game_header(void)
97 GameHeader.header_tags_length = ORIGINAL_NUMBER_OF_TAGS;
98 GameHeader.Tags = (char **) MallocOrDie(GameHeader.header_tags_length*
99 sizeof(*GameHeader.Tags));
100 for(i = 0; i < GameHeader.header_tags_length; i++){
101 GameHeader.Tags[i] = (char *) NULL;
106 increase_game_header_tags_length(unsigned new_length)
109 if(new_length <= GameHeader.header_tags_length){
110 fprintf(GlobalState.logfile,
111 "Internal error: inappropriate length %d ",new_length);
112 fprintf(GlobalState.logfile,
113 " passed to increase_game_header_tags().\n");
116 GameHeader.Tags = (char **) ReallocOrDie((void *) GameHeader.Tags,
117 new_length*sizeof(*GameHeader.Tags));
118 for(i = GameHeader.header_tags_length; i < new_length; i++){
119 GameHeader.Tags[i] = NULL;
121 GameHeader.header_tags_length = new_length;
124 /* Try to open the given file. Error and exit on failure. */
126 must_open_file(const char *filename,const char *mode)
129 fp = fopen(filename,mode);
131 fprintf(GlobalState.logfile,"Unable to open the file: \"%s\"\n",
138 /* Print out on outfp the current details and
139 * terminate with a newline.
142 report_details(FILE *outfp)
144 if(GameHeader.Tags[WHITE_TAG] != NULL){
145 fprintf(outfp,"%s - ",GameHeader.Tags[WHITE_TAG]);
147 if(GameHeader.Tags[BLACK_TAG] != NULL){
148 fprintf(outfp,"%s ",GameHeader.Tags[BLACK_TAG]);
151 if(GameHeader.Tags[EVENT_TAG] != NULL){
152 fprintf(outfp,"%s ",GameHeader.Tags[EVENT_TAG]);
154 if(GameHeader.Tags[SITE_TAG] != NULL){
155 fprintf(outfp,"%s ",GameHeader.Tags[SITE_TAG]);
158 if(GameHeader.Tags[DATE_TAG] != NULL){
159 fprintf(outfp,"%s ",GameHeader.Tags[DATE_TAG]);
165 /* Check that terminating_result is consistent with
166 * Tags[RESULT_TAG]. If the latter is missing, fill it
167 * in from terminating_result.
170 check_result(char **Tags,const char *terminating_result)
171 { char *result_tag = Tags[RESULT_TAG];
173 if(terminating_result != NULL){
174 if((result_tag == NULL) || (*result_tag == '\0') ||
175 (strcmp(result_tag,"?") == 0)){
176 /* Use a copy of terminating result. */
177 result_tag = copy_string(terminating_result);
178 Tags[RESULT_TAG] = result_tag;
180 else if((result_tag != NULL) &&
181 (strcmp(terminating_result,"*") != 0) &&
182 (strcmp(result_tag,terminating_result) != 0)){
183 print_error_context(GlobalState.logfile);
184 fprintf(GlobalState.logfile,
185 "Inconsistent result strings in the following game.\n");
186 report_details(GlobalState.logfile);
194 /* Select which file to write to based upon the game state.
195 * This will depend upon:
196 * Whether the number of games per file is limited.
197 * Whether ECO_level > DONT_DIVIDE.
201 select_output_file(StateInfo *GameState,const char *eco)
203 if(GameState->games_per_file > 0){
204 if((GameState->num_games_matched % GameState->games_per_file) == 0){
205 /* Time to open the next one. */
208 if(GameState->outputfile != NULL){
209 (void) fclose(GameState->outputfile);
211 sprintf(filename,"%u%s",
212 GameState->next_file_number,
213 output_file_suffix(GameState->output_format));
214 GameState->outputfile = must_open_file(filename,"w");
215 GameState->next_file_number++;
218 else if(GameState->ECO_level > DONT_DIVIDE){
219 /* Open a file of the appropriate name. */
220 if(GameState->outputfile != NULL){
221 /* @@@ In practice, this might need refinement.
222 * Repeated opening and closing may prove inefficient.
224 (void) fclose(GameState->outputfile);
225 GameState->outputfile = open_eco_output_file(
226 GameState->ECO_level,
232 return GameState->outputfile;
236 * Conditions for finishing processing, other than all the input
237 * having been processed.
239 static Boolean finished_processing(void)
241 return (GlobalState.matching_game_number > 0 &&
242 GlobalState.num_games_matched == GlobalState.matching_game_number);
246 ParseOptGameList(SourceFileType file_type)
247 { Move *move_list = NULL;
249 while(ParseGame(&move_list) && !finished_processing()){
250 if(file_type == NORMALFILE){
251 DealWithGame(move_list);
253 else if(file_type == CHECKFILE){
254 DealWithGame(move_list);
256 else if(file_type == ECOFILE){
257 if(move_list != NULL){
258 DealWithEcoLine(move_list);
261 fprintf(GlobalState.logfile,"ECO line with zero moves.\n");
262 report_details(GlobalState.logfile);
268 free_move_list(move_list);
271 setup_for_new_game();
273 if(file_type == ECOFILE && GlobalState.dump_eco) {
279 /* Parse a game and return a pointer to any valid list of moves
280 * in returned_move_list.
283 ParseGame(Move **returned_move_list)
284 { /* Boolean something_found = FALSE; */
285 CommentList *prefix_comment;
286 Move *move_list = NULL;
288 /* There shouldn't be a hanging comment before the result,
289 * but there sometimes is.
291 CommentList *hanging_comment;
293 /* Assume that we won't return anything. */
294 *returned_move_list = NULL;
295 /* Skip over any junk between games. */
296 current_symbol = skip_to_next_game(current_symbol);
297 prefix_comment = ParseOptCommentList();
298 if(prefix_comment != NULL){
299 /* Free this here, as it is hard to
300 * know whether it belongs to the game or the file.
301 * It is better to put game comments after the tags.
303 /* something_found = TRUE; */
304 free_comment_list(prefix_comment);
305 prefix_comment = NULL;
307 if(ParseOptTagList()){
308 /* something_found = TRUE; */
310 /* @@@ Beware of comments and/or tags without moves. */
311 move_list = ParseMoveList();
313 /* @@@ Look for a comment with no move text before the result. */
314 hanging_comment = ParseOptCommentList();
315 /* Append this to the final move, if there is one. */
317 /* Look for a result, even if there were no moves. */
318 result = ParseResult();
319 if(move_list != NULL){
320 /* Find the last move. */
321 Move *last_move = move_list;
323 while(last_move->next != NULL){
324 last_move = last_move->next;
326 if(hanging_comment != NULL) {
327 append_comments_to_move(last_move,hanging_comment);
330 /* Append it to the last move. */
331 last_move->terminating_result = result;
332 check_result(GameHeader.Tags,result);
333 *returned_move_list = move_list;
336 fprintf(GlobalState.logfile,"Missing result.\n");
337 report_details(GlobalState.logfile);
339 /* something_found = TRUE; */
342 /* @@@ Nothing to attach the comment to. */
343 (void) free((void *) hanging_comment);
344 hanging_comment = NULL;
346 * Workaround for games with zero moves.
347 * Check the result for consistency with the tags, but then
348 * there is no move to attach it to.
349 * When outputting a game, the missing result in this case
350 * will have to be supplied from the tags.
352 check_result(GameHeader.Tags,result);
354 (void) free((void *)result);
356 *returned_move_list = NULL;
358 return current_symbol != EOF_TOKEN;
362 ParseOptTagList(void)
363 { Boolean something_found = FALSE;
364 CommentList *prefix_comment;
367 something_found = TRUE;
370 /* Perform any consistency checks. */
371 if((GameHeader.Tags[SETUP_TAG] != NULL) &&
372 (strcmp(GameHeader.Tags[SETUP_TAG],"1") == 0)){
373 /* There must be a FEN_TAG to go with it. */
374 if(GameHeader.Tags[FEN_TAG] == NULL){
375 fprintf(GlobalState.logfile,
376 "Missing %s Tag to accompany %s Tag.\n",
377 tag_header_string(FEN_TAG),
378 tag_header_string(SETUP_TAG));
379 print_error_context(GlobalState.logfile);
383 prefix_comment = ParseOptCommentList();
384 if(prefix_comment != NULL){
385 GameHeader.prefix_comment = prefix_comment;
386 something_found = TRUE;
388 return something_found;
393 { Boolean TagFound = TRUE;
395 if(current_symbol == TAG){
396 TagName tag_index = yylval.tag_index;
398 current_symbol = next_token();
399 if(current_symbol == STRING){
400 char *tag_string = yylval.token_string;
402 if(tag_index < GameHeader.header_tags_length){
403 GameHeader.Tags[tag_index] = tag_string;
406 print_error_context(GlobalState.logfile);
407 fprintf(GlobalState.logfile,
408 "Internal error: Illegal tag index %d for %s\n",
409 tag_index,tag_string);
412 current_symbol = next_token();
415 print_error_context(GlobalState.logfile);
416 fprintf(GlobalState.logfile,"Missing tag string.\n");
419 else if(current_symbol == STRING){
420 print_error_context(GlobalState.logfile);
421 fprintf(GlobalState.logfile,"Missing tag for %s.\n",yylval.token_string);
422 (void) free((void *)yylval.token_string);
423 current_symbol = next_token();
434 { Move *head = NULL, *tail = NULL;
436 head = ParseMoveAndVariants();
441 while((next_move = ParseMoveAndVariants()) != NULL){
442 tail->next = next_move;
450 ParseMoveAndVariants(void)
451 { Move *move_details;
453 move_details = ParseMove();
454 if(move_details != NULL){
455 CommentList *comment;
457 move_details->Variants = ParseOptVariantList();
458 comment = ParseOptCommentList();
460 append_comments_to_move(move_details,comment);
469 { Move *move_details = NULL;
471 if(ParseOptMoveNumber()){
473 /* @@@ Watch out for finding just the number. */
474 move_details = ParseMoveUnit();
475 if(move_details != NULL){
476 CommentList *comment;
478 move_details->Nags = ParseOptNAGList();
479 comment = ParseOptCommentList();
481 append_comments_to_move(move_details,comment);
489 { Move *move_details = NULL;
491 if(current_symbol == MOVE){
492 move_details = yylval.move_details;
494 if(move_details->class == NULL_MOVE && RAV_level == 0) {
495 print_error_context(GlobalState.logfile);
496 fprintf(GlobalState.logfile, "Null moves (--) only allowed in variations.\n");
499 current_symbol = next_token();
500 if(current_symbol == CHECK_SYMBOL){
501 strcat((char *) move_details->move,"+");
502 current_symbol = next_token();
503 /* Sometimes + is followed by #, so cover this case. */
504 if(current_symbol == CHECK_SYMBOL){
505 current_symbol = next_token();
508 move_details->Comment = ParseOptCommentList();
514 ParseOptCommentList(void)
515 { CommentList *head = NULL, *tail = NULL;
517 while(current_symbol == COMMENT){
519 head = tail = yylval.comment;
522 tail->next = yylval.comment;
525 current_symbol = next_token();
531 ParseOptMoveNumber(void)
532 { Boolean something_found = FALSE;
534 if(current_symbol == MOVE_NUMBER){
535 current_symbol = next_token();
536 something_found = TRUE;
538 return something_found;
542 ParseOptNAGList(void)
543 { StringList *nags = NULL;
545 while(current_symbol == NAG){
546 if(GlobalState.keep_NAGs){
547 nags = save_string_list_item(nags,yylval.token_string);
550 (void) free((void *)yylval.token_string);
552 current_symbol = next_token();
558 ParseOptVariantList(void)
559 { Variation *head = NULL, *tail = NULL,
562 while((variation = ParseVariant()) != NULL){
564 head = tail = variation;
567 tail->next = variation;
576 { Variation *variation = NULL;
578 if(current_symbol == RAV_START){
579 CommentList *prefix_comment;
580 CommentList *suffix_comment;
585 variation = MallocOrDie(sizeof(Variation));
587 current_symbol = next_token();
588 prefix_comment = ParseOptCommentList();
589 if(prefix_comment != NULL){
591 moves = ParseMoveList();
593 print_error_context(GlobalState.logfile);
594 fprintf(GlobalState.logfile,"Missing move list in variation.\n");
596 result = ParseResult();
597 if((result != NULL) && (moves != NULL)){
598 /* Find the last move, to which to append the terminating
601 Move *last_move = moves;
602 CommentList *trailing_comment;
604 while(last_move->next != NULL){
605 last_move = last_move->next;
607 last_move->terminating_result = result;
608 /* Accept a comment after the result, but it will
609 * be printed out preceding the result.
611 trailing_comment = ParseOptCommentList();
612 if(trailing_comment != NULL){
613 append_comments_to_move(last_move,trailing_comment);
619 if(current_symbol == RAV_END){
621 current_symbol = next_token();
624 fprintf(GlobalState.logfile,"Missing ')' to close variation.\n");
626 suffix_comment = ParseOptCommentList();
627 if(suffix_comment != NULL){
629 variation->prefix_comment = prefix_comment;
630 variation->suffix_comment = suffix_comment;
631 variation->moves = moves;
632 variation->next = NULL;
639 { char *result = NULL;
641 if(current_symbol == TERMINATING_RESULT){
642 result = yylval.token_string;
644 /* In the interests of skipping any intervening material
645 * between games, set the lookahead to a dummy token.
647 current_symbol = NO_TOKEN;
650 current_symbol = next_token();
657 setup_for_new_game(void)
659 restart_lex_for_new_game();
661 game_start_position = get_position();
664 /* Discard any data held in the GameHeader.Tags structure. */
669 for(tag = 0; tag < GameHeader.header_tags_length; tag++){
670 if(GameHeader.Tags[tag] != NULL){
671 free(GameHeader.Tags[tag]);
672 GameHeader.Tags[tag] = NULL;
677 /* Discard data from a gathered game. */
679 free_string_list(StringList *list)
685 if(next->str != NULL){
686 (void) free((void *)next->str);
688 (void) free((void *)next);
693 free_comment_list(CommentList *comment_list)
695 while(comment_list != NULL){
696 CommentList *this_comment = comment_list;
698 if(comment_list->Comment != NULL){
699 free_string_list(comment_list->Comment);
701 comment_list = comment_list->next;
702 (void) free((void *)this_comment);
707 free_variation(Variation *variation)
710 while(variation != NULL){
712 variation = variation->next;
713 if(next->prefix_comment != NULL){
714 free_comment_list(next->prefix_comment);
716 if(next->suffix_comment != NULL){
717 free_comment_list(next->suffix_comment);
719 if(next->moves != NULL){
720 (void) free_move_list((void *)next->moves);
722 (void) free((void *)next);
727 free_move_list(Move *move_list)
730 while(move_list != NULL){
732 move_list = move_list->next;
733 if(next->Nags != NULL){
734 free_string_list(next->Nags);
736 if(next->Comment != NULL){
737 free_comment_list(next->Comment);
739 if(next->Variants != NULL){
740 free_variation(next->Variants);
742 if(next->epd != NULL){
743 (void) free((void *)next->epd);
745 if(next->bpfen != NULL){
746 (void) free((void *)next->bpfen);
748 if(next->terminating_result != NULL){
749 (void) free((void *)next->terminating_result);
751 (void) free((void *)next);
755 /* Add str onto the tail of list and
756 * return the head of the resulting list.
759 save_string_list_item(StringList *list,const char *str)
762 StringList *new_item;
764 new_item = (StringList *)MallocOrDie(sizeof(*new_item));
766 new_item->next = NULL;
771 StringList *tail = list;
773 while(tail->next != NULL){
776 tail->next = new_item;
782 /* Append any comments in Comment onto the end of
783 * any associated with move.
786 append_comments_to_move(Move *move,CommentList *Comment)
789 /* Add in to the end of any already existing. */
790 if(move->Comment == NULL){
791 move->Comment = Comment;
794 /* Add in the final comment to
795 * the end of any existing for this move.
797 CommentList *tail = move->Comment;
799 while(tail->next != NULL){
802 tail->next = Comment;
808 DealWithGame(Move *move_list)
810 /* Record whether the game has been printed or not.
811 * This is used for the case of the -n flag which catches
812 * all non-printed games.
814 Boolean game_output = FALSE;
815 /* We need a dummy argument for apply_move_list. */
818 /* Update the count of how many games handled. */
819 GlobalState.num_games_processed++;
821 /* Fill in the information currently known. */
822 current_game.tags = GameHeader.Tags;
823 current_game.tags_length = GameHeader.header_tags_length;
824 current_game.prefix_comment = GameHeader.prefix_comment;
825 current_game.moves = move_list;
826 current_game.moves_checked = FALSE;
827 current_game.moves_ok = FALSE;
828 current_game.error_ply = 0;
829 current_game.start_position = game_start_position;
831 /* Determine whether or not this game is wanted, on the
832 * basis of the various selection criteria available.
836 * apply_move_list checks out the moves.
837 * If it returns TRUE as a match, it will also fill in the
838 * current_game.final_hash_value and
839 * current_game.cumulative_hash_value
840 * fields of current_game so that these can be used in the
841 * previous_occurance function.
843 * If there are any tag criteria, it will be easy to quickly
844 * eliminate most games without going through the length
845 * process of game matching.
847 * If ECO adding is done, the order of checking may cause
848 * a conflict here since it won't be possible to reject a game
849 * based on its ECO code unless it already has one.
850 * Therefore, Check for the ECO tag only after everything else has
853 if(CheckTagDetailsNotECO(current_game.tags,current_game.tags_length) &&
854 apply_move_list(¤t_game,&plycount) &&
855 check_move_bounds(plycount) &&
856 check_textual_variations(current_game) &&
857 check_for_ending(current_game.moves) &&
858 check_for_only_checkmate(current_game.moves) &&
859 CheckECOTag(current_game.tags)){
860 /* If there is no original filename then the game is not a
863 const char *original_filename = previous_occurance(current_game, plycount);
865 if((original_filename == NULL) && GlobalState.suppress_originals){
866 /* Don't output first occurrences. */
868 else if((original_filename == NULL) || !GlobalState.suppress_duplicates){
869 GlobalState.num_games_matched++;
870 if(GlobalState.check_only) {
871 // We are only checking.
872 if(GlobalState.verbose){
873 /* Report progress on logfile. */
874 report_details(GlobalState.logfile);
877 else if(GlobalState.current_file_type == CHECKFILE){
878 /* We are only checking, so don't count this as a matched game. */
879 GlobalState.num_games_matched--;
881 else if(GlobalState.matching_game_number > 0 &&
882 GlobalState.num_games_matched != GlobalState.matching_game_number) {
883 /* This is not the right matching game to be output. */
886 /* This game is to be kept and output. */
887 FILE *outputfile = select_output_file(&GlobalState,
888 current_game.tags[ECO_TAG]);
890 /* See if we wish to separate out duplicates. */
891 if((original_filename != NULL) &&
892 (GlobalState.duplicate_file != NULL)){
893 static const char *last_input_file = NULL;
895 outputfile = GlobalState.duplicate_file;
896 if((last_input_file != GlobalState.current_input_file) &&
897 (GlobalState.current_input_file != NULL)){
898 /* Record which file this and succeeding
899 * duplicates come from.
901 print_str(outputfile,"{ From: ");
902 print_str(outputfile,
903 GlobalState.current_input_file);
904 print_str(outputfile," }");
905 terminate_line(outputfile);
906 last_input_file = GlobalState.current_input_file;
908 print_str(outputfile,"{ First found in: ");
909 print_str(outputfile,original_filename);
910 print_str(outputfile," }");
911 terminate_line(outputfile);
913 /* Now output what we have. */
914 output_game(current_game,outputfile);
916 if(GlobalState.verbose){
917 /* Report progress on logfile. */
918 report_details(GlobalState.logfile);
923 if(!game_output && (GlobalState.non_matching_file != NULL) &&
924 GlobalState.current_file_type != CHECKFILE){
925 /* The user wants to keep everything else. */
926 if(!current_game.moves_checked){
927 /* Make sure that the move text is in a reasonable state. */
928 (void) apply_move_list(¤t_game,&plycount);
930 if(current_game.moves_ok || GlobalState.keep_broken_games){
931 output_game(current_game,GlobalState.non_matching_file);
935 /* Game is finished with, so free everything. */
936 if(GameHeader.prefix_comment != NULL){
937 free_comment_list(GameHeader.prefix_comment);
939 /* Ensure that the GameHeader's prefix comment is NULL for
942 GameHeader.prefix_comment = NULL;
945 free_move_list(current_game.moves);
946 if((GlobalState.num_games_processed % 10) == 0){
947 fprintf(stderr,"Games: %lu\r",GlobalState.num_games_processed);
952 DealWithEcoLine(Move *move_list)
954 /* We need to know the length of a game to store with the
955 * hash information as a sanity check.
957 unsigned number_of_half_moves;
959 /* Fill in the information currently known. */
960 current_game.tags = GameHeader.Tags;
961 current_game.tags_length = GameHeader.header_tags_length;
962 current_game.prefix_comment = GameHeader.prefix_comment;
963 current_game.moves = move_list;
964 current_game.moves_checked = FALSE;
965 current_game.moves_ok = FALSE;
966 current_game.error_ply = 0;
968 /* apply_eco_move_list checks out the moves.
969 * It will also fill in the
970 * current_game.final_hash_value and
971 * current_game.cumulative_hash_value
972 * fields of current_game.
974 if(apply_eco_move_list(¤t_game,&number_of_half_moves)){
975 if(current_game.moves_ok){
976 /* Store the ECO code in the appropriate hash location. */
977 save_eco_details(current_game,number_of_half_moves);
981 /* Game is finished with, so free everything. */
982 if(GameHeader.prefix_comment != NULL){
983 free_comment_list(GameHeader.prefix_comment);
985 /* Ensure that the GameHeader's prefix comment is NULL for
988 GameHeader.prefix_comment = NULL;
991 free_move_list(current_game.moves);
994 /* If file_type == ECOFILE we are dealing with a file of ECO
995 * input rather than a normal game file.
998 yyparse(SourceFileType file_type)
1000 setup_for_new_game();
1001 current_symbol = skip_to_next_game(NO_TOKEN);
1002 ParseOptGameList(file_type);
1003 if(current_symbol == EOF_TOKEN){
1007 else if(finished_processing()) {
1008 /* Ok -- done all we need to. */
1012 fprintf(GlobalState.logfile,"End of input reached before end of file.\n");