2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2012 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
43 /* The maximum length of an output line. This is conservatively
44 * slightly smaller than the PGN export standard of 80.
46 #define MAX_LINE_LENGTH 75
48 /* Define a file name relative to the current directory representing
49 * a file of ECO classificiations.
51 #ifndef DEFAULT_ECO_FILE
52 #define DEFAULT_ECO_FILE "eco.pgn"
55 /* This structure holds details of the program state
56 * available to all parts of the program.
57 * This goes against the grain of good structured programming
58 * principles, but most of these fields are set from the program's
59 * arguments and are read-only thereafter. If I had done this in
60 * C++ there would have been a cleaner interface!
62 StateInfo GlobalState = {
63 FALSE, /* skipping_current_game */
64 FALSE, /* check_only (-r) */
65 TRUE, /* verbose (-s) */
66 TRUE, /* keep_NAGs (-N) */
67 TRUE, /* keep_comments (-C) */
68 TRUE, /* keep_variations (-V) */
69 ALL_TAGS, /* tag_output_form (-7, --notags) */
70 TRUE, /* match_permutations (-v) */
71 FALSE, /* positional_variations (-x) */
72 FALSE, /* use_soundex (-S) */
73 FALSE, /* suppress_duplicates (-D) */
74 FALSE, /* suppress_originals (-U) */
75 FALSE, /* fuzzy_match_duplicates (--fuzzy) */
76 0, /* fuzzy_match_depth (--fuzzy) */
77 FALSE, /* check_tags */
78 FALSE, /* add_ECO (-e) */
79 FALSE, /* parsing_ECO_file (-e) */
80 DONT_DIVIDE, /* ECO_level (-E) */
81 SAN, /* output_format (-W) */
82 MAX_LINE_LENGTH, /* max_line_length (-w) */
83 FALSE, /* use_virtual_hash_table (-Z) */
84 FALSE, /* check_move_bounds (-b) */
85 FALSE, /* match_only_checkmate (-M) */
86 FALSE, /* match_only_stalemate (--stalemate) */
87 TRUE, /* keep_move_numbers (--nomovenumbers) */
88 TRUE, /* keep_results (--noresults) */
89 TRUE, /* keep_checks (--nochecks) */
90 FALSE, /* output_evaluation (--evaluation) */
91 FALSE, /* keep_broken_games (--keepbroken) */
92 0, /* depth_of_positional_search */
93 0, /* num_games_processed */
94 0, /* num_games_matched */
95 0, /* games_per_file (-#) */
96 1, /* next_file_number */
97 0, /* lower_move_bound */
98 10000, /* upper_move_bound */
99 -1, /* output_ply_limit (--plylimit) */
100 0, /* matching_game_number */
101 FALSE, /* output_FEN_string */
102 FALSE, /* add_FEN_comments (--fencomments) */
103 FALSE, /* add_position_match_comments (--markmatches) */
104 FALSE, /* output_total_plycount (--totalplycount) */
105 FALSE, /* add_hashcode_tag (--addhashcode) */
106 FALSE, /* dump-eco (--dumpeco) */
107 "MATCH", /* position_match_comment (--markpositionmatches) */
108 (char *)NULL, /* current_input_file */
109 NORMALFILE, /* current_file_type */
110 -1, /* current_file_number */
111 0, /* start_file_number */
112 0, /* start_position */
113 LONG_MAX, /* end_position */
114 0, /* computer_flag */
115 DEFAULT_ECO_FILE, /* eco_file (-e) */
116 (FILE *)NULL, /* outputfile (-o, -a). Default is stdout */
117 (char *)NULL, /* output_filename (-o, -a) */
118 (FILE *)NULL, /* logfile (-l). Default is stderr */
119 (FILE *)NULL, /* duplicate_file (-d) */
120 (FILE *)NULL, /* non_matching_file (-n) */
123 /* Prepare the output file handles in GlobalState. */
125 init_default_global_state(void)
127 GlobalState.outputfile = stdout;
128 GlobalState.logfile = stderr;
129 set_output_line_length(MAX_LINE_LENGTH);
133 main(int argc, char *argv[])
136 /* Prepare global state. */
137 init_default_global_state();
138 /* Prepare the Game_Header. */
140 /* Prepare the tag lists for -t/-T matching. */
142 /* Prepare the hash tables for transposition detection. */
144 /* Initialise the lexical analyser's tables. */
146 /* Allow for some arguments. */
147 for(argnum = 1; argnum < argc; ){
148 const char *argument = argv[argnum];
149 if(argument[0] == '-'){
151 /* Arguments with no additional component. */
152 case SEVEN_TAG_ROSTER_ARGUMENT:
153 case DONT_KEEP_COMMENTS_ARGUMENT:
154 case DONT_KEEP_DUPLICATES_ARGUMENT:
155 case DONT_KEEP_VARIATIONS_ARGUMENT:
156 case DONT_KEEP_NAGS_ARGUMENT:
157 case DONT_MATCH_PERMUTATIONS_ARGUMENT:
158 case CHECK_ONLY_ARGUMENT:
159 case KEEP_SILENT_ARGUMENT:
160 case USE_SOUNDEX_ARGUMENT:
161 case MATCH_CHECKMATE_ARGUMENT:
162 case SUPPRESS_ORIGINALS_ARGUMENT:
163 case OUTPUT_FEN_STRING_ARGUMENT:
164 case USE_VIRTUAL_HASH_TABLE_ARGUMENT:
165 process_argument(argument[1], "");
169 /* Argument rewritten as a different one. */
170 case ALTERNATIVE_HELP_ARGUMENT:
171 process_argument(HELP_ARGUMENT, "");
175 /* Arguments where an additional component is required.
176 * It must be adjacent to the argument and not separated from it.
178 case TAG_EXTRACTION_ARGUMENT:
179 process_argument(argument[1], &(argument[2]));
183 /* Arguments where an additional component is optional.
184 * If it is present, it must be adjacent to the argument
185 * letter and not separated from it.
187 case USE_ECO_FILE_ARGUMENT:
188 case OUTPUT_FORMAT_ARGUMENT:
190 process_argument(argument[1], &(argument[2]));
194 /* Long form arguments. */
195 case LONG_FORM_ARGUMENT:
197 /* How many args (1 or 2) are processed. */
199 /* This argument might need the following argument
200 * as an associated value.
202 const char *possible_associated_value = "";
203 if(argnum + 1 < argc) {
204 possible_associated_value = argv[argnum+1];
206 /* Find out how many arguments were consumed
210 process_long_form_argument(&argument[2],
211 possible_associated_value);
212 argnum += args_processed;
216 /* Arguments with a required filename component. */
217 case FILE_OF_ARGUMENTS_ARGUMENT:
218 case APPEND_TO_OUTPUT_FILE_ARGUMENT:
219 case CHECK_FILE_ARGUMENT:
220 case DUPLICATES_FILE_ARGUMENT:
221 case FILE_OF_FILES_ARGUMENT:
222 case WRITE_TO_LOG_FILE_ARGUMENT:
223 case APPEND_TO_LOG_FILE_ARGUMENT:
224 case NON_MATCHING_GAMES_ARGUMENT:
225 case WRITE_TO_OUTPUT_FILE_ARGUMENT:
226 case TAG_ROSTER_ARGUMENT:
227 { /* We require an associated file argument. */
228 const char argument_letter = argument[1];
229 const char *filename = &(argument[2]);
230 if(*filename == '\0'){
231 /* Try to pick it up from the next argument. */
234 filename = argv[argnum];
237 /* Make sure the associated_value does not look
238 * like the next argument.
240 if((*filename == '\0') || (*filename == '-')){
241 fprintf(GlobalState.logfile,
242 "Usage: -%c filename\n",
250 process_argument(argument[1], filename);
254 /* Arguments with a required following value. */
255 case BOUNDS_ARGUMENT:
256 case ECO_OUTPUT_LEVEL_ARGUMENT:
257 case LINE_WIDTH_ARGUMENT:
258 case GAMES_PER_FILE_ARGUMENT:
259 { /* We require an associated file argument. */
260 const char argument_letter = argument[1];
261 const char *associated_value = &(argument[2]);
262 if(*associated_value == '\0'){
263 /* Try to pick it up from the next argument. */
266 associated_value = argv[argnum];
269 /* Make sure the associated_value does not look
270 * like the next argument.
272 if((*associated_value == '\0') ||
273 (*associated_value == '-')){
274 fprintf(GlobalState.logfile,
275 "Usage: -%c value\n",
283 process_argument(argument[1], associated_value);
287 /* Argument that require different treatment because they
288 * are present on the command line rather than an argsfile.
292 case POSITIONS_ARGUMENT:
293 case ENDINGS_ARGUMENT:
294 { /* From the command line, we require an
295 * associated file argument.
296 * Check this here, as it is not the case
297 * when reading arguments from an argument file.
299 const char *filename = &(argument[2]);
300 const char argument_letter = argument[1];
301 if(*filename == '\0'){
302 /* Try to pick it up from the next argument. */
305 filename = argv[argnum];
308 /* Make sure the filename does not look
309 * like the next argument.
311 if((*filename == '\0') || (*filename == '-')){
312 fprintf(GlobalState.logfile,
313 "Usage: -%cfilename or -%c filename\n",
314 argument_letter,argument_letter);
321 process_argument(argument_letter,filename);
325 fprintf(GlobalState.logfile,
326 "Unknown flag %s. Use -%c for usage details.\n",
327 argument,HELP_ARGUMENT);
333 /* Should be a file name containing games. */
334 add_filename_to_source_list(argument,NORMALFILE);
338 /* Prepare the hash tables for duplicate detection. */
339 init_duplicate_hash_table();
341 if(GlobalState.add_ECO){
342 /* Read in a list of ECO lines in order to classify the games. */
343 if(open_eco_file(GlobalState.eco_file)){
344 /* Indicate that the ECO file is currently being parsed. */
345 GlobalState.parsing_ECO_file = TRUE;
348 GlobalState.parsing_ECO_file = FALSE;
351 fprintf(GlobalState.logfile,"Unable to open the ECO file %s.\n",
352 GlobalState.eco_file);
357 /* Open up the first file to act as Lex's source of input. */
358 if(!open_first_file()){
361 yyparse(GlobalState.current_file_type);
362 /* Remove any temporary files. */
363 clear_duplicate_hash_table();
364 if(GlobalState.verbose){
365 fprintf(GlobalState.logfile,"%lu game%s matched out of %lu.\n",
366 GlobalState.num_games_matched,
367 GlobalState.num_games_matched == 1?"":"s",
368 GlobalState.num_games_processed);
370 if((GlobalState.logfile != stderr) && (GlobalState.logfile != NULL)){
371 (void) fclose(GlobalState.logfile);