2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2014 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
23 /* Type definitions required by multiple files. */
25 /* Define a type to represent different output formats.
26 * Currently represented are:
27 * SOURCE: the original source notation.
29 * CM: Chess Master input format.
30 * LALG: Long-algebraic, e.g. e2e4.
31 * HALG: Hyphenated long-algebraic, e.g. e2-e4.
32 * ELALG: Enhanced long-algebraic. Includes piece names, e.g. Ng1f3,
33 * and en-passant notation.
34 * UCI: UCI-compatible format - actually LALG.
36 typedef enum { SOURCE, SAN, EPD, CM, LALG, HALG, ELALG, UCI, SESSE_BIN } OutputFormat;
38 /* Define a type to specify whether a move gives check, checkmate,
40 * checkmate implies check, but check does not imply that a move
43 typedef enum { NOCHECK, CHECK, CHECKMATE } CheckStatus;
45 /* Permit lists of strings, e.g. lists of comments,
48 typedef struct string_list {
50 struct string_list *next;
53 /* The following function is used for linking list items together. */
54 StringList *save_string_list_item(StringList *list,const char *str);
56 typedef struct comment_list{
58 struct comment_list *next;
61 typedef struct variation{
62 CommentList *prefix_comment;
64 CommentList *suffix_comment;
65 struct variation *next;
68 /* Define a maximum length for the text of moves.
71 #define MAX_MOVE_LEN 15
74 typedef struct EcoLog EcoLog;
76 /* Retain the text of a move and any associated
80 /* @@@ This array is of type unsigned char,
81 * in order to accommodate full 8-bit letters without
84 unsigned char move[MAX_MOVE_LEN+1];
85 /* Class of move, e.g. PAWN_MOVE, PIECE_MOVE. */
92 /* captured_piece is EMPTY if there is no capture. */
94 /* promoted_piece is EMPTY if class is not PAWN_MOVE_WITH_PROMOTION. */
96 /* Whether this move gives check. */
97 CheckStatus check_status;
98 /* An EPD representation of the board immediately after this move
102 /* Same as epd, but in our special binary packed format.
103 * Not zero-terminated, since it is binary.
108 CommentList *Comment;
109 /* terminating_result hold the result of the current list of moves. */
110 char *terminating_result;
113 /* Pointers to the previous and next move.
114 * The extraction program does not need the prev field, but my
115 * intention is to build other interfaces that might need it.
116 * For instance, a game viewer would need to be able to move backwards
117 * and forwards through a game.
119 struct move *prev, *next;
123 /* Tags for this game. */
125 /* The maximum number of strings in tags. */
127 /* Any comment prefixing the game, between
128 * the tags and the moves.
130 CommentList *prefix_comment;
131 /* The hash value of the final position. */
132 HashCode final_hash_value;
133 /* An accumulated hash value, used to disambiguate false clashes
134 * of final_hash_value.
136 HashCode cumulative_hash_value;
137 /* Board hash value at fuzzy_move_depth, if required. */
138 HashCode fuzzy_duplicate_hash;
139 /* The move list of the game. */
141 /* Whether the moves have been checked, or not. */
142 Boolean moves_checked;
143 /* Whether the moves are ok, or not. */
145 /* if !moves_ok, the first ply at which an error was found.
146 * 0 => no error found.
149 /* File position of the first byte of the game, or -1
155 /* Define a type to distinguish between CHECK files, NORMAL files,
157 * CHECKFILEs are those whose contents are not output.
158 * Their contents are used to check for duplicates in NORMALFILEs.
159 * An ECOFILE consists of ECO lines for classification.
161 typedef enum { NORMALFILE, CHECKFILE, ECOFILE } SourceFileType;
163 /* 0 = don't divide on ECO code.
164 * 1 = divide by letter.
165 * 2 = divide by letter and single digit.
166 * N > 1 = divide by letter and N-1 digits.
167 * In principle, it should be possible to expand the ECO classification
168 * with an arbitrary number of digits.
171 DONT_DIVIDE = 0, MIN_ECO_LEVEL = 1, MAX_ECO_LEVEL = 10
174 /* Define a type to describe which tags are to be output.
175 * This used to be handled by the Boolean seven_tag_roster field
176 * in GlobalState but there are now different forms of output
180 ALL_TAGS = 0, SEVEN_TAG_ROSTER = 1, NO_TAGS = 2
183 /* This structure holds details of the program state.
184 * Most of these fields are set from the program's arguments.
187 /* Whether we are skipping the current game - typically because
188 * of an error in its text.
190 Boolean skipping_current_game;
191 /* Whether to check, but not write the converted output. */
193 /* Whether to print a running commentary to logfile. */
195 /* Whether to keep NAGs along with moves. */
197 /* Whether to keep comments along with moves. */
198 Boolean keep_comments;
199 /* Whether to keep variations along with moves. */
200 Boolean keep_variations;
201 /* Which tags are to be output. */
202 TagOutputForm tag_output_format;
203 /* Whether to match permutations of textual variations or not. */
204 Boolean match_permutations;
205 /* Whether we are matching positional variations or not. */
206 Boolean positional_variations;
207 /* Whether we are using Soundex matching or not. */
209 /* Whether to suppress duplicate game scores. */
210 Boolean suppress_duplicates;
211 /* Whether to suppress unique game scores. */
212 Boolean suppress_originals;
213 /* Whether to use fuzzy matching for duplicates. */
214 Boolean fuzzy_match_duplicates;
215 /* At what depth to use fuzzy matching. */
216 int fuzzy_match_depth;
217 /* Whether to check the tags for matches. */
219 /* Whether to add ECO codes. */
221 /* Whether an ECO file is currently being parsed. */
222 Boolean parsing_ECO_file;
223 /* Which level to divide the output. */
224 EcoDivision ECO_level;
225 /* What form to write the output in. */
226 OutputFormat output_format;
227 /* Maximum output line length. */
228 unsigned max_line_length;
229 /* Whether to use a virtual hash table or not. */
230 Boolean use_virtual_hash_table;
231 /* Whether to match on the number of moves in a game. */
232 Boolean check_move_bounds;
233 /* Whether to match only games ending in checkmate. */
234 Boolean match_only_checkmate;
235 /* Whether to match only games ending in stalemate. */
236 Boolean match_only_stalemate;
237 /* Whether to output move numbers in the output. */
238 Boolean keep_move_numbers;
239 /* Whether to output results in the output. */
240 Boolean keep_results;
241 /* Whether to keep check and mate characters in the output. */
243 /* Whether to output an evaluation value after each move. */
244 Boolean output_evaluation;
245 /* Whether to keep games which have incorrect moves. */
246 Boolean keep_broken_games;
247 /* Maximum depth to which to search for positional variations.
248 * This is picked up from the length of variations in the positional
251 unsigned depth_of_positional_search;
252 unsigned long num_games_processed;
253 unsigned long num_games_matched;
254 /* How many games to store in each file. */
255 unsigned games_per_file;
256 /* Which is the next file number. */
257 unsigned next_file_number;
258 /* Lower and upper bounds for moves if check_move_bounds. */
259 unsigned lower_move_bound, upper_move_bound;
260 /* Limit to the number of plies to appear in the output. */
261 int output_ply_limit;
262 /* Which single game to output (matching_game_number > 0) */
263 unsigned long matching_game_number;
264 /* Whether to output a FEN string at the end of the game. */
265 Boolean output_FEN_string;
266 /* Whether to add a FEN comment after every move. */
267 Boolean add_FEN_comments;
268 /* Whether to add a 'matching position' comment. */
269 Boolean add_position_match_comments;
270 /* Whether to include a tag with the total ply count of the game. */
271 Boolean output_total_plycount;
272 /* Whether to add a HashCode tag. */
273 Boolean add_hashcode_tag;
274 /* Whether to dump ECO data to stdout after loading.*/
276 /* The comment to use for position matches, if required. */
277 const char *position_match_comment;
278 /* Current input file name. */
279 const char *current_input_file;
280 /* Whether this is a CHECKFILE or a NORMALFILE. */
281 SourceFileType current_file_type;
282 /* Byte positions to scan to and from in the PGN file.
283 * Starting in the middle of a game will yield unexpected
288 /* File of ECO lines. */
289 const char *eco_file;
290 /* Where to write the extracted games. */
292 /* Output file name. */
293 const char *output_filename;
294 /* Where to write errors and running commentary. */
296 /* Where to write duplicate games. */
297 FILE *duplicate_file;
298 /* Where to write games that don't match the criteria. */
299 FILE *non_matching_file;
302 /* Provide access to the global state that has been set
303 * through command line arguments.
305 extern StateInfo GlobalState;
306 FILE *must_open_file(const char *filename,const char *mode);