2 * Program: pgn-extract: a Portable Game Notation (PGN) extractor.
3 * Copyright (C) 1994-2014 David Barnes
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 1, or (at your option)
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * David Barnes may be contacted as D.J.Barnes@kent.ac.uk
19 * http://www.cs.kent.ac.uk/people/staff/djb/
34 /* Define a type to permit tag strings to be associated with
35 * a TagOperator for selecting relationships between them
36 * and a game to be matched.
43 /* Definitions for maintaining arrays of tag strings.
44 * These arrays are used for various purposes:
45 * lists of white/black players to extract on.
46 * lists of other criteria to extract on.
50 /* How many elements we have currently allocated for.
51 * If this is > 0 then we should always have allocated exactly
52 * one more than this number to simplify the (char **)NULL
53 * termination of the list.
55 unsigned num_allocated_elements;
56 /* num_used_elements should always be <= num_allocated elements. */
57 unsigned num_used_elements;
58 /* The list of elements.
59 * Elements 0 .. num_used_elements point to null-terminated strings.
60 * list[num_used_elements] == (char **)NULL once the list is complete.
62 TagSelection *tag_strings;
65 /* Functions to allow creation of string lists. */
66 /* Allow a string list for every known tag.
67 * It is important that these lists should be initialised to
68 * { 0, 0, (TagSelection *) NULL }
69 * which happens by default, anyway.
70 * This array, and its length (tag_list_length) are initialised
71 * by calling init_tag_lists.
73 static StringArray *TagLists;
74 static int tag_list_length = 0;
76 static char *soundex(const char *str);
77 static Boolean CheckList(int tag,const char *tag_string,StringArray *list);
79 void init_tag_lists(void)
82 tag_list_length = ORIGINAL_NUMBER_OF_TAGS;
83 TagLists = (StringArray *) MallocOrDie(tag_list_length*sizeof(*TagLists));
84 for(i = 0; i < tag_list_length; i++){
85 TagLists[i].num_allocated_elements = 0;
86 TagLists[i].num_used_elements = 0;
87 TagLists[i].tag_strings = (TagSelection *) NULL;
92 * Extend the tag list to the new length.
95 extend_tag_list_length(int new_length)
97 if(new_length < tag_list_length){
98 fprintf(GlobalState.logfile,
99 "Internal error: inappropriate call to extend_tag_list_length().\n");
100 fprintf(GlobalState.logfile,
101 "New length of %d is not greater than existing length of %d\n",
102 new_length,tag_list_length);
107 TagLists = (StringArray *) ReallocOrDie((void *) TagLists,
108 new_length*sizeof(*TagLists));
109 for(i = tag_list_length; i < new_length; i++){
110 TagLists[i].num_allocated_elements = 0;
111 TagLists[i].num_used_elements = 0;
112 TagLists[i].tag_strings = (TagSelection *) NULL;
114 tag_list_length = new_length;
118 /* Add str to the list of strings in list.
119 * List may be a new list, in which case space is allocated
121 * Return the index on success, otherwise -1.
124 add_to_taglist(const char *str,StringArray *list)
125 { Boolean everything_ok = TRUE;
127 if(list->num_allocated_elements == list->num_used_elements){
128 /* We need more space. */
129 if(list->num_allocated_elements == 0){
130 /* No elements in the list. */
131 list->tag_strings = (TagSelection *)MallocOrDie((INIT_LIST_SPACE+1)*
132 sizeof(TagSelection));
133 if(list->tag_strings != NULL){
134 list->num_allocated_elements = INIT_LIST_SPACE;
135 list->num_used_elements = 0;
138 everything_ok = FALSE;
142 list->tag_strings = (TagSelection *)realloc((void *)list->tag_strings,
143 (list->num_allocated_elements+MORE_LIST_SPACE+1)*
144 sizeof(TagSelection));
145 if(list->tag_strings != NULL){
146 list->num_allocated_elements += MORE_LIST_SPACE;
149 everything_ok = FALSE;
154 /* There is space. */
155 unsigned ix = list->num_used_elements;
157 list->tag_strings[ix].operator = NONE;
158 list->tag_strings[ix].tag_string = (char *) MallocOrDie(strlen(str)+1);
159 if(list->tag_strings[ix].tag_string != NULL){
160 strcpy(list->tag_strings[ix].tag_string,str);
161 list->num_used_elements++;
162 /* Make sure that the list is properly terminated at all times. */
163 list->tag_strings[ix+1].tag_string = NULL;
175 /* Simple soundex code supplied by John Brogan
176 * (jwbrogan@unix2.netaxs.com), 26th Aug 1994.
178 * "In recognition of the large number of strong players from countries
179 * with Slavic based languages, I tried to tailor the soundex code
180 * to match any reasonable transliteration of a Slavic name into
181 * English. Thus, Nimzovich will match Nimsowitsch, Tal will match
182 * Talj, etc. Unfortunately, in order to be sure not to miss any
183 * valid matches, I had to make the code so tolerant that it sometimes
184 * comes up with some wildly false matches. This, to me, is better
185 * than missing some games, but your mileage may differ."
187 * This looks like it was originally derived from the public domain
188 * version released by N. Dean Pentcheff, 1989, which was, itself,
189 * based on that in D.E. Knuth's "The art of computer programming.",
190 * Volume 3: Sorting and searching. Addison-Wesley Publishing Company:
191 * Reading, Mass. Page 392.
192 * Amended by David Barnes, 2nd Sep 1994.
195 /* Define a maximum length for the soundex result. */
196 #define MAXSOUNDEX 50
198 /* Calculate a soundex string for instr.
199 * The space used is statically allocated, so the caller
200 * will have to allocate its own for the result if it
201 * to be retained across different calls.
204 soundex(const char *str)
205 { static char sbuf[MAXSOUNDEX+1];
206 /* An index into sbuf. */
208 /* Keep track of the last character to compress repeated letters. */
210 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
211 const char *mapping = "01230120002455012622011202";
212 char initial_letter = *str;
214 /* Special case for names that begin with 'J',
215 * otherwise Janosevic == Nimzovich.
216 * In addition, we really want Yusupov to match Jusupov.
218 if(islower((int) initial_letter)){
219 initial_letter = toupper(initial_letter);
221 if((initial_letter == 'Y') || (initial_letter == 'J')){
227 while((*str != '\0') && (sindex < MAXSOUNDEX)){
230 /* We are only interested in alphabetics, and duplicate
231 * characters are reduced to singletons.
233 if(isalpha((int) ch) && (ch != lastc)){
236 if(islower((int) ch)){
239 /* Pick up the translation. */
240 translation = mapping[ch - 'A'];
241 if((translation != '0') && (translation != lastc)){
242 sbuf[sindex] = translation;
253 /* Return TRUE if tag is one on which soundex matching should
254 * be used, if requested.
258 { Boolean use_soundex = FALSE;
263 case PSEUDO_PLAYER_TAG:
273 /* Add tagstr to the list of tags to be matched.
274 * If we are using soundex matching, then store
275 * its soundex version rather than its plain text.
278 add_tag_to_list(int tag,const char *tagstr,TagOperator operator)
280 if(tag >= tag_list_length){
281 /* A new tag - one without a pre-defined _TAG #define.
282 * Make sure there is room for it in TagList.
284 extend_tag_list_length(tag+1);
286 if((tag >= 0) && (tag < tag_list_length)){
287 const char *string_to_store = tagstr;
290 if(GlobalState.use_soundex){
291 if(soundex_tag(tag)){
292 string_to_store = soundex(tagstr);
295 ix = add_to_taglist(string_to_store,&TagLists[tag]);
297 TagLists[tag].tag_strings[ix].operator = operator;
299 /* Ensure that we know we are checking tags. */
300 GlobalState.check_tags = TRUE;
303 fprintf(GlobalState.logfile,
304 "Illegal tag number %d in add_tag_to_list.\n",tag);
308 /* Argstr is an extraction argument.
309 * The type of argument is indicated by the first letter of
311 * a -- annotator of the game
312 * b -- player of the black pieces
313 * d -- date of the game
315 * p -- player of either colour
317 * w -- player of the white pieces
318 * The remainder of argstr is the argument string to be entered
319 * into the appropriate list.
322 extract_tag_argument(const char *argstr)
323 { const char *arg = &(argstr[1]);
327 add_tag_to_list(ANNOTATOR_TAG,arg,NONE);
330 add_tag_to_list(BLACK_TAG,arg,NONE);
333 add_tag_to_list(DATE_TAG,arg,NONE);
336 add_tag_to_list(ECO_TAG,arg,NONE);
339 add_tag_to_list(HASHCODE_TAG,arg,NONE);
342 add_tag_to_list(PSEUDO_PLAYER_TAG,arg,NONE);
345 add_tag_to_list(RESULT_TAG,arg,NONE);
348 add_tag_to_list(WHITE_TAG,arg,NONE);
351 fprintf(GlobalState.logfile,
352 "Unknown type of tag extraction argument: %s\n",
359 /* Check for one of list->strings matching the date_string.
360 * Return TRUE on match, FALSE on failure.
361 * It is only necessary for a prefix of tag to match
365 /* Set limits on the allowable ranges for dates extracted from games.
366 * Because of century changes, it is difficult to know what
367 * best to do with two digit year numbers; so exclude them.
373 CheckDate(const char *date_string,StringArray *list)
374 { unsigned list_index;
376 /* Care needed because dates with operators must be ANDed and
377 * dates without operators must be ORed.
378 * The first match is used to set wanted properly for the first time.
380 Boolean wanted = FALSE;
381 for(list_index = 0; list_index < list->num_used_elements; list_index++){
382 const char *list_string = list->tag_strings[list_index].tag_string;
383 TagOperator operator = list->tag_strings[list_index].operator;
385 if(*list_string == 'b'){
386 operator = LESS_THAN;
389 else if(*list_string == 'a'){
390 operator = GREATER_THAN;
396 if(operator != NONE){
397 /* We have a relational comparison. */
398 unsigned game_date, list_date;
399 /* Try to extract dates from both strings. */
400 if((sscanf(list_string,"%u",&list_date) == 1) &&
401 (sscanf(date_string,"%u",&game_date) == 1) &&
402 (game_date > MINDATE) && (game_date < MAXDATE)){
406 matches = game_date < list_date;
408 case LESS_THAN_OR_EQUAL_TO:
409 matches = game_date <= list_date;
412 matches = game_date > list_date;
414 case GREATER_THAN_OR_EQUAL_TO:
415 matches = game_date >= list_date;
418 matches = game_date == list_date;
421 matches = game_date != list_date;
425 /* Should have been covered above. */
429 if(list_index == 0) {
433 wanted = wanted && matches;
437 /* Bad format, or out of range. Assume not wanted. */
442 /* No need to check if we already have a match. */
443 if(list_index == 0 || !wanted) {
444 /* Just a straight prefix match. */
445 wanted = strncmp(date_string,list_string,strlen(list_string)) == 0;
453 CheckElo(const char *elo_string,StringArray *list)
454 { unsigned list_index;
455 Boolean wanted = TRUE;
457 for(list_index = 0; (list_index < list->num_used_elements) && wanted;
459 const char *list_string = list->tag_strings[list_index].tag_string;
460 TagOperator operator = list->tag_strings[list_index].operator;
462 if(operator != NONE){
463 /* We have a relational comparison. */
464 unsigned game_elo, list_elo;
465 /* Try to extract elos from both strings. */
466 if((sscanf(list_string,"%u",&list_elo) == 1) &&
467 (sscanf(elo_string,"%u",&game_elo) == 1)){
470 wanted = game_elo < list_elo;
472 case LESS_THAN_OR_EQUAL_TO:
473 wanted = game_elo <= list_elo;
476 wanted = game_elo > list_elo;
478 case GREATER_THAN_OR_EQUAL_TO:
479 wanted = game_elo >= list_elo;
482 wanted = game_elo == list_elo;
485 wanted = game_elo != list_elo;
488 /* Already covered above. */
493 /* Bad format, or out of range. Assume not wanted. */
498 /* Just a straight prefix match. */
499 if(strncmp(elo_string,list_string,strlen(list_string)) == 0){
507 /* Check for one of list->strings matching the tag.
508 * Return TRUE on match, FALSE on failure.
509 * It is only necessary for a prefix of tag to match
513 CheckList(int tag,const char *tag_string,StringArray *list)
514 { unsigned list_index;
515 Boolean wanted = FALSE;
516 const char *search_str;
518 if(GlobalState.use_soundex && soundex_tag(tag)){
519 search_str = soundex(tag_string);
522 search_str = tag_string;
524 for(list_index = 0; (list_index < list->num_used_elements) && !wanted;
526 const char *list_string = list->tag_strings[list_index].tag_string;
528 if(strncmp(search_str,list_string,strlen(list_string)) == 0){
535 /* Check the Tag Details of this current game against those in
536 * the wanted lists. Check all details apart from any ECO
537 * tag as this is checked separately by CheckECOTag.
538 * An empty list implies that there is no restriction on
539 * the values in the corresponding tag field.
540 * In consequence, completely empty lists imply that all
541 * games reaching this far are wanted.
542 * Return TRUE if wanted, FALSE otherwise.
545 CheckTagDetailsNotECO(char *Details[],int num_details)
546 { Boolean wanted = TRUE;
549 if(GlobalState.check_tags){
551 if(num_details < tag_list_length){
552 fprintf(GlobalState.logfile,
553 "Internal error: mismatch in tag set lengths in ");
554 fprintf(GlobalState.logfile,
555 "CheckTagDetailsNotECO: %d vs %d\n",
556 num_details,tag_list_length);
560 /* PSEUDO_PLAYER_TAG and PSEUDO_ELO_TAG are treated differently,
561 * since they have the effect of or-ing together the WHITE_ and BLACK_ lists.
562 * Otherwise, different tag lists are and-ed.
564 if(TagLists[PSEUDO_PLAYER_TAG].num_used_elements != 0){
565 /* Check both the White and Black lists. */
566 if(Details[WHITE_TAG] != NULL){
567 wanted = CheckList(WHITE_TAG,Details[WHITE_TAG],
568 &TagLists[PSEUDO_PLAYER_TAG]);
569 /* If we didn't find a player there, try for the opponent. */
570 if(!wanted && (Details[BLACK_TAG] != NULL)){
571 wanted = CheckList(BLACK_TAG,Details[BLACK_TAG],
572 &TagLists[PSEUDO_PLAYER_TAG]);
575 else if(Details[BLACK_TAG] != NULL){
576 wanted = CheckList(BLACK_TAG,Details[BLACK_TAG],
577 &TagLists[PSEUDO_PLAYER_TAG]);
583 else if(TagLists[PSEUDO_ELO_TAG].num_used_elements != 0){
584 /* Check both the White and Black lists. */
585 if(Details[WHITE_ELO_TAG] != NULL){
586 wanted = CheckElo(Details[WHITE_ELO_TAG],
587 &TagLists[PSEUDO_ELO_TAG]);
588 /* If we didn't find a player there, try for the opponent. */
589 if(!wanted && (Details[BLACK_ELO_TAG] != NULL)){
590 wanted = CheckElo(Details[BLACK_ELO_TAG],
591 &TagLists[PSEUDO_ELO_TAG]);
594 else if(Details[BLACK_ELO_TAG] != NULL){
595 wanted = CheckElo(Details[BLACK_ELO_TAG],
596 &TagLists[PSEUDO_ELO_TAG]);
603 /* No PSEUDO_*_TAG info to check. */
606 /* Check the remaining tags in turn as long as we still have a match. */
607 for(tag = 0; (tag < tag_list_length) && wanted; tag++){
608 if(tag == PSEUDO_PLAYER_TAG){
610 else if(tag == PSEUDO_ELO_TAG){
612 else if(tag == ECO_TAG){
613 /* This is handled separately. */
615 else if(TagLists[tag].num_used_elements != 0){
616 if(Details[tag] != NULL){
618 wanted = CheckDate(Details[tag],&TagLists[tag]);
620 else if((tag == WHITE_ELO_TAG) || (tag == BLACK_ELO_TAG)){
621 wanted = CheckElo(Details[tag],&TagLists[tag]);
624 wanted = CheckList(tag,Details[tag],&TagLists[tag]);
628 /* Required tag not present. */
640 /* Check just the ECO tag from the game's tag details. */
642 CheckECOTag(char *Details[])
643 { Boolean wanted = TRUE;
645 if(GlobalState.check_tags){
646 if(TagLists[ECO_TAG].num_used_elements != 0){
647 if(Details[ECO_TAG] != NULL){
648 wanted = CheckList(ECO_TAG,Details[ECO_TAG],&TagLists[ECO_TAG]);
651 /* Required tag not present. */