git.sesse.net Git - vlc/blob - extras/misc/authors_list.sh

   1 #!/bin/bash
   2
   3 # A script checking the git logs for commits. Final goal is updating AUTHORS.
   4 # Run it in source root
   5
   6 # To be copied and run in the git directory for having "git shortlog -sn po/" find the logs.
   7 # It will generate a subdirectory temp_update_AUTHORS
   8
   9
  10 mkdir -p temp_update_AUTHORS
  11
  12 echo "Checking all git logs"
  13 git shortlog -sn > temp_update_AUTHORS/all_git.txt
  14
  15 echo "Checking "po only" git logs"
  16 git shortlog -sn po/ > temp_update_AUTHORS/po_git.txt
  17
  18 echo "reading AUTHORS"
  19 sed -n '/Programming/,$  s/[^-].*/&/p' < AUTHORS | sed '1 d'  > temp_update_AUTHORS/programmers_part.txt
  20 # The bottom part of AUTHORS, could be done with one sed, but I don't care...
  21
  22
  23 echo "Removing commit counts from git log"
  24 sed 's/[0-9 \t]*\(.*\)/\1/g' < temp_update_AUTHORS/all_git.txt |sort|uniq > temp_update_AUTHORS/all_git_namesonly.txt
  25 # I think "uniq" is not needed here.
  26
  27
  28 echo "Removing translators from the git log"
  29 # Remove translators. (Commiters with the same count in /po and total and hence are listed twice). Then the commit counter is removed
  30 cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -u |sed 's/[0-9 \t]*\(.*\)/\1/g' | sort|uniq> temp_update_AUTHORS/coders_only.txt
  31
  32
  33 # Similar effect with second sed run:
  34 # Remove translators. I remove every line containing the name. Maybe the .* before and after the last \1 should be removed (i.e. for contributors "Firstname Secondname aka something_you_want_to_keep"
  35 #cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -D|uniq|sed 's/[0-9 \t]*\(.*\)/\1/g' |sed 's:[0-9 \t]*\(.*\):s^.*\1.*^^g:' > temp_update_AUTHORS/remove_translators_gen
  36 #
  37 #sed -f temp_update_AUTHORS/remove_translators_gen < temp_update_AUTHORS/all_git_namesonly.txt > temp_update_AUTHORS/coders_only.txt
  38 # This is everyone who did code commits with git. The blank lines are the removed translators.
  39
  40
  41 # Now, I want to reduce the number of lines the human reader has to check, so we are going to kill the already listed contributors.
  42
  43 echo "Finding pre-git contributors in AUTHORS"
  44 sed 's:\(.*\):s^.*\1.*^^g:' < temp_update_AUTHORS/coders_only.txt > temp_update_AUTHORS/remove_git_commiters_gen
  45 sed -f temp_update_AUTHORS/remove_git_commiters_gen < temp_update_AUTHORS/programmers_part.txt |sort| uniq -u > temp_update_AUTHORS/pre-git.txt
  46
  47 sed 's:\(.*\):s^.*\1.*^^g:' < temp_update_AUTHORS/programmers_part.txt > temp_update_AUTHORS/remove_programmers_part_gen
  48 sed -f temp_update_AUTHORS/remove_programmers_part_gen < temp_update_AUTHORS/coders_only.txt |sort| uniq -u > temp_update_AUTHORS/new_coders_only.txt
  49
  50 # VideoLAN as a contributor can be removed, I think
  51
  52
  53
  54 sed 's/[0-9 \t]*\(.*\)/\1/g' < temp_update_AUTHORS/all_git.txt  > temp_update_AUTHORS/all_git_namesonly_ordered.txt
  55 # Just remove the tab an the # commits, keep the order. This file is going to be the sort order.
  56 # You can re-order the complete programmers part like that and simply append the pre-git commiters.
  57 # Ordering the contributors that way is not a bad idea.
  58 # The question: Is it easier/better to check the new commiters in this order?
  59 # One can find (UPPERCASE issues, middle names,..) better when listing them alphabetically.
  60
  61 # I suggest checking manually a file build like that:
  62 # alphabetically ordered, complete list of contributors/git (code) commiters, with an extra marking for new ones. Example
  63 # Old Commiter
  64 # New Commiter           ---XXX---NEW
  65 # New COMMITER           ---XXX---NEW   |same name with UPPERCASE part
  66 # Very Commiter
  67 # Very New Commiter      ---XXX---NEW   |Same person with a middle name
  68
  69 # The uppercase case can be done by script I guess, I did not look up how to make sure the intended version will be picked.
  70 # How to proceed with manually found problems? Solve them for the future (.mailmap/own script/...)
  71
  72 rm -f temp_update_AUTHORS/ordering_log.txt
  73 rm -f temp_update_AUTHORS/ordered_by_commits.txt
  74 FileName='temp_update_AUTHORS/all_git_namesonly_ordered.txt'
  75 while read LINE
  76 do
  77  if [ "$LINE" = "VideoLAN" ]; then
  78   echo "VideoLAN is not a person"
  79  else
  80 #  grep "$LINE" temp_update_AUTHORS/new_coders_only.txt >> temp_update_AUTHORS/ordering_log.txt
  81   grep "$LINE" temp_update_AUTHORS/coders_only.txt >> temp_update_AUTHORS/ordering_log.txt
  82 # I want to keep the $? (it removes some broken names) but I could send the output to /dev/null
  83 # If someone's name is a prefix to some other's name, this diff will show it:
  84 # diff temp_update_AUTHORS/ordering_log.txt temp_update_AUTHORS/ordered_by_commits.txt
  85 # AFAIK this will not effect the output, since we don't use the grep output but only the git output
  86   if [ $? = "0" ]; then
  87     echo "$LINE" >> temp_update_AUTHORS/ordered_by_commits.txt
  88   fi
  89  fi
  90 done < $FileName
  91
  92
  93
  94 cat temp_update_AUTHORS/all_git.txt temp_update_AUTHORS/po_git.txt|sort|uniq -D|uniq|sed 's/[0-9 \t]*\(.*\)/\1/g' > temp_update_AUTHORS/translators.txt
  95 wc -l temp_update_AUTHORS/*
  96 echo "Some contributors only commited into po. Please cross-check that with the localization part. See: temp_update_AUTHORS/translators.txt"
  97 echo "But first, please check if temp_update_AUTHORS/review.txt contains complete names and other constraints for publishing (i.e. UPPERCASE name parts, broken text, a name and it's abbreviation both present...)"
  98
  99
 100 sed 's/\(.*\)/\1               ---XXX---NEW/g' < temp_update_AUTHORS/new_coders_only.txt | cat - temp_update_AUTHORS/programmers_part.txt |sort > temp_update_AUTHORS/review.txt
 101 # This file contains VideoLAN as a contributor.
 102
 103 echo
 104 echo "For the lazy ones: Have a look at temp_update_AUTHORS/final.txt"
 105 echo "Contains all git code commiters (the translators are stored somewhere else) sorted by commits, and the pre-git commiters"
 106 echo "Programming" >  temp_update_AUTHORS/final.txt
 107 echo "-----------" >> temp_update_AUTHORS/final.txt
 108 cat temp_update_AUTHORS/ordered_by_commits.txt temp_update_AUTHORS/pre-git.txt >> temp_update_AUTHORS/final.txt
 109 echo
 110
 111 echo "Listing email adresses used with different names..."
 112 git shortlog -sne |sed 's/[^<]*\(.*\)/\1/g' |sort|uniq -d
 113 echo "If something was listed here you should probably modify .mailmap"
 114
 115 # This last part puts the relevant addresses into temp_twice_used_adress/check_for_this.txt
 116 # Currently, this is not needed (.mailmap is up to date)
 117
 118 #mkdir -p temp_twice_used_adress
 119 #echo "Checking all git logs"
 120 #git shortlog -sne > temp_twice_used_adress/all_shortlog_sne.txt
 121 #
 122 #echo "Removing everything but email addresses"
 123 #sed 's/[^<]*\(.*\)/\1/g' < temp_twice_used_adress/all_shortlog_sne.txt |sort|uniq -d > temp_twice_used_adress/all_git_addresses_only.txt
 124 #
 125 #FileName='temp_twice_used_adress/all_git_addresses_only.txt'
 126 #while read LINE
 127 #do
 128 #  grep "$LINE" temp_twice_used_adress/all_shortlog_sne.txt >> temp_twice_used_adress/check_for_this.txt
 129 ## I want to keep the $? (it removes some broken names) but I could send the output to /dev/null
 130 #  if [ $? = "0" ]; then
 131 #    echo "$LINE"
 132 #  fi
 133 #done < $FileName
 134
 135
 136 cp temp_update_AUTHORS/final.txt .
 137 rm -rf temp_update_AUTHORS/
 138