git.sesse.net Git - vlc/blob - extras/misc/translators_list.sh

   1 #!/bin/bash
   2
   3 # A script checking the po-files for translators. Final goal is updating AUTHORS.
   4
   5  # To be copied and run in the git directory for having "ls po/*po" find the files.
   6  # It will generate a subdirectory temp_translators
   7
   8 echo Searching the headers of *.po
   9
  10 mkdir temp_translators
  11 # Should check for existance, for the real use should be running a diff next time for reducing the lines to read
  12
  13 git describe > temp_translators/start.txt
  14 date >> temp_translators/start.txt
  15 # to have the date visible
  16
  17
  18 for i in $( ls po/*po ); do
  19   echo item: $i
  20   echo $i >>temp_translators/start.txt
  21
  22   if [ -h $i ]
  23   then
  24     echo just a link
  25     echo $i is just a link >>temp_translators/start.txt
  26   else
  27
  28
  29 #  translationlangcode=$(echo $i | sed 's/\([a-z]*\).po*/\1/')
  30 #  longlanguage=$(sed -n '1,30 s/[*]*[t,T]ranslati[.]*/&/p' < $i)
  31 #  poeditlanguage=$(sed -n '1,30 s/[.]*X-Poedit-Language:[.]*/&/p' < $i)
  32 #  echo $longlanguage $poeditlanguage
  33 #  echo $translationlangcode >> ausgabe/start.txt
  34 # I used this to have more output on running the script. Gives also the language name instead of the po-file's name only
  35
  36
  37   sed '/#: include/ q' < $i  >> temp_translators/start.txt
  38 # The header should be done by now.
  39 #TODO: A real safe way would take only the wanted meta parts (i.e. "Last-Translator:) and all commented lines #
  40
  41   fi
  42 done
  43
  44
  45
  46 # Removing every \n  The result will have to be read by humans, so there it's no use in keeping them
  47 sed 's:\\n::g' < temp_translators/start.txt > temp_translators/start_without_backslash_n.txt
  48
  49 echo '"Plural-Forms:.*' > temp_translators/doubles.txt
  50 # TODO: This can be more than one line ^^
  51 echo '"&& n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5;"' >> temp_translators/doubles.txt
  52 echo '"%100<10 || n%100>=20) ? 1 : 2);"' >> temp_translators/doubles.txt
  53 echo '"|| n%100>=20) ? 1 : 2);"' >> temp_translators/doubles.txt
  54 echo '"%100==4 ? 3 : 0);"' >> temp_translators/doubles.txt
  55 # The only two liners so far. Quick fix for the above TODO...
  56
  57 echo '"PO-Revision-Date: .*"' >> temp_translators/doubles.txt
  58 echo '"X-Poedit-Bookmark.*' >> temp_translators/doubles.txt
  59 echo '"Project-Id-Version:.*' >> temp_translators/doubles.txt
  60 echo '"X-Generator: .*' >> temp_translators/doubles.txt
  61 echo '"Language: .*' >> temp_translators/doubles.txt
  62 echo ' *[Cc]opyright *([cC])[0-9 ,-]*t*h*e* *VideoLAN$* *t*e*a*m*[0-9 ,-]*\.*' >> temp_translators/doubles.txt
  63 echo '[ ]*\$[ ]*[i,I][d,D][ ]*[:]*[ ]*\$' >> temp_translators/doubles.txt
  64 echo '"X-Poedit-Country:.*' >> temp_translators/doubles.txt
  65 echo '"X-Project-Style:.*'  >> temp_translators/doubles.txt
  66 echo ' *<videolan@videolan.org> *' >> temp_translators/doubles.txt
  67 # Whatever line occurs twice or more is most probably not a translators name. However, you can check it. The file will not be deleted
  68 #TODO: If I knew sed better, I would put all the removed parts in a logfile. sort uniq would give a list to check for everything deleted
  69 sort -r temp_translators/start_without_backslash_n.txt | uniq -d >> temp_translators/doubles.txt
  70 # With the reverse Order the # will be removed at the end. Else the doubles beginning with # will not be matched
  71
  72 # Just check a "sort results.txt|less" for more to remove
  73
  74 # Changing the strings to sed commands removing the doubles
  75 sed 's:.*:s^&^^g:' < temp_translators/doubles.txt > temp_translators/generated_com
  76
  77
  78 # Removing all doubles
  79 sed -f temp_translators/generated_com temp_translators/start_without_backslash_n.txt >temp_translators/results.txt
  80
  81
  82 # Now, we are going to mark the already mentioned translators..
  83 # Some names are written in CAPITALS. A modified marking script would be good.
  84
  85 sed -n  '/Localization/,/^$/ p' <AUTHORS | sed -n '3,$ s/\(.*\) --* .*/\1/p' >temp_translators/localization_part.txt
  86 #sed -n '/Localization/,/^$/ p' <AUTHORS | sed -n '3,$ s/\(.*\) - .*/\1/p' >temp_translators/sect.txt
  87 #The second line is what we want. I added -* to have "Éric Lassauge  -- French" included, too. But I really think this is a typo in AUTHORS
  88
  89 sed -n  '/Programming/,/^$/ p' <AUTHORS | sed  -n '3,$ p' >temp_translators/pro_part.txt
  90 sed 's:.*:s^&^YYY-- & --YYY^g:' < temp_translators/pro_part.txt > temp_translators/replace_prog_names
  91
  92
  93 # Changing the strings to sed commands removing the doubles
  94 sed 's:.*:s^&^XXX-- & --XXX^g:' < temp_translators/localization_part.txt > temp_translators/replacenames
  95
  96
  97
  98 #mkdir -p temp_twice_used_adress
  99 #echo "Checking all git logs"
 100 #git shortlog -sne > temp_twice_used_adress/all_shortlog_sne.txt
 101
 102
 103 #echo "Removing everything but email addresses"
 104 git shortlog -sne |sed 's/[^<]*\(.*\)/\1/g' |sort > temp_translators/git_addresses_only.txt
 105 #uniq -d|
 106 sed 's:.*:s^&^ZZZ-- & --ZZZ^g:' < temp_translators/git_addresses_only.txt > temp_translators/replace_git_commiters
 107
 108
 109
 110 #sed -f temp_translators/replacenames <temp_translators/results.txt |uniq >temp_translators/review.txt
 111
 112 #sed -f temp_translators/replacenames <temp_translators/results.txt |sed -f temp_translators/replace_prog_names |uniq >temp_translators/review.txt
 113 sed -f temp_translators/replacenames <temp_translators/results.txt |sed -f temp_translators/replace_prog_names |sed -f temp_translators/replace_git_commiters |uniq >temp_translators/review.txt
 114
 115 echo "Now temp_translators/review.txt should be reviewed. I don't think this can be done automatically, so I have done it already (not on your computer). Feedback is appreciated."
 116 echo "XXX are named translators, YYY are named programmers, ZZZ commited with git"
 117
 118 # temp_translators/review.txt has to be reviewed manually. I don't think you would simply remove the names from AUTHORS found because then you would
 119 # see an email adress and would not know, if it has to be added or how (with what name).
 120 # The simpler to handle git commiter check script gives better names and dates, but of course not for all contributors. However, cross checking is recommended.
 121
 122 # The idea is to use a diff next time, but I did not work out yet what files would be best for that. Probably after finding the doubles would be a good time to remove everything visible in older versions.