checktransupdate.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: GPL-2.0
  3. """
  4. This script helps track the translation status of the documentation
  5. in different locales, e.g., zh_CN. More specially, it uses `git log`
  6. commit to find the latest english commit from the translation commit
  7. (order by author date) and the latest english commits from HEAD. If
  8. differences occur, report the file and commits that need to be updated.
  9. The usage is as follows:
  10. - tools/docs/checktransupdate.py -l zh_CN
  11. This will print all the files that need to be updated or translated in the zh_CN locale.
  12. - tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
  13. This will only print the status of the specified file.
  14. The output is something like:
  15. Documentation/dev-tools/kfence.rst
  16. No translation in the locale of zh_CN
  17. Documentation/translations/zh_CN/dev-tools/testing-overview.rst
  18. commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
  19. 1 commits needs resolving in total
  20. """
  21. import os
  22. import re
  23. import time
  24. import logging
  25. from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
  26. from datetime import datetime
  27. def get_origin_path(file_path):
  28. """Get the origin path from the translation path"""
  29. paths = file_path.split("/")
  30. tidx = paths.index("translations")
  31. opaths = paths[:tidx]
  32. opaths += paths[tidx + 2 :]
  33. return "/".join(opaths)
  34. def get_latest_commit_from(file_path, commit):
  35. """Get the latest commit from the specified commit for the specified file"""
  36. command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
  37. logging.debug(command)
  38. pipe = os.popen(command)
  39. result = pipe.read()
  40. result = result.split("\n")
  41. if len(result) <= 1:
  42. return None
  43. logging.debug("Result: %s", result[0])
  44. return {
  45. "hash": result[0],
  46. "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
  47. "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
  48. "message": result[4:],
  49. }
  50. def get_origin_from_trans(origin_path, t_from_head):
  51. """Get the latest origin commit from the translation commit"""
  52. o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
  53. while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
  54. o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
  55. if o_from_t is not None:
  56. logging.debug("tracked origin commit id: %s", o_from_t["hash"])
  57. return o_from_t
  58. def get_origin_from_trans_smartly(origin_path, t_from_head):
  59. """Get the latest origin commit from the formatted translation commit:
  60. (1) update to commit HASH (TITLE)
  61. (2) Update the translation through commit HASH (TITLE)
  62. """
  63. # catch flag for 12-bit commit hash
  64. HASH = r'([0-9a-f]{12})'
  65. # pattern 1: contains "update to commit HASH"
  66. pat_update_to = re.compile(rf'update to commit {HASH}')
  67. # pattern 2: contains "Update the translation through commit HASH"
  68. pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
  69. origin_commit_hash = None
  70. for line in t_from_head["message"]:
  71. # check if the line matches the first pattern
  72. match = pat_update_to.search(line)
  73. if match:
  74. origin_commit_hash = match.group(1)
  75. break
  76. # check if the line matches the second pattern
  77. match = pat_update_translation.search(line)
  78. if match:
  79. origin_commit_hash = match.group(1)
  80. break
  81. if origin_commit_hash is None:
  82. return None
  83. o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
  84. if o_from_t is not None:
  85. logging.debug("tracked origin commit id: %s", o_from_t["hash"])
  86. return o_from_t
  87. def get_commits_count_between(opath, commit1, commit2):
  88. """Get the commits count between two commits for the specified file"""
  89. command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
  90. logging.debug(command)
  91. pipe = os.popen(command)
  92. result = pipe.read().split("\n")
  93. # filter out empty lines
  94. result = list(filter(lambda x: x != "", result))
  95. return result
  96. def pretty_output(commit):
  97. """Pretty print the commit message"""
  98. command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
  99. logging.debug(command)
  100. pipe = os.popen(command)
  101. return pipe.read()
  102. def valid_commit(commit):
  103. """Check if the commit is valid or not"""
  104. msg = pretty_output(commit)
  105. return "Merge tag" not in msg
  106. def check_per_file(file_path):
  107. """Check the translation status for the specified file"""
  108. opath = get_origin_path(file_path)
  109. if not os.path.isfile(opath):
  110. logging.error("Cannot find the origin path for {file_path}")
  111. return
  112. o_from_head = get_latest_commit_from(opath, "HEAD")
  113. t_from_head = get_latest_commit_from(file_path, "HEAD")
  114. if o_from_head is None or t_from_head is None:
  115. logging.error("Cannot find the latest commit for %s", file_path)
  116. return
  117. o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
  118. # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
  119. if o_from_t is None:
  120. o_from_t = get_origin_from_trans(opath, t_from_head)
  121. if o_from_t is None:
  122. logging.error("Error: Cannot find the latest origin commit for %s", file_path)
  123. return
  124. if o_from_head["hash"] == o_from_t["hash"]:
  125. logging.debug("No update needed for %s", file_path)
  126. else:
  127. logging.info(file_path)
  128. commits = get_commits_count_between(
  129. opath, o_from_t["hash"], o_from_head["hash"]
  130. )
  131. count = 0
  132. for commit in commits:
  133. if valid_commit(commit):
  134. logging.info("commit %s", pretty_output(commit))
  135. count += 1
  136. logging.info("%d commits needs resolving in total\n", count)
  137. def valid_locales(locale):
  138. """Check if the locale is valid or not"""
  139. script_path = os.path.dirname(os.path.abspath(__file__))
  140. linux_path = os.path.join(script_path, "../..")
  141. if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
  142. raise ArgumentTypeError("Invalid locale: {locale}")
  143. return locale
  144. def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
  145. """List all files with the specified suffix in the folder and its subfolders"""
  146. files = []
  147. stack = [folder]
  148. while stack:
  149. pwd = stack.pop()
  150. # filter out the exclude folders
  151. if os.path.basename(pwd) in exclude_folders:
  152. continue
  153. # list all files and folders
  154. for item in os.listdir(pwd):
  155. ab_item = os.path.join(pwd, item)
  156. if os.path.isdir(ab_item):
  157. stack.append(ab_item)
  158. else:
  159. if ab_item.endswith(include_suffix):
  160. files.append(ab_item)
  161. return files
  162. class DmesgFormatter(logging.Formatter):
  163. """Custom dmesg logging formatter"""
  164. def format(self, record):
  165. timestamp = time.time()
  166. formatted_time = f"[{timestamp:>10.6f}]"
  167. log_message = f"{formatted_time} {record.getMessage()}"
  168. return log_message
  169. def config_logging(log_level, log_file="checktransupdate.log"):
  170. """configure logging based on the log level"""
  171. # set up the root logger
  172. logger = logging.getLogger()
  173. logger.setLevel(log_level)
  174. # Create console handler
  175. console_handler = logging.StreamHandler()
  176. console_handler.setLevel(log_level)
  177. # Create file handler
  178. file_handler = logging.FileHandler(log_file)
  179. file_handler.setLevel(log_level)
  180. # Create formatter and add it to the handlers
  181. formatter = DmesgFormatter()
  182. console_handler.setFormatter(formatter)
  183. file_handler.setFormatter(formatter)
  184. # Add the handler to the logger
  185. logger.addHandler(console_handler)
  186. logger.addHandler(file_handler)
  187. def main():
  188. """Main function of the script"""
  189. script_path = os.path.dirname(os.path.abspath(__file__))
  190. linux_path = os.path.join(script_path, "../..")
  191. parser = ArgumentParser(description="Check the translation update")
  192. parser.add_argument(
  193. "-l",
  194. "--locale",
  195. default="zh_CN",
  196. type=valid_locales,
  197. help="Locale to check when files are not specified",
  198. )
  199. parser.add_argument(
  200. "--print-missing-translations",
  201. action=BooleanOptionalAction,
  202. default=True,
  203. help="Print files that do not have translations",
  204. )
  205. parser.add_argument(
  206. '--log',
  207. default='INFO',
  208. choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
  209. help='Set the logging level')
  210. parser.add_argument(
  211. '--logfile',
  212. default='checktransupdate.log',
  213. help='Set the logging file (default: checktransupdate.log)')
  214. parser.add_argument(
  215. "files", nargs="*", help="Files to check, if not specified, check all files"
  216. )
  217. args = parser.parse_args()
  218. # Configure logging based on the --log argument
  219. log_level = getattr(logging, args.log.upper(), logging.INFO)
  220. config_logging(log_level)
  221. # Get files related to linux path
  222. files = args.files
  223. if len(files) == 0:
  224. offical_files = list_files_with_excluding_folders(
  225. os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
  226. )
  227. for file in offical_files:
  228. # split the path into parts
  229. path_parts = file.split(os.sep)
  230. # find the index of the "Documentation" directory
  231. kindex = path_parts.index("Documentation")
  232. # insert the translations and locale after the Documentation directory
  233. new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
  234. + path_parts[kindex + 1 :]
  235. # join the path parts back together
  236. new_file = os.sep.join(new_path_parts)
  237. if os.path.isfile(new_file):
  238. files.append(new_file)
  239. else:
  240. if args.print_missing_translations:
  241. logging.info(os.path.relpath(os.path.abspath(file), linux_path))
  242. logging.info("No translation in the locale of %s\n", args.locale)
  243. files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
  244. # cd to linux root directory
  245. os.chdir(linux_path)
  246. for file in files:
  247. check_per_file(file)
  248. if __name__ == "__main__":
  249. main()