From 7ba95de35206e1e557b37e91575bf70d3fa7906d Mon Sep 17 00:00:00 2001 From: wizardforcel <562826179@qq.com> Date: Sat, 1 Aug 2020 15:33:56 +0800 Subject: [PATCH] 2020-08-01 15:33:56 --- BookerTrans/__init__.py | 13 ++++++++----- BookerTrans/__main__.py | 4 +++- history.md | 4 ++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/BookerTrans/__init__.py b/BookerTrans/__init__.py index 4812562..087d816 100644 --- a/BookerTrans/__init__.py +++ b/BookerTrans/__init__.py @@ -10,7 +10,7 @@ from . import config __author__ = "ApacheCN" __email__ = "apachecn@163.com" __license__ = "SATA" -__version__ = "2020.07.25.1" +__version__ = "2020.08.01" RE_CODE = r'<(pre|code|tt|var|kbd)[^>]*?>[\s\S]*?' RE_TAG = r'<[^>]*?>' @@ -97,9 +97,7 @@ def trans_one(html): def trans_html(html): # 预处理 - html = re.sub(r'<\?xml[^>]*\?>', '', html) - html = re.sub(r'xmlns=".+?"', '', html) - html = process_code(html) + html = preprocess(html) root = pq(html) # 处理

@@ -143,7 +141,12 @@ def trans_html(html): return str(root) -def process_code(html): +def preprocess(html): + html = re.sub(r'<\?xml[^>]*\?>', '', html) + html = re.sub(r'xmlns=".+?"', '', html) + html = html.replace(' ', ' ') \ + .replace(' ', ' ') + root = pq(html) pres = root('div.code, div.Code') diff --git a/BookerTrans/__main__.py b/BookerTrans/__main__.py index 8c2cf06..b32da9d 100644 --- a/BookerTrans/__main__.py +++ b/BookerTrans/__main__.py @@ -5,7 +5,9 @@ from os import path from argparse import ArgumentParser from . import trans_html, config, api, __version__ -is_html = lambda f: f.endswith('.html') or f.endswith('.htm') +is_html = lambda f: f.endswith('.html') or \ + f.endswith('.htm') or \ + f.endswith('.xhtml') def process_file(fname): if not is_html(fname): diff --git a/history.md b/history.md index c04a88b..751ea15 100644 --- a/history.md +++ b/history.md @@ -1,5 +1,9 @@ # 历史记录 +v2020.8.1 + ++ 修复文字间空格的翻译问题 + v2020.7.25 + 改进内联代码的判断逻辑 -- GitLab