2020-08-01 15:33:56

7ba95de3 · wizardforcel · faa1670d · 7ba95de3 · 7ba95de3 · 7ba95de3
隐藏空白更改
内联并排

Showing with 15 addition and 6 deletion

BookerTrans/__init__.py BookerTrans/__init__.py +8 -5

BookerTrans/__main__.py BookerTrans/__main__.py +3 -1

history.md history.md +4 -0

未找到文件。
--- a/BookerTrans/__init__.py
+++ b/BookerTrans/__init__.py
@@ -10,7 +10,7 @@ from . import config
 __author__ = "ApacheCN"
 __email__ = "apachecn@163.com"
 __license__ = "SATA"
-__version__ = "2020.07.25.1"
+__version__ = "2020.08.01"

 RE_CODE = r'<(pre|code|tt|var|kbd)[^>]*?>[\s\S]*?</\1>'
 RE_TAG = r'<[^>]*?>'
@@ -97,9 +97,7 @@ def trans_one(html):

 def trans_html(html):
    # 预处理
-    html = re.sub(r'<\?xml[^>]*\?>', '', html)
-    html = re.sub(r'xmlns=".+?"', '', html)
-    html = process_code(html)
+    html = preprocess(html)
    root = pq(html)
    
    # 处理 <p> <h?>
@@ -143,7 +141,12 @@ def trans_html(html):
    
    return str(root)

-def process_code(html):
+def preprocess(html):
+    html = re.sub(r'<\?xml[^>]*\?>', '', html)
+    html = re.sub(r'xmlns=".+?"', '', html)
+    html = html.replace('&#160;', ' ') \
+               .replace('&nbsp;', ' ')
+
    root = pq(html)
    
    pres = root('div.code, div.Code')

--- a/BookerTrans/__main__.py
+++ b/BookerTrans/__main__.py
@@ -5,7 +5,9 @@ from os import path
 from argparse import ArgumentParser
 from . import trans_html, config, api, __version__

-is_html = lambda f: f.endswith('.html') or f.endswith('.htm')
+is_html = lambda f: f.endswith('.html') or \
+                    f.endswith('.htm') or \
+                    f.endswith('.xhtml')

 def process_file(fname):
    if not is_html(fname):

--- a/history.md
+++ b/history.md
 # 历史记录

+v2020.8.1
+
+   修复文字间空格的翻译问题
+
 v2020.7.25

 +   改进内联代码的判断逻辑