From 7ba95de35206e1e557b37e91575bf70d3fa7906d Mon Sep 17 00:00:00 2001
From: wizardforcel <562826179@qq.com>
Date: Sat, 1 Aug 2020 15:33:56 +0800
Subject: [PATCH] 2020-08-01 15:33:56
---
BookerTrans/__init__.py | 13 ++++++++-----
BookerTrans/__main__.py | 4 +++-
history.md | 4 ++++
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/BookerTrans/__init__.py b/BookerTrans/__init__.py
index 4812562..087d816 100644
--- a/BookerTrans/__init__.py
+++ b/BookerTrans/__init__.py
@@ -10,7 +10,7 @@ from . import config
__author__ = "ApacheCN"
__email__ = "apachecn@163.com"
__license__ = "SATA"
-__version__ = "2020.07.25.1"
+__version__ = "2020.08.01"
RE_CODE = r'<(pre|code|tt|var|kbd)[^>]*?>[\s\S]*?\1>'
RE_TAG = r'<[^>]*?>'
@@ -97,9 +97,7 @@ def trans_one(html):
def trans_html(html):
# 预处理
- html = re.sub(r'<\?xml[^>]*\?>', '', html)
- html = re.sub(r'xmlns=".+?"', '', html)
- html = process_code(html)
+ html = preprocess(html)
root = pq(html)
# 处理
@@ -143,7 +141,12 @@ def trans_html(html):
return str(root)
-def process_code(html):
+def preprocess(html):
+ html = re.sub(r'<\?xml[^>]*\?>', '', html)
+ html = re.sub(r'xmlns=".+?"', '', html)
+ html = html.replace(' ', ' ') \
+ .replace(' ', ' ')
+
root = pq(html)
pres = root('div.code, div.Code')
diff --git a/BookerTrans/__main__.py b/BookerTrans/__main__.py
index 8c2cf06..b32da9d 100644
--- a/BookerTrans/__main__.py
+++ b/BookerTrans/__main__.py
@@ -5,7 +5,9 @@ from os import path
from argparse import ArgumentParser
from . import trans_html, config, api, __version__
-is_html = lambda f: f.endswith('.html') or f.endswith('.htm')
+is_html = lambda f: f.endswith('.html') or \
+ f.endswith('.htm') or \
+ f.endswith('.xhtml')
def process_file(fname):
if not is_html(fname):
diff --git a/history.md b/history.md
index c04a88b..751ea15 100644
--- a/history.md
+++ b/history.md
@@ -1,5 +1,9 @@
# 历史记录
+v2020.8.1
+
++ 修复文字间空格的翻译问题
+
v2020.7.25
+ 改进内联代码的判断逻辑
--
GitLab