提交 fbae17cd 编写于 作者: M Mars Liu

project init

上级 9fecbc13
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
.idea
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
.settings/**
.project
.pydevproject
The MIT License (MIT)
Copyright (c) 2015 Dwarf Artisan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# skill_tree_parser
技能树结构(章节、元信息、习题)解释器,支持技能树结构和内容的分析、解释。
\ No newline at end of file
技能树结构(章节、元信息、习题)解释器,支持技能树结构和内容的分析、解释。
[aliases]
release = register sdist bdist_egg upload
#!/usr/bin/env python
from setuptools import setup
from pathlib import Path
this_directory = Path(__file__).parent
long_description = (this_directory / "README.md").read_text()
setup(name="skill-tree-parser",
version="0.0.1",
description="CSDN Skill Tree Parser",
long_description=long_description,
long_description_content_type='text/markdown',
author="Liu Xin",
author_email="liuxin@csdn.net",
url="https://gitcode.net/csdn/skill_tree_parser",
license="MIT",
packages=["csdn", "test"],
package_dir={
"skill_tree": "src/skill_tree",
"test": "src/tests"
},
install_requires=[
"pyparsec",
"GitPython"
],
classifiers=[
"Topic :: Utilities",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3 :: Only",
"License :: OSI Approved :: MIT License"
]
)
import sys
from parsec import one, eof
from parsec.combinator import *
from parsec.text import *
from .market_math import processor
"""
使用 markdown 编写的问题应该总是一个固定格式,即
# 问题标题
问题描述
## 模板
```java
package app;
public class App {
public static void main(String[] args){
$code
}
}
```
如果第一个二级标题为 template,则生成一个可选的 template 字段,template 章节中的代码段会作为代码模板,用于被生成的 notebook
## aop
AOP(面向切面)章节定义 notebook 生成时插入的 cell ,如果定义了这个二级标题,其下复合定义的章节会插在必要的位置。
### before
这里的章节会写入到答案代码之前。
### after
这里的章节会写入到答案代码之后
## 答案
这里写正确答案。此处的代码会写入到 notebook 中,如过定义了模板章节,会将其合并后生成 notebook。
```java
```
## 选项
### A 选项标题会被忽略
可选的描述
```
// 代码
```
### B
可选的描述
```
// 代码
```
### C
可选的描述
```
// 代码
```
### D
可选的描述
```
// 代码
```
需要注意的是,选项的标题并不会出现在最终的题目中,仅作为编辑过程中的标注。而第一个选项就是答案。其顺序在最终展现时由服务代码进行混淆。
"""
class Paragraph:
def __init__(self, source="", language=""):
self.source = source
self.language = language
def isEmpty(self):
return self.source == "" and self.language == ""
def to_map(self):
return {"content": self.source, "language": self.language}
class Option:
def __init__(self, paras=None):
self.paras = paras if paras else []
def to_map(self):
return [p.to_map() for p in self.paras]
class Exercise:
def __init__(self, title, answer, description, options):
self.title = title
self.answer = answer
self.description = description
self.options = options
@Parsec
def spaces(state):
return skip1(space)(state)
def maybe_spaces(state):
return skip(space)(state)
def inline(state):
char = one(state)
if char == '\n':
raise ParsecError(state, "unexpected newline")
else:
return char
@Parsec
def title(state):
"""
解析问题标题
"""
parser = string("#").then(spaces).then(many1(inline)).over(string('\n'))
data = parser(state)
return "".join(data)
@Parsec
def chapter_title(state):
"""
解析章标题
@param state:
@return:
"""
parser = string("##").then(spaces).then(many1(inline)).over(string('\n'))
data = parser(state)
return "".join(data)
@Parsec
def section_title(state):
"""
解析节标题
@param state:
@return:
"""
parser = string("###").then(spaces).then(many1(inline)).over(string('\n'))
data = parser(state)
return "".join(data)
@Parsec
def paragraph(state):
"""
解析文字段落
"""
buffer = ""
line = many(inline).over(choice(attempt(string("\n")), eof))
stop = choices(attempt(string("\n")), attempt(string("### ")), string("## "))
while True:
maybe_spaces(state)
result = "".join(line(state))
data = result
if data:
buffer += '\n' + result
else:
break
tran = state.begin()
try:
stop(state)
except ParsecError:
continue
finally:
state.rollback(tran)
return Paragraph(processor(buffer), "markdown")
return Paragraph(processor(buffer), "markdown")
@Parsec
def code(state):
left = attempt(string("```")).then(many(attempt(inline))).over(string("\n"))
language = ''.join(left(state))
right = attempt(string("\n```").over(choice(attempt(string("\n")), eof)))
buffer = ""
while True:
try:
right(state)
return Paragraph(buffer, language)
except ParsecError:
buffer += one(state)
@Parsec
def desc(state):
"""解析问题或选项描述
问题描述由若干段落或代码组成,内部结构遵循 markdown 语法
"""
buffer = []
parser = choice(attempt(code), paragraph)
stop = choices(attempt(string("## ")), attempt(string("### ")), eof)
while True:
tran = state.begin()
try:
stop(state)
return buffer
except ParsecError:
pass
finally:
state.rollback(tran)
buffer.append(parser(state))
maybe_spaces(state)
def option(state):
"解析选项"
parser = attempt(string("###").then(spaces).then(many1(attempt(inline))).over(string('\n')))
parser(state)
maybe_spaces(state)
return Option(desc(state))
def template(state):
"解析模板,返回对应的模板代码对象,如果解析失败,返回 None 并恢复 state"
tran = state.begin()
try:
title = chapter_title(state)
if title == "template":
state.commit(tran)
maybe_spaces(state)
return code(state)
else:
raise ParsecError(state, "template not found")
except ParsecError:
state.rollback(tran)
return None
@Parsec
def aop_parser(state):
"""
解析AOP,返回对应的AOP字典,如果解析失败,返回 None 并恢复 state
@param state:
@return:
"""
result = {}
stop = attempt(chapter_title)
tt = state.begin()
try:
title = chapter_title(state)
if title == "aop":
state.commit(tt)
maybe_spaces(state)
while True:
tran = state.begin()
try:
stop(state)
state.rollback(tran)
if len(result) == 0:
return None
else:
return result
except ParsecError:
state.rollback(tran)
maybe_spaces(state)
st = section_title(state)
maybe_spaces(state)
if st == "before":
result["before"] = desc(state)
elif st == "after":
result["after"] = desc(state)
else:
raise ParsecError(state, f"invalid section {st} in aop chapter")
else:
raise ParsecError(state, "aop not found")
except ParsecError as err:
state.rollback(tt)
return None
@Parsec
def parse(state):
t = title(state)
maybe_spaces(state)
try:
description = desc(state)
except ParsecError as err:
raise err
tmpl = template(state)
maybe_spaces(state)
aop = aop_parser(state)
ct = chapter_title(state)
if ct == "答案":
maybe_spaces(state)
answer = desc(state)
else:
raise ParsecError(state, "chapter [答案] is required")
ct = chapter_title(state)
if ct == "选项":
maybe_spaces(state)
options = []
while True:
try:
opt = option(state)
options.append(opt)
maybe_spaces(state)
except ParsecError as err:
result = Exercise(t, answer, description, options)
if tmpl is not None:
result.template = tmpl
if aop is not None:
result.aop = aop
return result
else:
raise ParsecError(state, "chapter [选项] not found")
from parsec.text import *
from parsec.combinator import *
from parsec.atom import *
from parsec.state import BasicState
side = string('$')
@Parsec
def escape(state):
c = state.next()
if c == "$":
return "$"
elif c == "\\":
return "\\"
elif c == "{":
return "{"
elif c == "}":
return '}'
else:
raise ParsecError(state, f"unknown escape char \\{c}")
@Parsec
def escaped(state):
c = state.next()
if c == '\\':
return escape(state)
elif c == "$":
raise ParsecError(state, "got stop char $")
else:
return c
@Parsec
def token_content(state):
buffer = ""
while True:
c = ahead(choice(one, eof))(state)
if c in ["{", "}", "^", "_", "$", None]:
return buffer
else:
buffer += escaped(state)
@Parsec
def superscript(state):
c = state.next()
if c == "{":
cnt = token_content(state)
string("}")(state)
return f"<sup>{cnt}</sup>"
else:
return f"<sup>{c}</sup>"
@Parsec
def subscript(state):
c = state.next()
if c == "{":
cnt = token_content(state)
string("}")(state)
return f"<sub>{cnt}</sub>"
else:
return f"<sub>{c}</sub>"
@Parsec
def content(state):
buffer = ""
side(state)
while True:
try:
tran = state.begin()
c = state.next()
if c == "$":
state.commit(tran)
return buffer
elif c == "^":
state.commit(tran)
buffer += superscript(state)
elif c == "_":
state.commit(tran)
buffer += subscript(state)
else:
state.rollback(tran)
cnt = token_content(state)
if cnt:
buffer += cnt
else:
raise ParsecError(state, f"unexpect content {state.data[state.index]}")
except ParsecEof:
raise ParsecError(state, f"expect right $ but eof")
def processor(plain):
st = BasicState(plain)
buffer = ""
while True:
try:
tran = st.begin()
c = one(st)
if c == "$":
st.rollback(tran)
cnt = content(st)
if cnt:
buffer += cnt
else:
raise ParsecError(st, f"unexpect content {st.data[st.index]} ")
else:
st.commit(tran)
buffer += c
except ParsecEof:
return buffer
import json
import logging
import os
import re
import sys
import uuid
import git
id_set = set()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
repo = git.Repo(".")
def user_name():
return repo.config_reader().get_value("user", "name")
def read_text(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
return f.read()
def load_json(p):
return json.loads(read_text(p))
def dump_json(p, j, exist_ok=False, override=False):
if os.path.exists(p):
if exist_ok:
if not override:
return
else:
logger.error(f"{p} already exist")
sys.exit(0)
with open(p, 'w+', encoding="utf8") as f:
f.write(json.dumps(j, indent=2, ensure_ascii=False))
def ensure_config(path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
node = {"keywords": []}
dump_json(config_path, node, exist_ok=True, override=False)
return node
else:
return load_json(config_path)
def parse_no_name(d):
p = r'(\d+)\.(.*)'
m = re.search(p, d)
try:
no = int(m.group(1))
dir_name = m.group(2)
except:
sys.exit(0)
return no, dir_name
def check_export(base, cfg):
flag = False
exports = []
for export in cfg.get('export', []):
ecfg_path = os.path.join(base, export)
if os.path.exists(ecfg_path):
exports.append(export)
else:
flag = True
if flag:
cfg["export"] = exports
return flag
class TreeWalker:
def __init__(self, root, tree_name, title=None, log=None):
self.name = tree_name
self.root = root
self.title = tree_name if title is None else title
self.tree = {}
self.logger = logger if log is None else log
def walk(self):
root = self.load_root()
root_node = {
"node_id": root["node_id"],
"keywords": root["keywords"],
"children": []
}
self.tree[root["tree_name"]] = root_node
self.load_levels(root_node)
self.load_chapters(self.root, root_node)
for index, level in enumerate(root_node["children"]):
level_title = list(level.keys())[0]
level_node = list(level.values())[0]
level_path = os.path.join(self.root, f"{index + 1}.{level_title}")
self.load_chapters(level_path, level_node)
for index, chapter in enumerate(level_node["children"]):
chapter_title = list(chapter.keys())[0]
chapter_node = list(chapter.values())[0]
chapter_path = os.path.join(level_path, f"{index + 1}.{chapter_title}")
self.load_sections(chapter_path, chapter_node)
for index, section_node in enumerate(chapter_node["children"]):
section_title = list(section_node.keys())[0]
full_path = os.path.join(chapter_path, f"{index + 1}.{section_title}")
if os.path.isdir(full_path):
self.check_section_keywords(full_path)
self.ensure_exercises(full_path)
tree_path = os.path.join(self.root, "tree.json")
dump_json(tree_path, self.tree, exist_ok=True, override=True)
return self.tree
def sort_dir_list(self, dirs):
result = [self.extract_node_env(dir) for dir in dirs]
result.sort(key=lambda item: item[0])
return result
def load_levels(self, root_node):
levels = []
for level in os.listdir(self.root):
if not os.path.isdir(level):
continue
level_path = os.path.join(self.root, level)
num, config = self.load_level_node(level_path)
levels.append((num, config))
levels = self.resort_children(self.root, levels)
root_node["children"] = [item[1] for item in levels]
return root_node
def load_level_node(self, level_path):
config = self.ensure_level_config(level_path)
num, name = self.extract_node_env(level_path)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
}
}
return num, result
def load_chapters(self, base, level_node):
chapters = []
for name in os.listdir(base):
full_name = os.path.join(base, name)
if os.path.isdir(full_name):
num, chapter = self.load_chapter_node(full_name)
chapters.append((num, chapter))
chapters = self.resort_children(base, chapters)
level_node["children"] = [item[1] for item in chapters]
return level_node
def load_sections(self, base, chapter_node):
sections = []
for name in os.listdir(base):
full_name = os.path.join(base, name)
if os.path.isdir(full_name):
num, section = self.load_section_node(full_name)
sections.append((num, section))
sections = self.resort_children(base, sections)
chapter_node["children"] = [item[1] for item in sections]
return chapter_node
def resort_children(self, base, children):
children.sort(key=lambda item: item[0])
for index, [number, element] in enumerate(children):
title = list(element.keys())[0]
origin = os.path.join(base, f"{number}.{title}")
posted = os.path.join(base, f"{index + 1}.{title}")
if origin != posted:
self.logger.info(f"rename [{origin}] to [{posted}]")
os.rename(origin, posted)
return children
def ensure_chapters(self):
for subdir in os.listdir(self.root):
self.ensure_level_config(subdir)
def load_root(self):
config_path = os.path.join(self.root, "config.json")
if not os.path.exists(config_path):
config = {
"tree_name": self.name,
"keywords": [],
"node_id": self.gen_node_id(),
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, result, exist_ok=True, override=True)
return config
def ensure_level_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id()
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
return config
def ensure_chapter_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id(),
"keywords": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
return config
def ensure_section_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id(),
"keywords": [],
"children": [],
"export": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, result, exist_ok=True, override=True)
return config
def ensure_node_id(self, config):
flag = False
if "node_id" not in config or \
not config["node_id"].startswith(f"{self.name}-") or \
config["node_id"] in id_set:
new_id = self.gen_node_id()
id_set.add(new_id)
config["node_id"] = new_id
flag = True
for child in config.get("children", []):
child_node = list(child.values())[0]
f, _ = self.ensure_node_id(child_node)
flag = flag or f
return flag, config
def gen_node_id(self):
return f"{self.name}-{uuid.uuid4().hex}"
def extract_node_env(self, path):
try:
_, dir = os.path.split(path)
self.logger.info(path)
number, title = dir.split(".", 1)
return int(number), title
except Exception as error:
self.logger.error(f"目录 [{path}] 解析失败,结构不合法,可能是缺少序号")
# sys.exit(1)
raise error
def load_chapter_node(self, full_name):
config = self.ensure_chapter_config(full_name)
num, name = self.extract_node_env(full_name)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
}
}
return num, result
def load_section_node(self, full_name):
config = self.ensure_section_config(full_name)
num, name = self.extract_node_env(full_name)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": config.get("children", [])
}
}
# if "children" in config:
# result["children"] = config["children"]
return num, result
def ensure_exercises(self, section_path):
config = self.ensure_section_config(section_path)
flag = False
for e in os.listdir(section_path):
base, ext = os.path.splitext(e)
_, source = os.path.split(e)
if ext != ".md":
continue
mfile = base + ".json"
meta_path = os.path.join(section_path, mfile)
self.ensure_exercises_meta(meta_path, source)
export = config.get("export", [])
if mfile not in export and self.name != "algorithm":
export.append(mfile)
flag = True
config["export"] = export
if flag:
dump_json(os.path.join(section_path, "config.json"), config, True, True)
for e in config.get("export", []):
full_name = os.path.join(section_path, e)
exercise = load_json(full_name)
if not exercise.get("exercise_id") or exercise.get("exercise_id") in id_set:
eid = uuid.uuid4().hex
exercise["exercise_id"] = eid
dump_json(full_name, exercise, True, True)
else:
id_set.add(exercise["exercise_id"])
def ensure_exercises_meta(self, meta_path, source):
_, mfile = os.path.split(meta_path)
meta = None
if os.path.exists(meta_path):
content = read_text(meta_path)
if content:
meta = json.loads(content)
if "exercise_id" not in meta:
meta["exercise_id"] = uuid.uuid4().hex
if "notebook_enable" not in meta:
meta["notebook_enable"] = self.default_notebook()
if "source" not in meta:
meta["source"] = source
if "author" not in meta:
meta["author"] = user_name()
if "type" not in meta:
meta["type"] = "code_options"
if meta is None:
meta = {
"type": "code_options",
"author": user_name(),
"source": source,
"notebook_enable": self.default_notebook(),
"exercise_id": uuid.uuid4().hex
}
dump_json(meta_path, meta, True, True)
def default_notebook(self):
if self.name in ["python", "java", "c"]:
return True
else:
return False
def check_section_keywords(self, full_path):
config = self.ensure_section_config(full_path)
if not config.get("keywords", []):
self.logger.error(f"节点 [{full_path}] 的关键字为空,请修改配置文件写入关键字")
sys.exit(1)
def math_processor(context):
""" math(str)->str
对文本内容预处理,将公式标记为前端可展示的 html。
"""
md = context
new_md = []
math_ctx = {
"enter": False,
"chars": []
}
count = len(md)
i = 0
while i < count:
c = md[i]
if c == '$':
if math_ctx['enter']:
j = 0
chars = math_ctx['chars']
length = len(chars)
while j < length:
cc = chars[j]
if cc == '_':
next_c = chars[j + 1]
if next_c == '{':
subs = []
cursor = 2
next_c = chars[j + cursor]
while next_c != '}':
subs.append(next_c)
cursor += 1
next_c = chars[j + cursor]
sub = ''.join(subs)
new_md.append(f'<sub>{sub}</sub>')
j += cursor
else:
new_md.append(f'<sub>{next_c}</sub>')
j += 1
elif cc == '^':
next_c = chars[j + 1]
if next_c == '{':
subs = []
cursor = 2
next_c = chars[j + cursor]
while next_c != '}':
subs.append(next_c)
cursor += 1
next_c = chars[j + cursor]
sub = ''.join(subs)
new_md.append(f'<sup>{sub}</sup>')
j += cursor
else:
new_md.append(f'<sup>{next_c}</sup>')
j += 1
else:
new_md.append(cc)
j += 1
math_ctx['enter'] = False
math_ctx['chars'] = []
else:
math_ctx['enter'] = True
math_ctx['chars'] = []
else:
if math_ctx['enter']:
math_ctx['chars'].append(c)
else:
new_md.append(c)
i += 1
return "".join(new_md)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册