提交 f06e4d0b 编写于 作者: J jiangzoi

add hanzi2num

上级 4a243d7c
OUTPUT=output
GENERATED=generated
RUNTIME=./antlr4-runtime
CCARGS=-c -I $(RUNTIME) -I $(GENERATED) -std=c++11
LDARGS=-g
LIBS=./lib/libantlr4-runtime.a
CC=g++
GRAMMAR=wenyan
ANTLR4=java -jar /usr/local/lib/antlr-4.7.2-complete.jar
COMPILER=wenyanCompiler
ANTLRGEN=Lexer Parser
OBJS=$(addsuffix .o,$(addprefix $(OUTPUT)/$(GRAMMAR),$(ANTLRGEN)))
GSOURCES=$(addsuffix .cpp,$(addprefix $(GENERATED)/$(GRAMMAR),$(ANTLRGEN)))
.precious: $(GSOURCES)
all: ${COMPILER}
${COMPILER}: dirs antlr4 ${COMPILER}.cpp $(OBJS)
$(CC) $(CCARGS) ${COMPILER}.cpp -o $(OUTPUT)/${COMPILER}.o
$(CC) $(LDARGS) $(OUTPUT)/${COMPILER}.o $(OBJS) $(LIBS) -o ${COMPILER}
antlr4: $(GENERATED)/.generated;
$(GENERATED)/.generated: $(GRAMMAR).g4
$(ANTLR4) -Dlanguage=Cpp -no-listener -o $(GENERATED) $(GRAMMAR).g4
@touch $(GENERATED)/.generated
$(OUTPUT)/%.o : $(GENERATED)/%.cpp
$(CC) $(CCARGS) $< -o $@
$(GENERATED)/%.cpp: $(GENERATED)/.generated;
dirs:; mkdir -p $(OUTPUT) $(GENERATED)
clean:; rm -rf $(OUTPUT) $(GENERATED)
#ifndef _HANZI2NUM_H_
#define _HANZI2NUM_H_
#include <string>
#include <unordered_map>
#include <vector>
#include <cstdint>
std::unordered_map<std::string, int64_t> _hanzi_unit = {{"零", 0},
{"一", 1},
{"二", 2},
{"三", 3},
{"四", 4},
{"五", 5},
{"六", 6},
{"七", 7},
{"八", 8},
{"九", 9},
{"十", 10},
{"百", 100},
{"千", 1000},
{"萬", 10000},
{"億", 100000000}
};
class HanziItem {
private:
int64_t _value;
int64_t _unit;
bool _zeroed;
std::string _origin;
public:
HanziItem(int64_t value, int64_t unit, std::string origin):_value(value), _unit(unit), _zeroed(false), _origin(origin) {}
int64_t get_value() {
return _value;
}
int64_t get_unit() {
return _unit;
}
bool get_zeroed() {
return _zeroed;
}
bool zerofy() {
if (!_zeroed) {
_zeroed = true;
return true;
}
return false;
}
std::string get_origin() {
return _origin;
}
std::string to_string() {
return "value: " + std::to_string(_value) +
" unit: " + std::to_string(_unit) +
" zeroed: " + std::to_string(_zeroed);
}
};
int get_unit(std::string hanzi) {
if (_hanzi_unit.count(hanzi) != 0) {
return _hanzi_unit.at(hanzi);
}
return -1;
}
bool check_unit_legal(HanziItem* a, int64_t b_unit) {
int64_t a_unit = a->get_unit();
if (a_unit >= b_unit) {
return false;
}
if (((a_unit == 100) && (b_unit == 1000)) ||
((a_unit == 10) && (b_unit == 1000)) ||
((a_unit == 10) && (b_unit == 100))) {
return false;
}
return true;
}
int64_t hanzi2int(std::string hanzi) {
std::vector<HanziItem*> s; // item stack
HanziItem* last_item;
bool flag = true;
std::string msg;
// std::cout << "hanzi " << hanzi << " len: " << hanzi.size() << std::endl;
// for (int i = 0; i < hanzi.size(); i++) {
// std::cout << std::hex << unsigned(hanzi[i]) << " ";
// }
// std::cout << std::dec << std::endl;
for (int i = 0; i < hanzi.size(); i += 3) {
// get each hanzi e.g. 十、一、百
std::string cur_str = hanzi.substr(i, 3);
// std::cout << "cur_str: " << cur_str << std::endl;
int64_t cur_int = get_unit(cur_str);
// std::cout << "cur_int: " << cur_int << std::endl;
if (s.size() == 0) {
// if the stack is empty
if (cur_int >= 100) {
// starting hanzi cannot be 百
msg = std::string("The first character cannot be ") + cur_str;
flag = false;
break;
}
if (cur_int < 10) {
s.push_back(new HanziItem(cur_int, 1, cur_str));
}
else {
s.push_back(new HanziItem(1, cur_int, cur_str));
}
}
else {
last_item = s[s.size() - 1];
if (cur_int == last_item->get_unit()) {
// repeated unit is illegal e.g. 百百
msg = cur_str + std::string("cannot be repeated");
flag = false;
break;
}
else if (cur_int == 0) {
if (!last_item->zerofy()) {
msg = std::string("零 cannot be repeated");
flag = false;
break;
}
}
else if (cur_int > last_item->get_unit()) {
// merge the units for this section
int64_t acc = 0;
while (s.size() > 0) {
last_item = s[s.size() - 1];
// std::cout << "last_item: " << last_item->get_origin()
// << "cur_item: " << cur_str << std::endl;
if (last_item->get_unit() > cur_int) {
break;
}
if (!check_unit_legal(last_item, cur_int)) {
msg = std::string("neighbor units illegal: ") + last_item->get_origin() + cur_str;
flag = false;
break;
}
if (s.size() > 1) {
HanziItem* last_last_item = s[s.size() - 2];
if ((last_last_item->get_unit() < cur_int)) {
if ((last_last_item->get_unit() > 10 * last_item->get_unit() * last_item->get_value()) &&
(!last_last_item->get_zeroed())) {
// e.g. 八萬八百
msg = std::string("There should be 零 between") +
last_last_item->get_origin() +
std::string(" and ") +
last_item->get_origin();
flag = false;
break;
}
if ((last_last_item->get_unit() < 10 * last_item->get_unit() * last_item->get_value()) &&
(last_last_item->get_zeroed())) {
// e.g. 八萬八百
msg = std::string("There should be no 零 between") +
last_last_item->get_origin() +
std::string(" and ") +
last_item->get_origin();
flag = false;
break;
}
}
}
acc += last_item->get_value() * last_item->get_unit();
delete last_item;
s.pop_back();
}
if (!flag) {
break;
}
if (acc == 0) {
msg = std::string("single ") + cur_str + std::string(" is illegal");
flag = false;
break;
}
s.push_back(new HanziItem(acc, cur_int, cur_str));
}
else {
if (cur_int >= 100) {
msg = std::string("single ") + cur_str + std::string(" is illegal");
flag = false;
break;
}
s.push_back(new HanziItem(1, cur_int, cur_str));
}
}
// for (HanziItem* p : s) {
// std::cout << p->to_string() << std::endl;
// }
}
// get the final value
int64_t result = 0;
while (s.size() > 0) {
last_item = s[s.size() - 1];
if (s.size() > 1) {
HanziItem* last_last_item = s[s.size() - 2];
if ((last_last_item->get_unit() > 10 * last_item->get_unit() * last_item->get_value()) &&
(!last_last_item->get_zeroed())) {
// e.g. 八萬八百
msg = std::string("There should be 零 between") +
last_last_item->get_origin() +
std::string(" and ") +
last_item->get_origin();
flag = false;
break;
}
if ((last_last_item->get_unit() < 10 * last_item->get_unit() * last_item->get_value()) &&
(last_last_item->get_zeroed())) {
// e.g. 八萬八百
msg = std::string("There should be no 零 between") +
last_last_item->get_origin() +
std::string(" and ") +
last_item->get_origin();
flag = false;
break;
}
}
result += last_item->get_value() * last_item->get_unit();
delete last_item;
s.pop_back();
}
// delete the allocated space;
for (HanziItem* p : s) {
delete p;
}
if (!flag) {
throw msg;
}
return result;
}
#endif
\ No newline at end of file
#ifndef _SYMBOL_TABLE_H_
#define _SYMBOL_TABLE_H_
#include <string>
#include <vector>
template <class SYM, class DATA>
class SymbolTableEntry {
private:
SYM id;
DATA* data;
public:
SymbolTableEntry(SYM x, DATA* d): id(x), data(d) { }
~SymbolTableEntry() {
delete data;
std::cout << "delete symbol table entry" << std::endl;
}
SYM get_id() const { return id; }
DATA* get_data() const { return data; }
};
template <class SYM, class DATA>
class SymbolTable {
typedef SymbolTableEntry<SYM, DATA> ScopeEntry;
typedef std::vector<ScopeEntry*> Scope;
typedef std::vector<Scope*> ScopeStack;
private:
ScopeStack table_stack;
public:
SymbolTable() { }
/* start a new nested scope */
void enter_scope() {
table_stack.push_back(new Scope());
}
/* add a symbol x to the table */
ScopeEntry* add_symbol(SYM x, DATA* d) {
ScopeEntry* item = new ScopeEntry(x, d);
table_stack.top()->push_back(item);
return item;
}
/* finds current x in all scope or null */
DATA* check_all_scope(SYM x) {
if (table_stack.size() == 0) {
return NULL;
}
for (int i = table_stack.size(); i >= 0; i--) {
Scope& current_scope = *(table_stack[i]);
int current_idx = current_scope->size();
for (; current_idx >= 0; current_idx--) {
if (current_scope[current_idx]->get_id() == x) {
return current_scope[current_idx]->get_data();
}
}
}
return NULL;
}
/* check_scope: true if x defined in current scope*/
DATA* check_cur_scope(SYM x) {
if (table_stack.size() == 0) {
return NULL;
}
Scope& current_scope = *(table_stack.top());
int current_idx = current_scope->size();
for (; current_idx >= 0; current_idx--) {
if (current_scope[current_idx]->get_id() == x) {
return current_scope[current_idx]->get_data();
}
}
return NULL;
}
void exit_scope() {
if (table_stack.size() == 0) {
std::cerr << "Symbol Table Error: no scope to exit" << std::endl;
return;
}
table_stack.pop_back();
}
};
#endif
\ No newline at end of file
......@@ -79,7 +79,7 @@ int main(int argc, char *argv[]) {
wenyanLexer lexer(&input);
antlr4::CommonTokenStream tokens(&lexer);
WenyanParserErrorListener errorListner;
WenyanParserErrorListener parserErrorListner;
tokens.fill();
......@@ -93,7 +93,7 @@ int main(int argc, char *argv[]) {
wenyanParser parser(&tokens);
parser.removeErrorListeners();
parser.addErrorListener(&errorListner);
parser.addErrorListener(&parserErrorListner);
/* Type Checking */
try {
wenyanVisitor visitor;
......
......@@ -2,11 +2,31 @@
#include "antlr4-runtime.h"
#include "wenyanParser.h"
#include "symbolTable.h"
#include "hanzi2num.h"
class WenyanVisitorErrorHandler {
public:
void syntaxError(
size_t line,
size_t charPositionInLine,
const std::string &msg) {
std::ostrstream s;
s << "Line(" << line << ":" << charPositionInLine << ") Error(" << msg << ")";
throw std::invalid_argument(s.str());
}
};
class wenyanVisitor : public antlr4::tree::AbstractParseTreeVisitor {
typedef std::string Symbol;
typedef std::string Type;
private:
SymbolTable<Symbol, Type> symTable;
WenyanVisitorErrorHandler _err_handler;
public:
antlrcpp::Any visitProgram(wenyanParser::ProgramContext *ctx) {
symTable.enter_scope();
std::cout << "visit program" << std::endl;
for (wenyanParser::StatementContext* statement : ctx->statement()) {
bool result = visitStatement(statement);
......@@ -14,6 +34,7 @@ public:
return false;
}
}
symTable.exit_scope();
return true;
}
......@@ -111,6 +132,22 @@ public:
antlrcpp::Any visitBinary_if_expression(wenyanParser::Binary_if_expressionContext *context);
antlrcpp::Any visitDeclare_statement(wenyanParser::Declare_statementContext *ctx) {
antlr4::tree::TerminalNode* declare_num_node = ctx->INT_NUM();
antlr4::tree::TerminalNode* declare_type_node = ctx->TYPE();
std::string declare_num_str = declare_num_node->getText();
std::string declare_type_str = declare_type_node->getText();
std::cout << "str: " << declare_num_str << " : " << declare_type_str << std::endl;
int64_t declare_num;
try {
declare_num = hanzi2int(declare_num_str);
}
catch (std::string msg) {
_err_handler.syntaxError(declare_num_node->getSymbol()->getLine(),
declare_num_node->getSymbol()->getCharPositionInLine(),
std::string("[Error] Declare statement: ") + msg);
}
std::cout << "declare number: " << declare_num << std::endl;
return true;
}
......
......@@ -12,41 +12,38 @@ statement : declare_statement
| import_statement
| object_statement
| pick_up_statement
| reference_statement
| array_statement
| clean_statement
| BREAK
| comment;
pick_up_statement : '夫' data ('之' (STRING_LITERAL|INT_NUM|'其餘'|IDENTIFIER|'長'))? reference_single_statement? ;
reference_statement : '名之' ('曰' IDENTIFIER)+ ;
declare_statement : DECLARE_KEYWORD INT_NUM TYPE ('曰' data)*;
define_statement : DEFINE_KEYWORD TYPE data ;
pick_up_statement : '夫' data ('之' data)? ;
array_statement : array_cat_statement|array_push_statement ;
array_cat_statement : '銜' (IDENTIFIER|'其') (PREPOSITION_YI IDENTIFIER)+ reference_single_statement?;
array_push_statement : '充' (IDENTIFIER|'其') (PREPOSITION_YI data)+ reference_single_statement?;
array_cat_statement : '銜' (IDENTIFIER|LAST_IDENTIFIER) (PREPOSITION_YI IDENTIFIER)+ ;
array_push_statement : '充' (IDENTIFIER|LAST_IDENTIFIER) (PREPOSITION_YI data)+ ;
function_statement : function_define_statement|(function_call_statement (reference_single_statement)?) ;
function_statement : function_define_statement|function_call_statement ;
function_call_statement : function_plain_call|function_nested_call ;
function_plain_call : ('施' IDENTIFIER (preposition data)*)|('施其' (preposition data)*) ;
function_plain_call : '施' (IDENTIFIER|LAST_IDENTIFIER) (preposition data)* ;
function_nested_call : ('取' INT_NUM '以施' IDENTIFIER)+ ;
function_define_statement : '吾有' INT_NUM '術' reference_single_statement ('欲行是術' '必先得' (INT_NUM TYPE ('曰' IDENTIFIER)+)+)? ('是術曰'|'乃行是術曰') statement* '是謂' IDENTIFIER '之術也' ;
function_define_statement : DECLARE_KEYWORD INT_NUM '術' reference_statement ('欲行是術' '必先得' (INT_NUM TYPE ('曰' IDENTIFIER)+)+)? ('是術曰'|'乃行是術曰') statement* '是謂' IDENTIFIER '之術也' ;
if_statement : IF if_expression '者' statement+ (ELSE statement+)? FOR_IF_END ;
if_expression : unary_if_expression|binary_if_expression ;
unary_if_expression : data|(IDENTIFIER '之'('長'|STRING_LITERAL|IDENTIFIER))|'其' ;
unary_if_expression : data|(IDENTIFIER '之' data)| ;
binary_if_expression : unary_if_expression IF_LOGIC_OP unary_if_expression ;
declare_statement : ('吾有'|'今有') INT_NUM TYPE ('曰' data)*;
define_statement : (declare_statement reference_multi_statement)|init_define_statement ;
reference_multi_statement : '名之' ('曰' IDENTIFIER)+ ;
reference_single_statement : '名之' ('曰' IDENTIFIER) ;
init_define_statement : '有' TYPE data (reference_single_statement)? ;
for_statement : for_arr_statement
| for_enum_statement
| for_while_statement ;
......@@ -57,16 +54,16 @@ for_while_statement : FOR_START_WHILE statement* FOR_IF_END ;
math_statement : (arith_math_statement|logic_math_statement|mod_math_statement) (reference_multi_statement)? ;
arith_math_statement : arith_binary_math|arith_unary_math ;
arith_binary_math : ARITH_BINARY_OP (data|'其') preposition (data|'其') ;
arith_unary_math : UNARY_OP (IDENTIFIER|'其') ;
mod_math_statement : '除' (INT_NUM|FLOAT_NUM|IDENTIFIER|'其') preposition (INT_NUM|FLOAT_NUM|IDENTIFIER) POST_MOD_MATH_OP? ;
arith_binary_math : ARITH_BINARY_OP data preposition data ;
arith_unary_math : UNARY_OP (IDENTIFIER|LAST_IDENTIFIER) ;
mod_math_statement : '除' (INT_NUM|FLOAT_NUM|IDENTIFIER|LAST_IDENTIFIER) preposition (INT_NUM|FLOAT_NUM|IDENTIFIER) POST_MOD_MATH_OP? ;
logic_math_statement : '夫' IDENTIFIER IDENTIFIER LOGIC_BINARY_OP ;
assign_statement : '昔之' IDENTIFIER ('之' (INT_NUM|STRING_LITERAL|IDENTIFIER))? '者' (('今' ((data ('之' INT_NUM)?)|'其') '是矣')|'今不復存矣') ;
return_statement : '乃得' (data|'其')|'乃歸空無'|'乃得矣' ;
return_statement : '乃得' data|'乃歸空無'|'乃得矣' ;
import_statement : '吾嘗觀' STRING_LITERAL '之書' ('方悟' IDENTIFIER+ '之義')? ;
......@@ -76,11 +73,15 @@ object_statement : '吾有' INT_NUM '物' reference_multi_statement (
object_define_statement : '其物如是' ('物之' STRING_LITERAL '者' TYPE '曰' data)+ '是謂' IDENTIFIER '之物也' ;
data : STRING_LITERAL|BOOL_VALUE|IDENTIFIER|INT_NUM|FLOAT_NUM ;
data : STRING_LITERAL|BOOL_VALUE|IDENTIFIER|INT_NUM|FLOAT_NUM|LAST_IDENTIFIER ;
LAST_IDENTIFIER : '其' ;
STRING_LITERAL : '「「' ( ~('」') )* '」」' ;
IDENTIFIER : '「' ( ~('」') )+ '」' ;
OTHER : '其餘' ;
ARITH_BINARY_OP : '加'|'減'|'乘' ;
LOGIC_BINARY_OP : '中有陽乎'|'中無陰乎' ;
POST_MOD_MATH_OP : '所餘幾何' ;
......@@ -106,7 +107,7 @@ FLOAT_NUM_KEYWORDS : '分'|'釐'|'毫'|'絲'|'忽'|'微'|'塵'|'埃'|'
INT_NUM : INT_NUM_KEYWORDS+ ;
INT_NUM_KEYWORDS : '零'|'一'|'二'|'三'|'四'|'五'|'六'|'七'|'八'|'九'|'十'|'百'|'千'|'萬'|'億'|'兆'|'京'|'垓'|'秭'|'穣'|'溝'|'澗'|'正'|'載'|'極' ;
TYPE : '數'|'列'|'言'|'爻' ;
TYPE : '數'|'列'|'言'|'爻'|IDENTIFIER ;
BOOL_VALUE : '陰'|'陽' ;
print_statement : '書之' ;
......@@ -114,4 +115,7 @@ WS : ([ \t\r\n]|'。'|',')+ -> skip ;
comment : ('注曰'|'疏曰'|'批曰') STRING_LITERAL ;
clean_statement : '噫' ;
DECLARE_KEYWORD : '吾有'|'今有' ;
DEFINE_KEYWORD : '有' ;
BREAK : '乃止' ;
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册