提交 c629f5dc 编写于 作者: S Stephen Dolan

Move tail call optimisations to compile.c.

Needs more testing.
上级 3a647d3e
......@@ -74,6 +74,7 @@ static inst* inst_new(opcode op) {
i->subfn = gen_noop();
i->arglist = gen_noop();
i->source = UNKNOWN_LOCATION;
i->compiled = 0;
return i;
}
......@@ -474,6 +475,12 @@ static int count_cfunctions(block b) {
}
static int returns_immediately(inst* i) {
while (i->op == JUMP)
i = i->imm.target;
return i->op == RET;
}
// Expands call instructions into a calling sequence
static int expand_call_arglist(struct locfile* locations, block* b) {
int errors = 0;
......@@ -498,6 +505,12 @@ static int expand_call_arglist(struct locfile* locations, block* b) {
case CLOSURE_CREATE:
case CLOSURE_PARAM: {
block callargs = gen_noop();
/* A call is a tail call if:
a) the next instruction is a RET or a chain of unconditional JUMPs
that ends in a RET, and
b) none of the closures -callee included- are in the current bytecode block. */
int is_tail_call = b->first && returns_immediately(b->first);
if (!curr->bound_by->compiled) is_tail_call = 0;
for (inst* i; (i = block_take(&curr->arglist));) {
assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
block b = inst_block(i);
......@@ -511,10 +524,12 @@ static int expand_call_arglist(struct locfile* locations, block* b) {
block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
break;
}
if (!i->bound_by->compiled) is_tail_call = 0;
actual_args++;
}
curr->imm.intval = actual_args;
curr->arglist = callargs;
if (is_tail_call) curr->op = TAIL_CALL_JQ;
if (curr->bound_by->op == CLOSURE_CREATE) {
for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
......@@ -559,13 +574,13 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
int pos = 0;
int var_frame_idx = 0;
bc->nsubfunctions = 0;
errors += expand_call_arglist(locations, &b);
b = BLOCK(b, gen_op_simple(RET));
errors += expand_call_arglist(locations, &b);
jv localnames = jv_array();
for (inst* curr = b.first; curr; curr = curr->next) {
if (!curr->next) assert(curr == b.last);
int length = opcode_describe(curr->op)->length;
if (curr->op == CALL_JQ) {
if (curr->op == CALL_JQ || curr->op == TAIL_CALL_JQ) {
for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
length += 2;
}
......@@ -639,7 +654,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
assert(!curr->arglist.first);
code[pos++] = (uint16_t)curr->imm.intval;
code[pos++] = curr->bound_by->imm.intval;
} else if (curr->op == CALL_JQ) {
} else if (curr->op == CALL_JQ || curr->op == TAIL_CALL_JQ) {
assert(curr->bound_by->op == CLOSURE_CREATE ||
curr->bound_by->op == CLOSURE_PARAM);
code[pos++] = (uint16_t)curr->imm.intval;
......
......@@ -796,76 +796,6 @@ void jq_teardown(jq_state **jq) {
jv_mem_free(old_jq);
}
static int ret_follows(uint16_t *pc) {
if (*pc == RET)
return 1;
if (*pc++ != JUMP)
return 0;
return ret_follows(pc + *pc + 1); // FIXME, might be ironic
}
/*
* Look for tail calls that can be optimized: tail calls with no
* references left to the current frame.
*
* We're staring at this bytecode layout:
*
* CALL_JQ
* <nclosures>
* <callee closure> (2 units)
* <nclosures closures> (2 units each)
* <next instruction>
*
* A closure is:
*
* <level> (a relative frame count chased via the current frame's env)
* <index> (an index of a subfunction or closure in that frame)
*
* We're looking for:
*
* a) the next instruction is a RET or a chain of unconditional JUMPs
* that ends in a RET, and
*
* b) none of the closures -callee included- have level == 0.
*/
static uint16_t tail_call_analyze(uint16_t *pc) {
assert(*pc == CALL_JQ);
pc++;
// + 1 for the callee closure
for (uint16_t nclosures = *pc++ + 1; nclosures > 0; pc++, nclosures--) {
if (*pc++ == 0)
return CALL_JQ;
}
if (ret_follows(pc))
return TAIL_CALL_JQ;
return CALL_JQ;
}
static struct bytecode *optimize_code(struct bytecode *bc) {
uint16_t *pc = bc->code;
// FIXME: Don't mutate bc->code...
while (pc < bc->code + bc->codelen) {
switch (*pc) {
case CALL_JQ:
*pc = tail_call_analyze(pc);
break;
// Other bytecode optimizations here. A peephole optimizer would
// fit right in.
default: break;
}
pc += bytecode_operation_length(pc);
}
return bc;
}
static struct bytecode *optimize(struct bytecode *bc) {
for (int i=0; i<bc->nsubfunctions; i++) {
bc->subfunctions[i] = optimize(bc->subfunctions[i]);
}
return optimize_code(bc);
}
int jq_compile_args(jq_state *jq, const char* str, jv args) {
jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data);
assert(jv_get_kind(args) == JV_KIND_ARRAY);
......@@ -903,8 +833,6 @@ int jq_compile_args(jq_state *jq, const char* str, jv args) {
fprintf(stderr, "%s\n", jv_string_value(s));
jv_free(s);
}
if (jq->bc)
jq->bc = optimize(jq->bc);
locfile_free(&locations);
return jq->bc != NULL;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册