未验证 提交 aac4464e 编写于 作者: Y Yinan Xu 提交者: GitHub

Add support for a simple version of move elimination (#682)

In this commit, we add support for a simpler version of move elimination.

The original instruction sequences are:
move r1, r0
add r2, r1, r3

The optimized sequnces are:
move pr1, pr0
add pr2, pr0, pr3 # instead of add pr2, pr1, pr3

In this way, add can be issued once r0 is ready and move seems to be eliminated.
上级 e6e4a58d
......@@ -256,6 +256,7 @@ class CtrlSignals extends XSBundle {
val imm = UInt(ImmUnion.maxLen.W)
val commitType = CommitType()
val fpu = new FPUCtrlSignals
val isMove = Bool()
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
val decoder = freechips.rocketchip.rocket.DecodeLogic(inst, XDecode.decodeDefault, table)
......@@ -274,6 +275,8 @@ class CfCtrl extends XSBundle {
}
class PerfDebugInfo extends XSBundle {
val src1MoveElim = Bool()
val src2MoveElim = Bool()
// val fetchTime = UInt(64.W)
val renameTime = UInt(64.W)
val dispatchTime = UInt(64.W)
......@@ -543,4 +546,6 @@ class CustomCSRCtrlIO extends XSBundle {
val bp_ctrl = Output(new BPUCtrl)
// Memory Block
val sbuffer_threshold = Output(UInt(4.W))
// Rename
val move_elim_enable = Output(Bool())
}
......@@ -323,6 +323,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
rename.io.out <> dispatch.io.fromRename
rename.io.renameBypass <> dispatch.io.renameBypass
rename.io.dispatchInfo <> dispatch.io.preDpInfo
rename.io.csrCtrl <> RegNext(io.csrCtrl)
dispatch.io.redirect <> backendRedirect
dispatch.io.flush := flushReg
......
......@@ -33,7 +33,7 @@ abstract trait DecodeConstants {
// | | | | | | | | | | | isRVF
// | | | | | | | | | | | | selImm
List(SrcType.DC, SrcType.DC, SrcType.DC, FuType.alu, ALUOpType.sll, N, N, N, N, N, N, N, SelImm.INVALID_INSTR) // Use SelImm to indicate invalid instr
val table: Array[(BitPat, List[BitPat])]
}
......@@ -427,7 +427,7 @@ class DecodeUnit extends XSModule with DecodeUnitConstants {
// fill in exception vector
cf_ctrl.cf.exceptionVec := io.enq.ctrl_flow.exceptionVec
cf_ctrl.cf.exceptionVec(illegalInstr) := cs.selImm === SelImm.INVALID_INSTR
// fix frflags
// fflags zero csrrs rd csr
val isFrflags = BitPat("b000000000001_00000_010_?????_1110011") === ctrl_flow.instr
......@@ -448,6 +448,8 @@ class DecodeUnit extends XSModule with DecodeUnitConstants {
}
))
cs.isMove := BitPat("b000000000000_?????_000_?????_0010011") === ctrl_flow.instr
cf_ctrl.ctrl := cs
// TODO: do we still need this?
......
......@@ -77,26 +77,37 @@ class Dispatch1 extends XSModule with HasExceptionNO {
for (i <- 0 until RenameWidth) {
updatedCommitType(i) := Cat(isLs(i), (isStore(i) && !isAMO(i)) | isBranch(i))
updatedPsrc1(i) := io.fromRename.take(i).map(_.bits.pdest)
val pdestBypassedPsrc1 = io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc1_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc1) {
(z, next) => Mux(next._2, next._1, z)
}
updatedPsrc2(i) := io.fromRename.take(i).map(_.bits.pdest)
val pdestBypassedPsrc2 = io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc2_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc2) {
(z, next) => Mux(next._2, next._1, z)
}
updatedPsrc3(i) := io.fromRename.take(i).map(_.bits.pdest)
val pdestBypassedPsrc3 = io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.lsrc3_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.psrc3) {
(z, next) => Mux(next._2, next._1, z)
}
updatedOldPdest(i) := io.fromRename.take(i).map(_.bits.pdest)
val pdestBypassedOldPdest = io.fromRename.take(i).map(_.bits.pdest)
.zip(if (i == 0) Seq() else io.renameBypass.ldest_bypass(i-1).asBools)
.foldLeft(io.fromRename(i).bits.old_pdest) {
(z, next) => Mux(next._2, next._1, z)
}
if (i == 0) {
updatedPsrc1(i) := pdestBypassedPsrc1
updatedPsrc2(i) := pdestBypassedPsrc2
}
else {
// for move elimination, the psrc1/psrc2 of consumer instruction always come from psrc1 of move
updatedPsrc1(i) := Mux(io.renameBypass.move_eliminated_src1(i-1), updatedPsrc1(i-1), pdestBypassedPsrc1)
updatedPsrc2(i) := Mux(io.renameBypass.move_eliminated_src2(i-1), updatedPsrc1(i-1), pdestBypassedPsrc2)
}
updatedPsrc3(i) := pdestBypassedPsrc3
updatedOldPdest(i) := pdestBypassedOldPdest
updatedUop(i) := io.fromRename(i).bits
// update bypass psrc1/psrc2/psrc3/old_pdest
......@@ -104,6 +115,8 @@ class Dispatch1 extends XSModule with HasExceptionNO {
updatedUop(i).psrc2 := updatedPsrc2(i)
updatedUop(i).psrc3 := updatedPsrc3(i)
updatedUop(i).old_pdest := updatedOldPdest(i)
updatedUop(i).debugInfo.src1MoveElim := (if (i == 0) false.B else io.renameBypass.move_eliminated_src1(i-1))
updatedUop(i).debugInfo.src2MoveElim := (if (i == 0) false.B else io.renameBypass.move_eliminated_src2(i-1))
// update commitType
updatedUop(i).ctrl.commitType := updatedCommitType(i)
// update roqIdx, lqIdx, sqIdx
......
......@@ -360,6 +360,9 @@ class CSR extends FunctionUnit with HasCSRConst
val smblockctl = RegInit(UInt(XLEN.W), "h7".U)
csrio.customCtrl.sbuffer_threshold := smblockctl(3, 0)
val srnctl = RegInit(UInt(XLEN.W), "h1".U)
csrio.customCtrl.move_elim_enable := srnctl(0)
val tlbBundle = Wire(new TlbCsrBundle)
tlbBundle.satp := satp.asTypeOf(new SatpStruct)
csrio.tlb := tlbBundle
......@@ -492,6 +495,7 @@ class CSR extends FunctionUnit with HasCSRConst
MaskedRegMap(Sdsid, sdsid),
MaskedRegMap(Slvpredctl, slvpredctl),
MaskedRegMap(Smblockctl, smblockctl),
MaskedRegMap(Srnctl, srnctl),
//--- Machine Information Registers ---
MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable),
......
......@@ -55,6 +55,7 @@ trait HasCSRConst {
val Spfctl = 0x5C1
val Slvpredctl = 0x5C2
val Smblockctl = 0x5C3
val Srnctl = 0x5C4
val Sdsid = 0x9C0
......@@ -93,7 +94,7 @@ trait HasCSRConst {
// 0xB80 - 0x89F are also used as perfcnt csr
val Mcycle = 0xb00
val Minstret = 0xb02
val Mhpmcounter3 = 0xB03
val Mhpmcounter4 = 0xB04
val Mhpmcounter5 = 0xB05
......@@ -123,7 +124,7 @@ trait HasCSRConst {
val Mhpmcounter29 = 0xB1D
val Mhpmcounter30 = 0xB1E
val Mhpmcounter31 = 0xB1F
// Machine Counter Setup (not implemented)
val Mcountinhibit = 0x320
val Mhpmevent3 = 0x323
......
......@@ -12,6 +12,8 @@ class RenameBypassInfo extends XSBundle {
val lsrc2_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val lsrc3_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
val move_eliminated_src1 = Vec(RenameWidth-1, Bool())
val move_eliminated_src2 = Vec(RenameWidth-1, Bool())
}
class Rename extends XSModule with HasCircularQueuePtrHelper {
......@@ -25,6 +27,7 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
val out = Vec(RenameWidth, DecoupledIO(new MicroOp))
val renameBypass = Output(new RenameBypassInfo)
val dispatchInfo = Output(new PreDispatchInfo)
val csrCtrl = Flipped(new CustomCSRCtrlIO)
})
def printRenameInfo(in: DecoupledIO[CfCtrl], out: DecoupledIO[MicroOp]) = {
......@@ -202,6 +205,20 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
val intMatch = needIntDest(j) && needIntDest(i)
(fpMatch || intMatch) && io.in(j).bits.ctrl.ldest === io.in(i).bits.ctrl.ldest
}).reverse)
io.renameBypass.move_eliminated_src1(i-1) :=
// the producer move instruction writes to non-zero register
io.in(i-1).bits.ctrl.isMove && io.in(i-1).bits.ctrl.ldest =/= 0.U &&
// the consumer instruction uses the move's destination register
io.in(i).bits.ctrl.src1Type === SrcType.reg && io.in(i).bits.ctrl.lsrc1 === io.in(i-1).bits.ctrl.ldest &&
// CSR control (by srnctl)
io.csrCtrl.move_elim_enable
io.renameBypass.move_eliminated_src2(i-1) :=
// the producer move instruction writes to non-zero register
io.in(i-1).bits.ctrl.isMove && io.in(i-1).bits.ctrl.ldest =/= 0.U &&
// the consumer instruction uses the move's destination register
io.in(i).bits.ctrl.src2Type === SrcType.reg && io.in(i).bits.ctrl.lsrc2 === io.in(i-1).bits.ctrl.ldest &&
// CSR control (by srnctl)
io.csrCtrl.move_elim_enable
}
val isLs = VecInit(uops.map(uop => FuType.isLoadStore(uop.ctrl.fuType)))
......
......@@ -797,6 +797,12 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
XSPerf("clock_cycle", 1.U)
XSPerf("utilization", PopCount((0 until RoqSize).map(valid(_))))
XSPerf("commitInstr", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid)))
val commitIsMove = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).ctrl.isMove)
XSPerf("commitInstrMove", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(commitIsMove).map{ case (v, m) => v && m })))
val commitSrc1MoveElim = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).debugInfo.src1MoveElim)
XSPerf("commitInstrSrc1MoveElim", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(commitSrc1MoveElim).map{ case (v, e) => v && e })))
val commitSrc2MoveElim = deqPtrVec.map(_.value).map(ptr => debug_microOp(ptr).debugInfo.src2MoveElim)
XSPerf("commitInstrSrc2MoveElim", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(commitSrc2MoveElim).map{ case (v, e) => v && e })))
val commitIsLoad = io.commits.info.map(_.commitType).map(_ === CommitType.LOAD)
val commitLoadValid = io.commits.valid.zip(commitIsLoad).map{ case (v, t) => v && t }
XSPerf("commitInstrLoad", Mux(io.commits.isWalk, 0.U, PopCount(commitLoadValid)))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册