未验证 提交 29a0599e 编写于 作者: Y Yinan Xu 提交者: GitHub

backend, rs: support multiple enqueue and dequeue instructions (#820)

* backend,RS: add numEnq parameter to allow multiple enqueue instructions

* backend,RS: support multiple issue instructions at each cycle
上级 b6a21a24
......@@ -131,29 +131,29 @@ class FloatBlock
slowPorts.length,
fixedDelay = certainLatency,
fastWakeup = certainLatency >= 0,
feedback = false
feedback = false,1, 1
))
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(i))
rs.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3).data))
val src2Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 1).data))
val src3Value = VecInit((0 until 4).map(i => fpRf.io.readPorts(i * 3 + 2).data))
rs.io.srcRegValue(0) := src1Value(readPortIndex(i))
rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(2) := src3Value(readPortIndex(i))
rs.io.srcRegValue(0)(0) := src1Value(readPortIndex(i))
rs.io.srcRegValue(0)(1) := src2Value(readPortIndex(i))
if (cfg.fpSrcCnt > 2) rs.io.srcRegValue(0)(2) := src3Value(readPortIndex(i))
rs.io.fastDatas <> inBlockFastPorts.map(_._2)
rs.io.slowPorts <> slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.flush <> flush
exeUnits(i).io.fromFp <> rs.io.deq
exeUnits(i).io.fromFp <> rs.io.deq(0)
// rs.io.memfeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
......@@ -165,8 +165,8 @@ class FloatBlock
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeFpRf
).map(x => {
val raw = WireInit(x.io.fastUopOut)
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.fpWen
val raw = WireInit(x.io.fastUopOut(0))
raw.valid := x.io.fastUopOut(0).valid && raw.bits.ctrl.fpWen
raw
})
rs.io.fastUopsIn <> inBlockUops
......@@ -234,7 +234,7 @@ class FloatBlock
difftest.io.fpr := VecInit(fpRf.io.debug_rports.map(p => ieee(p.data)))
}
val rsDeqCount = PopCount(reservationStations.map(_.io.deq.valid))
val rsDeqCount = PopCount(reservationStations.map(_.io.deq(0).valid))
XSPerfAccumulate("fp_rs_deq_count", rsDeqCount)
XSPerfHistogram("fp_rs_deq_count", rsDeqCount, true.B, 0, 6, 1)
}
......@@ -124,89 +124,88 @@ class IntegerBlock
isFp = false
))
io.intWbOut := VecInit(intWbArbiter.io.out.drop(4))
def needWakeup(cfg: ExuConfig): Boolean =
(cfg.readIntRf && cfg.writeIntRf) || (cfg.readFpRf && cfg.writeFpRf)
def needData(a: ExuConfig, b: ExuConfig): Boolean =
(a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf)
// val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex)
val readPortIndex = Seq(1, 2, 3, 0, 1, 2, 3)
val reservationStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1
if (cfg == MulDivExeUnitCfg) {// NOTE: dirty code, add mul to fast wake up, but leave div
certainLatency = mulCfg.latency.latencyVal.get
} else if (cfg.hasCertainLatency) {
certainLatency = cfg.latency.latencyVal.get
}
val readIntRf = cfg.readIntRf
val inBlockWbData = exeUnits.filter(e => e.config.hasCertainLatency && readIntRf).map(a => (a.config, a.io.out.bits.data))
val fastDatas = inBlockWbData ++ fastWakeUpIn.zip(io.wakeUpIn.fast.map(_.bits.data)) ++
(if (cfg == AluExeUnitCfg && EnableLoadFastWakeUp) memFastWakeUpIn.zip(io.memFastWakeUp.fast.map(_.bits.data)) else Seq())
val fastPortsCnt = fastDatas.length
val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency && readIntRf).map(a => (a.config, a.io.out))
// only load+mul need slowPorts
val slowPorts = intWbArbiter.io.out.drop(4)
val extraListenPortsCnt = slowPorts.length
val feedback = (cfg == LdExeUnitCfg) || (cfg == StExeUnitCfg)
for (exe <- exeUnits) {
exe.io.redirect <> redirect
exe.io.flush <> flush
}
println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} slowPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}")
val jmp_rs = Module(new ReservationStation("rs_jmp", JumpExeUnitCfg, IssQueSize, XLEN, 6, 4, -1, false, false, 1, 1))
val mul_rs_0 = Module(new ReservationStation("rs_mul_0", MulDivExeUnitCfg, IssQueSize, XLEN, 6, 4, 2, false, false, 1, 1))
val mul_rs_1 = Module(new ReservationStation("rs_mul_1", MulDivExeUnitCfg, IssQueSize, XLEN, 6, 4, 2, false, false, 1, 1))
val alu_rs_0 = Module(new ReservationStation("rs_alu_0", AluExeUnitCfg, 2*IssQueSize, XLEN,
8, 4, 0, true, false, 2, 2
))
val alu_rs_1 = Module(new ReservationStation("rs_alu_1", AluExeUnitCfg, 2*IssQueSize, XLEN,
8, 4, 0, true, false, 2, 2
))
val rs = Module(new ReservationStation(s"rs_${cfg.name}", cfg, IssQueSize, XLEN,
fastDatas.map(_._1).length,
slowPorts.length,
fixedDelay = certainLatency,
fastWakeup = certainLatency >= 0,
feedback = feedback
))
val aluFastData = VecInit(exeUnits.drop(3).map(_.io.out.bits.data))
val mulFastData = VecInit(exeUnits.drop(1).take(2).map(_.io.out.bits.data))
val memFastData = VecInit(io.memFastWakeUp.fast.map(_.bits.data))
val slowPorts = intWbArbiter.io.out.drop(4)
jmp_rs.io.numExist <> io.toCtrlBlock.numExist(0)
jmp_rs.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(0))
jmp_rs.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.drop(2).take(2).map(_.data))
jmp_rs.io.jumpPc := io.fromCtrlBlock.jumpPc
jmp_rs.io.jalr_target := io.fromCtrlBlock.jalr_target
jmp_rs.io.fastDatas <> mulFastData ++ aluFastData
jmp_rs.io.deq(0) <> jmpExeUnit.io.fromInt
mul_rs_0.io.numExist <> io.toCtrlBlock.numExist(1)
mul_rs_0.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(1))
mul_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.drop(4).take(2).map(_.data))
mul_rs_0.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_0.io.deq(0) <> mduExeUnits(0).io.fromInt
mul_rs_1.io.numExist <> io.toCtrlBlock.numExist(2)
mul_rs_1.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(2))
mul_rs_1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.drop(6).take(2).map(_.data))
mul_rs_1.io.fastDatas <> mulFastData ++ aluFastData
mul_rs_1.io.deq(0) <> mduExeUnits(1).io.fromInt
io.toCtrlBlock.numExist(3) := alu_rs_0.io.numExist >> 1
io.toCtrlBlock.numExist(4) := alu_rs_0.io.numExist >> 1
alu_rs_0.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl.drop(3).take(2))
alu_rs_0.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.take(2).map(_.data))
alu_rs_0.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.drop(2).take(2).map(_.data))
alu_rs_0.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
alu_rs_0.io.deq(0) <> aluExeUnits(0).io.fromInt
alu_rs_0.io.deq(1) <> aluExeUnits(1).io.fromInt
io.toCtrlBlock.numExist(5) := alu_rs_1.io.numExist >> 1
io.toCtrlBlock.numExist(6) := alu_rs_1.io.numExist >> 1
alu_rs_1.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl.drop(5))
alu_rs_1.io.srcRegValue(0) <> VecInit(intRf.io.readPorts.drop(4).take(2).map(_.data))
alu_rs_1.io.srcRegValue(1) <> VecInit(intRf.io.readPorts.drop(6).take(2).map(_.data))
alu_rs_1.io.fastDatas <> mulFastData ++ aluFastData ++ memFastData
alu_rs_1.io.deq(0) <> aluExeUnits(2).io.fromInt
alu_rs_1.io.deq(1) <> aluExeUnits(3).io.fromInt
val reservationStations = Seq(jmp_rs, mul_rs_0, mul_rs_1, alu_rs_0, alu_rs_1)
val aluFastUop = Wire(Vec(4, ValidIO(new MicroOp)))
val mulFastUop = Wire(Vec(2, ValidIO(new MicroOp)))
val memFastUop = io.memFastWakeUp.fastUops
aluFastUop(0) := alu_rs_0.io.fastUopOut(0)
aluFastUop(1) := alu_rs_0.io.fastUopOut(1)
aluFastUop(2) := alu_rs_1.io.fastUopOut(0)
aluFastUop(3) := alu_rs_1.io.fastUopOut(1)
mulFastUop(0) := mul_rs_0.io.fastUopOut(0)
mulFastUop(1) := mul_rs_1.io.fastUopOut(0)
for (rs <- reservationStations) {
rs.io.redirect <> redirect
rs.io.flush <> flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.srcRegValue := DontCare
val src1Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2).data))
val src2Value = VecInit((0 until 4).map(i => intRf.io.readPorts(i * 2 + 1).data))
rs.io.srcRegValue(0) := src1Value(readPortIndex(i))
if (cfg.intSrcCnt > 1) rs.io.srcRegValue(1) := src2Value(readPortIndex(i))
if (cfg == JumpExeUnitCfg) {
rs.io.jumpPc := io.fromCtrlBlock.jumpPc
rs.io.jalr_target := io.fromCtrlBlock.jalr_target
}
rs.io.fastDatas <> fastDatas.map(_._2)
rs.io.redirect <> redirect
rs.io.flush <> flush
rs.io.slowPorts := slowPorts
exeUnits(i).io.redirect <> redirect
exeUnits(i).io.fromInt <> rs.io.deq
exeUnits(i).io.flush <> flush
// rs.io.memfeedback := DontCare
rs.suggestName(s"rs_${cfg.name}")
rs
})
for (rs <- reservationStations) {
val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeIntRf
).map(x => {
val raw = WireInit(x.io.fastUopOut)
raw.valid := x.io.fastUopOut.valid && raw.bits.ctrl.rfWen
raw
})
rs.io.fastUopsIn <> inBlockUops ++ io.wakeUpIn.fastUops ++
(if (rs.exuCfg == AluExeUnitCfg && EnableLoadFastWakeUp) io.memFastWakeUp.fastUops else Seq())
}
jmp_rs.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop
mul_rs_1.io.fastUopsIn := mulFastUop ++ aluFastUop
alu_rs_0.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
alu_rs_1.io.fastUopsIn := mulFastUop ++ aluFastUop ++ memFastUop
io.wakeUpOut.fastUops <> reservationStations.filter(
rs => rs.exuCfg.hasCertainLatency
).map(_.io.fastUopOut).map(intUopValid)
io.wakeUpOut.fastUops := mulFastUop ++ aluFastUop
io.wakeUpOut.fast <> exeUnits.filter(
x => x.config.hasCertainLatency
......@@ -280,7 +279,7 @@ class IntegerBlock
difftest.io.gpr := VecInit(intRf.io.debug_rports.map(_.data))
}
val rsDeqCount = PopCount(reservationStations.map(_.io.deq.valid))
val rsDeqCount = PopCount(reservationStations.map(_.io.deq(0).valid))
XSPerfAccumulate("int_rs_deq_count", rsDeqCount)
XSPerfHistogram("int_rs_deq_count", rsDeqCount, true.B, 0, 7, 1)
}
......@@ -153,17 +153,17 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
slowPorts.length,
fixedDelay = certainLatency,
fastWakeup = certainLatency >= 0,
feedback = feedback)
feedback = feedback, 1, 1)
)
rs.io.redirect <> redirect // TODO: remove it
rs.io.flush <> io.fromCtrlBlock.flush // TODO: remove it
rs.io.numExist <> io.toCtrlBlock.numExist(i)
rs.io.fromDispatch <> io.fromCtrlBlock.enqIqCtrl(i)
rs.io.fromDispatch <> VecInit(io.fromCtrlBlock.enqIqCtrl(i))
rs.io.srcRegValue(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
rs.io.srcRegValue(0)(0) := io.fromIntBlock.readIntRf(readPortIndex(i)).data
if (i >= exuParameters.LduCnt) {
rs.io.srcRegValue(1) := io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data
rs.io.srcRegValue(0)(1) := io.fromIntBlock.readIntRf(readPortIndex(i) + 1).data
rs.io.fpRegValue := io.fromFpBlock.readFpRf(i - exuParameters.LduCnt).data
}
......@@ -222,7 +222,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.isFirstIssue := reservationStations(i).io.isFirstIssue // NOTE: just for dtlb's perf cnt
loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
// get input form dispatch
loadUnits(i).io.ldin <> reservationStations(i).io.deq
loadUnits(i).io.ldin <> reservationStations(i).io.deq(0)
// dcache access
loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
// forward
......@@ -255,7 +255,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stu.io.rsIdx <> rs.io.rsIdx
stu.io.isFirstIssue <> rs.io.isFirstIssue // NOTE: just for dtlb's perf cnt
stu.io.dtlb <> dtlbReq
stu.io.stin <> rs.io.deq
stu.io.stin <> rs.io.deq(0)
stu.io.lsq <> lsq.io.storeIn(i)
// Lsq to load unit's rs
......@@ -264,12 +264,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.storeDataIn(i) := rs.io.stData
// sync issue info to rs
lsq.io.storeIssue(i).valid := rs.io.deq.valid
lsq.io.storeIssue(i).bits := rs.io.deq.bits
lsq.io.storeIssue(i).valid := rs.io.deq(0).valid
lsq.io.storeIssue(i).bits := rs.io.deq(0).bits
// sync issue info to store set LFST
io.toCtrlBlock.stIn(i).valid := rs.io.deq.valid
io.toCtrlBlock.stIn(i).bits := rs.io.deq.bits
io.toCtrlBlock.stIn(i).valid := rs.io.deq(0).valid
io.toCtrlBlock.stIn(i).bits := rs.io.deq(0).bits
io.toCtrlBlock.stOut(i).valid := stu.io.stout.valid
io.toCtrlBlock.stOut(i).bits := stu.io.stout.bits
......@@ -323,21 +323,21 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val atomic_rs0 = exuParameters.LduCnt + 0
val atomic_rs1 = exuParameters.LduCnt + 1
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType)
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType)
val st0_atomics = reservationStations(atomic_rs0).io.deq(0).valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq(0).bits.uop.ctrl.fuType)
val st1_atomics = reservationStations(atomic_rs1).io.deq(0).valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq(0).bits.uop.ctrl.fuType)
val st0_data_atomics = reservationStations(atomic_rs0).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.stData.bits.uop.ctrl.fuType)
val st1_data_atomics = reservationStations(atomic_rs1).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.stData.bits.uop.ctrl.fuType)
when (st0_atomics) {
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
reservationStations(atomic_rs0).io.deq(0).ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
state := s_atomics_0
assert(!st1_atomics)
}
when (st1_atomics) {
reservationStations(atomic_rs1).io.deq.ready := atomicsUnit.io.in.ready
reservationStations(atomic_rs1).io.deq(0).ready := atomicsUnit.io.in.ready
storeUnits(1).io.stin.valid := false.B
state := s_atomics_1
......@@ -349,7 +349,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}
atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq(0).bits, reservationStations(atomic_rs1).io.deq(0).bits)
atomicsUnit.io.storeDataIn.valid := st0_data_atomics || st1_data_atomics
atomicsUnit.io.storeDataIn.bits := Mux(st0_data_atomics, reservationStations(atomic_rs0).io.stData.bits, reservationStations(atomic_rs1).io.stData.bits)
atomicsUnit.io.rsIdx := Mux(st0_atomics, reservationStations(atomic_rs0).io.rsIdx, reservationStations(atomic_rs1).io.rsIdx)
......@@ -394,8 +394,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.memInfo.lqFull := RegNext(lsq.io.lqFull)
io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
val ldDeqCount = PopCount(reservationStations.take(2).map(_.io.deq.valid))
val stDeqCount = PopCount(reservationStations.drop(2).map(_.io.deq.valid))
val ldDeqCount = PopCount(reservationStations.take(2).map(_.io.deq(0).valid))
val stDeqCount = PopCount(reservationStations.drop(2).map(_.io.deq(0).valid))
val rsDeqCount = ldDeqCount + stDeqCount
XSPerfAccumulate("load_rs_deq_count", ldDeqCount)
XSPerfHistogram("load_rs_deq_count", ldDeqCount, true.B, 1, 2, 1)
......
......@@ -41,6 +41,8 @@ class ReservationStation
fixedDelay: Int,
fastWakeup: Boolean,
feedback: Boolean,
enqNum: Int,
deqNum: Int
)(implicit p: Parameters) extends XSModule {
val iqIdxWidth = log2Up(iqSize+1)
val nonBlocked = if (exuCfg == MulDivExeUnitCfg) false else fixedDelay >= 0
......@@ -50,8 +52,8 @@ class ReservationStation
val config = RSConfig(
name = myName,
numEntries = iqSize,
numEnq = 1,
numDeq = 1,
numEnq = enqNum,
numDeq = deqNum,
numSrc = srcNum,
dataBits = srcLen,
dataIdBits = PhyRegIdxWidth,
......@@ -68,18 +70,20 @@ class ReservationStation
val io = IO(new Bundle {
val numExist = Output(UInt(iqIdxWidth.W))
val fromDispatch = Flipped(DecoupledIO(new MicroOp))
val deq = DecoupledIO(new ExuInput)
// enq
val fromDispatch = Vec(config.numEnq, Flipped(DecoupledIO(new MicroOp)))
val srcRegValue = Vec(config.numEnq, Input(Vec(srcNum, UInt(srcLen.W))))
val fpRegValue = if (config.delayedRf) Input(UInt(srcLen.W)) else null
// deq
val deq = Vec(config.numDeq, DecoupledIO(new ExuInput))
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new StoreDataBundle) else null
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))
val stIssuePtr = if (config.checkWaitBit) Input(new SqPtr()) else null
val fpRegValue = if (config.delayedRf) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == JumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val jalr_target = if(exuCfg == JumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
val fastUopOut = ValidIO(new MicroOp)
val fastUopOut = Vec(config.numDeq, ValidIO(new MicroOp))
val fastUopsIn = Vec(config.numFastWakeup, Flipped(ValidIO(new MicroOp)))
val fastDatas = Vec(config.numFastWakeup, Input(UInt(srcLen.W)))
val slowPorts = Vec(slowPortsCnt, Flipped(ValidIO(new ExuOutput)))
......@@ -106,28 +110,33 @@ class ReservationStation
*/
// enqueue from dispatch
select.io.validVec := statusArray.io.isValid
io.fromDispatch.ready := select.io.allocate(0).valid
// agreement with dispatch: don't enqueue when io.redirect.valid
val do_enqueue = io.fromDispatch.fire() && !io.redirect.valid && !io.flush
select.io.allocate(0).ready := do_enqueue
statusArray.io.update(0).enable := do_enqueue
statusArray.io.update(0).addr := select.io.allocate(0).bits
statusArray.io.update(0).data.valid := true.B
val needFpSource = io.fromDispatch.bits.needRfRPort(1, 1, false)
statusArray.io.update(0).data.scheduled := (if (config.delayedRf) needFpSource else false.B)
statusArray.io.update(0).data.blocked := (if (config.checkWaitBit) io.fromDispatch.bits.cf.loadWaitBit else false.B)
statusArray.io.update(0).data.credit := (if (config.delayedRf) Mux(needFpSource, 2.U, 0.U) else 0.U)
statusArray.io.update(0).data.srcState := VecInit(io.fromDispatch.bits.srcIsReady.take(config.numSrc))
statusArray.io.update(0).data.psrc := VecInit(io.fromDispatch.bits.psrc.take(config.numSrc))
statusArray.io.update(0).data.srcType := VecInit(io.fromDispatch.bits.ctrl.srcType.take(config.numSrc))
statusArray.io.update(0).data.roqIdx := io.fromDispatch.bits.roqIdx
statusArray.io.update(0).data.sqIdx := io.fromDispatch.bits.sqIdx
val doEnqueue = Wire(Vec(config.numEnq, Bool()))
val needFpSource = Wire(Vec(config.numEnq, Bool()))
for (i <- 0 until config.numEnq) {
io.fromDispatch(i).ready := select.io.allocate(i).valid
// agreement with dispatch: don't enqueue when io.redirect.valid
doEnqueue(i) := io.fromDispatch(i).fire() && !io.redirect.valid && !io.flush
select.io.allocate(i).ready := doEnqueue(i)
statusArray.io.update(i).enable := doEnqueue(i)
statusArray.io.update(i).addr := select.io.allocate(i).bits
statusArray.io.update(i).data.valid := true.B
needFpSource(i) := io.fromDispatch(i).bits.needRfRPort(1, 1, false)
statusArray.io.update(i).data.scheduled := (if (config.delayedRf) needFpSource(i) else false.B)
statusArray.io.update(i).data.blocked := (if (config.checkWaitBit) io.fromDispatch(i).bits.cf.loadWaitBit else false.B)
statusArray.io.update(i).data.credit := (if (config.delayedRf) Mux(needFpSource(i), 2.U, 0.U) else 0.U)
statusArray.io.update(i).data.srcState := VecInit(io.fromDispatch(i).bits.srcIsReady.take(config.numSrc))
statusArray.io.update(i).data.psrc := VecInit(io.fromDispatch(i).bits.psrc.take(config.numSrc))
statusArray.io.update(i).data.srcType := VecInit(io.fromDispatch(i).bits.ctrl.srcType.take(config.numSrc))
statusArray.io.update(i).data.roqIdx := io.fromDispatch(i).bits.roqIdx
statusArray.io.update(i).data.sqIdx := io.fromDispatch(i).bits.sqIdx
payloadArray.io.write(i).enable := doEnqueue(i)
payloadArray.io.write(i).addr := select.io.allocate(i).bits
payloadArray.io.write(i).data := io.fromDispatch(i).bits
}
// when config.checkWaitBit is set, we need to block issue until the corresponding store issues
if (config.checkWaitBit) {
statusArray.io.stIssuePtr := io.stIssuePtr
}
payloadArray.io.write(0).enable := do_enqueue
payloadArray.io.write(0).addr := select.io.allocate(0).bits
payloadArray.io.write(0).data := io.fromDispatch.bits
// wakeup from other RS or function units
val fastNotInSlowWakeup = exuCfg match {
case LdExeUnitCfg => io.fastUopsIn.drop(2).take(4)
......@@ -159,36 +168,38 @@ class ReservationStation
*/
// select the issue instructions
select.io.request := statusArray.io.canIssue
select.io.grant(0).ready := io.deq.ready
if (config.hasFeedback) {
statusArray.io.issueGranted(0).valid := select.io.grant(0).fire
statusArray.io.issueGranted(0).bits := select.io.grant(0).bits
statusArray.io.deqResp(0).valid := io.memfeedback.valid
statusArray.io.deqResp(0).bits.rsMask := UIntToOH(io.memfeedback.bits.rsIdx)
statusArray.io.deqResp(0).bits.success := io.memfeedback.bits.hit
}
else {
statusArray.io.issueGranted(0).valid := select.io.grant(0).fire
statusArray.io.issueGranted(0).bits := select.io.grant(0).bits
statusArray.io.deqResp(0).valid := select.io.grant(0).fire
statusArray.io.deqResp(0).bits.rsMask := select.io.grant(0).bits
statusArray.io.deqResp(0).bits.success := io.deq.ready
}
payloadArray.io.read(0).addr := select.io.grant(0).bits
if (fixedDelay >= 0) {
val wakeupQueue = Module(new WakeupQueue(fixedDelay))
val fuCheck = (if (exuCfg == MulDivExeUnitCfg) payloadArray.io.read(0).data.ctrl.fuType === FuType.mul else true.B)
wakeupQueue.io.in.valid := select.io.grant(0).fire && fuCheck
wakeupQueue.io.in.bits := payloadArray.io.read(0).data
wakeupQueue.io.redirect := io.redirect
wakeupQueue.io.flush := io.flush
io.fastUopOut := wakeupQueue.io.out
}
else {
io.fastUopOut.valid := false.B
io.fastUopOut.bits := DontCare
for (i <- 0 until config.numDeq) {
select.io.grant(i).ready := io.deq(i).ready
if (config.hasFeedback) {
require(config.numDeq == 1)
statusArray.io.issueGranted(0).valid := select.io.grant(0).fire
statusArray.io.issueGranted(0).bits := select.io.grant(0).bits
statusArray.io.deqResp(0).valid := io.memfeedback.valid
statusArray.io.deqResp(0).bits.rsMask := UIntToOH(io.memfeedback.bits.rsIdx)
statusArray.io.deqResp(0).bits.success := io.memfeedback.bits.hit
}
else {
statusArray.io.issueGranted(i).valid := select.io.grant(i).fire
statusArray.io.issueGranted(i).bits := select.io.grant(i).bits
statusArray.io.deqResp(i).valid := select.io.grant(i).fire
statusArray.io.deqResp(i).bits.rsMask := select.io.grant(i).bits
statusArray.io.deqResp(i).bits.success := io.deq(i).ready
}
payloadArray.io.read(i).addr := select.io.grant(i).bits
if (fixedDelay >= 0) {
val wakeupQueue = Module(new WakeupQueue(fixedDelay))
val fuCheck = (if (exuCfg == MulDivExeUnitCfg) payloadArray.io.read(i).data.ctrl.fuType === FuType.mul else true.B)
wakeupQueue.io.in.valid := select.io.grant(i).fire && fuCheck
wakeupQueue.io.in.bits := payloadArray.io.read(i).data
wakeupQueue.io.redirect := io.redirect
wakeupQueue.io.flush := io.flush
io.fastUopOut(i) := wakeupQueue.io.out
}
else {
io.fastUopOut(i).valid := false.B
io.fastUopOut(i).bits := DontCare
}
}
// select whether the source is from (whether regfile or imm)
// for read-after-issue, it's done over the selected uop
// for read-before-issue, it's done over the enqueue uop (and store the imm in dataArray to save space)
......@@ -213,10 +224,14 @@ class ReservationStation
}
data
}
val lastAllocateUop = RegNext(io.fromDispatch.bits)
val immBypassedData = VecInit(extractImm(lastAllocateUop).zip(io.srcRegValue).map {
case (imm, reg_data) => Mux(imm.valid, imm.bits, reg_data)
})
// lastAllocateUop: Vec(config.numEnq, new MicroOp)
val lastAllocateUop = RegNext(VecInit(io.fromDispatch.map(_.bits)))
val immBypassedData = Wire(Vec(config.numEnq, Vec(config.numSrc, UInt(config.dataBits.W))))
for (((uop, data), bypass) <- lastAllocateUop.zip(io.srcRegValue).zip(immBypassedData)) {
bypass := extractImm(uop).zip(data).map {
case (imm, reg_data) => Mux(imm.valid, imm.bits, reg_data)
}
}
/**
* S1: Data broadcast (from Regfile and FUs) and read
......@@ -224,13 +239,15 @@ class ReservationStation
* Note: this is only needed when read-before-issue
*/
// dispatch data: the next cycle after enqueue
dataArray.io.write(0).enable := RegNext(do_enqueue)
dataArray.io.write(0).mask := RegNext(statusArray.io.update(0).data.srcState)
dataArray.io.write(0).addr := RegNext(select.io.allocate(0).bits)
dataArray.io.write(0).data := immBypassedData
if (config.delayedRf) {
dataArray.io.delayedWrite(0).valid := RegNext(RegNext(do_enqueue && needFpSource))
dataArray.io.delayedWrite(0).bits := io.fpRegValue
for (i <- 0 until config.numEnq) {
dataArray.io.write(i).enable := RegNext(doEnqueue(i))
dataArray.io.write(i).mask := RegNext(statusArray.io.update(i).data.srcState)
dataArray.io.write(i).addr := RegNext(select.io.allocate(i).bits)
dataArray.io.write(i).data := immBypassedData(i)
if (config.delayedRf) {
dataArray.io.delayedWrite(i).valid := RegNext(RegNext(doEnqueue(i) && needFpSource(i)))
dataArray.io.delayedWrite(i).bits := io.fpRegValue
}
}
// data broadcast: from function units (only slow wakeup date are needed)
val broadcastValid = RegNext(VecInit(fastNotInSlowWakeup.map(_.valid))) ++ io.slowPorts.map(_.valid)
......@@ -254,27 +271,38 @@ class ReservationStation
/**
* S1: read data from regfile
*/
dataArray.io.read(0).addr := select.io.grant(0).bits
// for read-before-issue, we need to bypass the enqueue data here
// for read-after-issue, we need to bypass the imm here
// check enq data bypass (another form of broadcast except that we know where it hits) here
val enqRegSelected = RegNext(select.io.allocate(0).bits) === select.io.grant(0).bits
val enqSrcStateReg = RegNext(statusArray.io.update(0).data.srcState)
val enqBypassValid = enqSrcStateReg.map(_ && enqRegSelected)
// dequeue data should be bypassed
val deqUop = payloadArray.io.read(0).data
val deqDataRead = dataArray.io.read(0).data
val deqData = VecInit(enqBypassValid.zip(immBypassedData).zip(deqDataRead).map {
case ((v, d), r) => Mux(v, d, r)
})
val s1_out = Wire(Vec(config.numDeq, Decoupled(new ExuInput)))
for (i <- 0 until config.numDeq) {
dataArray.io.read(i).addr := select.io.grant(i).bits
// for read-before-issue, we need to bypass the enqueue data here
// for read-after-issue, we need to bypass the imm here
// check enq data bypass (another form of broadcast except that we know where it hits) here
// enqRegSelected: Vec(config.numEnq, Bool())
val enqRegSelected = VecInit(select.io.allocate.map(a => RegNext(a.bits) === select.io.grant(i).bits))
// enqSrcStateReg: Vec(config.numEnq, Vec(config.numSrc, Bool()))
// [i][j]: i-th enqueue, j-th source state
val enqSrcStateReg = RegNext(VecInit(statusArray.io.update.map(_.data.srcState)))
// enqBypassValid: Vec(config.numEnq, Vec(config.numSrc, Bool()))
val enqBypassValid = enqSrcStateReg.zip(enqRegSelected).map{ case (state, sel) => VecInit(state.map(_ && sel)) }
// bypass data for config.numDeq
val deqBypassValid = Mux1H(enqRegSelected, enqBypassValid)
val deqBypassData = Mux1H(enqRegSelected, immBypassedData)
// dequeue data should be bypassed
val deqUop = payloadArray.io.read(i).data
val deqDataRead = dataArray.io.read(i).data
val deqData = VecInit(deqBypassValid.zip(deqBypassData).zip(deqDataRead).map {
case ((v, d), r) => Mux(v, d, r)
})
val s1_out = Wire(Decoupled(new ExuInput))
s1_out.valid := select.io.grant(0).valid && !deqUop.roqIdx.needFlush(io.redirect, io.flush)
s1_out.bits := DontCare
for (i <- 0 until config.numSrc) {
s1_out.bits.src(i) := deqData(i)
s1_out(i).valid := select.io.grant(i).valid && !deqUop.roqIdx.needFlush(io.redirect, io.flush)
s1_out(i).bits := DontCare
for (j <- 0 until config.numSrc) {
s1_out(i).bits.src(j) := deqData(j)
}
s1_out(i).bits.uop := deqUop
}
s1_out.bits.uop := deqUop
/**
* S1: detect bypass from fast wakeup
......@@ -287,47 +315,53 @@ class ReservationStation
}
}
val fastWakeupMatchRegVec = RegNext(fastWakeupMatchVec)
val targetFastWakeupMatch = Mux1H(select.io.grant(0).bits, fastWakeupMatchRegVec)
val wakeupBypassMask = Wire(Vec(config.numFastWakeup, Vec(config.numSrc, Bool())))
for (i <- 0 until config.numFastWakeup) {
wakeupBypassMask(i) := VecInit(targetFastWakeupMatch.map(_(i)))
}
// data: send to bypass network
// TODO: these should be done outside RS
val bypassNetwork = Module(new BypassNetwork(config.numSrc, config.numFastWakeup, config.dataBits, config.optBuf))
bypassNetwork.io.hold := !io.deq.ready
bypassNetwork.io.source := s1_out.bits.src.take(config.numSrc)
bypassNetwork.io.bypass.zip(wakeupBypassMask.zip(io.fastDatas)).map { case (by, (m, d)) =>
by.valid := m
by.data := d
}
for (i <- 0 until config.numDeq) {
val targetFastWakeupMatch = Mux1H(select.io.grant(i).bits, fastWakeupMatchRegVec)
val wakeupBypassMask = Wire(Vec(config.numFastWakeup, Vec(config.numSrc, Bool())))
for (j <- 0 until config.numFastWakeup) {
wakeupBypassMask(j) := VecInit(targetFastWakeupMatch.map(_(j)))
}
// data: send to bypass network
// TODO: these should be done outside RS
val bypassNetwork = Module(new BypassNetwork(config.numSrc, config.numFastWakeup, config.dataBits, config.optBuf))
bypassNetwork.io.hold := !io.deq(i).ready
bypassNetwork.io.source := s1_out(i).bits.src.take(config.numSrc)
bypassNetwork.io.bypass.zip(wakeupBypassMask.zip(io.fastDatas)).map { case (by, (m, d)) =>
by.valid := m
by.data := d
}
/**
* S2: to function units
*/
// payload: send to function units
// TODO: these should be done outside RS
PipelineConnect(s1_out, io.deq, io.deq.ready || io.deq.bits.uop.roqIdx.needFlush(io.redirect, io.flush), false.B)
val pipeline_fire = s1_out.valid && io.deq.ready
if (config.hasFeedback) {
io.rsIdx := RegEnable(OHToUInt(select.io.grant(0).bits), pipeline_fire)
io.isFirstIssue := false.B
}
/**
* S2: to function units
*/
// payload: send to function units
// TODO: these should be done outside RS
PipelineConnect(s1_out(i), io.deq(i), io.deq(i).ready || io.deq(i).bits.uop.roqIdx.needFlush(io.redirect, io.flush), false.B)
val pipeline_fire = s1_out(i).valid && io.deq(i).ready
if (config.hasFeedback) {
io.rsIdx := RegEnable(OHToUInt(select.io.grant(i).bits), pipeline_fire)
io.isFirstIssue := false.B
}
for (i <- 0 until config.numSrc) {
io.deq.bits.src(i) := bypassNetwork.io.target(i)
}
for (j <- 0 until config.numSrc) {
io.deq(i).bits.src(j) := bypassNetwork.io.target(j)
}
// legacy things
if (exuCfg == StExeUnitCfg) {
io.stData.valid := io.deq.valid
io.stData.bits.data := io.deq.bits.src(1)
io.stData.bits.uop := io.deq.bits.uop
// legacy things
if (exuCfg == StExeUnitCfg) {
io.stData.valid := io.deq(i).valid
io.stData.bits.data := io.deq(i).bits.src(1)
io.stData.bits.uop := io.deq(i).bits.uop
}
}
// logs
XSDebug(io.fromDispatch.valid && !io.fromDispatch.ready, p"enq blocked, roqIdx ${io.fromDispatch.bits.roqIdx}\n")
XSDebug(io.fromDispatch.fire(), p"enq fire, roqIdx ${io.fromDispatch.bits.roqIdx}, srcState ${Binary(io.fromDispatch.bits.srcState.asUInt)}\n")
XSDebug(io.deq.fire(), p"deq fire, roqIdx ${io.deq.bits.uop.roqIdx}\n")
XSDebug(io.deq.valid && !io.deq.ready, p"deq blocked, roqIdx ${io.deq.bits.uop.roqIdx}\n")
for (dispatch <- io.fromDispatch) {
XSDebug(dispatch.valid && !dispatch.ready, p"enq blocked, roqIdx ${dispatch.bits.roqIdx}\n")
XSDebug(dispatch.fire(), p"enq fire, roqIdx ${dispatch.bits.roqIdx}, srcState ${Binary(dispatch.bits.srcState.asUInt)}\n")
}
for (deq <- io.deq) {
XSDebug(deq.fire(), p"deq fire, roqIdx ${deq.bits.uop.roqIdx}\n")
XSDebug(deq.valid && !deq.ready, p"deq blocked, roqIdx ${deq.bits.uop.roqIdx}\n")
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册