未验证 提交 64886eef 编写于 作者: W William Wang 提交者: GitHub

mem: disable l2l forward by default (#1283)

上级 9d4e1137
......@@ -161,6 +161,7 @@ case class XSCoreParameters
StorePipelineWidth: Int = 2,
StoreBufferSize: Int = 16,
StoreBufferThreshold: Int = 7,
EnableLoadToLoadForward: Boolean = false,
EnableFastForward: Boolean = false,
EnableLdVioCheckAfterReset: Boolean = true,
RefillSize: Int = 512,
......@@ -377,6 +378,7 @@ trait HasXSParameter {
val StorePipelineWidth = coreParams.StorePipelineWidth
val StoreBufferSize = coreParams.StoreBufferSize
val StoreBufferThreshold = coreParams.StoreBufferThreshold
val EnableLoadToLoadForward = coreParams.EnableLoadToLoadForward
val EnableFastForward = coreParams.EnableFastForward
val EnableLdVioCheckAfterReset = coreParams.EnableLdVioCheckAfterReset
val RefillSize = coreParams.RefillSize
......
......@@ -577,34 +577,38 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
// we reduce its latency for one cycle since it does not need to read
// from data array. Timing to be optimized later.
if (params.isLoad) {
val ldFastDeq = Wire(io.deq(i).cloneType)
// Condition: wakeup by load (to select load wakeup bits)
val ldCanBeFast = VecInit(
wakeupBypassMask.drop(exuParameters.AluCnt).take(exuParameters.LduCnt).map(_.asUInt.orR)
).asUInt
ldFastDeq.valid := issueVec(i).valid && ldCanBeFast.orR
ldFastDeq.ready := true.B
ldFastDeq.bits.src := DontCare
ldFastDeq.bits.uop := s1_out(i).bits.uop
// when last cycle load has fast issue, cancel this cycle's normal issue and let it go
val lastCycleLdFire = RegNext(ldFastDeq.valid && !s2_deq(i).valid && io.deq(i).ready)
when (lastCycleLdFire) {
s2_deq(i).valid := false.B
s2_deq(i).ready := true.B
}
// For now, we assume deq.valid has higher priority than ldFastDeq.
when (!s2_deq(i).valid) {
io.deq(i).valid := ldFastDeq.valid
io.deq(i).bits := ldFastDeq.bits
s2_deq(i).ready := true.B
}
io.load.get.fastMatch(i) := Mux(s2_deq(i).valid, 0.U, ldCanBeFast)
when (!s2_deq(i).valid) {
io.feedback.get(i).rsIdx := s1_issue_index(i)
io.feedback.get(i).isFirstIssue := s1_first_issue(i)
if (EnableLoadToLoadForward) {
val ldFastDeq = Wire(io.deq(i).cloneType)
// Condition: wakeup by load (to select load wakeup bits)
val ldCanBeFast = VecInit(
wakeupBypassMask.drop(exuParameters.AluCnt).take(exuParameters.LduCnt).map(_.asUInt.orR)
).asUInt
ldFastDeq.valid := issueVec(i).valid && ldCanBeFast.orR
ldFastDeq.ready := true.B
ldFastDeq.bits.src := DontCare
ldFastDeq.bits.uop := s1_out(i).bits.uop
// when last cycle load has fast issue, cancel this cycle's normal issue and let it go
val lastCycleLdFire = RegNext(ldFastDeq.valid && !s2_deq(i).valid && io.deq(i).ready)
when (lastCycleLdFire) {
s2_deq(i).valid := false.B
s2_deq(i).ready := true.B
}
// For now, we assume deq.valid has higher priority than ldFastDeq.
when (!s2_deq(i).valid) {
io.deq(i).valid := ldFastDeq.valid
io.deq(i).bits := ldFastDeq.bits
s2_deq(i).ready := true.B
}
io.load.get.fastMatch(i) := Mux(s2_deq(i).valid, 0.U, ldCanBeFast)
when (!s2_deq(i).valid) {
io.feedback.get(i).rsIdx := s1_issue_index(i)
io.feedback.get(i).isFirstIssue := s1_first_issue(i)
}
XSPerfAccumulate(s"fast_load_deq_valid_$i", !s2_deq(i).valid && ldFastDeq.valid)
XSPerfAccumulate(s"fast_load_deq_fire_$i", !s2_deq(i).valid && ldFastDeq.valid && io.deq(i).ready)
} else {
io.load.get.fastMatch(i) := DontCare
}
XSPerfAccumulate(s"fast_load_deq_valid_$i", !s2_deq(i).valid && ldFastDeq.valid)
XSPerfAccumulate(s"fast_load_deq_fire_$i", !s2_deq(i).valid && ldFastDeq.valid && io.deq(i).ready)
}
io.deq(i).bits.uop.debugInfo.issueTime := GTimer()
......
......@@ -59,24 +59,29 @@ class LoadUnit_S0(implicit p: Parameters) extends XSModule with HasDCacheParamet
val s0_uop = io.in.bits.uop
val imm12 = WireInit(s0_uop.ctrl.imm(11,0))
// slow vaddr from non-load insts
val slowpath_vaddr = io.in.bits.src(0) + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits)
val slowpath_mask = genWmask(slowpath_vaddr, s0_uop.ctrl.fuOpType(1,0))
// fast vaddr from load insts
val fastpath_vaddrs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
io.fastpath(i).data + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits)
})))
val fastpath_masks = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
genWmask(fastpath_vaddrs(i), s0_uop.ctrl.fuOpType(1,0))
})))
val fastpath_vaddr = Mux1H(io.loadFastMatch, fastpath_vaddrs)
val fastpath_mask = Mux1H(io.loadFastMatch, fastpath_masks)
// select vaddr from 2 alus
val s0_vaddr = Mux(io.loadFastMatch.orR, fastpath_vaddr, slowpath_vaddr)
val s0_mask = Mux(io.loadFastMatch.orR, fastpath_mask, slowpath_mask)
XSPerfAccumulate("load_to_load_forward", io.loadFastMatch.orR && io.in.fire())
val s0_vaddr = WireInit(io.in.bits.src(0) + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits))
val s0_mask = WireInit(genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)))
if (EnableLoadToLoadForward) {
// slow vaddr from non-load insts
val slowpath_vaddr = io.in.bits.src(0) + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits)
val slowpath_mask = genWmask(slowpath_vaddr, s0_uop.ctrl.fuOpType(1,0))
// fast vaddr from load insts
val fastpath_vaddrs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
io.fastpath(i).data + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits)
})))
val fastpath_masks = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
genWmask(fastpath_vaddrs(i), s0_uop.ctrl.fuOpType(1,0))
})))
val fastpath_vaddr = Mux1H(io.loadFastMatch, fastpath_vaddrs)
val fastpath_mask = Mux1H(io.loadFastMatch, fastpath_masks)
// select vaddr from 2 alus
s0_vaddr := Mux(io.loadFastMatch.orR, fastpath_vaddr, slowpath_vaddr)
s0_mask := Mux(io.loadFastMatch.orR, fastpath_mask, slowpath_mask)
XSPerfAccumulate("load_to_load_forward", io.loadFastMatch.orR && io.in.fire())
}
val isSoftPrefetch = LSUOpType.isPrefetch(s0_uop.ctrl.fuOpType)
val isSoftPrefetchRead = s0_uop.ctrl.fuOpType === LSUOpType.prefetch_r
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册