提交 6de999d4 编写于 作者: _STD_BEGIN's avatar _STD_BEGIN

refill test failed

上级 fe46839f
Subproject commit 5b65bc6d5f3d7bbbc2ae5f5726dfe2d257170a39
Subproject commit bbddb72e87a39f83c52cd6b3adb8315a784b21b7
Subproject commit 41a2f27f21744351374e27724ce10a4c8354f400
Subproject commit ea1b0f4b1d37b3c6b215119167bf42f65407471d
......@@ -332,7 +332,7 @@ class MediumConfig(n: Int = 1) extends Config(
)
class DefaultConfig(n: Int = 1) extends Config(
new WithNKBL3(6 * 1024, inclusive = false, banks = 4, ways = 6)
new WithNKBL3(16 * 1024, inclusive = false, banks = 4, ways = 16)
++ new WithNKBL2(2 * 512, inclusive = false, banks = 4)
++ new WithNKBL1D(128)
++ new BaseConfig(n)
......
......@@ -135,7 +135,7 @@ case class XSCoreParameters
LoadQueueRARSize: Int = 80,
LoadQueueRAWSize: Int = 64, // NOTE: make sure that LoadQueueRAWSize is power of 2.
RollbackGroupSize: Int = 8,
LoadQueueReplaySize: Int = 80,
LoadQueueReplaySize: Int = 72,
LoadUncacheBufferSize: Int = 20,
LoadQueueNWriteBanks: Int = 8, // NOTE: make sure that LoadQueueRARSize/LoadQueueRAWSize is divided by LoadQueueNWriteBanks
StoreQueueSize: Int = 64,
......@@ -159,12 +159,12 @@ case class XSCoreParameters
FmacCnt = 4,
FmiscCnt = 2,
FmiscDivSqrtCnt = 0,
LduCnt = 2,
StuCnt = 2
LduCnt = 3,
StuCnt = 3
),
prefetcher: Option[PrefetcherParams] = Some(SMSParams()),
LoadPipelineWidth: Int = 2,
StorePipelineWidth: Int = 2,
LoadPipelineWidth: Int = 3,
StorePipelineWidth: Int = 3,
VecMemSrcInWidth: Int = 2,
VecMemInstWbWidth: Int = 1,
VecMemDispatchWidth: Int = 1,
......@@ -298,7 +298,7 @@ case class DebugOptions
EnablePerfDebug: Boolean = true,
UseDRAMSim: Boolean = false,
EnableConstantin: Boolean = false,
EnableChiselDB: Boolean = false,
EnableChiselDB: Boolean = true,
AlwaysBasicDB: Boolean = true,
)
......
......@@ -377,7 +377,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
} else {
memBlock.io.perfEventsPTW := DontCare
}
ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
// ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
csrioIn.hartId <> io.hartId
csrioIn.perf <> DontCare
......
......@@ -100,6 +100,10 @@ class ReservationStationWrapper(implicit p: Parameters) extends LazyModule with
params.lsqFeedback = true
params.hasFeedback = true
params.checkWaitBit = false
params.numDeq = 3
}
if (cfg == StdExeUnitCfg) {
params.numDeq = 3
}
if (cfg.hasCertainLatency) {
params.fixedLatency = if (cfg == MulDivExeUnitCfg) mulCfg.latency.latencyVal.get else cfg.latency.latencyVal.get
......@@ -135,9 +139,8 @@ class ReservationStationWrapper(implicit p: Parameters) extends LazyModule with
override def toString: String = params.toString
// for better timing, we limits the size of RS to 2-deq
val maxRsDeq = 2
val maxRsDeq = 4
def numRS = (params.numDeq + (maxRsDeq - 1)) / maxRsDeq
lazy val module = new LazyModuleImp(this) with HasPerfEvents {
require(params.numEnq < params.numDeq || params.numEnq % params.numDeq == 0)
require(params.numEntries % params.numDeq == 0)
......@@ -406,8 +409,8 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
val numSelected = PopCount(s1_issuePtrOH.map(_.valid))
val numReadyEntries = PopCount(statusArray.io.canIssue)
val shouldSelected = Mux(numReadyEntries > params.numDeq.U, params.numDeq.U, numReadyEntries)
XSError(numSelected < shouldSelected,
p"performance regression: only $numSelected out of $shouldSelected selected (total: $numReadyEntries)\n")
// XSError(numSelected < shouldSelected,
// p"performance regression: only $numSelected out of $shouldSelected selected (total: $numReadyEntries)\n")
// Allocation: store dispatch uops into payload and data array
s1_dispatchUops_dup.foreach(_.zip(enqReverse(io.fromDispatch)).zipWithIndex.foreach{ case ((uop, in), i) =>
......@@ -495,7 +498,9 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
// However, in this case, the select policy always selects at maximum numDeq instructions to issue.
// Thus, we need an arbitration between the numDeq + 1 possibilities.
val oldestSelection = Module(new OldestSelection(params))
oldestSelection.io.in := s1_in_selectPtr
oldestSelection.io.in.zip(s1_in_selectPtr).map { case (in, ptr) => {
in := ptr
}}
oldestSelection.io.oldest := s1_in_oldestPtrOH
// By default, we use the default victim index set in parameters.
oldestSelection.io.canOverride := (0 until params.numDeq).map(_ == params.oldestFirst._3).map(_.B)
......
......@@ -291,7 +291,6 @@ class ExceptionGen(implicit p: Parameters) extends XSModule with HasCircularQueu
def getOldest(valid: Seq[Bool], bits: Seq[RobExceptionInfo]): (Seq[Bool], Seq[RobExceptionInfo]) = {
assert(valid.length == bits.length)
assert(isPow2(valid.length))
if (valid.length == 1) {
(valid, bits)
} else if (valid.length == 2) {
......@@ -304,7 +303,7 @@ class ExceptionGen(implicit p: Parameters) extends XSModule with HasCircularQueu
(Seq(oldest.valid), Seq(oldest.bits))
} else {
val left = getOldest(valid.take(valid.length / 2), bits.take(valid.length / 2))
val right = getOldest(valid.takeRight(valid.length / 2), bits.takeRight(valid.length / 2))
val right = getOldest(valid.takeRight(valid.length - valid.length / 2), bits.takeRight(valid.length - valid.length / 2))
getOldest(left._1 ++ right._1, left._2 ++ right._2)
}
}
......
......@@ -61,8 +61,6 @@ class FreeList(size: Int, allocWidth: Int, freeWidth: Int, enablePreAlloc: Boole
val tailPtrNext = Wire(new FreeListPtr)
// legality check
require(isPow2(freeWidth))
require((size % freeWidth) == 0)
def getRemBits(input: UInt)(rem: Int): UInt = {
VecInit((0 until size / freeWidth).map(i => { input(freeWidth * i + rem) })).asUInt
}
......@@ -73,28 +71,34 @@ class FreeList(size: Int, allocWidth: Int, freeWidth: Int, enablePreAlloc: Boole
val freeSelMaskVec = Wire(Vec(freeWidth, UInt(size.W)))
// update freeMask
require((size % freeWidth) == 0)
freeSelMask := freeSelMaskVec.reduce(_|_)
freeMask := (io.free | freeMask) & ~freeSelMask
val remFreeSelMaskVec = VecInit(Seq.tabulate(freeWidth)(rem => getRemBits((freeMask & ~freeSelMask))(rem)))
val remFreeSelIndexVec = VecInit(Seq.tabulate(freeWidth)(fport => {
val highIndex = PriorityEncoder(remFreeSelMaskVec(fport))
Cat(highIndex, fport.U(log2Ceil(freeWidth).W))
val remFreeSelIndexOHVec = VecInit(Seq.tabulate(freeWidth)(fport => {
val highIndexOH = PriorityEncoderOH(remFreeSelMaskVec(fport))
val freeIndexOHVec = Wire(Vec(size, Bool()))
freeIndexOHVec.foreach(e => e := false.B)
for (i <- 0 until size / freeWidth) {
freeIndexOHVec(i * freeWidth + fport) := highIndexOH(i)
}
freeIndexOHVec.asUInt
}))
val freeReq = RegNext(VecInit(remFreeSelMaskVec.map(_.asUInt.orR)))
val freeSlot = RegNext(remFreeSelIndexVec)
val freeSlotOH = RegNext(remFreeSelIndexOHVec)
val doFree = freeReq.asUInt.orR
for (i <- 0 until freeWidth) {
val offset = PopCount(freeReq.take(i))
val enqPtr = tailPtr + offset
val enqPtr = tailPtr + offset
when (freeReq(i)) {
freeList(enqPtr.value) := freeSlot(i)
freeList(enqPtr.value) := OHToUInt(freeSlotOH(i))
}
freeSelMaskVec(i) := Mux(freeReq(i), UIntToOH(freeSlot(i)), 0.U)
freeSelMaskVec(i) := Mux(freeReq(i), freeSlotOH(i), 0.U)
}
tailPtrNext := tailPtr + PopCount(freeReq)
......
......@@ -107,7 +107,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
dontTouch(loadQueue.io.tlbReplayDelayCycleCtrl)
val tlbReplayDelayCycleCtrl = WireInit(VecInit(Seq(15.U(ReSelectLen.W), 0.U(ReSelectLen.W), 126.U(ReSelectLen.W), 0.U(ReSelectLen.W))))
val tlbReplayDelayCycleCtrl = WireInit(VecInit(Seq(14.U(ReSelectLen.W), 0.U(ReSelectLen.W), 125.U(ReSelectLen.W), 0.U(ReSelectLen.W))))
loadQueue.io.tlbReplayDelayCycleCtrl := tlbReplayDelayCycleCtrl
// io.enq logic
......
......@@ -31,6 +31,7 @@ import utility._
// These raw data modules are like SyncDataModuleTemplate, but support cam-like ops
abstract class LqRawDataModule[T <: Data] (gen: T, numEntries: Int, numRead: Int, numWrite: Int, numWBank: Int, numWDelay: Int, numCamPort: Int = 0)(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val ren = Input(Vec(numRead, Bool()))
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, gen))
val wen = Input(Vec(numWrite, Bool()))
......@@ -51,13 +52,14 @@ abstract class LqRawDataModule[T <: Data] (gen: T, numEntries: Int, numRead: Int
require(numWBank >= 2, "write bank must be greater than or equal to two!")
require(numWDelay >= 1, "write delay must be greater than or equal to one!")
require(numCamPort >= 0, "camport must be greater than or equal to zero!")
require((numEntries % numWBank) == 0)
val numEntryPerBank = numEntries / numWBank
val data = Reg(Vec(numEntries, gen))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(RegNext(io.raddr(i)))
io.rdata(i) := RegEnable(data(io.raddr(i)), io.ren(i))
}
// write ports
......
......@@ -180,7 +180,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
// Cause : replay cause
// Flags : rar/raw queue allocate flags
val allocated = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) // The control signals need to explicitly indicate the initial value
val sleep = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B)))
val scheduled = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B)))
val uop = Reg(Vec(LoadQueueReplaySize, new MicroOp))
val vaddrModule = Module(new LqVAddrModule(
gen = UInt(VAddrBits.W),
......@@ -320,6 +320,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
})
// Replay is splitted into 3 stages
require((LoadQueueReplaySize % LoadPipelineWidth) == 0)
def getRemBits(input: UInt)(rem: Int): UInt = {
VecInit((0 until LoadQueueReplaySize / LoadPipelineWidth).map(i => { input(LoadPipelineWidth * i + rem) })).asUInt
}
......@@ -329,44 +330,37 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
}
// stage1: select 2 entries and read their vaddr
val s1_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize).W))))
val s2_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize).W))))
val s0_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize + 1).W))))
val s1_can_go = Wire(Vec(LoadPipelineWidth, Bool()))
val s1_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize + 1).W))))
val s2_can_go = Wire(Vec(LoadPipelineWidth, Bool()))
val s2_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize + 1).W))))
// generate mask
val needCancel = Wire(Vec(LoadQueueReplaySize, Bool()))
// generate enq mask
val selectIndexOH = Wire(Vec(LoadPipelineWidth, UInt(LoadQueueReplaySize.W)))
val loadEnqFireMask = io.enq.map(x => x.fire && !x.bits.isLoadReplay).zip(selectIndexOH).map(x => Mux(x._1, x._2, 0.U))
val remLoadEnqFireVec = loadEnqFireMask.map(x => VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(x)(rem))))
val remEnqSelVec = Seq.tabulate(LoadPipelineWidth)(w => VecInit(remLoadEnqFireVec.map(x => x(w))))
val s0_loadEnqFireMask = io.enq.map(x => x.fire && !x.bits.isLoadReplay).zip(selectIndexOH).map(x => Mux(x._1, x._2, 0.U))
val s0_remLoadEnqFireVec = s0_loadEnqFireMask.map(x => VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(x)(rem))))
val s0_remEnqSelVec = Seq.tabulate(LoadPipelineWidth)(w => VecInit(s0_remLoadEnqFireVec.map(x => x(w))))
// generate free mask
val loadReplayFreeMask = io.enq.map(_.bits).zip(canFreeVec).map(x => Mux(x._2, UIntToOH(x._1.sleepIndex), 0.U)).reduce(_|_)
val loadFreeSelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
needCancel(i) || loadReplayFreeMask(i)
})).asUInt
val remFreeSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => getRemBits(loadFreeSelMask)(rem)))
// generate cancel mask
val loadReplayFireMask = (0 until LoadPipelineWidth).map(w => Mux(io.replay(w).fire, UIntToOH(s2_oldestSel(w).bits), 0.U)).reduce(_|_)
val loadCancelSelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
needCancel(i) || loadReplayFireMask(i)
})).asUInt
val remCancelSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => getRemBits(loadCancelSelMask)(rem)))
val s0_loadFreeSelMask = needCancel.asUInt
val s0_remFreeSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => getRemBits(s0_loadFreeSelMask)(rem)))
// l2 hint wakes up cache missed load
// l2 will send GrantData in next 2/3 cycle, wake up the missed load early and sent them to load pipe, so them will hit the data in D channel or mshr in load S1
val loadHintWakeMask = VecInit((0 until LoadQueueReplaySize).map(i => {
allocated(i) && sleep(i) && blockByCacheMiss(i) && missMSHRId(i) === io.l2Hint.bits.sourceId && io.l2Hint.valid
val s0_loadHintWakeMask = VecInit((0 until LoadQueueReplaySize).map(i => {
allocated(i) && !scheduled(i) && blockByCacheMiss(i) && missMSHRId(i) === io.l2Hint.bits.sourceId && io.l2Hint.valid
})).asUInt()
// l2 will send 2 beats data in 2 cycles, so if data needed by this load is in first beat, select it this cycle, otherwise next cycle
val loadHintSelMask = loadHintWakeMask & VecInit(dataInLastBeatReg.map(!_)).asUInt
val remLoadHintSelMask = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(loadHintSelMask)(rem)))
val hintSelValid = loadHintSelMask.orR
val s0_loadHintSelMask = s0_loadHintWakeMask & VecInit(dataInLastBeatReg.map(!_)).asUInt
val s0_remLoadHintSelMask = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(s0_loadHintSelMask)(rem)))
val s0_hintSelValid = s0_loadHintSelMask.orR
// wake up cache missed load
(0 until LoadQueueReplaySize).foreach(i => {
when(loadHintWakeMask(i)) {
when(s0_loadHintWakeMask(i)) {
blockByCacheMiss(i) := false.B
creditUpdate(i) := 0.U
}
......@@ -377,54 +371,64 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
// 1. hint wake up load
// 2. higher priority load
// 3. lower priority load
val loadHigherPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
val s0_loadHigherPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
val blocked = selBlocked(i) || blockByWaitStore(i) || blockByRARReject(i) || blockByRAWReject(i) || blockByOthers(i) || blockByForwardFail(i) || blockByCacheMiss(i) || blockByTlbMiss(i)
val hasHigherPriority = cause(i)(LoadReplayCauses.dcacheMiss) || cause(i)(LoadReplayCauses.forwardFail)
allocated(i) && sleep(i) && !blocked && !loadCancelSelMask(i) && hasHigherPriority
allocated(i) && !scheduled(i) && !blocked && hasHigherPriority
})).asUInt // use uint instead vec to reduce verilog lines
val loadLowerPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
val s0_loadLowerPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => {
val blocked = selBlocked(i) || blockByWaitStore(i) || blockByRARReject(i) || blockByRAWReject(i) || blockByOthers(i) || blockByForwardFail(i) || blockByCacheMiss(i) || blockByTlbMiss(i)
val hasLowerPriority = !cause(i)(LoadReplayCauses.dcacheMiss) && !cause(i)(LoadReplayCauses.forwardFail)
allocated(i) && sleep(i) && !blocked && !loadCancelSelMask(i) && hasLowerPriority
allocated(i) && !scheduled(i) && !blocked && hasLowerPriority
})).asUInt // use uint instead vec to reduce verilog lines
val loadNormalReplaySelMask = loadLowerPriorityReplaySelMask | loadHigherPriorityReplaySelMask | loadHintSelMask
val remNormalReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(loadNormalReplaySelMask)(rem)))
val loadPriorityReplaySelMask = Mux(hintSelValid, loadHintSelMask, Mux(loadHigherPriorityReplaySelMask.orR, loadHigherPriorityReplaySelMask, loadLowerPriorityReplaySelMask))
val remPriorityReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(loadPriorityReplaySelMask)(rem)))
/******************************************************************************************
* WARNING: Make sure that OldestSelectStride must less than or equal stages of load unit.*
******************************************************************************************
val s0_loadNormalReplaySelMask = s0_loadLowerPriorityReplaySelMask | s0_loadHigherPriorityReplaySelMask | s0_loadHintSelMask
val s0_remNormalReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(s0_loadNormalReplaySelMask)(rem)))
val s0_loadPriorityReplaySelMask = Mux(s0_hintSelValid, s0_loadHintSelMask, Mux(s0_loadHigherPriorityReplaySelMask.orR, s0_loadHigherPriorityReplaySelMask, s0_loadLowerPriorityReplaySelMask))
val s0_remPriorityReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(s0_loadPriorityReplaySelMask)(rem)))
/******************************************************************************************************
* WARNING: Make sure that OldestSelectStride must less than or equal stages of load pipeline. *
******************************************************************************************************
*/
val OldestSelectStride = 4
val oldestPtrExt = (0 until OldestSelectStride).map(i => io.ldWbPtr + i.U)
val oldestMatchMaskVec = (0 until LoadQueueReplaySize).map(i => (0 until OldestSelectStride).map(j => loadNormalReplaySelMask(i) && uop(i).lqIdx === oldestPtrExt(j)))
val remOldsetMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(oldestMatchMaskVec.map(_.take(1)))(rem))
val remOlderMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(oldestMatchMaskVec.map(_.drop(1)))(rem))
val remOldestSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => {
val s0_oldestMatchMaskVec = (0 until LoadQueueReplaySize).map(i => (0 until OldestSelectStride).map(j => s0_loadNormalReplaySelMask(i) && uop(i).lqIdx === oldestPtrExt(j)))
val s0_remOldsetMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(s0_oldestMatchMaskVec.map(_.take(1)))(rem))
val s0_remOlderMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(s0_oldestMatchMaskVec.map(_.drop(1)))(rem))
val s0_remOldestSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => {
VecInit((0 until LoadQueueReplaySize / LoadPipelineWidth).map(i => {
Mux(VecInit(remOldsetMatchMaskVec(rem).map(_(0))).asUInt.orR, remOldsetMatchMaskVec(rem)(i)(0), remOlderMatchMaskVec(rem)(i).reduce(_|_))
Mux(VecInit(s0_remOldsetMatchMaskVec(rem).map(_(0))).asUInt.orR, s0_remOldsetMatchMaskVec(rem)(i)(0), s0_remOlderMatchMaskVec(rem)(i).reduce(_|_))
})).asUInt
}))
val remOldestHintSelVec = remOldestSelVec.zip(remLoadHintSelMask).map {
val s0_remOldestHintSelVec = s0_remOldestSelVec.zip(s0_remLoadHintSelMask).map {
case(oldestVec, hintVec) => oldestVec & hintVec
}
// select oldest logic
s1_oldestSel := VecInit((0 until LoadPipelineWidth).map(rport => {
s0_oldestSel := VecInit((0 until LoadPipelineWidth).map(rport => {
// select enqueue earlest inst
val ageOldest = AgeDetector(LoadQueueReplaySize / LoadPipelineWidth, remEnqSelVec(rport), remFreeSelVec(rport), remPriorityReplaySelVec(rport))
val ageOldest = AgeDetector(LoadQueueReplaySize / LoadPipelineWidth, s0_remEnqSelVec(rport), s0_remFreeSelVec(rport), s0_remPriorityReplaySelVec(rport))
assert(!(ageOldest.valid && PopCount(ageOldest.bits) > 1.U), "oldest index must be one-hot!")
val ageOldestValid = ageOldest.valid
val ageOldestIndex = OHToUInt(ageOldest.bits)
val ageOldestIndexOH = ageOldest.bits
// select program order oldest
val issOldestValid = Mux(io.l2Hint.valid, remOldestHintSelVec(rport).orR, remOldestSelVec(rport).orR)
val issOldestIndex = Mux(io.l2Hint.valid, OHToUInt(PriorityEncoderOH(remOldestHintSelVec(rport))), OHToUInt(PriorityEncoderOH(remOldestSelVec(rport))))
val l2HintFirst = io.l2Hint.valid && s0_remOldestHintSelVec(rport).orR
val issOldestValid = l2HintFirst || s0_remOldestSelVec(rport).orR
val issOldestIndexOH = Mux(l2HintFirst, PriorityEncoderOH(s0_remOldestHintSelVec(rport)), PriorityEncoderOH(s0_remOldestSelVec(rport)))
val oldest = Wire(Valid(UInt()))
val oldestSel = Mux(issOldestValid, issOldestIndexOH, ageOldestIndexOH)
val oldestBitsVec = Wire(Vec(LoadQueueReplaySize, Bool()))
require((LoadQueueReplaySize % LoadPipelineWidth) == 0)
oldestBitsVec.foreach(e => e := false.B)
for (i <- 0 until LoadQueueReplaySize / LoadPipelineWidth) {
oldestBitsVec(i * LoadPipelineWidth + rport) := oldestSel(i)
}
oldest.valid := ageOldest.valid || issOldestValid
oldest.bits := Cat(Mux(issOldestValid, issOldestIndex, ageOldestIndex), rport.U(log2Ceil(LoadPipelineWidth).W))
oldest.bits := OHToUInt(oldestBitsVec.asUInt)
oldest
}))
......@@ -477,26 +481,43 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
wrapper.bits.port := i.U
wrapper
})
val s1_balanceOldestSel = balanceReOrder(s1_balanceOldestSelExt)
(0 until LoadPipelineWidth).map(w => {
vaddrModule.io.raddr(w) := s1_balanceOldestSel(w).bits.index
})
val s1_balanceOldestSel = VecInit(balanceReOrder(s1_balanceOldestSelExt))
for (i <- 0 until LoadPipelineWidth) {
val s0_can_go = s1_can_go(s1_balanceOldestSel(i).bits.port) || uop(s1_oldestSel(i).bits).robIdx.needFlush(io.redirect)
val s0_cancel = uop(s0_oldestSel(i).bits).robIdx.needFlush(io.redirect)
val s0_oldestSelV = s0_oldestSel(i).valid && !s0_cancel
s1_oldestSel(i).valid := RegEnable(s0_oldestSelV, s0_can_go)
s1_oldestSel(i).bits := RegEnable(s0_oldestSel(i).bits, s0_can_go)
when (s0_can_go && s0_oldestSelV) {
scheduled(s0_oldestSel(i).bits) := true.B
}
}
val s2_cancelReplay = Wire(Vec(LoadPipelineWidth, Bool()))
for (i <- 0 until LoadPipelineWidth) {
val s1_cancel = uop(s1_balanceOldestSel(i).bits.index).robIdx.needFlush(io.redirect)
val s1_oldestSelV = s1_balanceOldestSel(i).valid && !s1_cancel
s1_can_go(i) := Mux(s2_oldestSel(i).valid && !s2_cancelReplay(i), io.replay(i).ready && replayCanFire(i), true.B)
s2_oldestSel(i).valid := RegEnable(s1_oldestSelV, s1_can_go(i))
s2_oldestSel(i).bits := RegEnable(s1_balanceOldestSel(i).bits.index, s1_can_go(i))
vaddrModule.io.ren(i) := s1_balanceOldestSel(i).valid && s1_can_go(i)
vaddrModule.io.raddr(i) := s1_balanceOldestSel(i).bits.index
}
for (i <- 0 until LoadPipelineWidth) {
val s2_replayIdx = RegNext(s1_balanceOldestSel(i).bits.index)
val s2_replayUop = uop(s2_replayIdx)
val s2_replayMSHRId = missMSHRId(s2_replayIdx)
val s2_replacementUpdated = replacementUpdated(s2_replayIdx)
val s2_replayCauses = cause(s2_replayIdx)
val s2_replayCarry = replayCarryReg(s2_replayIdx)
val s2_replayCacheMissReplay = trueCacheMissReplay(s2_replayIdx)
val cancelReplay = s2_replayUop.robIdx.needFlush(io.redirect)
val s2_loadCancelSelMask = RegNext(loadCancelSelMask)
s2_oldestSel(i).valid := RegNext(s1_balanceOldestSel(i).valid) && !s2_loadCancelSelMask(s2_replayIdx)
s2_oldestSel(i).bits := s2_replayIdx
io.replay(i).valid := s2_oldestSel(i).valid && !cancelReplay && replayCanFire(i)
val s1_replayIdx = s1_balanceOldestSel(i).bits.index
val s2_replayUop = RegEnable(uop(s1_replayIdx), s1_can_go(i))
val s2_replayMSHRId = RegEnable(missMSHRId(s1_replayIdx), s1_can_go(i))
val s2_replacementUpdated = RegEnable(replacementUpdated(s1_replayIdx), s1_can_go(i))
val s2_replayCauses = RegEnable(cause(s1_replayIdx), s1_can_go(i))
val s2_replayCarry = RegEnable(replayCarryReg(s1_replayIdx), s1_can_go(i))
val s2_replayCacheMissReplay = RegEnable(trueCacheMissReplay(s1_replayIdx), s1_can_go(i))
s2_cancelReplay(i) := s2_replayUop.robIdx.needFlush(io.redirect)
s2_can_go(i) := DontCare
io.replay(i).valid := s2_oldestSel(i).valid && !s2_cancelReplay(i) && replayCanFire(i)
io.replay(i).bits := DontCare
io.replay(i).bits.uop := s2_replayUop
io.replay(i).bits.vaddr := vaddrModule.io.rdata(i)
......@@ -509,8 +530,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
io.replay(i).bits.sleepIndex := s2_oldestSel(i).bits
when (io.replay(i).fire) {
sleep(s2_oldestSel(i).bits) := false.B
assert(allocated(s2_oldestSel(i).bits), s"LoadQueueReplay: why replay an invalid entry ${s2_oldestSel(i).bits} ?\n")
XSError(!allocated(s2_oldestSel(i).bits), p"LoadQueueReplay: why replay an invalid entry ${s2_oldestSel(i).bits} ?")
}
}
......@@ -567,7 +587,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
// Allocate new entry
allocated(enqIndex) := true.B
sleep(enqIndex) := true.B
scheduled(enqIndex) := false.B
uop(enqIndex) := enq.bits.uop
vaddrModule.io.wen(w) := true.B
......@@ -668,7 +688,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
allocated(sleepIndex) := false.B
freeMaskVec(sleepIndex) := true.B
} .otherwise {
sleep(sleepIndex) := true.B
scheduled(sleepIndex) := false.B
}
}
}
......@@ -762,7 +782,7 @@ class LoadQueueReplay(implicit p: Parameters) extends XSModule
XSPerfAccumulate("replay_dcache_replay", replayDCacheReplayCount)
XSPerfAccumulate("replay_forward_fail", replayForwardFailCount)
XSPerfAccumulate("replay_dcache_miss", replayDCacheMissCount)
XSPerfAccumulate("replay_hint_wakeup", hintSelValid)
XSPerfAccumulate("replay_hint_wakeup", s0_hintSelValid)
val perfEvents: Seq[(String, UInt)] = Seq(
("enq", enqCount),
......
......@@ -624,7 +624,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule
!s2_is_prefetch
val s2_data_invalid = io.lsq.dataInvalid && !s2_exception
val s2_fullForward = WireInit(false.B)
val s2_bank_conflict = io.dcacheBankConflict && !forward_D_or_mshr_valid
io.s2_forward_fail := s2_forward_fail
io.dcache_kill := pmp.ld || pmp.mmio // move pmp resp kill to outside
......@@ -648,7 +648,8 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule
(!s2_wait_store &&
!s2_tlb_miss &&
s2_cache_replay) ||
(io.out.bits.miss && io.l2Hint.valid && (io.out.bits.replayInfo.missMSHRId === io.l2Hint.bits.sourceId))) &&
(io.out.bits.miss && io.l2Hint.valid && (io.out.bits.replayInfo.missMSHRId === io.l2Hint.bits.sourceId)) ||
s2_bank_conflict) &&
!s2_exception &&
!s2_mmio &&
!s2_is_prefetch
......@@ -819,7 +820,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule
io.out.bits.replayInfo.cause(LoadReplayCauses.waitStore) := s2_wait_store && !s2_mmio && !s2_is_prefetch
io.out.bits.replayInfo.cause(LoadReplayCauses.tlbMiss) := s2_tlb_miss
io.out.bits.replayInfo.cause(LoadReplayCauses.schedError) := (io.in.bits.replayInfo.cause(LoadReplayCauses.schedError) || s2_schedError) && !s2_mmio && !s2_is_prefetch
io.out.bits.replayInfo.cause(LoadReplayCauses.bankConflict) := io.dcacheBankConflict && !s2_mmio && !s2_is_prefetch
io.out.bits.replayInfo.cause(LoadReplayCauses.bankConflict) := s2_bank_conflict && !s2_mmio && !s2_is_prefetch
io.out.bits.replayInfo.cause(LoadReplayCauses.dcacheMiss) := io.out.bits.miss
if (EnableFastForward) {
io.out.bits.replayInfo.cause(LoadReplayCauses.dcacheReplay) := s2_cache_replay && !s2_is_prefetch && !s2_mmio && !s2_exception && !fullForward
......
......@@ -949,7 +949,7 @@ class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModul
class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper {
require(exuParameters.LduCnt == 2)
require(exuParameters.LduCnt == 3)
val io_agt_en = IO(Input(Bool()))
val io_stride_en = IO(Input(Bool()))
......
......@@ -466,12 +466,14 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
val sbuffer_empty = Cat(invalidMask).andR()
val sq_empty = !Cat(io.in.map(_.valid)).orR()
val empty = sbuffer_empty && sq_empty
val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U)
val threshold = Wire(UInt(5.W)) // RegNext(io.csrCtrl.sbuffer_threshold +& 1.U)
threshold := Constantin.createRecord("StoreBufferThreshold_"+p(XSCoreParamsKey).HartId.toString(), initValue = 7.U)
val ActiveCount = PopCount(activeMask)
val ValidCount = PopCount(validMask)
val do_eviction = RegNext(ActiveCount >= threshold || ActiveCount === (StoreBufferSize-1).U || ValidCount === (StoreBufferSize).U, init = false.B)
require((StoreBufferThreshold + 1) <= StoreBufferSize)
XSDebug(p"ActiveCount[$ActiveCount]\n")
io.flush.empty := RegNext(empty && io.sqempty)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册