未验证 提交 1b7adedc 编写于 作者: W William Wang 提交者: GitHub

MemBlock: split store addr and store data (#781)

* RSFeedback: add source type

* StoreQueue: split store addr and store data

* StoreQueue: update ls forward logic

* Now it supports splited addr and data

* Chore: force assign name for load/store unit

* RS: add rs'support for store a-d split

* StoreQueue: fix stlf logic

* StoreQueue: fix addr wb sq update logic

* AtomicsUnit: support splited a/d

* StoreQueue: add sbuffer enq condition assertion

Store data op (std) may still be invalid after store addr op's (sta)
commitment, so datavalid needs to be checked before commiting
store data to sbuffer

Note that at current commit a non-completed std op for a
commited store may exist. We should make sure that uop
will not be cancelled by a latter branch mispredict. More work
to be done!

* Roq: add std/sta split writeback logic

Now store will commit only if both sta & std have been writebacked
Co-authored-by: NZhangZifei <zhangzifei20z@ict.ac.cn>
上级 68f25d38
...@@ -51,6 +51,14 @@ object ValidUndirectioned { ...@@ -51,6 +51,14 @@ object ValidUndirectioned {
} }
} }
object RSFeedbackType {
val tlbMiss = 0.U(2.W)
val mshrFull = 1.U(2.W)
val dataInvalid = 2.U(2.W)
def apply() = UInt(2.W)
}
class SCMeta(val useSC: Boolean)(implicit p: Parameters) extends XSBundle with HasSCParameter { class SCMeta(val useSC: Boolean)(implicit p: Parameters) extends XSBundle with HasSCParameter {
val tageTaken = if (useSC) Bool() else UInt(0.W) val tageTaken = if (useSC) Bool() else UInt(0.W)
val scUsed = if (useSC) Bool() else UInt(0.W) val scUsed = if (useSC) Bool() else UInt(0.W)
...@@ -407,14 +415,13 @@ class RoqCommitIO(implicit p: Parameters) extends XSBundle { ...@@ -407,14 +415,13 @@ class RoqCommitIO(implicit p: Parameters) extends XSBundle {
def hasCommitInstr = !isWalk && valid.asUInt.orR def hasCommitInstr = !isWalk && valid.asUInt.orR
} }
class TlbFeedback(implicit p: Parameters) extends XSBundle { class RSFeedback(implicit p: Parameters) extends XSBundle {
val rsIdx = UInt(log2Up(IssQueSize).W) val rsIdx = UInt(log2Up(IssQueSize).W)
val hit = Bool() val hit = Bool()
val flushState = Bool() val flushState = Bool()
val sourceType = RSFeedbackType()
} }
class RSFeedback(implicit p: Parameters) extends TlbFeedback
class FrontendToBackendIO(implicit p: Parameters) extends XSBundle { class FrontendToBackendIO(implicit p: Parameters) extends XSBundle {
// to backend end // to backend end
val cfVec = Vec(DecodeWidth, DecoupledIO(new CtrlFlow)) val cfVec = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
......
...@@ -101,6 +101,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -101,6 +101,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val storeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StoreUnit)) val storeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StoreUnit))
val exeUnits = loadUnits ++ storeUnits val exeUnits = loadUnits ++ storeUnits
loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
val atomicsUnit = Module(new AtomicsUnit) val atomicsUnit = Module(new AtomicsUnit)
val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.ldout.bits) val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.ldout.bits)
...@@ -221,7 +224,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -221,7 +224,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
for (i <- 0 until exuParameters.LduCnt) { for (i <- 0 until exuParameters.LduCnt) {
loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect
loadUnits(i).io.flush <> io.fromCtrlBlock.flush loadUnits(i).io.flush <> io.fromCtrlBlock.flush
loadUnits(i).io.tlbFeedback <> reservationStations(i).io.memfeedback loadUnits(i).io.rsFeedback <> reservationStations(i).io.memfeedback
loadUnits(i).io.rsIdx := reservationStations(i).io.rsIdx // TODO: beautify it loadUnits(i).io.rsIdx := reservationStations(i).io.rsIdx // TODO: beautify it
loadUnits(i).io.isFirstIssue := reservationStations(i).io.isFirstIssue // NOTE: just for dtlb's perf cnt loadUnits(i).io.isFirstIssue := reservationStations(i).io.isFirstIssue // NOTE: just for dtlb's perf cnt
loadUnits(i).io.dtlb <> dtlb.io.requestor(i) loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
...@@ -255,13 +258,16 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -255,13 +258,16 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stu.io.redirect <> io.fromCtrlBlock.redirect stu.io.redirect <> io.fromCtrlBlock.redirect
stu.io.flush <> io.fromCtrlBlock.flush stu.io.flush <> io.fromCtrlBlock.flush
stu.io.tlbFeedback <> rs.io.memfeedback stu.io.rsFeedback <> rs.io.memfeedback
stu.io.rsIdx <> rs.io.rsIdx stu.io.rsIdx <> rs.io.rsIdx
stu.io.isFirstIssue <> rs.io.isFirstIssue // NOTE: just for dtlb's perf cnt stu.io.isFirstIssue <> rs.io.isFirstIssue // NOTE: just for dtlb's perf cnt
stu.io.dtlb <> dtlbReq stu.io.dtlb <> dtlbReq
stu.io.stin <> rs.io.deq stu.io.stin <> rs.io.deq
stu.io.lsq <> lsq.io.storeIn(i) stu.io.lsq <> lsq.io.storeIn(i)
// rs.io.storeData <> lsq.io.storeDataIn(i)
lsq.io.storeDataIn(i) := rs.io.stData
// sync issue info to rs // sync issue info to rs
lsq.io.storeIssue(i).valid := rs.io.deq.valid lsq.io.storeIssue(i).valid := rs.io.deq.valid
lsq.io.storeIssue(i).bits := rs.io.deq.bits lsq.io.storeIssue(i).bits := rs.io.deq.bits
...@@ -321,6 +327,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -321,6 +327,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType) val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType)
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType) val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType)
val st0_data_atomics = reservationStations(atomic_rs0).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.stData.bits.uop.ctrl.fuType)
val st1_data_atomics = reservationStations(atomic_rs1).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.stData.bits.uop.ctrl.fuType)
when (st0_atomics) { when (st0_atomics) {
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B storeUnits(0).io.stin.valid := false.B
...@@ -342,6 +351,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -342,6 +351,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
atomicsUnit.io.in.valid := st0_atomics || st1_atomics atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits) atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.storeDataIn.valid := st0_data_atomics || st1_data_atomics
atomicsUnit.io.storeDataIn.bits := Mux(st0_data_atomics, reservationStations(atomic_rs0).io.stData.bits, reservationStations(atomic_rs1).io.stData.bits)
atomicsUnit.io.rsIdx := Mux(st0_atomics, reservationStations(atomic_rs0).io.rsIdx, reservationStations(atomic_rs1).io.rsIdx) atomicsUnit.io.rsIdx := Mux(st0_atomics, reservationStations(atomic_rs0).io.rsIdx, reservationStations(atomic_rs1).io.rsIdx)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.flush <> io.fromCtrlBlock.flush atomicsUnit.io.flush <> io.fromCtrlBlock.flush
...@@ -366,14 +377,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -366,14 +377,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
} }
when (state === s_atomics_0) { when (state === s_atomics_0) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.memfeedback atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs0).io.memfeedback
assert(!storeUnits(0).io.tlbFeedback.valid) assert(!storeUnits(0).io.rsFeedback.valid)
} }
when (state === s_atomics_1) { when (state === s_atomics_1) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.memfeedback atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs1).io.memfeedback
assert(!storeUnits(1).io.tlbFeedback.valid) assert(!storeUnits(1).io.rsFeedback.valid)
} }
lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx
......
...@@ -8,7 +8,7 @@ import utils._ ...@@ -8,7 +8,7 @@ import utils._
import xiangshan.backend.decode.{ImmUnion, Imm_U} import xiangshan.backend.decode.{ImmUnion, Imm_U}
import xiangshan.backend.exu.{Exu, ExuConfig} import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.roq.RoqPtr import xiangshan.backend.roq.RoqPtr
import xiangshan.mem.SqPtr import xiangshan.mem.{SqPtr, StoreDataBundle}
import scala.math.max import scala.math.max
...@@ -102,6 +102,7 @@ class ReservationStation ...@@ -102,6 +102,7 @@ class ReservationStation
val numExist = Output(UInt(iqIdxWidth.W)) val numExist = Output(UInt(iqIdxWidth.W))
val fromDispatch = Flipped(DecoupledIO(new MicroOp)) val fromDispatch = Flipped(DecoupledIO(new MicroOp))
val deq = DecoupledIO(new ExuInput) val deq = DecoupledIO(new ExuInput)
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new StoreDataBundle) else null
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W))) val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))
val stIssuePtr = if (exuCfg == LdExeUnitCfg) Input(new SqPtr()) else null val stIssuePtr = if (exuCfg == LdExeUnitCfg) Input(new SqPtr()) else null
...@@ -143,6 +144,11 @@ class ReservationStation ...@@ -143,6 +144,11 @@ class ReservationStation
select.io.memfeedback := io.memfeedback select.io.memfeedback := io.memfeedback
select.io.flushState := io.memfeedback.bits.flushState select.io.flushState := io.memfeedback.bits.flushState
} }
if (exuCfg == StExeUnitCfg) {
select.io.dataReadyVec := ctrl.io.dataReadyVec
} else {
select.io.dataReadyVec := DontCare
}
ctrl.io.in.valid := select.io.enq.ready && io.fromDispatch.valid // NOTE: ctrl doesnt care redirect for timing optimization ctrl.io.in.valid := select.io.enq.ready && io.fromDispatch.valid // NOTE: ctrl doesnt care redirect for timing optimization
ctrl.io.flush := io.flush ctrl.io.flush := io.flush
...@@ -162,6 +168,10 @@ class ReservationStation ...@@ -162,6 +168,10 @@ class ReservationStation
if (exuCfg == LdExeUnitCfg) { if (exuCfg == LdExeUnitCfg) {
ctrl.io.stIssuePtr := RegNext(io.stIssuePtr) ctrl.io.stIssuePtr := RegNext(io.stIssuePtr)
} }
if (exuCfg == StExeUnitCfg) {
ctrl.io.selData.valid := select.io.deqData.valid
ctrl.io.selData.bits := select.io.deqData.bits
}
data.io.in.valid := select.io.enq.fire() data.io.in.valid := select.io.enq.fire()
data.io.in.addr := select.io.enq.bits data.io.in.addr := select.io.enq.bits
...@@ -174,6 +184,7 @@ class ReservationStation ...@@ -174,6 +184,7 @@ class ReservationStation
} }
if (exuCfg == StExeUnitCfg) { if (exuCfg == StExeUnitCfg) {
data.io.fpRegValue := io.fpRegValue data.io.fpRegValue := io.fpRegValue
data.io.selData := select.io.deqData.bits
} }
data.io.sel := select.io.deq.bits data.io.sel := select.io.deq.bits
data.io.listen.wen := ctrl.io.listen data.io.listen.wen := ctrl.io.listen
...@@ -196,6 +207,12 @@ class ReservationStation ...@@ -196,6 +207,12 @@ class ReservationStation
if (srcNum > 1) { io.deq.bits.src2 := data.io.out(1) } if (srcNum > 1) { io.deq.bits.src2 := data.io.out(1) }
if (srcNum > 2) { io.deq.bits.src3 := data.io.out(2) } if (srcNum > 2) { io.deq.bits.src3 := data.io.out(2) }
if (exuCfg == JumpExeUnitCfg) { io.deq.bits.uop.cf.pc := data.io.pc } if (exuCfg == JumpExeUnitCfg) { io.deq.bits.uop.cf.pc := data.io.pc }
if (exuCfg == StExeUnitCfg) {
io.stData.bits.uop := ctrl.io.stData.bits
io.stData.bits.data := data.io.stData
io.stData.valid := ctrl.io.stData.valid
}
} }
class ReservationStationSelect class ReservationStationSelect
...@@ -225,6 +242,7 @@ class ReservationStationSelect ...@@ -225,6 +242,7 @@ class ReservationStationSelect
val redirectVec = Input(Vec(iqSize, Bool())) val redirectVec = Input(Vec(iqSize, Bool()))
val readyVec = Input(Vec(iqSize, Bool())) val readyVec = Input(Vec(iqSize, Bool()))
val dataReadyVec = Input(Vec(iqSize, Bool())) // NOTE: wanna dead code elimination eliminates the codes
val validVec = Output(Vec(iqSize, Bool())) val validVec = Output(Vec(iqSize, Bool()))
val indexVec = Output(Vec(iqSize, UInt(iqIdxWidth.W))) val indexVec = Output(Vec(iqSize, UInt(iqIdxWidth.W)))
...@@ -236,6 +254,7 @@ class ReservationStationSelect ...@@ -236,6 +254,7 @@ class ReservationStationSelect
def fire() = valid && ready def fire() = valid && ready
} }
val deq = DecoupledIO(UInt(iqIdxWidth.W)) val deq = DecoupledIO(UInt(iqIdxWidth.W))
val deqData = if (exuCfg == StExeUnitCfg) ValidIO(UInt(iqIdxWidth.W)) else null
val flushState = if (feedback) Input(Bool()) else null val flushState = if (feedback) Input(Bool()) else null
val isFirstIssue = if (feedback) Output(Bool()) else null val isFirstIssue = if (feedback) Output(Bool()) else null
...@@ -251,7 +270,8 @@ class ReservationStationSelect ...@@ -251,7 +270,8 @@ class ReservationStationSelect
* count queue : record replay cycle * count queue : record replay cycle
*/ */
val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4) val s_idle :: s_valid :: s_wait :: s_replay :: s_sent :: Nil = Enum(5)
val d_idle :: d_sent :: Nil = Enum(2)
/* state machine /* state machine
* s_idle : empty slot, init state, set when deq * s_idle : empty slot, init state, set when deq
* s_valid : ready to be secleted * s_valid : ready to be secleted
...@@ -270,6 +290,11 @@ class ReservationStationSelect ...@@ -270,6 +290,11 @@ class ReservationStationSelect
val emptyIdxQueue = widthMap(i => emptyQueue(indexQueue(i))) val emptyIdxQueue = widthMap(i => emptyQueue(indexQueue(i)))
val countIdxQueue = widthMap(i => countQueue(indexQueue(i))) val countIdxQueue = widthMap(i => countQueue(indexQueue(i)))
// NOTE: wanna dead code elimination eliminates the below codes
val dataStateQueue = RegInit(VecInit(Seq.fill(iqSize)(d_idle)))
val dataValidQueue = VecInit(dataStateQueue.zip(stateQueue).map(a => a._1 === d_idle && a._2 =/= s_idle))
val dataReadyIdxQueue = widthMap(i => dataValidQueue(indexQueue(i)) && io.dataReadyVec(indexQueue(i)))
// select ready // select ready
// for no replay, select just equal to deq (attached) // for no replay, select just equal to deq (attached)
// with replay, select is just two stage with deq. // with replay, select is just two stage with deq.
...@@ -305,6 +330,19 @@ class ReservationStationSelect ...@@ -305,6 +330,19 @@ class ReservationStationSelect
(if(feedback) ~(0.U(iqSize.W)) else (if(feedback) ~(0.U(iqSize.W)) else
Mux(RegNext(selectValid && (io.redirect.valid || io.flush)), 0.U, ~(0.U(iqSize.W)))) Mux(RegNext(selectValid && (io.redirect.valid || io.flush)), 0.U, ~(0.U(iqSize.W))))
// store deq data, receiver(the sq) must be ready
// NOTE: wanna dead code elimination eliminates the below codes
val lastDataMask = Wire(UInt(iqSize.W))
val dataMask = WireInit(VecInit((0 until iqSize).map(i => dataReadyIdxQueue(i)))).asUInt & lastDataMask
val dataIdx = ParallelPriorityMux(dataMask.asBools zip indexQueue)
val dataPtr = ParallelPriorityMux(dataMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of indexQueue
val haveData = Cat(dataMask).orR
val dataIdxReg = RegNext(dataIdx, init = 0.U)
val dataValid = haveData
val dataReg = RegNext(dataValid, init = false.B)
val dataPtrReg = RegNext(Mux(moveMask(dataPtr), dataPtr-1.U, dataPtr), init = 0.U)
lastDataMask := ~Mux(dataReg, UIntToOH(dataPtrReg), 0.U)
// deq // deq
val dequeue = Mux(RegNext(io.flush), false.B, val dequeue = Mux(RegNext(io.flush), false.B,
if (feedback) bubbleReg else bubbleReg || issueFire) if (feedback) bubbleReg else bubbleReg || issueFire)
...@@ -327,11 +365,28 @@ class ReservationStationSelect ...@@ -327,11 +365,28 @@ class ReservationStationSelect
if (feedback) { if (feedback) {
when (io.memfeedback.valid) { when (io.memfeedback.valid) {
when (stateQueue(io.memfeedback.bits.rsIdx) === s_wait) { when (stateQueue(io.memfeedback.bits.rsIdx) === s_wait) {
stateQueue(io.memfeedback.bits.rsIdx) := Mux(io.memfeedback.bits.hit, s_idle, s_replay) val s_finish_state = if (exuCfg == StExeUnitCfg) {
Mux(dataStateQueue(io.memfeedback.bits.rsIdx) === d_sent || (dataReg && dataIdxReg === io.memfeedback.bits.rsIdx),
s_idle, s_sent)
} else { s_idle }
stateQueue(io.memfeedback.bits.rsIdx) := Mux(io.memfeedback.bits.hit, s_finish_state, s_replay)
} }
when (!io.memfeedback.bits.hit) { when (!io.memfeedback.bits.hit) {
countQueue(io.memfeedback.bits.rsIdx) := replayDelay(cntCountQueue(io.memfeedback.bits.rsIdx)) countQueue(io.memfeedback.bits.rsIdx) := replayDelay(cntCountQueue(io.memfeedback.bits.rsIdx))
} }
assert(stateQueue(io.memfeedback.bits.rsIdx) === s_wait, "mem feedback but rs dont wait for it")
}
}
if (exuCfg == StExeUnitCfg) {
when (dataReg) {
dataStateQueue(dataIdxReg) := d_sent
}
when (dataReg && stateQueue(dataIdxReg) === s_sent) {
stateQueue(dataIdxReg) := s_idle
}
for (i <- 0 until iqSize) {
assert(stateQueue(i) =/= s_sent || dataStateQueue(i) =/= d_sent, "dont want the state that addr and data both sent, but still not idle")
} }
} }
...@@ -383,6 +438,7 @@ class ReservationStationSelect ...@@ -383,6 +438,7 @@ class ReservationStationSelect
val enqIdx = indexQueue(enqPtr) val enqIdx = indexQueue(enqPtr)
when (enqueue) { when (enqueue) {
stateQueue(enqIdx) := s_valid stateQueue(enqIdx) := s_valid
dataStateQueue(enqIdx) := d_idle
cntCountQueue(enqIdx) := 0.U cntCountQueue(enqIdx) := 0.U
} }
...@@ -394,6 +450,11 @@ class ReservationStationSelect ...@@ -394,6 +450,11 @@ class ReservationStationSelect
io.deq.valid := selectValid io.deq.valid := selectValid
io.deq.bits := selectIndex io.deq.bits := selectIndex
if (exuCfg == StExeUnitCfg) {
io.deqData.valid := dataValid
io.deqData.bits := dataIdx
}
io.numExist := RegNext(Mux(nextTailPtr.flag, if(isPow2(iqSize)) (iqSize-1).U else iqSize.U, nextTailPtr.value), init = (iqSize - 1).U) io.numExist := RegNext(Mux(nextTailPtr.flag, if(isPow2(iqSize)) (iqSize-1).U else iqSize.U, nextTailPtr.value), init = (iqSize - 1).U)
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B))) assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
...@@ -463,10 +524,13 @@ class ReservationStationCtrl ...@@ -463,10 +524,13 @@ class ReservationStationCtrl
val uop = new MicroOp val uop = new MicroOp
})) }))
val sel = Flipped(ValidIO(UInt(iqIdxWidth.W))) val sel = Flipped(ValidIO(UInt(iqIdxWidth.W)))
val selData = if (exuCfg == StExeUnitCfg) Flipped(ValidIO(UInt(iqIdxWidth.W))) else null
val out = ValidIO(new MicroOp) val out = ValidIO(new MicroOp)
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new MicroOp) else null
val redirectVec = Output(Vec(iqSize, Bool())) val redirectVec = Output(Vec(iqSize, Bool()))
val readyVec = Output(Vec(iqSize, Bool())) val readyVec = Output(Vec(iqSize, Bool()))
val dataReadyVec = if (exuCfg == StExeUnitCfg) Output(Vec(IssQueSize, Bool())) else null
val validVec = Input(Vec(iqSize, Bool())) val validVec = Input(Vec(iqSize, Bool()))
val indexVec = Input(Vec(iqSize, UInt(iqIdxWidth.W))) val indexVec = Input(Vec(iqSize, UInt(iqIdxWidth.W)))
...@@ -486,7 +550,6 @@ class ReservationStationCtrl ...@@ -486,7 +550,6 @@ class ReservationStationCtrl
val enqEn = io.in.valid val enqEn = io.in.valid
val enqEnReg = RegNext(enqEn && !(io.redirect.valid || io.flush), init = false.B) val enqEnReg = RegNext(enqEn && !(io.redirect.valid || io.flush), init = false.B)
val enqUop = io.in.bits.uop val enqUop = io.in.bits.uop
val enqUopReg = RegEnable(enqUop, selValid)
val selPtr = io.sel.bits val selPtr = io.sel.bits
val selPtrReg = RegEnable(selPtr, selValid) val selPtrReg = RegEnable(selPtr, selValid)
val data = io.listen val data = io.listen
...@@ -547,7 +610,12 @@ class ReservationStationCtrl ...@@ -547,7 +610,12 @@ class ReservationStationCtrl
} }
// load wait store // load wait store
io.readyVec := srcQueueWire.map(Cat(_).andR) if (exuCfg == StExeUnitCfg) {
io.readyVec := srcQueueWire.map(a => a(0))
io.dataReadyVec := srcQueueWire.map(a => a(1))
} else {
io.readyVec := srcQueueWire.map(Cat(_).andR)
}
if (exuCfg == LdExeUnitCfg) { if (exuCfg == LdExeUnitCfg) {
val ldWait = Reg(Vec(iqSize, Bool())) val ldWait = Reg(Vec(iqSize, Bool()))
val sqIdx = Reg(Vec(iqSize, new SqPtr())) val sqIdx = Reg(Vec(iqSize, new SqPtr()))
...@@ -566,7 +634,7 @@ class ReservationStationCtrl ...@@ -566,7 +634,7 @@ class ReservationStationCtrl
} }
val redirectHit = io.redirectVec(selPtr) val redirectHit = io.redirectVec(selPtr)
val uop = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, 1, 1)) val uop = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, if (exuCfg == StExeUnitCfg) 2 else 1, 1))
uop.io.raddr(0) := selPtr uop.io.raddr(0) := selPtr
io.out.valid := RegNext(selValid && ~redirectHit) io.out.valid := RegNext(selValid && ~redirectHit)
...@@ -575,7 +643,14 @@ class ReservationStationCtrl ...@@ -575,7 +643,14 @@ class ReservationStationCtrl
uop.io.waddr(0) := enqPtr uop.io.waddr(0) := enqPtr
uop.io.wdata(0) := enqUop uop.io.wdata(0) := enqUop
class fastSendUop extends Bundle { if (exuCfg == StExeUnitCfg) { // NOTE: send data part of st
uop.io.raddr(1) := io.selData.bits
io.stData.bits := uop.io.rdata(1)
io.stData.valid := RegNext(io.selData.valid && ~io.redirectVec(io.selData.bits))
}
// NOTE: st dont fast wake others, dont care override
class fastSendUop extends XSBundle {
val pdest = UInt(PhyRegIdxWidth.W) val pdest = UInt(PhyRegIdxWidth.W)
val rfWen = Bool() val rfWen = Bool()
val fpWen = Bool() val fpWen = Bool()
...@@ -595,6 +670,9 @@ class ReservationStationCtrl ...@@ -595,6 +670,9 @@ class ReservationStationCtrl
red := roq.needFlush(io.redirect, io.flush) red := roq.needFlush(io.redirect, io.flush)
} }
io.out.bits.roqIdx := roqIdx(selPtrReg) io.out.bits.roqIdx := roqIdx(selPtrReg)
if (exuCfg == StExeUnitCfg) {
io.stData.bits.roqIdx := roqIdx(RegEnable(io.selData.bits, io.selData.valid))
}
io.fastUopOut := DontCare io.fastUopOut := DontCare
if (fastWakeup) { if (fastWakeup) {
...@@ -790,7 +868,10 @@ class ReservationStationData ...@@ -790,7 +868,10 @@ class ReservationStationData
} }
val sel = Input(UInt(iqIdxWidth.W)) val sel = Input(UInt(iqIdxWidth.W))
val selData = if(exuCfg == StExeUnitCfg) Input(UInt(iqIdxWidth.W)) else null
val out = Output(Vec(srcNum, UInt(srcLen.W))) val out = Output(Vec(srcNum, UInt(srcLen.W)))
val stData = if(exuCfg == StExeUnitCfg) Output(UInt(srcLen.W)) else null
val pc = if(exuCfg == JumpExeUnitCfg) Output(UInt(VAddrBits.W)) else null val pc = if(exuCfg == JumpExeUnitCfg) Output(UInt(VAddrBits.W)) else null
}) })
...@@ -870,8 +951,18 @@ class ReservationStationData ...@@ -870,8 +951,18 @@ class ReservationStationData
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) ) (0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
} }
// deq // deq
data.map(_.r.addr := io.sel) if (exuCfg == StExeUnitCfg) {
data(0).r.addr := io.sel
data(1).r.addr := io.selData
io.stData := data(1).r.rdata
} else {
data.map(_.r.addr := io.sel)
}
io.out := data.map(_.r.rdata) io.out := data.map(_.r.rdata)
if (exuCfg == StExeUnitCfg) {
io.out(1) := DontCare
}
if(pcMem.nonEmpty){ if(pcMem.nonEmpty){
pcMem.get.io.raddr(0) := io.sel pcMem.get.io.raddr(0) := io.sel
io.pc := pcMem.get.io.rdata(0) io.pc := pcMem.get.io.rdata(0)
......
...@@ -48,6 +48,7 @@ class RoqLsqIO(implicit p: Parameters) extends XSBundle { ...@@ -48,6 +48,7 @@ class RoqLsqIO(implicit p: Parameters) extends XSBundle {
val pendingld = Output(Bool()) val pendingld = Output(Bool())
val pendingst = Output(Bool()) val pendingst = Output(Bool())
val commit = Output(Bool()) val commit = Output(Bool())
val storeDataRoqWb = Input(Vec(StorePipelineWidth, Valid(new RoqPtr)))
} }
class RoqEnqIO(implicit p: Parameters) extends XSBundle { class RoqEnqIO(implicit p: Parameters) extends XSBundle {
...@@ -266,6 +267,7 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc ...@@ -266,6 +267,7 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
// writeback status // writeback status
// val writebacked = Reg(Vec(RoqSize, Bool())) // val writebacked = Reg(Vec(RoqSize, Bool()))
val writebacked = Mem(RoqSize, Bool()) val writebacked = Mem(RoqSize, Bool())
val store_data_writebacked = Mem(RoqSize, Bool())
// data for redirect, exception, etc. // data for redirect, exception, etc.
// val flagBkup = RegInit(VecInit(List.fill(RoqSize)(false.B))) // val flagBkup = RegInit(VecInit(List.fill(RoqSize)(false.B)))
val flagBkup = Mem(RoqSize, Bool()) val flagBkup = Mem(RoqSize, Bool())
...@@ -460,7 +462,8 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc ...@@ -460,7 +462,8 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
io.commits.isWalk := state =/= s_idle io.commits.isWalk := state =/= s_idle
val commit_v = Mux(state === s_idle, VecInit(deqPtrVec.map(ptr => valid(ptr.value))), VecInit(walkPtrVec.map(ptr => valid(ptr.value)))) val commit_v = Mux(state === s_idle, VecInit(deqPtrVec.map(ptr => valid(ptr.value))), VecInit(walkPtrVec.map(ptr => valid(ptr.value))))
val commit_w = VecInit(deqPtrVec.map(ptr => writebacked(ptr.value))) // store will be commited iff both sta & std have been writebacked
val commit_w = VecInit(deqPtrVec.map(ptr => writebacked(ptr.value) && store_data_writebacked(ptr.value)))
val commit_exception = exceptionDataRead.valid && !isAfter(exceptionDataRead.bits.roqIdx, deqPtrVec.last) val commit_exception = exceptionDataRead.valid && !isAfter(exceptionDataRead.bits.roqIdx, deqPtrVec.last)
val commit_block = VecInit((0 until CommitWidth).map(i => !commit_w(i))) val commit_block = VecInit((0 until CommitWidth).map(i => !commit_w(i)))
val allowOnlyOneCommit = commit_exception || intrBitSetReg val allowOnlyOneCommit = commit_exception || intrBitSetReg
...@@ -655,11 +658,14 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc ...@@ -655,11 +658,14 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
for (i <- 0 until RenameWidth) { for (i <- 0 until RenameWidth) {
when (canEnqueue(i)) { when (canEnqueue(i)) {
writebacked(enqPtrVec(i).value) := false.B writebacked(enqPtrVec(i).value) := false.B
val isStu = io.enq.req(i).bits.ctrl.fuType === FuType.stu
store_data_writebacked(enqPtrVec(i).value) := !isStu
} }
} }
when (exceptionGen.io.out.valid) { when (exceptionGen.io.out.valid) {
val wbIdx = exceptionGen.io.out.bits.roqIdx.value val wbIdx = exceptionGen.io.out.bits.roqIdx.value
writebacked(wbIdx) := true.B writebacked(wbIdx) := true.B
store_data_writebacked(wbIdx) := true.B
} }
// writeback logic set numWbPorts writebacked to true // writeback logic set numWbPorts writebacked to true
for (i <- 0 until numWbPorts) { for (i <- 0 until numWbPorts) {
...@@ -669,6 +675,12 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc ...@@ -669,6 +675,12 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
writebacked(wbIdx) := !block_wb writebacked(wbIdx) := !block_wb
} }
} }
// store data writeback logic mark store as data_writebacked
for (i <- 0 until StorePipelineWidth) {
when(io.lsq.storeDataRoqWb(i).valid) {
store_data_writebacked(io.lsq.storeDataRoqWb(i).bits.value) := true.B
}
}
// flagBkup // flagBkup
// enqueue logic set 6 flagBkup at most // enqueue logic set 6 flagBkup at most
......
...@@ -50,6 +50,11 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle { ...@@ -50,6 +50,11 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle {
val forwardData = Vec(8, UInt(8.W)) val forwardData = Vec(8, UInt(8.W))
} }
class StoreDataBundle(implicit p: Parameters) extends XSBundle {
val data = UInt((XLEN+1).W)
val uop = new MicroOp
}
class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle { class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle {
val paddr = Output(UInt(PAddrBits.W)) val paddr = Output(UInt(PAddrBits.W))
val mask = Output(UInt(8.W)) val mask = Output(UInt(8.W))
...@@ -62,9 +67,17 @@ class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle { ...@@ -62,9 +67,17 @@ class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle {
// val lqIdx = Output(UInt(LoadQueueIdxWidth.W)) // val lqIdx = Output(UInt(LoadQueueIdxWidth.W))
val sqIdx = Output(new SqPtr) val sqIdx = Output(new SqPtr)
val dataInvalid = Input(Bool()) // Addr match, but data is not valid for now
// If dataInvalid, load inst should sleep for a while
// Feedback type should be RSFeedbackType.dataInvalid
} }
class MaskedLoadForwardQueryIO(implicit p: Parameters) extends XSBundle { // LoadForwardQueryIO used in load pipeline
//
// Difference between PipeLoadForwardQueryIO and LoadForwardQueryIO:
// PipeIO use predecoded sqIdxMask for better forward timing
class PipeLoadForwardQueryIO(implicit p: Parameters) extends XSBundle {
val paddr = Output(UInt(PAddrBits.W)) val paddr = Output(UInt(PAddrBits.W))
val mask = Output(UInt(8.W)) val mask = Output(UInt(8.W))
val uop = Output(new MicroOp) // for replay val uop = Output(new MicroOp) // for replay
...@@ -74,7 +87,13 @@ class MaskedLoadForwardQueryIO(implicit p: Parameters) extends XSBundle { ...@@ -74,7 +87,13 @@ class MaskedLoadForwardQueryIO(implicit p: Parameters) extends XSBundle {
val forwardMask = Input(Vec(8, Bool())) val forwardMask = Input(Vec(8, Bool()))
val forwardData = Input(Vec(8, UInt(8.W))) val forwardData = Input(Vec(8, UInt(8.W)))
val sqIdx = Output(new SqPtr) // for debug val sqIdx = Output(new SqPtr) // for debug, should not be used in pipeline for timing reasons
// sqIdxMask is calcuated in earlier stage for better timing // sqIdxMask is calcuated in earlier stage for better timing
val sqIdxMask = Output(UInt(StoreQueueSize.W)) val sqIdxMask = Output(UInt(StoreQueueSize.W))
// dataInvalid: addr match, but data is not valid for now
val dataInvalidFast = Input(Bool()) // resp to load_s1
val dataInvalid = Input(Bool()) // resp to load_s2
// If dataInvalid, load inst should sleep for a while
// Feedback type should be RSFeedbackType.dataInvalid
} }
...@@ -42,12 +42,13 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet ...@@ -42,12 +42,13 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
val flush = Input(Bool()) val flush = Input(Bool())
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle))) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new StoreDataBundle))) // store data, send to sq from rs
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool())) val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val needReplayFromRS = Vec(LoadPipelineWidth, Input(Bool())) val needReplayFromRS = Vec(LoadPipelineWidth, Input(Bool()))
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO)) val forward = Vec(LoadPipelineWidth, Flipped(new PipeLoadForwardQueryIO))
val roq = Flipped(new RoqLsqIO) val roq = Flipped(new RoqLsqIO)
val rollback = Output(Valid(new Redirect)) val rollback = Output(Valid(new Redirect))
val dcache = Flipped(ValidIO(new Refill)) val dcache = Flipped(ValidIO(new Refill))
...@@ -101,6 +102,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet ...@@ -101,6 +102,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
storeQueue.io.brqRedirect <> io.brqRedirect storeQueue.io.brqRedirect <> io.brqRedirect
storeQueue.io.flush <> io.flush storeQueue.io.flush <> io.flush
storeQueue.io.storeIn <> io.storeIn storeQueue.io.storeIn <> io.storeIn
storeQueue.io.storeDataIn <> io.storeDataIn
storeQueue.io.sbuffer <> io.sbuffer storeQueue.io.sbuffer <> io.sbuffer
storeQueue.io.mmioStout <> io.mmioStout storeQueue.io.mmioStout <> io.mmioStout
storeQueue.io.roq <> io.roq storeQueue.io.roq <> io.roq
......
...@@ -75,7 +75,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule ...@@ -75,7 +75,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool())) val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val needReplayFromRS = Vec(LoadPipelineWidth, Input(Bool())) val needReplayFromRS = Vec(LoadPipelineWidth, Input(Bool()))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val load_s1 = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO)) val load_s1 = Vec(LoadPipelineWidth, Flipped(new PipeLoadForwardQueryIO))
val roq = Flipped(new RoqLsqIO) val roq = Flipped(new RoqLsqIO)
val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
val dcache = Flipped(ValidIO(new Refill)) val dcache = Flipped(ValidIO(new Refill))
...@@ -644,6 +644,11 @@ class LoadQueue(implicit p: Parameters) extends XSModule ...@@ -644,6 +644,11 @@ class LoadQueue(implicit p: Parameters) extends XSModule
allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U
/**
* misc
*/
io.roq.storeDataRoqWb := DontCare // will be overwriten by store queue's result
// perf counter // perf counter
QueuePerf(LoadQueueSize, validCount, !allowEnqueue) QueuePerf(LoadQueueSize, validCount, !allowEnqueue)
io.lqFull := !allowEnqueue io.lqFull := !allowEnqueue
......
...@@ -7,7 +7,7 @@ import utils._ ...@@ -7,7 +7,7 @@ import utils._
import xiangshan._ import xiangshan._
import xiangshan.cache._ import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
import xiangshan.backend.roq.RoqLsqIO import xiangshan.backend.roq.{RoqLsqIO, RoqPtr}
import difftest._ import difftest._
class SqPtr(implicit p: Parameters) extends CircularQueuePtr[SqPtr]( class SqPtr(implicit p: Parameters) extends CircularQueuePtr[SqPtr](
...@@ -39,24 +39,25 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -39,24 +39,25 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
val enq = new SqEnqIO val enq = new SqEnqIO
val brqRedirect = Flipped(ValidIO(new Redirect)) val brqRedirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool()) val flush = Input(Bool())
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // store addr, data is not included
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new StoreDataBundle))) // store data, send to sq from rs
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) // write commited store to sbuffer
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO)) val forward = Vec(LoadPipelineWidth, Flipped(new PipeLoadForwardQueryIO))
val roq = Flipped(new RoqLsqIO) val roq = Flipped(new RoqLsqIO)
val uncache = new DCacheWordIO val uncache = new DCacheWordIO
// val refill = Flipped(Valid(new DCacheLineReq )) // val refill = Flipped(Valid(new DCacheLineReq ))
val exceptionAddr = new ExceptionAddrIO val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool()) val sqempty = Output(Bool())
val issuePtrExt = Output(new SqPtr) val issuePtrExt = Output(new SqPtr) // used to wake up delayed load/store
val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput))) val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput))) // used to update issuePtrExt
val sqFull = Output(Bool()) val sqFull = Output(Bool())
}) })
// data modules // data modules
val uop = Reg(Vec(StoreQueueSize, new MicroOp)) val uop = Reg(Vec(StoreQueueSize, new MicroOp))
// val data = Reg(Vec(StoreQueueSize, new LsqEntry)) // val data = Reg(Vec(StoreQueueSize, new LsqEntry))
val dataModule = Module(new StoreQueueData(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) val dataModule = Module(new SQDataModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
dataModule.io := DontCare dataModule.io := DontCare
val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
paddrModule.io := DontCare paddrModule.io := DontCare
...@@ -65,8 +66,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -65,8 +66,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
// state & misc // state & misc
val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
val addrvalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio addr is valid
val datavalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio data is valid val datavalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio data is valid
val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB val allvalid = VecInit((0 until StoreQueueSize).map(i => addrvalid(i) && datavalid(i))) // non-mmio data & addr is valid
val issued = Reg(Vec(StoreQueueSize, Bool())) // inst has been issued by rs val issued = Reg(Vec(StoreQueueSize, Bool())) // inst has been issued by rs
val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq
val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
...@@ -123,7 +125,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -123,7 +125,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
uop(index) := io.enq.req(i).bits uop(index) := io.enq.req(i).bits
allocated(index) := true.B allocated(index) := true.B
datavalid(index) := false.B datavalid(index) := false.B
writebacked(index) := false.B addrvalid(index) := false.B
issued(index) := false.B issued(index) := false.B
commited(index) := false.B commited(index) := false.B
pending(index) := false.B pending(index) := false.B
...@@ -168,7 +170,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -168,7 +170,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
* *
* Most store instructions writeback to regfile in the previous cycle. * Most store instructions writeback to regfile in the previous cycle.
* However, * However,
* (1) For an mmio instruction with exceptions, we need to mark it as datavalid * (1) For an mmio instruction with exceptions, we need to mark it as addrvalid
* (in this way it will trigger an exception when it reaches ROB's head) * (in this way it will trigger an exception when it reaches ROB's head)
* instead of pending to avoid sending them to lower level. * instead of pending to avoid sending them to lower level.
* (2) For an mmio instruction without exceptions, we mark it as pending. * (2) For an mmio instruction without exceptions, we mark it as pending.
...@@ -176,39 +178,33 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -176,39 +178,33 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
* Upon receiving the response, StoreQueue writes back the instruction * Upon receiving the response, StoreQueue writes back the instruction
* through arbiter with store units. It will later commit as normal. * through arbiter with store units. It will later commit as normal.
*/ */
// Write addr to sq
for (i <- 0 until StorePipelineWidth) { for (i <- 0 until StorePipelineWidth) {
dataModule.io.wen(i) := false.B
paddrModule.io.wen(i) := false.B paddrModule.io.wen(i) := false.B
dataModule.io.mask.wen(i) := false.B
val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
when (io.storeIn(i).fire()) { when (io.storeIn(i).fire()) {
datavalid(stWbIndex) := !io.storeIn(i).bits.mmio addrvalid(stWbIndex) := true.B//!io.storeIn(i).bits.mmio
writebacked(stWbIndex) := !io.storeIn(i).bits.mmio
pending(stWbIndex) := io.storeIn(i).bits.mmio pending(stWbIndex) := io.storeIn(i).bits.mmio
val storeWbData = Wire(new SQDataEntry) dataModule.io.mask.waddr(i) := stWbIndex
storeWbData := DontCare dataModule.io.mask.wdata(i) := io.storeIn(i).bits.mask
storeWbData.mask := io.storeIn(i).bits.mask dataModule.io.mask.wen(i) := true.B
storeWbData.data := io.storeIn(i).bits.data
dataModule.io.waddr(i) := stWbIndex
dataModule.io.wdata(i) := storeWbData
dataModule.io.wen(i) := true.B
paddrModule.io.waddr(i) := stWbIndex paddrModule.io.waddr(i) := stWbIndex
paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr
paddrModule.io.wen(i) := true.B paddrModule.io.wen(i) := true.B
mmio(stWbIndex) := io.storeIn(i).bits.mmio mmio(stWbIndex) := io.storeIn(i).bits.mmio
XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x\n", XSInfo("store addr write to sq idx %d pc 0x%x vaddr %x paddr %x mmio %x\n",
io.storeIn(i).bits.uop.sqIdx.value, io.storeIn(i).bits.uop.sqIdx.value,
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.cf.pc,
io.storeIn(i).bits.vaddr, io.storeIn(i).bits.vaddr,
io.storeIn(i).bits.paddr, io.storeIn(i).bits.paddr,
io.storeIn(i).bits.data,
io.storeIn(i).bits.mmio io.storeIn(i).bits.mmio
) )
} }
// vaddrModule write is delayed, as vaddrModule will not be read right after write // vaddrModule write is delayed, as vaddrModule will not be read right after write
vaddrModule.io.waddr(i) := RegNext(stWbIndex) vaddrModule.io.waddr(i) := RegNext(stWbIndex)
...@@ -216,6 +212,31 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -216,6 +212,31 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
vaddrModule.io.wen(i) := RegNext(io.storeIn(i).fire()) vaddrModule.io.wen(i) := RegNext(io.storeIn(i).fire())
} }
// Write data to sq
for (i <- 0 until StorePipelineWidth) {
dataModule.io.data.wen(i) := false.B
io.roq.storeDataRoqWb(i).valid := false.B
io.roq.storeDataRoqWb(i).bits := DontCare
val stWbIndex = io.storeDataIn(i).bits.uop.sqIdx.value
when (io.storeDataIn(i).fire()) {
datavalid(stWbIndex) := true.B
dataModule.io.data.waddr(i) := stWbIndex
dataModule.io.data.wdata(i) := genWdata(io.storeDataIn(i).bits.data, io.storeDataIn(i).bits.uop.ctrl.fuOpType(1,0))
dataModule.io.data.wen(i) := true.B
io.roq.storeDataRoqWb(i).valid := true.B
io.roq.storeDataRoqWb(i).bits := io.storeDataIn(i).bits.uop.roqIdx
XSInfo("store data write to sq idx %d pc 0x%x data %x -> %x\n",
io.storeDataIn(i).bits.uop.sqIdx.value,
io.storeDataIn(i).bits.uop.cf.pc,
io.storeDataIn(i).bits.data,
dataModule.io.data.wdata(i)
)
}
}
/** /**
* load forward query * load forward query
* *
...@@ -235,25 +256,33 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -235,25 +256,33 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
val differentFlag = deqPtrExt(0).flag =/= io.forward(i).sqIdx.flag val differentFlag = deqPtrExt(0).flag =/= io.forward(i).sqIdx.flag
val forwardMask = io.forward(i).sqIdxMask val forwardMask = io.forward(i).sqIdxMask
val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B))) // all addrvalid terms need to be checked
for (j <- 0 until StoreQueueSize) { val addrValidVec = WireInit(VecInit((0 until StoreQueueSize).map(i => addrvalid(i) && allocated(i))))
storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked val dataValidVec = WireInit(VecInit((0 until StoreQueueSize).map(i => datavalid(i))))
} val allValidVec = WireInit(VecInit((0 until StoreQueueSize).map(i => addrvalid(i) && datavalid(i) && allocated(i))))
val needForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & storeWritebackedVec.asUInt val canForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & allValidVec.asUInt
val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt val canForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & allValidVec.asUInt
val needForward = Mux(differentFlag, ~deqMask | forwardMask, deqMask ^ forwardMask)
XSDebug(p"$i f1 ${Binary(needForward1)} f2 ${Binary(needForward2)} " +
XSDebug(p"$i f1 ${Binary(canForward1)} f2 ${Binary(canForward2)} " +
p"sqIdx ${io.forward(i).sqIdx} pa ${Hexadecimal(io.forward(i).paddr)}\n" p"sqIdx ${io.forward(i).sqIdx} pa ${Hexadecimal(io.forward(i).paddr)}\n"
) )
// do real fwd query // do real fwd query (cam lookup in load_s1)
dataModule.io.needForward(i)(0) := needForward1 & paddrModule.io.forwardMmask(i).asUInt dataModule.io.needForward(i)(0) := canForward1 & paddrModule.io.forwardMmask(i).asUInt
dataModule.io.needForward(i)(1) := needForward2 & paddrModule.io.forwardMmask(i).asUInt dataModule.io.needForward(i)(1) := canForward2 & paddrModule.io.forwardMmask(i).asUInt
paddrModule.io.forwardMdata(i) := io.forward(i).paddr paddrModule.io.forwardMdata(i) := io.forward(i).paddr
// Forward result will be generated 1 cycle later (load_s2)
io.forward(i).forwardMask := dataModule.io.forwardMask(i) io.forward(i).forwardMask := dataModule.io.forwardMask(i)
io.forward(i).forwardData := dataModule.io.forwardData(i) io.forward(i).forwardData := dataModule.io.forwardData(i)
// If addr match, data not ready, mark it as dataInvalid
// load_s1: generate dataInvalid in load_s1 to set fastUop to
io.forward(i).dataInvalidFast := (addrValidVec.asUInt & ~dataValidVec.asUInt & paddrModule.io.forwardMmask(i).asUInt & needForward).orR
// load_s2
io.forward(i).dataInvalid := RegNext(io.forward(i).dataInvalidFast)
} }
/** /**
...@@ -262,7 +291,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -262,7 +291,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
* States: * States:
* (1) writeback from store units: mark as pending * (1) writeback from store units: mark as pending
* (2) when they reach ROB's head, they can be sent to uncache channel * (2) when they reach ROB's head, they can be sent to uncache channel
* (3) response from uncache channel: mark as datavalid * (3) response from uncache channel: mark as datavalidmask.wen
* (4) writeback to ROB (and other units): mark as writebacked * (4) writeback to ROB (and other units): mark as writebacked
* (5) ROB commits the instruction: same as normal instructions * (5) ROB commits the instruction: same as normal instructions
*/ */
...@@ -271,7 +300,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -271,7 +300,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
val uncacheState = RegInit(s_idle) val uncacheState = RegInit(s_idle)
switch(uncacheState) { switch(uncacheState) {
is(s_idle) { is(s_idle) {
when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr)) { when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr) && datavalid(deqPtr) && addrvalid(deqPtr)) {
uncacheState := s_req uncacheState := s_req
} }
} }
...@@ -306,6 +335,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -306,6 +335,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.uncache.req.bits.id := DontCare io.uncache.req.bits.id := DontCare
when(io.uncache.req.fire()){ when(io.uncache.req.fire()){
// mmio store should not be committed until uncache req is sent
pending(deqPtr) := false.B pending(deqPtr) := false.B
XSDebug( XSDebug(
...@@ -319,12 +349,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -319,12 +349,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
// (3) response from uncache channel: mark as datavalid // (3) response from uncache channel: mark as datavalid
io.uncache.resp.ready := true.B io.uncache.resp.ready := true.B
when (io.uncache.resp.fire()) {
datavalid(deqPtr) := true.B
}
// (4) writeback to ROB (and other units): mark as writebacked // (4) writeback to ROB (and other units): mark as writebacked
io.mmioStout.valid := uncacheState === s_wb // allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr) io.mmioStout.valid := uncacheState === s_wb
io.mmioStout.bits.uop := uop(deqPtr) io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop.sqIdx := deqPtrExt(0) io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr) io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr)
...@@ -335,7 +362,6 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -335,7 +362,6 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.mmioStout.bits.debug.isPerfCnt := false.B io.mmioStout.bits.debug.isPerfCnt := false.B
io.mmioStout.bits.fflags := DontCare io.mmioStout.bits.fflags := DontCare
when (io.mmioStout.fire()) { when (io.mmioStout.fire()) {
writebacked(deqPtr) := true.B
allocated(deqPtr) := false.B allocated(deqPtr) := false.B
} }
...@@ -360,6 +386,8 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -360,6 +386,8 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
// if !sbuffer.fire(), read the same ptr // if !sbuffer.fire(), read the same ptr
// if sbuffer.fire(), read next // if sbuffer.fire(), read next
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr) io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr)
// Note that store data/addr should both be valid after store's commit
assert(!io.sbuffer(i).valid || allvalid(ptr))
io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR
io.sbuffer(i).bits.addr := paddrModule.io.rdata(i) io.sbuffer(i).bits.addr := paddrModule.io.rdata(i)
io.sbuffer(i).bits.data := dataModule.io.rdata(i).data io.sbuffer(i).bits.data := dataModule.io.rdata(i).data
...@@ -460,10 +488,11 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete ...@@ -460,10 +488,11 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
if (i % 4 == 0) XSDebug("") if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x ", uop(i).cf.pc) XSDebug(false, true.B, "%x ", uop(i).cf.pc)
PrintFlag(allocated(i), "a") PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && datavalid(i), "v") PrintFlag(allocated(i) && addrvalid(i), "a")
PrintFlag(allocated(i) && writebacked(i), "w") PrintFlag(allocated(i) && datavalid(i), "d")
PrintFlag(allocated(i) && commited(i), "c") PrintFlag(allocated(i) && commited(i), "c")
PrintFlag(allocated(i) && pending(i), "p") PrintFlag(allocated(i) && pending(i), "p")
PrintFlag(allocated(i) && mmio(i), "m")
XSDebug(false, true.B, " ") XSDebug(false, true.B, " ")
if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n") if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n")
} }
......
...@@ -63,9 +63,16 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp ...@@ -63,9 +63,16 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp
val io = IO(new Bundle() { val io = IO(new Bundle() {
val raddr = Vec(numRead, Input(UInt(log2Up(size).W))) val raddr = Vec(numRead, Input(UInt(log2Up(size).W)))
val rdata = Vec(numRead, Output(new SQData8Entry)) val rdata = Vec(numRead, Output(new SQData8Entry))
val wen = Vec(numWrite, Input(Bool())) val data = new Bundle() {
val waddr = Vec(numWrite, Input(UInt(log2Up(size).W))) val wen = Vec(numWrite, Input(Bool()))
val wdata = Vec(numWrite, Input(new SQData8Entry)) val waddr = Vec(numWrite, Input(UInt(log2Up(size).W)))
val wdata = Vec(numWrite, Input(UInt((XLEN/8).W)))
}
val mask = new Bundle() {
val wen = Vec(numWrite, Input(Bool()))
val waddr = Vec(numWrite, Input(UInt(log2Up(size).W)))
val wdata = Vec(numWrite, Input(Bool()))
}
val needForward = Input(Vec(numForward, Vec(2, UInt(size.W)))) val needForward = Input(Vec(numForward, Vec(2, UInt(size.W))))
val forwardValid = Vec(numForward, Output(Bool())) val forwardValid = Vec(numForward, Output(Bool()))
...@@ -76,10 +83,15 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp ...@@ -76,10 +83,15 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp
val data = Reg(Vec(size, new SQData8Entry)) val data = Reg(Vec(size, new SQData8Entry))
// writeback to lq/sq // writeback to sq
(0 until numWrite).map(i => { (0 until numWrite).map(i => {
when(io.wen(i)){ when(io.data.wen(i)){
data(io.waddr(i)) := io.wdata(i) data(io.data.waddr(i)).data := io.data.wdata(i)
}
})
(0 until numWrite).map(i => {
when(io.mask.wen(i)){
data(io.mask.waddr(i)).valid := io.mask.wdata(i)
} }
}) })
...@@ -91,7 +103,12 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp ...@@ -91,7 +103,12 @@ class SQData8Module(size: Int, numRead: Int, numWrite: Int, numForward: Int)(imp
// DataModuleTemplate should not be used when there're any write conflicts // DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) { for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) { for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
}
}
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
} }
} }
...@@ -150,13 +167,20 @@ class SQDataEntry(implicit p: Parameters) extends XSBundle { ...@@ -150,13 +167,20 @@ class SQDataEntry(implicit p: Parameters) extends XSBundle {
val data = UInt(XLEN.W) val data = UInt(XLEN.W)
} }
class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { class SQDataModule(size: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() { val io = IO(new Bundle() {
val raddr = Vec(numRead, Input(UInt(log2Up(size).W))) val raddr = Vec(numRead, Input(UInt(log2Up(size).W)))
val rdata = Vec(numRead, Output(new SQDataEntry)) val rdata = Vec(numRead, Output(new SQDataEntry))
val wen = Vec(numWrite, Input(Bool())) val data = new Bundle() {
val waddr = Vec(numWrite, Input(UInt(log2Up(size).W))) val wen = Vec(numWrite, Input(Bool()))
val wdata = Vec(numWrite, Input(new SQDataEntry)) val waddr = Vec(numWrite, Input(UInt(log2Up(size).W)))
val wdata = Vec(numWrite, Input(UInt(XLEN.W)))
}
val mask = new Bundle() {
val wen = Vec(numWrite, Input(Bool()))
val waddr = Vec(numWrite, Input(UInt(log2Up(size).W)))
val wdata = Vec(numWrite, Input(UInt(8.W)))
}
val needForward = Input(Vec(numForward, Vec(2, UInt(size.W)))) val needForward = Input(Vec(numForward, Vec(2, UInt(size.W))))
val forwardMask = Vec(numForward, Output(Vec(8, Bool()))) val forwardMask = Vec(numForward, Output(Vec(8, Bool())))
...@@ -169,10 +193,12 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int)(im ...@@ -169,10 +193,12 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int)(im
for (i <- 0 until numWrite) { for (i <- 0 until numWrite) {
// write to data8 // write to data8
for (j <- 0 until 8) { for (j <- 0 until 8) {
data8(j).io.waddr(i) := io.waddr(i) data8(j).io.mask.waddr(i) := io.mask.waddr(i)
data8(j).io.wdata(i).valid := io.wdata(i).mask(j) data8(j).io.mask.wdata(i) := io.mask.wdata(i)(j)
data8(j).io.wdata(i).data := io.wdata(i).data(8*(j+1)-1, 8*j) data8(j).io.mask.wen(i) := io.mask.wen(i)
data8(j).io.wen(i) := io.wen(i) data8(j).io.data.waddr(i) := io.data.waddr(i)
data8(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j)
data8(j).io.data.wen(i) := io.data.wen(i)
} }
} }
...@@ -188,7 +214,12 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int)(im ...@@ -188,7 +214,12 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int)(im
// DataModuleTemplate should not be used when there're any write conflicts // DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) { for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) { for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
}
}
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
} }
} }
......
...@@ -11,12 +11,13 @@ import difftest._ ...@@ -11,12 +11,13 @@ import difftest._
class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstants{ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstants{
val io = IO(new Bundle() { val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput)) val in = Flipped(Decoupled(new ExuInput))
val storeDataIn = Flipped(Valid(new StoreDataBundle)) // src2 from rs
val out = Decoupled(new ExuOutput) val out = Decoupled(new ExuOutput)
val dcache = new DCacheWordIO val dcache = new DCacheWordIO
val dtlb = new TlbRequestIO val dtlb = new TlbRequestIO
val rsIdx = Input(UInt(log2Up(IssQueSize).W)) val rsIdx = Input(UInt(log2Up(IssQueSize).W))
val flush_sbuffer = new SbufferFlushBundle val flush_sbuffer = new SbufferFlushBundle
val tlbFeedback = ValidIO(new TlbFeedback) val rsFeedback = ValidIO(new RSFeedback)
val redirect = Flipped(ValidIO(new Redirect)) val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool()) val flush = Input(Bool())
val exceptionAddr = ValidIO(UInt(VAddrBits.W)) val exceptionAddr = ValidIO(UInt(VAddrBits.W))
...@@ -27,6 +28,8 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant ...@@ -27,6 +28,8 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
//------------------------------------------------------- //-------------------------------------------------------
val s_invalid :: s_tlb :: s_flush_sbuffer_req :: s_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_finish :: Nil = Enum(7) val s_invalid :: s_tlb :: s_flush_sbuffer_req :: s_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_finish :: Nil = Enum(7)
val state = RegInit(s_invalid) val state = RegInit(s_invalid)
val addr_valid = RegInit(false.B)
val data_valid = RegInit(false.B)
val in = Reg(new ExuInput()) val in = Reg(new ExuInput())
val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
val atom_override_xtval = RegInit(false.B) val atom_override_xtval = RegInit(false.B)
...@@ -68,18 +71,30 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant ...@@ -68,18 +71,30 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
io.in.ready := true.B io.in.ready := true.B
when (io.in.fire()) { when (io.in.fire()) {
in := io.in.bits in := io.in.bits
in.src2 := in.src2 // leave src2 unchanged
addr_valid := true.B
}
when (io.storeDataIn.fire()) {
in.src2 := io.storeDataIn.bits.data
data_valid := true.B
}
when(data_valid && addr_valid) {
state := s_tlb state := s_tlb
addr_valid := false.B
data_valid := false.B
} }
} }
// Send TLB feedback to store issue queue // Send TLB feedback to store issue queue
// we send feedback right after we receives request // we send feedback right after we receives request
// also, we always treat amo as tlb hit // also, we always treat amo as tlb hit
// since we will continue polling tlb all by ourself // since we will continue polling tlb all by ourself
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid)) io.rsFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B io.rsFeedback.bits.hit := true.B
io.tlbFeedback.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid) io.rsFeedback.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid)
io.tlbFeedback.bits.flushState := DontCare io.rsFeedback.bits.flushState := DontCare
io.rsFeedback.bits.sourceType := DontCare
// tlb translation, manipulating signals && deal with exception // tlb translation, manipulating signals && deal with exception
when (state === s_tlb) { when (state === s_tlb) {
......
...@@ -13,7 +13,7 @@ class LoadToLsqIO(implicit p: Parameters) extends XSBundle { ...@@ -13,7 +13,7 @@ class LoadToLsqIO(implicit p: Parameters) extends XSBundle {
val ldout = Flipped(DecoupledIO(new ExuOutput)) val ldout = Flipped(DecoupledIO(new ExuOutput))
val loadDataForwarded = Output(Bool()) val loadDataForwarded = Output(Bool())
val needReplayFromRS = Output(Bool()) val needReplayFromRS = Output(Bool())
val forward = new MaskedLoadForwardQueryIO val forward = new PipeLoadForwardQueryIO
} }
// Load Pipeline Stage 0 // Load Pipeline Stage 0
...@@ -99,7 +99,7 @@ class LoadUnit_S1(implicit p: Parameters) extends XSModule { ...@@ -99,7 +99,7 @@ class LoadUnit_S1(implicit p: Parameters) extends XSModule {
val dcachePAddr = Output(UInt(PAddrBits.W)) val dcachePAddr = Output(UInt(PAddrBits.W))
val dcacheKill = Output(Bool()) val dcacheKill = Output(Bool())
val sbuffer = new LoadForwardQueryIO val sbuffer = new LoadForwardQueryIO
val lsq = new MaskedLoadForwardQueryIO val lsq = new PipeLoadForwardQueryIO
}) })
val s1_uop = io.in.bits.uop val s1_uop = io.in.bits.uop
...@@ -156,7 +156,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -156,7 +156,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val io = IO(new Bundle() { val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle)) val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle) val out = Decoupled(new LsPipelineBundle)
val tlbFeedback = ValidIO(new TlbFeedback) val rsFeedback = ValidIO(new RSFeedback)
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO val lsq = new LoadForwardQueryIO
val sbuffer = new LoadForwardQueryIO val sbuffer = new LoadForwardQueryIO
...@@ -168,6 +168,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -168,6 +168,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val s2_mask = io.in.bits.mask val s2_mask = io.in.bits.mask
val s2_paddr = io.in.bits.paddr val s2_paddr = io.in.bits.paddr
val s2_tlb_miss = io.in.bits.tlbMiss val s2_tlb_miss = io.in.bits.tlbMiss
val s2_data_invalid = io.lsq.dataInvalid
val s2_exception = selectLoad(io.in.bits.uop.cf.exceptionVec, false).asUInt.orR val s2_exception = selectLoad(io.in.bits.uop.cf.exceptionVec, false).asUInt.orR
val s2_mmio = io.in.bits.mmio && !s2_exception val s2_mmio = io.in.bits.mmio && !s2_exception
val s2_cache_miss = io.dcacheResp.bits.miss val s2_cache_miss = io.dcacheResp.bits.miss
...@@ -178,10 +179,18 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -178,10 +179,18 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
assert(!(io.in.valid && dcacheShouldResp && !io.dcacheResp.valid), "DCache response got lost") assert(!(io.in.valid && dcacheShouldResp && !io.dcacheResp.valid), "DCache response got lost")
// feedback tlb result to RS // feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid io.rsFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception) io.rsFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception) && !s2_data_invalid
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx io.rsFeedback.bits.rsIdx := io.in.bits.rsIdx
io.tlbFeedback.bits.flushState := io.in.bits.ptwBack io.rsFeedback.bits.flushState := io.in.bits.ptwBack
io.rsFeedback.bits.sourceType := Mux(s2_tlb_miss, RSFeedbackType.tlbMiss,
Mux(io.lsq.dataInvalid,
RSFeedbackType.dataInvalid,
RSFeedbackType.mshrFull
)
)
// s2_cache_replay is quite slow to generate, send it separately to LQ
io.needReplayFromRS := s2_cache_replay io.needReplayFromRS := s2_cache_replay
// merge forward result // merge forward result
...@@ -189,7 +198,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -189,7 +198,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val forwardMask = Wire(Vec(8, Bool())) val forwardMask = Wire(Vec(8, Bool()))
val forwardData = Wire(Vec(8, UInt(8.W))) val forwardData = Wire(Vec(8, UInt(8.W)))
val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U && !io.lsq.dataInvalid
io.lsq := DontCare io.lsq := DontCare
io.sbuffer := DontCare io.sbuffer := DontCare
...@@ -221,7 +230,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -221,7 +230,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
)) ))
val rdataPartialLoad = rdataHelper(s2_uop, rdataSel) val rdataPartialLoad = rdataHelper(s2_uop, rdataSel)
io.out.valid := io.in.valid && !s2_tlb_miss io.out.valid := io.in.valid && !s2_tlb_miss && !s2_data_invalid
// Inst will be canceled in store queue / lsq, // Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid // so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits io.out.bits := io.in.bits
...@@ -253,9 +262,9 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -253,9 +262,9 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
XSPerfAccumulate("dcache_miss", io.in.valid && s2_cache_miss) XSPerfAccumulate("dcache_miss", io.in.valid && s2_cache_miss)
XSPerfAccumulate("full_forward", io.in.valid && fullForward) XSPerfAccumulate("full_forward", io.in.valid && fullForward)
XSPerfAccumulate("dcache_miss_full_forward", io.in.valid && s2_cache_miss && fullForward) XSPerfAccumulate("dcache_miss_full_forward", io.in.valid && s2_cache_miss && fullForward)
XSPerfAccumulate("replay", io.tlbFeedback.valid && !io.tlbFeedback.bits.hit) XSPerfAccumulate("replay", io.rsFeedback.valid && !io.rsFeedback.bits.hit)
XSPerfAccumulate("replay_tlb_miss", io.tlbFeedback.valid && !io.tlbFeedback.bits.hit && s2_tlb_miss) XSPerfAccumulate("replay_tlb_miss", io.rsFeedback.valid && !io.rsFeedback.bits.hit && s2_tlb_miss)
XSPerfAccumulate("replay_cache", io.tlbFeedback.valid && !io.tlbFeedback.bits.hit && !s2_tlb_miss && s2_cache_replay) XSPerfAccumulate("replay_cache", io.rsFeedback.valid && !io.rsFeedback.bits.hit && !s2_tlb_miss && s2_cache_replay)
XSPerfAccumulate("stall_out", io.out.valid && !io.out.ready) XSPerfAccumulate("stall_out", io.out.valid && !io.out.ready)
} }
...@@ -265,7 +274,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -265,7 +274,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
val ldout = Decoupled(new ExuOutput) val ldout = Decoupled(new ExuOutput)
val redirect = Flipped(ValidIO(new Redirect)) val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool()) val flush = Input(Bool())
val tlbFeedback = ValidIO(new TlbFeedback) val rsFeedback = ValidIO(new RSFeedback)
val rsIdx = Input(UInt(log2Up(IssQueSize).W)) val rsIdx = Input(UInt(log2Up(IssQueSize).W))
val isFirstIssue = Input(Bool()) val isFirstIssue = Input(Bool())
val dcache = new DCacheLoadIO val dcache = new DCacheLoadIO
...@@ -298,11 +307,13 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -298,11 +307,13 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
load_s2.io.dcacheResp <> io.dcache.resp load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
load_s2.io.lsq.dataInvalid <> io.lsq.forward.dataInvalid
load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData
load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask
load_s2.io.sbuffer.dataInvalid <> io.sbuffer.dataInvalid // always false
load_s2.io.dataForwarded <> io.lsq.loadDataForwarded load_s2.io.dataForwarded <> io.lsq.loadDataForwarded
io.tlbFeedback.bits := RegNext(load_s2.io.tlbFeedback.bits) io.rsFeedback.bits := RegNext(load_s2.io.rsFeedback.bits)
io.tlbFeedback.valid := RegNext(load_s2.io.tlbFeedback.valid && !load_s2.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush)) io.rsFeedback.valid := RegNext(load_s2.io.rsFeedback.valid && !load_s2.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
io.lsq.needReplayFromRS := load_s2.io.needReplayFromRS io.lsq.needReplayFromRS := load_s2.io.needReplayFromRS
// pre-calcuate sqIdx mask in s0, then send it to lsq in s1 for forwarding // pre-calcuate sqIdx mask in s0, then send it to lsq in s1 for forwarding
...@@ -313,7 +324,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper { ...@@ -313,7 +324,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
// load_s2.io.dcacheResp.bits.data := Mux1H(RegNext(io.dcache.s1_hit_way), RegNext(io.dcache.s1_data)) // load_s2.io.dcacheResp.bits.data := Mux1H(RegNext(io.dcache.s1_hit_way), RegNext(io.dcache.s1_data))
// assert(load_s2.io.dcacheResp.bits.data === io.dcache.resp.bits.data) // assert(load_s2.io.dcacheResp.bits.data === io.dcache.resp.bits.data)
io.fastUop.valid := io.dcache.s1_hit_way.orR && !io.dcache.s1_disable_fast_wakeup && load_s1.io.in.valid && !load_s1.io.dcacheKill io.fastUop.valid := io.dcache.s1_hit_way.orR && !io.dcache.s1_disable_fast_wakeup && load_s1.io.in.valid &&
!load_s1.io.dcacheKill && !io.lsq.forward.dataInvalidFast
io.fastUop.bits := load_s1.io.out.bits.uop io.fastUop.bits := load_s1.io.out.bits.uop
XSDebug(load_s0.io.out.valid, XSDebug(load_s0.io.out.valid,
......
...@@ -39,7 +39,9 @@ class StoreUnit_S0(implicit p: Parameters) extends XSModule { ...@@ -39,7 +39,9 @@ class StoreUnit_S0(implicit p: Parameters) extends XSModule {
io.out.bits := DontCare io.out.bits := DontCare
io.out.bits.vaddr := saddr io.out.bits.vaddr := saddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0)) // Now data use its own io
// io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.data := io.in.bits.src2 // FIXME: remove data from pipeline
io.out.bits.uop := io.in.bits.uop io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := DontCare io.out.bits.miss := DontCare
io.out.bits.rsIdx := io.rsIdx io.out.bits.rsIdx := io.rsIdx
...@@ -70,7 +72,7 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule { ...@@ -70,7 +72,7 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule {
val out = Decoupled(new LsPipelineBundle) val out = Decoupled(new LsPipelineBundle)
val lsq = ValidIO(new LsPipelineBundle) val lsq = ValidIO(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp)) val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback) val rsFeedback = ValidIO(new RSFeedback)
}) })
val s1_paddr = io.dtlbResp.bits.paddr val s1_paddr = io.dtlbResp.bits.paddr
...@@ -83,14 +85,15 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule { ...@@ -83,14 +85,15 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule {
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready? io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
// Send TLB feedback to store issue queue // Send TLB feedback to store issue queue
io.tlbFeedback.valid := io.in.valid io.rsFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss io.rsFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.flushState := io.dtlbResp.bits.ptwBack io.rsFeedback.bits.flushState := io.dtlbResp.bits.ptwBack
io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx io.rsFeedback.bits.rsIdx := io.in.bits.rsIdx
XSDebug(io.tlbFeedback.valid, io.rsFeedback.bits.sourceType := RSFeedbackType.tlbMiss
XSDebug(io.rsFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n", "S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit, io.rsFeedback.bits.hit,
io.tlbFeedback.bits.rsIdx io.rsFeedback.bits.rsIdx
) )
...@@ -146,7 +149,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule { ...@@ -146,7 +149,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule {
val stin = Flipped(Decoupled(new ExuInput)) val stin = Flipped(Decoupled(new ExuInput))
val redirect = Flipped(ValidIO(new Redirect)) val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool()) val flush = Input(Bool())
val tlbFeedback = ValidIO(new TlbFeedback) val rsFeedback = ValidIO(new RSFeedback)
val dtlb = new TlbRequestIO() val dtlb = new TlbRequestIO()
val rsIdx = Input(UInt(log2Up(IssQueSize).W)) val rsIdx = Input(UInt(log2Up(IssQueSize).W))
val isFirstIssue = Input(Bool()) val isFirstIssue = Input(Bool())
...@@ -168,7 +171,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule { ...@@ -168,7 +171,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule {
store_s1.io.lsq <> io.lsq // send result to sq store_s1.io.lsq <> io.lsq // send result to sq
store_s1.io.dtlbResp <> io.dtlb.resp store_s1.io.dtlbResp <> io.dtlb.resp
store_s1.io.tlbFeedback <> io.tlbFeedback store_s1.io.rsFeedback <> io.rsFeedback
PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush)) PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
......
...@@ -387,6 +387,7 @@ class NewSbuffer(implicit p: Parameters) extends XSModule with HasSbufferConst { ...@@ -387,6 +387,7 @@ class NewSbuffer(implicit p: Parameters) extends XSModule with HasSbufferConst {
val selectedInflightMask = Mux1H(line_offset_reg, Mux1H(inflight_tag_match_reg, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) val selectedInflightMask = Mux1H(line_offset_reg, Mux1H(inflight_tag_match_reg, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val selectedInflightData = Mux1H(line_offset_reg, Mux1H(inflight_tag_match_reg, data).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) val selectedInflightData = Mux1H(line_offset_reg, Mux1H(inflight_tag_match_reg, data).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
forward.dataInvalid := false.B // data in store line merge buffer is always ready
for (j <- 0 until DataBytes) { for (j <- 0 until DataBytes) {
forward.forwardMask(j) := false.B forward.forwardMask(j) := false.B
forward.forwardData(j) := DontCare forward.forwardData(j) := DontCare
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册