提交 743bc277 编写于 作者: A Allen

L1DCache: a complete rewrite.

Now, it can compile.
上级 98c3bf7a
......@@ -58,6 +58,8 @@ object OneHot {
def OH1ToUInt(x: UInt): UInt = OHToUInt(OH1ToOH(x))
def UIntToOH1(x: UInt, width: Int): UInt = ~((-1).S(width.W).asUInt << x)(width-1, 0)
def UIntToOH1(x: UInt): UInt = UIntToOH1(x, (1 << x.getWidth) - 1)
def checkOneHot(in: Bits): Unit = assert(PopCount(in) <= 1.U)
def checkOneHot(in: Iterable[Bool]): Unit = assert(PopCount(in) <= 1.U)
}
object LowerMask {
......
......@@ -191,8 +191,9 @@ trait HasXSParameter {
tagECC = Some("secded"),
dataECC = Some("secded"),
nMissEntries = 16,
nLoadMissEntries = 8,
nStoreMissEntries = 8
nProbeEntries = 16,
nReleaseEntries = 16,
nStoreReplayEntries = 16
)
val LRSCCycles = 100
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
class AtomicsReplayEntry extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheWordIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: s_pipe_resp :: s_resp :: Nil = Enum(4)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("AtomicsReplayEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
req := io.lsu.req.bits
state := s_pipe_req
}
}
// --------------------------------------------
// replay
when (state === s_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := false.B
pipe_req.source := AMO_SOURCE.U
pipe_req.cmd := req.cmd
pipe_req.addr := get_block_addr(req.addr)
pipe_req.word_idx := get_word(req.addr)
pipe_req.amo_data := req.data
pipe_req.amo_mask := req.mask
when (io.pipe_req.fire()) {
state := s_pipe_resp
}
}
val resp_data = Reg(UInt())
when (state === s_pipe_resp) {
// when not miss
// everything is OK, simply send response back to sbuffer
// when miss and not replay
// wait for missQueue to handling miss and replaying our request
// when miss and replay
// req missed and fail to enter missQueue, manually replay it later
// TODO: add assertions:
// 1. add a replay delay counter?
// 2. when req gets into MissQueue, it should not miss any more
when (io.pipe_resp.fire()) {
when (io.pipe_resp.bits.miss) {
when (io.pipe_resp.bits.replay) {
state := s_pipe_req
}
} .otherwise {
resp_data := io.pipe_resp.bits.data
state := s_resp
}
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := DontCare
io.lsu.resp.bits.data := resp_data
io.lsu.resp.bits.id := req.id
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
io.lsu.req.bits.dump()
}
when (io.lsu.resp.fire()) {
io.lsu.resp.bits.dump()
}
when (io.pipe_req.fire()) {
io.pipe_req.bits.dump()
}
when (io.pipe_resp.fire()) {
io.pipe_resp.bits.dump()
}
}
......@@ -20,12 +20,10 @@ case class DCacheParameters
tagECC: Option[String] = None,
dataECC: Option[String] = None,
nMissEntries: Int = 1,
nLoadMissEntries: Int = 1,
nStoreMissEntries: Int = 1,
nMiscMissEntries: Int = 1,
nProbeEntries: Int = 1,
nReleaseEntries: Int = 1,
nStoreReplayEntries: Int = 1,
nMMIOEntries: Int = 1,
nSDQ: Int = 17,
nRPQ: Int = 16,
nMMIOs: Int = 1,
blockBytes: Int = 64
) extends L1CacheParameters {
......@@ -48,23 +46,12 @@ trait HasDCacheParameters extends HasL1CacheParameters {
def nIOMSHRs = cacheParams.nMMIOs
def maxUncachedInFlight = cacheParams.nMMIOs
def missQueueEntryIdWidth = log2Up(cfg.nMissEntries)
def loadMissQueueEntryIdWidth = log2Up(cfg.nLoadMissEntries)
def storeMissQueueEntryIdWidth = log2Up(cfg.nStoreMissEntries)
def miscMissQueueEntryIdWidth = log2Up(cfg.nMiscMissEntries)
def clientMissQueueEntryIdWidth = max(
max(loadMissQueueEntryIdWidth,
storeMissQueueEntryIdWidth),
miscMissQueueEntryIdWidth)
// clients: ldu 0, ldu1, stu, atomics
def nClientMissQueues = 4
def clientIdWidth = log2Up(nClientMissQueues)
def missQueueClientIdWidth = clientIdWidth + clientMissQueueEntryIdWidth
def clientIdMSB = missQueueClientIdWidth - 1
def clientIdLSB = clientMissQueueEntryIdWidth
def entryIdMSB = clientMissQueueEntryIdWidth - 1
def entryIdLSB = 0
def nSourceType = 3
def sourceTypeWidth = log2Up(nSourceType)
def LOAD_SOURCE = 0
def STORE_SOURCE = 1
def AMO_SOURCE = 2
// each source use a id to distinguish its multiple reqs
def reqIdWidth = 64
require(isPow2(nSets), s"nSets($nSets) must be pow2")
......@@ -73,6 +60,7 @@ trait HasDCacheParameters extends HasL1CacheParameters {
require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)")
// this is a VIPT L1 cache
require(pgIdxBits >= untagBits, s"page aliasing problem: pgIdxBits($pgIdxBits) < untagBits($untagBits)")
require(rowWords == 1, "Our DCache Implementation assumes rowWords == 1")
}
abstract class DCacheModule extends L1CacheModule
......
package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLClientNode, TLClientParameters,
TLMasterParameters, TLMasterPortParameters, TLArbiter, TLMessages}
// memory request in word granularity(load, mmio, lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheWordReq: cmd: %x addr: %x data: %x mask: %x id: %d\n",
cmd, addr, data, mask, id)
}
}
// memory request in word granularity(store)
class DCacheLineReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
val mask = UInt(cfg.blockBytes.W)
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheLineReq: cmd: %x addr: %x data: %x mask: %x id: %d\n",
cmd, addr, data, mask, id)
}
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheWordResp: data: %x id: %d miss: %b replay: %b\n",
data, id, miss, replay)
}
}
class DCacheLineResp extends DCacheBundle
{
val data = UInt((cfg.blockBytes * 8).W)
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("DCacheLineResp: data: %x id: %d miss: %b replay: %b\n",
data, id, miss, replay)
}
}
class Refill extends DCacheBundle
{
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
def dump() = {
XSDebug("Refill: addr: %x data: %x\n", addr, data)
}
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
// used by load unit
class DCacheLoadIO extends DCacheWordIO
{
// kill previous cycle's req
val s1_kill = Output(Bool())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheLineIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheLineReq )
val resp = Flipped(DecoupledIO(new DCacheLineResp))
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
}
class DCacheIO extends DCacheBundle {
val lsu = new DCacheToLsuIO
val prefetch = DecoupledIO(new MissReq)
}
class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters {
val clientParameters = TLMasterPortParameters.v1(
Seq(TLMasterParameters.v1(
name = "dcache",
sourceId = IdRange(0, cfg.nMissEntries+1),
supportsProbe = TransferSizes(cfg.blockBytes)
))
)
val clientNode = TLClientNode(Seq(clientParameters))
lazy val module = new DCacheImp(this)
}
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasXSLog {
val io = IO(new DCacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match")
//----------------------------------------
// core data structures
val dataArray = Module(new DuplicatedDataArray)
val metaArray = Module(new DuplicatedMetaArray)
/*
dataArray.dump()
metaArray.dump()
*/
//----------------------------------------
// core modules
val ldu = Seq.fill(LoadPipelineWidth) { Module(new LoadPipe) }
val storeReplayUnit = Module(new StoreReplayQueue)
val atomicsReplayUnit = Module(new AtomicsReplayEntry)
val mainPipe = Module(new MainPipe)
val missQueue = Module(new MissQueue(edge))
val probeQueue = Module(new ProbeQueue(edge))
val wb = Module(new WritebackUnit(edge))
//----------------------------------------
// meta array
val MetaWritePortCount = 1
val MainPipeMetaWritePort = 0
metaArray.io.write <> mainPipe.io.meta_write
// MainPipe contend MetaRead with Load 0
// give priority to Load
val MetaReadPortCount = 2
val LoadPipeMetaReadPort = 0
val MainPipeMetaReadPort = 1
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, MetaReadPortCount))
metaReadArb.io.in(LoadPipeMetaReadPort) <> ldu(0).io.meta_read
metaReadArb.io.in(MainPipeMetaReadPort) <> mainPipe.io.meta_read
metaArray.io.read(0) <> metaReadArb.io.out
ldu(0).io.meta_resp <> metaArray.io.resp(0)
mainPipe.io.meta_resp <> metaArray.io.resp(0)
for (w <- 1 until LoadPipelineWidth) {
metaArray.io.read(w) <> ldu(w).io.meta_read
ldu(w).io.meta_resp <> metaArray.io.resp(w)
}
//----------------------------------------
// data array
val DataWritePortCount = 1
val MainPipeDataWritePort = 0
dataArray.io.write <> mainPipe.io.data_write
// give priority to load
val DataReadPortCount = 2
val LoadPipeDataReadPort = 0
val MainPipeDataReadPort = 1
val dataReadArb = Module(new Arbiter(new L1DataReadReq, DataReadPortCount))
dataReadArb.io.in(LoadPipeDataReadPort) <> ldu(0).io.data_read
dataReadArb.io.in(MainPipeDataReadPort) <> mainPipe.io.data_read
dataArray.io.read(0) <> dataReadArb.io.out
dataArray.io.resp(0) <> ldu(0).io.data_resp
dataArray.io.resp(0) <> mainPipe.io.data_resp
for (w <- 1 until LoadPipelineWidth) {
dataArray.io.read(w) <> ldu(w).io.data_read
dataArray.io.resp(w) <> ldu(w).io.data_resp
}
//----------------------------------------
// load pipe
// the s1 kill signal
// only lsu uses this, replay never kills
for (w <- 0 until LoadPipelineWidth) {
ldu(w).io.lsu <> io.lsu.load(w)
// replay and nack not needed anymore
// TODO: remove replay and nack
ldu(w).io.nack := false.B
}
//----------------------------------------
// store pipe and store miss queue
storeReplayUnit.io.lsu <> io.lsu.store
//----------------------------------------
// atomics
// atomics not finished yet
io.lsu.atomics := DontCare
atomicsReplayUnit.io := DontCare
// sanity check
val atomicsReq = io.lsu.atomics.req
//----------------------------------------
// miss queue
val MissReqPortCount = LoadPipelineWidth + 1
val MainPipeMissReqPort = 0
// Request
val missReqArb = Module(new Arbiter(new MissReq, MissReqPortCount))
missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req
for (w <- 0 until LoadPipelineWidth) { missReqArb.io.in(w + 1) <> ldu(w).io.miss_req }
missQueue.io.req <> missReqArb.io.out
// refill to load queue
io.lsu.lsq <> missQueue.io.refill
// tilelink stuff
bus.a <> missQueue.io.mem_acquire
bus.e <> missQueue.io.mem_finish
//----------------------------------------
// probe
probeQueue.io.mem_probe <> bus.b
//----------------------------------------
// mainPipe
val MainPipeReqPortCount = 4
val MissMainPipeReqPort = 0
val StoreMainPipeReqPort = 1
val AtomicsMainPipeReqPort = 2
val ProbeMainPipeReqPort = 3
val mainPipeReqArb = Module(new Arbiter(new MainPipeReq, MainPipeReqPortCount))
mainPipeReqArb.io.in(MissMainPipeReqPort) <> missQueue.io.pipe_req
mainPipeReqArb.io.in(StoreMainPipeReqPort) <> storeReplayUnit.io.pipe_req
mainPipeReqArb.io.in(AtomicsMainPipeReqPort) <> atomicsReplayUnit.io.pipe_req
mainPipeReqArb.io.in(ProbeMainPipeReqPort) <> probeQueue.io.pipe_req
mainPipe.io.req <> mainPipeReqArb.io.out
missQueue.io.pipe_resp <> mainPipe.io.miss_resp
storeReplayUnit.io.pipe_resp <> mainPipe.io.store_resp
atomicsReplayUnit.io.pipe_resp <> mainPipe.io.amo_resp
probeQueue.io.lrsc_locked_block <> mainPipe.io.lrsc_locked_block
//----------------------------------------
// wb
// add a queue between MainPipe and WritebackUnit to reduce MainPipe stalls due to WritebackUnit busy
val wb_queue = Module(new Queue(new WritebackReq, cfg.nReleaseEntries, flow = true))
wb_queue.io.enq <> mainPipe.io.wb_req
wb.io.req <> wb_queue.io.deq
bus.c <> wb.io.mem_release
// connect bus d
missQueue.io.mem_grant.valid := false.B
missQueue.io.mem_grant.bits := DontCare
wb.io.mem_grant.valid := false.B
wb.io.mem_grant.bits := DontCare
// in L1DCache, we ony expect Grant[Data] and ReleaseAck
bus.d.ready := false.B
when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData) {
missQueue.io.mem_grant <> bus.d
} .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) {
wb.io.mem_grant <> bus.d
} .otherwise {
assert (!bus.d.fire())
}
// dcache should only deal with DRAM addresses
when (bus.a.fire()) {
assert(bus.a.bits.address >= 0x80000000L.U)
}
when (bus.b.fire()) {
assert(bus.b.bits.address >= 0x80000000L.U)
}
when (bus.c.fire()) {
assert(bus.c.bits.address >= 0x80000000L.U)
}
io.prefetch.valid := missQueue.io.req.fire()
io.prefetch.bits := missQueue.io.req.bits
}
......@@ -2,6 +2,7 @@ package xiangshan.cache
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.ClientMetadata
import utils.XSDebug
......@@ -24,9 +25,6 @@ class LoadPipe extends DCacheModule
})
// LSU requests
// replayed req should never be nacked
assert(!(io.lsu.req.valid && io.lsu.req.bits.meta.replay && io.nack))
// it you got nacked, you can directly passdown
val not_nacked_ready = io.meta_read.ready && io.data_read.ready
val nacked_ready = true.B
......@@ -73,54 +71,35 @@ class LoadPipe extends DCacheModule
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
assert(!(s1_valid && s1_req.meta.replay && io.lsu.s1_kill),
"lsq tried to kill an replayed request!")
val s1_fake_meta = Wire(new L1Metadata)
s1_fake_meta.tag := get_tag(s1_addr)
s1_fake_meta.coh := ClientMetadata.onReset
// when there are no tag match, we give it a Fake Meta
// this simplifies our logic in s2 stage
val s1_hit_meta = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w))), s1_fake_meta)
val s1_hit_coh = s1_hit_meta.coh
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid && !io.lsu.s1_kill, init = false.B)
val s2_addr = RegNext(s1_addr)
dump_pipeline_reqs("LoadPipe s2", s2_valid, s2_req)
val s2_addr = RegNext(s1_addr)
// hit, miss, nack, permission checking
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = RegNext(s1_hit_state)
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
//
// If state changes on hit,
// we should treat it as not hit, and let mshr deal with it,
// since we can not write meta data on the main pipeline.
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
// nacked or not
val s2_nack = Wire(Bool())
val s2_hit_coh = RegNext(s1_hit_coh)
val s2_has_permission = s2_hit_coh.onAccess(s2_req.cmd)._1
val s2_new_hit_coh = s2_hit_coh.onAccess(s2_req.cmd)._3
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_coh === s2_new_hit_coh
// generate data
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
......@@ -138,15 +117,9 @@ class LoadPipe extends DCacheModule
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
// annotate out this assertion
// when TLB misses, s2_hit may still be true
// which may cause unnecessary assertion
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// when req got nacked, upper levels should replay this request
// the same set is busy
// nacked or not
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for load miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
......@@ -154,7 +127,7 @@ class LoadPipe extends DCacheModule
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
val s2_nack = s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids("LoadPipe s2", "s2_hit", s2_valid && s2_hit)
......@@ -163,19 +136,18 @@ class LoadPipe extends DCacheModule
dump_pipeline_valids("LoadPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := 0.U
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits := DontCare
io.miss_req.bits.source := LOAD_SOURCE.U
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_addr)
io.miss_req.bits.coh := s2_hit_coh
// send back response
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
resp.bits := DontCare
resp.bits.data := s2_data_word_decoded
resp.bits.meta := s2_req.meta
// on miss or nack, upper level should replay request
// but if we successfully sent the request to miss queue
// upper level does not need to replay request
......@@ -188,8 +160,7 @@ class LoadPipe extends DCacheModule
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"LoadPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
resp.bits.dump()
}
// -------
......@@ -197,8 +168,8 @@ class LoadPipe extends DCacheModule
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool,
req: DCacheWordReq ) = {
when (valid) {
XSDebug(s"$pipeline_stage_name cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.cmd, req.addr, req.data, req.mask, req.meta.id, req.meta.replay)
XSDebug("$pipeline_stage_name: ")
req.dump()
}
}
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions}
import utils.{XSDebug, OneHot}
class MainPipeReq extends DCacheBundle
{
// for request that comes from MissQueue
// does this req come from MissQueue
val miss = Bool()
// which MissQueueEntry send this req?
val miss_id = UInt(log2Up(cfg.nMissEntries).W)
// what permission are we granted with?
val miss_param = UInt(TLPermissions.bdWidth.W)
// for request that comes from MissQueue
// does this req come from Probe
val probe = Bool()
val probe_param = UInt(TLPermissions.bdWidth.W)
// request info
// reqs from MissQueue, Store, AMO use this
// probe does not use this
val source = UInt(sourceTypeWidth.W)
val cmd = UInt(M_SZ.W)
// must be aligned to block
val addr = UInt(PAddrBits.W)
// store
val store_data = UInt((cfg.blockBytes * 8).W)
val store_mask = UInt(cfg.blockBytes.W)
// which word does amo work on?
val word_idx = UInt(log2Up(cfg.blockBytes * 8 / DataBits).W)
val amo_data = UInt(DataBits.W)
val amo_mask = UInt((DataBits/8).W)
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("MainPipeReq: miss: %b miss_id: %d miss_param: %d probe: %b probe_param: %d source: %d cmd: %d addr: %x store_data: %x store_mask: %x word_idx: %d data: %x mask: %x id: %d\n",
miss, miss_id, miss_param, probe, probe_param, source, cmd, addr, store_data, store_mask, word_idx, amo_data, amo_mask, id)
}
}
class MainPipeResp extends DCacheBundle
{
val id = UInt(reqIdWidth.W)
// AMO resp data
val data = UInt(DataBits.W)
val miss = Bool()
val replay = Bool()
def dump() = {
XSDebug("MainPipeResp: id: %d data: %x miss: %b replay: %b\n",
id, data, miss, replay)
}
}
class MainPipe extends DCacheModule
{
val io = IO(new DCacheBundle {
// req and resp
val req = Flipped(DecoupledIO(new MainPipeReq))
val miss_req = DecoupledIO(new MissReq)
val miss_resp = ValidIO(new MainPipeResp)
val store_resp = ValidIO(new MainPipeResp)
val amo_resp = ValidIO(new MainPipeResp)
// meta/data read/write
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val data_write = DecoupledIO(new L1DataWriteReq)
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val meta_write = DecoupledIO(new L1MetaWriteReq)
// write back
val wb_req = DecoupledIO(new WritebackReq)
// lrsc locked block should block probe
val lrsc_locked_block = Output(Valid(UInt(PAddrBits.W)))
})
// assign default value to output signals
io.req.ready := false.B
io.miss_resp.valid := false.B
io.store_resp.valid := false.B
io.amo_resp.valid := false.B
io.data_read.valid := false.B
io.data_write.valid := false.B
io.data_write.bits := DontCare
io.meta_read.valid := false.B
io.meta_write.valid := false.B
io.meta_write.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.lrsc_locked_block.valid := false.B
io.lrsc_locked_block.bits := DontCare
// Pipeline
// TODO: add full bypass for meta and data, bypass should be based on block address match
val stall = Wire(Bool())
stall := DontCare
// --------------------------------------------------------------------------------
// stage 0
// read meta and data
// valid: this pipeline has valid req
// fire: req fired and will appear in next pipeline stage
val s0_valid = io.req.valid
val s0_fire = io.req.fire()
val s0_req = io.req.bits
val word_full_overwrite = Wire(Vec(blockRows, Bits(rowWords.W)))
for (i <- 0 until blockRows) {
word_full_overwrite(i) := VecInit((0 until rowWords) map { r =>
val rowMask = s0_req.store_mask((i + 1) * rowBytes - 1, i * rowBytes)
rowMask((r + 1) * wordBytes - 1, r * wordBytes).andR
}).asUInt
}
val row_full_overwrite = VecInit(word_full_overwrite.map(w => w.andR)).asUInt
val full_overwrite = row_full_overwrite.andR
// If req comes form MissQueue, it must be a full overwrite,
// but we still need to read data array
// since we may do replacement
// If it's a store(not from MissQueue):
// If it's full mask, no need to read data array;
// If it's partial mask, no need to read full masked words.
// If it's a AMO(not from MissQueue), only need to read the specific word.
// If it's probe, read it all.
// do not left out !s0_req.probe,
// if it's a probe, all data mask fields are useless
// don't worry about duplicate conditions
// backend tools will remove them
val miss_need_data = s0_req.miss
val store_need_data = !s0_req.miss && !s0_req.probe && s0_req.source === STORE_SOURCE.U && !full_overwrite
val amo_need_data = !s0_req.miss && !s0_req.probe && s0_req.source === AMO_SOURCE.U
val probe_need_data = s0_req.probe
val need_data = miss_need_data || store_need_data || amo_need_data || probe_need_data
val meta_read = io.meta_read.bits
val data_read = io.data_read.bits
val s1_s0_set_conflict = Wire(Bool())
val s2_s0_set_conflict = Wire(Bool())
val set_conflict = s1_s0_set_conflict || s2_s0_set_conflict
// sanity check
when (s0_fire) {
when (s0_req.miss) {
assert (full_overwrite)
}
// AMO not yet finished
assert (s0_req.source === AMO_SOURCE.U)
OneHot.checkOneHot(Seq(s0_req.miss, s0_req.probe))
}
val meta_ready = io.meta_read.ready
val data_ready = !need_data || io.data_read.ready
io.req.ready := meta_ready && data_ready && !set_conflict
io.meta_read.valid := io.req.valid && !set_conflict
io.data_read.valid := io.req.valid && need_data && !set_conflict
// Tag read for new requests
meta_read.idx := get_idx(s0_req.addr)
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
// Data read for new requests
data_read.addr := s0_req.addr
data_read.way_en := ~0.U(nWays.W)
val rowWordBits = log2Floor(rowWords)
val amo_row = s0_req.word_idx >> rowWordBits
val amo_word = if (rowWordBits == 0) 0.U else s0_req.word_idx(rowWordBits - 1, 0)
val store_rmask = ~row_full_overwrite
val amo_rmask = UIntToOH(amo_row)
val full_rmask = ~0.U(blockRows.W)
val none_rmask = 0.U(blockRows.W)
// generate wmask here and use it in stage 2
val store_wmask = word_full_overwrite
val amo_wmask = WireInit(VecInit((0 until blockRows) map (i => 0.U(rowWords.W))))
amo_wmask(amo_row) := VecInit((0 until rowWords) map (w => w.U === amo_word)).asUInt
val full_wmask = VecInit((0 until blockRows) map (i => ~0.U(rowWords.W)))
val none_wmask = VecInit((0 until blockRows) map (i => 0.U(rowWords.W)))
data_read.rmask := Mux(store_need_data, store_rmask,
Mux(amo_need_data, amo_rmask,
Mux(probe_need_data || miss_need_data, full_rmask, none_rmask)))
dump_pipeline_reqs("MainPipe s0", s0_valid, s0_req)
// --------------------------------------------------------------------------------
// stage 1
// read out meta, check hit or miss
// TODO: add stalling
val s1_valid = RegInit(false.B)
val s1_fire = s1_valid
val s1_req = RegEnable(s0_req, s0_fire)
val s1_store_wmask = RegEnable(store_wmask, s0_fire)
val s1_amo_wmask = RegEnable(amo_wmask, s0_fire)
val s1_full_wmask = RegEnable(full_wmask, s0_fire)
val s1_none_wmask = RegEnable(none_wmask, s0_fire)
s1_s0_set_conflict := s1_valid && get_idx(s1_req.addr) === get_idx(s0_req.addr)
when (s0_fire) { s1_valid := true.B }
when (!s0_fire && s1_fire) { s1_valid := false.B }
dump_pipeline_reqs("MainPipe s1", s1_valid, s1_req)
val meta_resp = io.meta_resp
// tag check
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_req.addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_fake_meta = Wire(new L1Metadata)
s1_fake_meta.tag := get_tag(s1_req.addr)
s1_fake_meta.coh := ClientMetadata.onReset
// when there are no tag match, we give it a Fake Meta
// this simplifies our logic in s2 stage
val s1_hit_meta = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w))), s1_fake_meta)
val s1_hit_coh = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
val s1_repl_coh = s1_repl_meta.coh
// for now, since we are using random replacement
// we only need to update replacement states after every valid replacement decision
// we only do replacement when we are true miss(not permission miss)
when (s1_fire) {
when (s1_req.miss && !s1_tag_match) {
replacer.miss
}
}
// --------------------------------------------------------------------------------
// stage 2
// check permissions
// read out data, do write/amo stuff
val s2_valid = RegInit(false.B)
val s2_fire = s2_valid
val s2_req = RegEnable(s1_req, s1_fire)
val s2_store_wmask = RegEnable(s1_store_wmask, s1_fire)
val s2_amo_wmask = RegEnable(s1_amo_wmask, s1_fire)
val s2_full_wmask = RegEnable(s1_full_wmask, s1_fire)
val s2_none_wmask = RegEnable(s1_none_wmask, s1_fire)
s2_s0_set_conflict := s2_valid && get_idx(s2_req.addr) === get_idx(s0_req.addr)
when (s1_fire) { s2_valid := true.B }
when (!s1_fire && s2_fire) { s2_valid := false.B }
dump_pipeline_reqs("MainPipe s2", s2_valid, s2_req)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_coh = RegNext(s1_hit_coh)
val s2_has_permission = s2_hit_coh.onAccess(s2_req.cmd)._1
val s2_new_hit_coh = s2_hit_coh.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_coh = RegNext(s1_repl_coh)
val s2_repl_way_en = RegNext(s1_repl_way_en)
// only true miss request(not permission miss) need to do replacement
// we use repl meta when we really need to a replacement
val need_replacement = s2_req.miss && !s2_tag_match
val s2_way_en = Mux(need_replacement, s2_repl_way_en, s2_tag_match_way)
val s2_meta = Mux(need_replacement, s2_repl_meta, s2_hit_meta)
val s2_coh = Mux(need_replacement, s2_repl_coh, s2_hit_coh)
// --------------------------------------------------------------------------------
// Permission checking
val miss_new_coh = s2_coh.onGrant(s2_req.cmd, s2_req.miss_param)
when (s2_valid) {
// permission checking for miss refill
when (s2_req.miss) {
// if miss refill req hits in dcache
// make sure it has enough permission to complete this cmd
assert (miss_new_coh.isValid())
when (s2_tag_match) {
// if miss refill req hits in dcache
// then the old permission should be lower than new permission
// otherwise we would not miss
assert (s2_hit_coh.state < miss_new_coh.state)
}
}
}
// Determine what state to go to based on Probe param
val (probe_has_dirty_data, probe_shrink_param, probe_new_coh) = s2_coh.onProbe(s2_req.probe_param)
// as long as we has permission
// we will treat it as a hit
// if we need to update meta from Trunk to Dirty
// go update it
val s2_hit = s2_tag_match && s2_has_permission
val s2_store_hit = s2_hit && !s2_req.miss && !s2_req.probe && s2_req.source === STORE_SOURCE.U
val s2_amo_hit = s2_hit && !s2_req.miss && !s2_req.probe && s2_req.source === AMO_SOURCE.U
when (s2_valid) {
XSDebug("MainPipe: s2 s2_tag_match: %b s2_has_permission: %b s2_hit: %b need_replacement: %b s2_way_en: %x s2_state: %d\n",
s2_tag_match, s2_has_permission, s2_hit, need_replacement, s2_way_en, s2_coh.state)
}
// --------------------------------------------------------------------------------
// Write to MetaArray
// whether we need to update meta
// miss should always update meta
val miss_update_meta = s2_req.miss
val probe_update_meta = s2_req.probe && s2_tag_match && s2_coh =/= probe_new_coh
// store only update meta when it hits and needs to update Trunk to Dirty
val store_update_meta = s2_store_hit && s2_hit_coh =/= s2_new_hit_coh
val amo_update_meta = s2_amo_hit && s2_hit_coh =/= s2_new_hit_coh
val update_meta = miss_update_meta || probe_update_meta || store_update_meta || amo_update_meta
val new_coh = Mux(miss_update_meta, miss_new_coh,
Mux(probe_update_meta, probe_new_coh,
Mux(store_update_meta || amo_update_meta, s2_new_hit_coh, ClientMetadata.onReset)))
io.meta_write.valid := s2_valid && update_meta
io.meta_write.bits.idx := get_idx(s2_req.addr)
io.meta_write.bits.data.coh := new_coh
io.meta_write.bits.data.tag := get_tag(s2_req.addr)
io.meta_write.bits.way_en := s2_way_en
// --------------------------------------------------------------------------------
// Write to DataArray
// Miss:
// 1. not store and not amo, data: store_data mask: store_mask(full_mask)
// 2. store, data: store_data mask: store_mask(full_mask)
// 3. amo, data: merge(store_data, amo_data, amo_mask) mask: store_mask(full_mask)
//
// Probe: do not write data, DontCare
// Store hit: data: merge(s2_data, store_data, store_mask) mask: store_mask
// AMO hit: data: merge(s2_data, amo_data, amo_mask) mask: store_mask
// so we can first generate store data and then merge with amo_data
// generate write mask
val wmask = Mux(s2_req.miss, s2_full_wmask,
Mux(s2_store_hit, s2_store_wmask,
Mux(s2_amo_hit, s2_amo_wmask,
s2_none_wmask)))
val need_write_data = VecInit(wmask.map(w => w.andR)).asUInt.andR
// generate write data
val store_data_merged = Wire(Vec(blockRows, UInt(rowBits.W)))
def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
val full_wmask = FillInterleaved(8, wmask)
((~full_wmask & old_data) | (full_wmask & new_data))
}
val s2_data = Mux1H(s2_way_en, io.data_resp)
val s2_data_decoded = (0 until blockRows) map { r =>
(0 until rowWords) map { w =>
val data = s2_data(r)(encWordBits * (w + 1) - 1, encWordBits * w)
val decoded = cacheParams.dataCode.decode(data)
assert(!(s2_valid && s2_hit && decoded.uncorrectable))
decoded.corrected
}
}
// TODO: deal with ECC errors
for (i <- 0 until blockRows) {
store_data_merged(i) := Cat((0 until rowWords).reverse map { w =>
val old_data = s2_data_decoded(i)(w)
val new_data = s2_req.store_data(rowBits * (i + 1) - 1, rowBits * i)(wordBits * (w + 1) - 1, wordBits * w)
val wmask = s2_req.store_mask(rowBytes * (i + 1) - 1, rowBytes * i)(wordBytes * (w + 1) - 1, wordBytes * w)
val store_data = mergePutData(old_data, new_data, wmask)
store_data
})
}
val amo_data_merged = Wire(Vec(blockRows, UInt(rowBits.W)))
for (i <- 0 until blockRows) {
amo_data_merged(i) := store_data_merged(i)
}
// TODO: do amo calculation
// and merge amo data
/*
for (i <- 0 until blockRows) {
store_data_merged(i) := Cat((0 until rowWords).reverse map { w =>
val old_data = store_data_merged(i)(w)
val wmask = Mux(s2_req.source === AMO_SOURCE.U && (s2_req.miss || s2_hit) && s2_req.word_idx === i.U, s2_req.amo_mask, 0.U)
val store_data = mergePutData(old_data, new_data, wmask)
})
}
*/
// ECC encode data
val wdata_merged = Wire(Vec(blockRows, UInt(encRowBits.W)))
for (i <- 0 until blockRows) {
wdata_merged(i) := Cat((0 until rowWords).reverse map { w =>
val wdata = amo_data_merged(i)(wordBits * (w + 1) - 1, wordBits * w)
val wdata_encoded = cacheParams.dataCode.encode(wdata)
wdata_encoded
})
}
val data_write = io.data_write.bits
io.data_write.valid := s2_valid && need_write_data
data_write.rmask := DontCare
data_write.way_en := s2_way_en
data_write.addr := s2_req.addr
data_write.wmask := wmask
data_write.data := wdata_merged
assert(!(io.data_write.valid && !io.data_write.ready))
// --------------------------------------------------------------------------------
// Writeback
// whether we need to write back a block
// TODO: add support for ProbePerm
// Now, we only deal with ProbeBlock
val miss_writeback = need_replacement && s2_coh === ClientStates.Dirty
// even probe missed, we still need to use write back to send ProbeAck NtoN response
// val probe_writeback = s2_req.probe && s2_tag_match && s2_coh.state =/= probe_new_coh.state
val probe_writeback = s2_req.probe
val need_writeback = miss_writeback || probe_writeback
val writeback_addr = Cat(s2_meta.tag, get_idx(s2_req.addr)) << blockOffBits
val (_, miss_shrink_param, _) = s2_coh.onCacheControl(M_FLUSH)
val writeback_param = Mux(miss_writeback, miss_shrink_param, probe_shrink_param)
val writeback_data = s2_coh === ClientStates.Dirty
val wb_req = io.wb_req.bits
io.wb_req.valid := s2_valid && need_writeback
wb_req.addr := writeback_addr
wb_req.param := writeback_param
wb_req.voluntary := miss_writeback
wb_req.hasData := writeback_data
wb_req.data := VecInit(s2_data_decoded.flatten).asUInt
assert(!(io.wb_req.valid && !io.wb_req.ready))
// --------------------------------------------------------------------------------
// send store/amo miss to miss queue
val store_amo_miss = !s2_req.miss && !s2_req.probe && !s2_hit && (s2_req.source === STORE_SOURCE.U || s2_req.source === AMO_SOURCE.U)
io.miss_req.valid := s2_valid && store_amo_miss
io.miss_req.bits.source := s2_req.source
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := s2_req.addr
io.miss_req.bits.store_data := s2_req.store_data
io.miss_req.bits.store_mask := s2_req.store_mask
io.miss_req.bits.word_idx := s2_req.word_idx
io.miss_req.bits.amo_data := s2_req.amo_data
io.miss_req.bits.amo_mask := s2_req.amo_mask
io.miss_req.bits.coh := s2_coh
io.miss_req.bits.id := s2_req.id
// --------------------------------------------------------------------------------
// send response
val resp = Wire(new MainPipeResp)
// TODO: add amo data out
resp.data := DontCare
resp.id := s2_req.id
resp.miss := store_amo_miss
resp.replay := io.miss_req.valid && !io.miss_req.ready
io.miss_resp.valid := s2_valid && s2_req.miss
io.miss_resp.bits := resp
io.miss_resp.bits.id := s2_req.miss_id
io.store_resp.valid := s2_valid && s2_req.source === STORE_SOURCE.U
io.store_resp.bits := resp
io.amo_resp.valid := s2_valid && s2_req.source === AMO_SOURCE.U
io.amo_resp.bits := resp
when (io.miss_resp.fire()) {
io.miss_resp.bits.dump()
}
when (io.store_resp.fire()) {
io.store_resp.bits.dump()
}
when (io.amo_resp.fire()) {
io.amo_resp.bits.dump()
}
// -------
// Debug logging functions
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool, req: MainPipeReq) = {
when (valid) {
XSDebug(s"$pipeline_stage_name ")
req.dump()
}
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import chisel3.ExcitingUtils._
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleA, TLBundleD, TLBundleE, TLPermissions, TLArbiter, ClientMetadata}
import utils.{HasTLDump, XSDebug, BoolStopWatch, OneHot}
class MissReq extends DCacheBundle
{
val source = UInt(sourceTypeWidth.W)
val cmd = UInt(M_SZ.W)
// must be aligned to block
val addr = UInt(PAddrBits.W)
// store
val store_data = UInt((cfg.blockBytes * 8).W)
val store_mask = UInt(cfg.blockBytes.W)
// which word does amo work on?
val word_idx = UInt(log2Up(blockWords).W)
val amo_data = UInt(DataBits.W)
val amo_mask = UInt((DataBits/8).W)
// coherence state
val coh = new ClientMetadata
val id = UInt(reqIdWidth.W)
def dump() = {
XSDebug("MissReq source: %d cmd: %d addr: %x store_data: %x store_mask: %x word_idx: %d amo_data: %x amo_mask: %x coh: %d id: %d\n",
source, cmd, addr, store_data, store_mask, word_idx, amo_data, amo_mask, coh.state, id)
}
}
// One miss entry deals with one missed block
class MissEntry(edge: TLEdgeOut) extends DCacheModule
{
val io = IO(new Bundle {
// MSHR ID
val id = Input(UInt())
// client requests
val req_valid = Input(Bool())
// this entry is free and can be allocated to new reqs
val primary_ready = Output(Bool())
// this entry is busy, but it can merge the new req
val secondary_ready = Output(Bool())
// this entry is busy and it can not merge the new req
val secondary_reject = Output(Bool())
val req = Input((new MissReq))
val refill = ValidIO(new Refill)
// bus
val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
val pipe_req = DecoupledIO(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
// MSHR:
// 1. receive req
// 2. send acquire req
// 3. receive grant resp
// 4. let main pipe do refill and replace
// 5. wait for resp
// 6. send finish to end the tilelink transaction
// We only send finish after data is written into cache.
// This prevents L2 from probing the block down.
// See Tilelink spec 1.8.1 page 69
// A slave should not issue a Probe if there is a pending GrantAck on the block. Once the Probe is
// issued, the slave should not issue further Probes on that block until it receives a ProbeAck.
val s_invalid :: s_refill_req :: s_refill_resp :: s_main_pipe_req :: s_main_pipe_resp :: s_mem_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
// --------------------------------------------
// internal registers
val req = Reg(new MissReq)
// param of grant
val grant_param = Reg(UInt(TLPermissions.bdWidth.W))
// recording the source/sink info from Grant
// so that we can use it grantack
val grantack = Reg(Valid(new TLBundleE(edge.bundle)))
// should we refill the data to load queue to wake up any missed load?
val should_refill_data = Reg(Bool())
// --------------------------------------------
// merge reqs
// see whether we can merge requests
// do not count s_invalid state in
// since we can not merge request at that state
val acquire_not_sent = state === s_refill_req && !io.mem_acquire.ready
val data_not_refilled = state === s_refill_req || state === s_refill_resp
def can_merge(new_req: MissReq): Bool = {
// caution: do not merge with AMO
// we can not do amoalu calculation in MissQueue
// so, we do not know the result after AMO calculation
// so do not merge with AMO
// before read acquire is fired, we can merge read or write
val before_read_sent = acquire_not_sent && req.source === LOAD_SOURCE.U && (new_req.source === LOAD_SOURCE.U || new_req.source === STORE_SOURCE.U)
// before read/write refills data to LoadQueue, we can merge any read
val before_data_refill = data_not_refilled && (req.source === LOAD_SOURCE.U || req.source === STORE_SOURCE.U) && new_req.source === LOAD_SOURCE.U
before_read_sent || before_data_refill
}
def should_merge(new_req: MissReq): Bool = {
val block_match = req.addr === new_req.addr
block_match && can_merge(new_req)
}
def should_reject(new_req: MissReq): Bool = {
val block_match = req.addr === new_req.addr
// do not reject any req when we are in s_invalid
block_match && !can_merge(new_req) && state =/= s_invalid
}
io.primary_ready := state === s_invalid
io.secondary_ready := should_merge(io.req)
io.secondary_reject := should_reject(io.req)
// should not allocate, merge or reject at the same time
// one at a time
OneHot.checkOneHot(Seq(io.primary_ready, io.secondary_ready, io.secondary_reject))
// --------------------------------------------
// assign default values to output signals
io.refill.valid := false.B
io.refill.bits := DontCare
io.mem_acquire.valid := false.B
io.mem_acquire.bits := DontCare
io.mem_grant.ready := false.B
io.mem_finish.valid := false.B
io.mem_finish.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
req.dump()
}
// --------------------------------------------
// State Machine
// --------------------------------------------
// receive requests
// primary request: allocate for a new request
when (io.req_valid && io.primary_ready) {
assert (state === s_invalid)
// re init some fields
req := io.req
grantack.valid := false.B
// only miss req from load needs a refill to LoadQueue
should_refill_data := io.req.source === LOAD_SOURCE.U
state := s_refill_req
}
// secondary request: merge with existing request
when (io.req_valid && io.secondary_ready) {
// The merged reqs should never have higher permissions
// which means the cache silently upgrade the permission of our block
// without merge with this miss queue request!
// Either our req come in with stale meta, or the req that upgrade the permission does not merge with this req.
// Both cases are bugs of DCache.
//
// DCache can silently drop permission(eg, probed or evicted)
// it should never silently upgrade permissions.
//
// TODO: please check Tilelink Metadata.scala
// and make sure that lower permission are encoded as smaller number
assert (io.req.coh.state <= req.coh.state)
// use the most uptodate meta
req.coh := io.req.coh
// when merging with store
// we should remember its info into our req
// or we will not be able to replay store
when (io.req.source === STORE_SOURCE.U) {
req := io.req
}
should_refill_data := io.req.source === LOAD_SOURCE.U
}
// --------------------------------------------
// refill
when (state === s_refill_req) {
val grow_param = req.coh.onAccess(req.cmd)._2
// for full overwrite, we can use AcquirePerm to save memory bandwidth
val full_overwrite = req.source === STORE_SOURCE.U && req.store_mask.andR
val acquireBlock = edge.AcquireBlock(
fromSource = io.id,
toAddress = req.addr,
lgSize = (log2Up(cfg.blockBytes)).U,
growPermissions = grow_param)._2
val acquirePerm = edge.AcquirePerm(
fromSource = io.id,
toAddress = req.addr,
lgSize = (log2Up(cfg.blockBytes)).U,
growPermissions = grow_param)._2
io.mem_acquire.valid := true.B
io.mem_acquire.bits := Mux(full_overwrite, acquirePerm, acquireBlock)
when (io.mem_acquire.fire()) {
state := s_refill_resp
}
}
val (_, _, refill_done, refill_count) = edge.count(io.mem_grant)
// raw data
val refill_data = Reg(Vec(blockRows, UInt(rowBits.W)))
val new_data = Wire(Vec(blockRows, UInt(rowBits.W)))
val new_mask = Wire(Vec(blockRows, UInt(rowBytes.W)))
for (i <- 0 until blockRows) {
new_data(i) := req.store_data(rowBits * (i + 1) - 1, rowBits * i)
// we only need to merge data for Store
new_mask(i) := Mux(req.source === STORE_SOURCE.U,
req.store_mask(rowBytes * (i + 1) - 1, rowBytes * i), 0.U(rowBytes.W))
}
def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
val full_wmask = FillInterleaved(8, wmask)
((~full_wmask & old_data) | (full_wmask & new_data))
}
when (state === s_refill_resp) {
io.mem_grant.ready := true.B
when (io.mem_grant.fire()) {
when (edge.hasData(io.mem_grant.bits)) {
refill_data(refill_count) := mergePutData(io.mem_grant.bits.data, new_data(refill_count), new_mask(refill_count))
} .otherwise {
// when we only acquire perm, not data
// use Store's data
for (i <- 0 until blockRows) {
refill_data(i) := new_data(i)
}
}
}
when (refill_done) {
grantack.valid := edge.isRequest(io.mem_grant.bits)
grantack.bits := edge.GrantAck(io.mem_grant.bits)
grant_param := io.mem_grant.bits.param
state := s_main_pipe_req
}
}
io.refill.valid := RegNext(state === s_refill_resp && refill_done && should_refill_data)
io.refill.bits.addr := req.addr
io.refill.bits.data := refill_data.asUInt
when (state === s_main_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req.miss := true.B
pipe_req.miss_id := io.id
pipe_req.miss_param := grant_param
pipe_req.probe := false.B
pipe_req.probe_param := DontCare
pipe_req.source := req.source
pipe_req.cmd := req.cmd
pipe_req.addr := req.addr
pipe_req.store_data := refill_data.asUInt
// full overwrite
pipe_req.store_mask := Fill(cfg.blockBytes, "b1".U)
pipe_req.word_idx := req.word_idx
pipe_req.amo_data := req.amo_data
pipe_req.amo_mask := req.amo_mask
pipe_req.id := req.id
when (io.pipe_req.fire()) {
state := s_main_pipe_resp
}
}
when (state === s_main_pipe_resp) {
when (io.pipe_resp.fire()) {
grantack.valid := false.B
state := s_mem_finish
}
}
when (state === s_mem_finish) {
io.mem_finish.valid := grantack.valid
io.mem_finish.bits := grantack.bits
when (io.mem_finish.fire()) {
grantack.valid := false.B
state := s_invalid
}
}
}
class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new MissReq))
val refill = ValidIO(new Refill)
val mem_acquire = Decoupled(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = Decoupled(new TLBundleE(edge.bundle))
val pipe_req = DecoupledIO(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nMissEntries))
val refill_arb = Module(new Arbiter(new Refill, cfg.nMissEntries))
// dispatch req to MSHR
val primary_ready = Wire(Vec(cfg.nMissEntries, Bool()))
val secondary_ready = Wire(Vec(cfg.nMissEntries, Bool()))
val secondary_reject = Wire(Vec(cfg.nMissEntries, Bool()))
// try merging with existing reqs
val merge = secondary_ready.asUInt.orR
val merge_idx = PriorityEncoder(secondary_ready)
// some req says the request can not be merged
val reject = secondary_reject.asUInt.orR
// allocate a new entry for this req
val allocate = !reject && !merge && primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
// will this req be accepted
val accept = (merge || allocate) && !reject
// if it's accepted, which entry will it enter
val entry_idx = Mux(allocate, alloc_idx, merge_idx)
// for one block, their should be only one MSHR
// one block should not be stay in multiple MSHRs
// if we a req can not merge with existing reqs
// block it!
OneHot.checkOneHot(secondary_ready)
OneHot.checkOneHot(secondary_reject)
// should not merge and reject at the same time
OneHot.checkOneHot(Seq(merge, reject))
io.req.ready := accept
io.mem_grant.ready := false.B
val entries = (0 until cfg.nMissEntries) map { i =>
val entry = Module(new MissEntry(edge))
entry.io.id := i.U(log2Up(cfg.nMissEntries).W)
// entry req
entry.io.req_valid := (i.U === entry_idx) && accept && io.req.valid
primary_ready(i) := entry.io.primary_ready
secondary_ready(i) := entry.io.secondary_ready
secondary_reject(i) := entry.io.secondary_reject
entry.io.req := io.req.bits
// entry refill
refill_arb.io.in(i).valid := entry.io.refill.valid
refill_arb.io.in(i).bits := entry.io.refill.bits
// pipe_req
pipe_req_arb.io.in(i) <> entry.io.pipe_req
// pipe_req
entry.io.pipe_resp.valid := false.B
entry.io.pipe_resp.bits := DontCare
when (io.pipe_resp.bits.id === i.U) {
entry.io.pipe_resp <> io.pipe_resp
}
entry.io.mem_grant.valid := false.B
entry.io.mem_grant.bits := DontCare
when (io.mem_grant.bits.source === i.U) {
entry.io.mem_grant <> io.mem_grant
}
/*
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
*/
entry
}
io.refill.valid := refill_arb.io.out.valid
io.refill.bits := refill_arb.io.out.bits
refill_arb.io.out.ready := true.B
// one refill at a time
OneHot.checkOneHot(refill_arb.io.in.map(r => r.valid))
TLArbiter.lowestFromSeq(edge, io.mem_acquire, entries.map(_.io.mem_acquire))
TLArbiter.lowestFromSeq(edge, io.mem_finish, entries.map(_.io.mem_finish))
io.pipe_req <> pipe_req_arb.io.out
// print all input/output requests for debug purpose
when (io.req.fire()) {
io.req.bits.dump()
// sanity check
val source = io.req.bits.source
val cmd = io.req.bits.cmd
when (source === LOAD_SOURCE.U) {
assert (cmd === M_XRD)
}
when (source === STORE_SOURCE.U) {
assert (cmd === M_XWR)
}
when (source === AMO_SOURCE.U) {
assert (
cmd === M_XA_SWAP ||
cmd === M_XLR ||
cmd === M_XSC ||
cmd === M_XA_ADD ||
cmd === M_XA_XOR ||
cmd === M_XA_OR ||
cmd === M_XA_AND ||
cmd === M_XA_MIN ||
cmd === M_XA_MAX ||
cmd === M_XA_MINU ||
cmd === M_XA_MAXU)
}
// req addr must be aligned to block boundary
assert (io.req.bits.addr(blockOffBits - 1, 0) === 0.U)
}
when (io.refill.fire()) {
io.refill.bits.dump()
}
when (io.mem_acquire.fire()) {
XSDebug("mem_acquire ")
io.mem_acquire.bits.dump
}
when (io.mem_grant.fire()) {
XSDebug("mem_grant ")
io.mem_grant.bits.dump
}
when (io.mem_finish.fire()) {
XSDebug("mem_finish ")
io.mem_finish.bits.dump
}
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(io.req.fire(), "perfCntDCacheMiss", Perf)
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
import utils.{HasTLDump, XSDebug}
class ProbeReq extends DCacheBundle
{
val source = UInt()
val opcode = UInt()
val addr = UInt(PAddrBits.W)
val param = UInt(TLPermissions.bdWidth.W)
def dump() = {
XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
source, opcode, addr, param)
}
}
class ProbeEntry extends DCacheModule {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new ProbeReq))
val pipe_req = DecoupledIO(new MainPipeReq)
val lrsc_locked_block = Input(Valid(UInt()))
// the block we are probing
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: Nil = Enum(2)
val state = RegInit(s_invalid)
val req = Reg(new ProbeReq)
// assign default values to signals
io.req.ready := false.B
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
req := io.req.bits
state := s_pipe_req
}
}
when (state === s_pipe_req) {
val lrsc_blocked = io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr
io.pipe_req.valid := !lrsc_blocked
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := true.B
pipe_req.probe_param := req.param
pipe_req.addr := req.addr
when (io.pipe_req.fire()) {
state := s_invalid
}
}
}
class ProbeQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
val pipe_req = DecoupledIO(new MainPipeReq)
val lrsc_locked_block = Input(Valid(UInt()))
})
val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
// allocate a free entry for incoming request
val primary_ready = Wire(Vec(cfg.nProbeEntries, Bool()))
val allocate = primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
// translate to inner req
val req = Wire(new ProbeReq)
req.source := io.mem_probe.bits.source
req.opcode := io.mem_probe.bits.opcode
req.addr := io.mem_probe.bits.address
req.param := io.mem_probe.bits.param
io.mem_probe.ready := allocate
val entries = (0 until cfg.nProbeEntries) map { i =>
val entry = Module(new ProbeEntry)
// entry req
entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
primary_ready(i) := entry.io.req.ready
entry.io.req.bits := req
// pipe_req
pipe_req_arb.io.in(i) <> entry.io.pipe_req
entry.io.lrsc_locked_block := io.lrsc_locked_block
entry
}
io.pipe_req <> pipe_req_arb.io.out
// print all input/output requests for debug purpose
when (io.mem_probe.valid) {
// before a probe finishes, L2 should not further issue probes on this block
val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR
assert (!probe_conflict)
// for now, we can only deal with ProbeBlock
assert (io.mem_probe.bits.opcode === TLMessages.Probe)
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import bus.tilelink._
class StoreReplayEntry extends DCacheModule
{
val io = IO(new Bundle {
val id = Input(UInt())
val lsu = Flipped(new DCacheLineIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
val block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_pipe_req :: s_pipe_resp :: s_resp :: Nil = Enum(4)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.pipe_req.valid := false.B
io.pipe_req.bits := DontCare
io.block_addr.valid := state =/= s_invalid
io.block_addr.bits := req.addr
when (state =/= s_invalid) {
XSDebug("StoreReplayEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
req := io.lsu.req.bits
state := s_pipe_req
}
}
// --------------------------------------------
// replay
when (state === s_pipe_req) {
io.pipe_req.valid := true.B
val pipe_req = io.pipe_req.bits
pipe_req := DontCare
pipe_req.miss := false.B
pipe_req.probe := false.B
pipe_req.source := STORE_SOURCE.U
pipe_req.cmd := req.cmd
pipe_req.addr := req.addr
pipe_req.store_data := req.data
pipe_req.store_mask := req.mask
pipe_req.id := io.id
when (io.pipe_req.fire()) {
state := s_pipe_resp
}
}
when (state === s_pipe_resp) {
// when not miss
// everything is OK, simply send response back to sbuffer
// when miss and not replay
// wait for missQueue to handling miss and replaying our request
// when miss and replay
// req missed and fail to enter missQueue, manually replay it later
// TODO: add assertions:
// 1. add a replay delay counter?
// 2. when req gets into MissQueue, it should not miss any more
when (io.pipe_resp.fire()) {
when (io.pipe_resp.bits.miss) {
when (io.pipe_resp.bits.replay) {
state := s_pipe_req
}
} .otherwise {
state := s_resp
}
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := DontCare
io.lsu.resp.bits.id := req.id
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"StoreReplayEntryTransaction req %d\n", io.id)
}
when (io.lsu.resp.fire()) {
XSDebug(s"StoreReplayEntryTransaction resp %d\n", io.id)
}
}
class StoreReplayQueue extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheLineIO)
val pipe_req = Decoupled(new MainPipeReq)
val pipe_resp = Flipped(ValidIO(new MainPipeResp))
})
val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nStoreReplayEntries))
val resp_arb = Module(new Arbiter(new DCacheLineResp, cfg.nStoreReplayEntries))
// allocate a free entry for incoming request
val primary_ready = Wire(Vec(cfg.nStoreReplayEntries, Bool()))
val allocate = primary_ready.asUInt.orR
val alloc_idx = PriorityEncoder(primary_ready)
val req = io.lsu.req
req.ready := allocate
val entries = (0 until cfg.nStoreReplayEntries) map { i =>
val entry = Module(new StoreReplayEntry)
entry.io.id := i.U
// entry req
entry.io.lsu.req.valid := (i.U === alloc_idx) && allocate && req.valid
primary_ready(i) := entry.io.lsu.req.ready
entry.io.lsu.req.bits := req.bits
// lsu req and resp
resp_arb.io.in(i) <> entry.io.lsu.resp
// replay req and resp
pipe_req_arb.io.in(i) <> entry.io.pipe_req
entry.io.pipe_resp.valid := (i.U === io.pipe_resp.bits.id) && io.pipe_resp.valid
entry.io.pipe_resp.bits := io.pipe_resp.bits
entry
}
io.lsu.resp <> resp_arb.io.out
io.pipe_req <> pipe_req_arb.io.out
// sanity check
when (io.lsu.req.valid) {
assert(io.lsu.req.bits.cmd === M_XWR)
val block_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.lsu.req.bits.addr)).asUInt.orR
assert (!block_conflict)
}
// debug output
when (io.lsu.req.fire()) {
io.lsu.req.bits.dump()
}
when (io.lsu.resp.fire()) {
io.lsu.resp.bits.dump()
}
when (io.pipe_req.fire()) {
io.pipe_req.bits.dump()
}
when (io.pipe_resp.fire()) {
io.pipe_resp.bits.dump()
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink.{TLBundleC, TLBundleD, TLEdgeOut, TLPermissions}
class WritebackReq extends DCacheBundle {
val addr = UInt(PAddrBits.W)
val param = UInt(TLPermissions.cWidth.W)
val voluntary = Bool()
val hasData = Bool()
val data = UInt((cfg.blockBytes * 8).W)
def dump() = {
XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
addr, param, voluntary, hasData, data)
}
}
class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new WritebackReq))
val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
})
// remaining beats
val remain = RegInit(0.U(refillCycles.W))
val remain_set = WireInit(0.U(refillCycles.W))
val remain_clr = WireInit(0.U(refillCycles.W))
remain := (remain | remain_set) & ~remain_clr
// used source id
// source id 0 is reserved for ProbeAck[Data]
val used = RegInit(0.U((cfg.nReleaseEntries - 1).W))
val used_set = WireInit(0.U((cfg.nReleaseEntries - 1).W))
val used_clr = WireInit(0.U((cfg.nReleaseEntries - 1).W))
used := (used | used_set) & ~used_clr
val busy = remain.orR
val all_used = used.andR
val req_reg = Reg(new WritebackReq)
val req = Mux(busy, req_reg, io.req.bits)
// --------------------------------------------------------------------------------
// new req entering
// source to use for this transaction
val source = Reg(UInt())
io.req.ready := !busy && (!io.req.bits.voluntary || !all_used)
when (io.req.fire()) {
remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
used_set := Mux(io.req.bits.voluntary, PriorityEncoderOH(~used), 0.U)
// source 0 is reserved for ProbeAck[Data]
source := Mux(io.req.bits.voluntary, PriorityEncoder(~used) + 1.U, 0.U)
}
// --------------------------------------------------------------------------------
// while there beats remaining to be sent, we keep sending
// which beat to send in this cycle?
val beat = PriorityEncoder(remain)
val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
for (i <- 0 until refillCycles) {
beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
}
val probeResponse = edge.ProbeAck(
fromSource = source,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param
)
val probeResponseData = edge.ProbeAck(
fromSource = source,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param,
data = beat_data(beat)
)
val voluntaryRelease = edge.Release(
fromSource = source,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param
)._2
val voluntaryReleaseData = edge.Release(
fromSource = source,
toAddress = req.addr,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param,
data = beat_data(beat)
)._2
io.mem_release.valid := busy
io.mem_release.bits := Mux(req.voluntary,
Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
Mux(req.hasData, probeResponseData, probeResponse))
when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
// --------------------------------------------------------------------------------
// receive ReleaseAck for Releases
// we are alway ready
// remember to assert any invalid grant
io.mem_grant.ready := used(io.mem_grant.bits.source - 1.U)
when (io.mem_grant.fire()) {
used_clr := UIntToOH(io.mem_grant.bits.source - 1.U)
}
// print all input/output requests for debug purpose
// print req
when (io.req.fire()) {
io.req.bits.dump()
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug}
// this is a traditional cache pipeline:
// it handles load/store/amo/lr,sc
class AtomicsPipe extends DCacheModule
{
val io = IO(new DCacheBundle{
val lsu = Flipped(new DCacheWordIO)
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val data_write = DecoupledIO(new L1DataWriteReq)
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val inflight_req_idxes = Output(Vec(3, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
val block_probe_addr = Output(Valid(UInt()))
val wb_invalidate_lrsc = Input(Valid(UInt()))
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
// LSU requests
io.lsu.req.ready := io.meta_read.ready && io.data_read.ready
io.meta_read.valid := io.lsu.req.valid
io.data_read.valid := io.lsu.req.valid
val meta_read = io.meta_read.bits
val data_read = io.data_read.bits
// Tag read for new requests
meta_read.idx := get_idx(io.lsu.req.bits.addr)
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
// Data read for new requests
data_read.addr := io.lsu.req.bits.addr
data_read.way_en := ~0.U(nWays.W)
// only needs to read the specific beat
data_read.rmask := UIntToOH(get_row(io.lsu.req.bits.addr))
// Pipeline
// ---------------------------------------
// stage 0
val s0_valid = io.lsu.req.fire()
val s0_req = io.lsu.req.bits
dump_pipeline_reqs("AtomicsPipe s0", s0_valid, s0_req)
// ---------------------------------------
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
val s1_nack = false.B
dump_pipeline_reqs("AtomicsPipe s1", s1_valid, s1_req)
// tag check
val meta_resp = io.meta_resp
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
// ---------------------------------------
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid, init = false.B)
dump_pipeline_reqs("AtomicsPipe s2", s2_valid, s2_req)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh)))
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
//
// If state changes on hit,
// we should treat it as not hit, and let mshr deal with it,
// since we can not write meta data on the main pipeline.
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
val s2_nack = Wire(Bool())
// when req got nacked, upper levels should replay this request
// the same set is busy
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for store miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
// lr/sc
val debug_sc_fail_addr = RegInit(0.U)
val debug_sc_fail_cnt = RegInit(0.U(8.W))
val lrsc_count = RegInit(0.U(log2Ceil(lrscCycles).W))
val lrsc_valid = lrsc_count > lrscBackoff.U
val lrsc_addr = Reg(UInt())
val s2_lr = s2_req.cmd === M_XLR && !s2_nack
val s2_sc = s2_req.cmd === M_XSC && !s2_nack
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === get_block_addr(s2_req.addr)
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
val s2_sc_resp = Mux(s2_sc_fail, 1.U, 0.U)
// we have permission on this block
// but we can not finish in this pass
// we need to go to miss queue to update meta and set dirty first
val s2_set_dirty = s2_tag_match && s2_has_permission && s2_hit_state =/= s2_new_hit_state
// this sc should succeed, but we need to set dirty first
// do not treat it as a sc failure and reset lr sc counter
val sc_set_dirty = s2_set_dirty && !s2_nack && s2_sc && s2_lrsc_addr_match
when (s2_valid && !sc_set_dirty) {
when (s2_hit && !s2_nack && s2_lr) {
lrsc_count := (lrscCycles - 1).U
lrsc_addr := get_block_addr(s2_req.addr)
} .otherwise {
lrsc_count := 0.U
}
} .elsewhen (lrsc_count > 0.U) {
lrsc_count := lrsc_count - 1.U
}
io.block_probe_addr.valid := lrsc_valid
io.block_probe_addr.bits := lrsc_addr
// when we release this block,
// we invalidate this reservation set
when (io.wb_invalidate_lrsc.valid) {
when (io.wb_invalidate_lrsc.bits === lrsc_addr) {
lrsc_count := 0.U
}
// when we release this block, there should be no matching lrsc inflight
assert (!(s2_valid && (s2_lr || s2_sc) && io.wb_invalidate_lrsc.bits === get_block_addr(s2_req.addr)))
}
when (s2_valid) {
when (s2_req.addr === debug_sc_fail_addr) {
when (s2_sc_fail) {
debug_sc_fail_cnt := debug_sc_fail_cnt + 1.U
} .elsewhen (s2_sc) {
debug_sc_fail_cnt := 0.U
}
} .otherwise {
when (s2_sc_fail) {
debug_sc_fail_addr := s2_req.addr
debug_sc_fail_cnt := 1.U
}
}
}
assert(debug_sc_fail_cnt < 100.U, "L1DCache failed too many SCs in a row")
// only dump these signals when they are actually valid
dump_pipeline_valids("AtomicsPipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
dump_pipeline_valids("AtomicsPipe s2", "s2_nack_data", s2_valid && s2_nack_data)
when (s2_valid) {
XSDebug("lrsc_count: %d lrsc_valid: %b lrsc_addr: %x\n",
lrsc_count, lrsc_valid, lrsc_addr)
XSDebug("s2_lr: %b s2_sc: %b s2_lrsc_addr_match: %b s2_sc_fail: %b s2_sc_resp: %x\n",
s2_lr, s2_sc, s2_lrsc_addr_match, s2_sc_fail, s2_sc_resp)
XSDebug("debug_sc_fail_addr: %x debug_sc_fail_cnt: %d\n",
debug_sc_fail_addr, debug_sc_fail_cnt)
}
// load data gen
val s2_data = Wire(Vec(nWays, UInt(encRowBits.W)))
val data_resp = io.data_resp
for (w <- 0 until nWays) {
s2_data(w) := data_resp(w)(get_row(s2_req.addr))
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
// the index of word in a row, in case rowBits != wordBits
val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes))
val s2_data_words = Wire(Vec(rowWords, UInt(encWordBits.W)))
for (w <- 0 until rowWords) {
s2_data_words(w) := s2_data_muxed(encWordBits * (w + 1) - 1, encWordBits * w)
}
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_req.addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := s2_req.meta.id
val resp = Wire(ValidIO(new DCacheWordResp))
resp.valid := s2_valid
resp.bits.data := Mux(s2_sc, s2_sc_resp, s2_data_word)
resp.bits.meta := s2_req.meta
// reuse this field to pass lr sc valid to commit
// nemu use this to see whether lr sc counter is still valid
resp.bits.meta.id := lrsc_valid
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"AtomicsPipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
// ---------------------------------------
// s3: do data write
// Store/amo hits
val amoalu = Module(new AMOALU(wordBits))
amoalu.io.mask := s2_req.mask
amoalu.io.cmd := s2_req.cmd
amoalu.io.lhs := s2_data_word_decoded
amoalu.io.rhs := s2_req.data
val s3_req = RegNext(s2_req)
val s3_valid = RegNext(s2_valid && s2_hit && isWrite(s2_req.cmd) && !s2_nack && !s2_sc_fail)
val s3_tag_match_way = RegNext(s2_tag_match_way)
val wdata_encoded = cacheParams.dataCode.encode(amoalu.io.out)
val s3_wdata = Reg(UInt())
s3_wdata := wdata_encoded
// write dcache if hit
// only needs to read the specific beat
val wmask = WireInit(VecInit((0 until blockRows) map (i => 0.U(rowWords.W))))
val wdata = WireInit(VecInit((0 until blockRows) map (i => Cat(
(0 until rowWords) map { w => s3_wdata }))))
wmask(get_row(s3_req.addr)) := ~0.U(rowWords.W)
val data_write = io.data_write.bits
io.data_write.valid := s3_valid
data_write.rmask := DontCare
data_write.way_en := s3_tag_match_way
data_write.addr := s3_req.addr
data_write.wmask := wmask
data_write.data := wdata
assert(!(io.data_write.valid && !io.data_write.ready))
dump_pipeline_reqs("AtomicsPipe s3", s3_valid, s3_req)
// -------
// wire out signals for synchronization
io.inflight_req_idxes(0).valid := io.lsu.req.valid
io.inflight_req_idxes(1).valid := s1_valid
io.inflight_req_idxes(2).valid := s2_valid
io.inflight_req_idxes(0).bits := get_idx(s0_req.addr)
io.inflight_req_idxes(1).bits := get_idx(s1_req.addr)
io.inflight_req_idxes(2).bits := get_idx(s2_req.addr)
io.inflight_req_block_addrs(0).valid := io.lsu.req.valid
io.inflight_req_block_addrs(1).valid := s1_valid
io.inflight_req_block_addrs(2).valid := s2_valid
io.inflight_req_block_addrs(0).bits := get_block_addr(s0_req.addr)
io.inflight_req_block_addrs(1).bits := get_block_addr(s1_req.addr)
io.inflight_req_block_addrs(2).bits := get_block_addr(s2_req.addr)
// -------
// Debug logging functions
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool,
req: DCacheWordReq ) = {
when (valid) {
XSDebug(s"$pipeline_stage_name cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.cmd, req.addr, req.data, req.mask, req.meta.id, req.meta.replay)
}
}
def dump_pipeline_valids(pipeline_stage_name: String, signal_name: String, valid: Bool) = {
when (valid) {
XSDebug(s"$pipeline_stage_name $signal_name\n")
}
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
// wraps around AtomicsPipe
// when requests misse, send miss req to missQueue and replays reqs
class AtomicsMissQueue extends DCacheModule
{
val io = IO(new DCacheBundle {
val lsu = Flipped(new DCacheWordIO)
val replay = new DCacheWordIO
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
})
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_resp :: s_miss_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
val id = 0.U
val req = Reg(new DCacheWordReq)
val resp = Reg(new DCacheWordResp)
val req_block_addr = get_block_addr(req.addr)
val reg_miss_resp = Reg(new MissResp)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.replay.req.valid := false.B
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
assert(!io.lsu.req.bits.meta.replay)
req := io.lsu.req.bits
state := s_replay_req
}
}
// --------------------------------------------
// replay
when (state === s_replay_req) {
io.replay.req.valid := true.B
io.replay.req.bits := req
when (io.replay.req.fire()) {
state := s_replay_resp
}
}
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
// req missed
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when (io.replay.resp.bits.replay) {
state := s_replay_req
} .otherwise {
// the req missed and enters mshr
// wait for miss response
state := s_miss_resp
}
} .otherwise {
// req hits, everything OK
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
// if it's a replayed request
// we need to tell mshr, we are done
state := s_miss_finish
}
}
}
}
when (state === s_miss_resp) {
when (io.miss_resp.fire()) {
reg_miss_resp := io.miss_resp.bits
// mark req as replayed req
req.meta.replay := true.B
state := s_replay_req
}
}
when (state === s_miss_finish) {
io.miss_finish.valid := true.B
io.miss_finish.bits.client_id := id
io.miss_finish.bits.entry_id := reg_miss_resp.entry_id
when (io.miss_finish.fire()) {
state := s_resp
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
io.lsu.req.bits.cmd, io.lsu.req.bits.addr, io.lsu.req.bits.data, io.lsu.req.bits.mask, io.lsu.req.bits.meta.id, io.lsu.req.bits.meta.replay)
}
val replay = io.replay.req
when (replay.fire()) {
XSDebug(s"replay cmd: %x addr: %x data: %x mask: %x id: %d replayed_req: %b\n",
replay.bits.cmd, replay.bits.addr, replay.bits.data, replay.bits.mask, replay.bits.meta.id, replay.bits.meta.replay)
}
when (io.lsu.resp.fire()) {
XSDebug(s"io.lsu.resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
io.lsu.resp.bits.data, io.lsu.resp.bits.meta.id, io.lsu.resp.bits.meta.replay, io.lsu.resp.bits.miss, io.lsu.resp.bits.replay)
}
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id)
val miss_finish = io.miss_finish
XSDebug(miss_finish.fire(), "miss_finish client_id: %d entry_id: %d\n",
miss_finish.bits.client_id, miss_finish.bits.entry_id)
when (io.lsu.req.fire()) {
XSDebug(s"AtomicsMissEntryTransaction req 0\n")
}
when (io.lsu.resp.fire()) {
XSDebug(s"AtomicsMissEntryTransaction resp 0\n")
}
}
package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLClientNode, TLClientParameters, TLMasterParameters, TLMasterPortParameters, TLArbiter}
// Meta data for dcache requests
// anything that should go with reqs and resps goes here
class DCacheMeta extends DCacheBundle {
val id = UInt(reqIdWidth.W)
val vaddr = UInt(VAddrBits.W) // maybe we should use VAddrBits?
val paddr = UInt(PAddrBits.W)
val uop = new MicroOp //FIXME: opt data width
val mmio = Bool()
val tlb_miss = Bool()
// dcache request id
// master uses id to correlate resps to reqs
// different master can allocate and free ids independently
// as long as they do not share resp
val mask = UInt((DataBits/8).W)
val replay = Bool() // whether it's a replayed request?
}
// memory request in word granularity(load, mmio, lr/sc, atomics)
class DCacheWordReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = new DCacheMeta
}
// memory request in word granularity(store)
class DCacheLineReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
val mask = UInt(cfg.blockBytes.W)
val meta = new DCacheMeta
}
class DCacheWordResp extends DCacheBundle
{
val data = UInt(DataBits.W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
}
class DCacheLineResp extends DCacheBundle
{
val data = UInt((cfg.blockBytes * 8).W)
val meta = new DCacheMeta
// cache req missed, send it to miss queue
val miss = Bool()
// cache req nacked, replay it later
val replay = Bool()
}
class Refill extends DCacheBundle
{
val addr = UInt(PAddrBits.W)
val data = UInt((cfg.blockBytes * 8).W)
}
class DCacheWordIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheWordReq)
val resp = Flipped(DecoupledIO(new DCacheWordResp))
}
// used by load unit
class DCacheLoadIO extends DCacheWordIO
{
// kill previous cycle's req
val s1_kill = Output(Bool())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val s1_paddr = Output(UInt(PAddrBits.W))
}
class DCacheLineIO extends DCacheBundle
{
val req = DecoupledIO(new DCacheLineReq )
val resp = Flipped(DecoupledIO(new DCacheLineResp))
}
class DCacheToLsuIO extends DCacheBundle {
val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load
val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses
val store = Flipped(new DCacheLineIO) // for sbuffer
val atomics = Flipped(new DCacheWordIO) // atomics reqs
}
class DCacheIO extends DCacheBundle {
val lsu = new DCacheToLsuIO
val prefetch = DecoupledIO(new MissReq)
}
class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters {
val clientParameters = TLMasterPortParameters.v1(
Seq(TLMasterParameters.v1(
name = "dcache",
sourceId = IdRange(0, cfg.nMissEntries+1),
supportsProbe = TransferSizes(cfg.blockBytes)
))
)
val clientNode = TLClientNode(Seq(clientParameters))
lazy val module = new DCacheImp(this)
}
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasXSLog {
val io = IO(new DCacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match")
//----------------------------------------
// core data structures
val dataArray = Module(new DuplicatedDataArray)
val metaArray = Module(new DuplicatedMetaArray)
/*
dataArray.dump()
metaArray.dump()
*/
//----------------------------------------
// core modules
val ldu = Seq.fill(LoadPipelineWidth) { Module(new LoadPipe) }
val stu = Module(new StorePipe)
val atomics = Module(new AtomicsPipe)
val storeMissQueue = Module(new StoreMissQueue)
val atomicsMissQueue = Module(new AtomicsMissQueue)
val missQueue = Module(new MissQueue(edge))
val wb = Module(new WritebackUnit(edge))
val prober = Module(new ProbeUnit(edge))
//----------------------------------------
// meta array
val MetaWritePortCount = 2
val MissQueueMetaWritePort = 0
val ProberMetaWritePort = 1
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, MetaWritePortCount))
metaWriteArb.io.in(MissQueueMetaWritePort) <> missQueue.io.meta_write
metaWriteArb.io.in(ProberMetaWritePort) <> prober.io.meta_write
metaArray.io.write <> metaWriteArb.io.out
// To simplify port arbitration
// MissQueue, Prober and StorePipe all use port 0
// if contention got severe, considering load balancing on two ports?
val MetaReadPortCount = 4
val ProberMetaReadPort = 0
val StorePipeMetaReadPort = 1
val LoadPipeMetaReadPort = 2
val AtomicsPipeMetaReadPort = 3
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, MetaReadPortCount))
metaReadArb.io.in(ProberMetaReadPort) <> prober.io.meta_read
metaReadArb.io.in(StorePipeMetaReadPort) <> stu.io.meta_read
metaReadArb.io.in(LoadPipeMetaReadPort) <> ldu(0).io.meta_read
metaReadArb.io.in(AtomicsPipeMetaReadPort) <> atomics.io.meta_read
metaArray.io.read(0) <> metaReadArb.io.out
prober.io.meta_resp <> metaArray.io.resp(0)
stu.io.meta_resp <> metaArray.io.resp(0)
ldu(0).io.meta_resp <> metaArray.io.resp(0)
atomics.io.meta_resp <> metaArray.io.resp(0)
for (w <- 1 until LoadPipelineWidth) {
metaArray.io.read(w) <> ldu(w).io.meta_read
ldu(w).io.meta_resp <> metaArray.io.resp(w)
}
//----------------------------------------
// data array
val DataWritePortCount = 3
val StorePipeDataWritePort = 0
val AtomicsPipeDataWritePort = 1
val MissQueueDataWritePort = 2
val dataWriteArb = Module(new Arbiter(new L1DataWriteReq, DataWritePortCount))
dataWriteArb.io.in(StorePipeDataWritePort) <> stu.io.data_write
dataWriteArb.io.in(MissQueueDataWritePort) <> missQueue.io.data_write
dataWriteArb.io.in(AtomicsPipeDataWritePort) <> atomics.io.data_write
dataArray.io.write <> dataWriteArb.io.out
// To simplify port arbitration
// WritebackUnit and StorePipe use port 0
val DataReadPortCount = 4
val WritebackDataReadPort = 0
val StorePipeDataReadPort = 1
val LoadPipeDataReadPort = 2
val AtomicsPipeDataReadPort = 3
val dataReadArb = Module(new Arbiter(new L1DataReadReq, DataReadPortCount))
dataReadArb.io.in(WritebackDataReadPort) <> wb.io.data_req
dataReadArb.io.in(StorePipeDataReadPort) <> stu.io.data_read
dataReadArb.io.in(LoadPipeDataReadPort) <> ldu(0).io.data_read
dataReadArb.io.in(AtomicsPipeDataReadPort) <> atomics.io.data_read
dataArray.io.read(0) <> dataReadArb.io.out
dataArray.io.resp(0) <> wb.io.data_resp
dataArray.io.resp(0) <> stu.io.data_resp
dataArray.io.resp(0) <> atomics.io.data_resp
dataArray.io.resp(0) <> ldu(0).io.data_resp
for (w <- 1 until LoadPipelineWidth) {
dataArray.io.read(w) <> ldu(w).io.data_read
dataArray.io.resp(w) <> ldu(w).io.data_resp
}
//----------------------------------------
// load pipe and load miss queue
// the s1 kill signal
// only lsu uses this, replay never kills
for (w <- 0 until LoadPipelineWidth) {
val load_w_nack = nack_load(io.lsu.load(w).req.bits.addr)
ldu(w).io.lsu.req <> io.lsu.load(w).req
ldu(w).io.lsu.s1_paddr <> io.lsu.load(w).s1_paddr
ldu(w).io.nack := load_w_nack
XSDebug(load_w_nack, s"LoadUnit $w nacked\n")
ldu(w).io.lsu.resp <> io.lsu.load(w).resp
ldu(w).io.lsu.s1_kill <> io.lsu.load(w).s1_kill
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.replay), "LSU should not replay requests")
}
for (w <- 0 until LoadPipelineWidth) {
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.mmio), "MMIO requests should not go to cache")
assert(!(io.lsu.load(w).req.fire() && io.lsu.load(w).req.bits.meta.tlb_miss), "TLB missed requests should not go to cache")
}
//----------------------------------------
// store pipe and store miss queue
storeMissQueue.io.lsu <> io.lsu.store
/*
assert(!(storeMissQueue.io.replay.req.fire() && !storeMissQueue.io.replay.req.bits.meta.replay),
"StoreMissQueue should replay requests")
*/
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.replay),
"Sbuffer should not should replay requests")
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.mmio),
"MMIO requests should not go to cache")
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.tlb_miss),
"TLB missed requests should not go to cache")
val store_block = block_store(storeMissQueue.io.replay.req.bits.addr)
block_decoupled(storeMissQueue.io.replay.req, stu.io.lsu.req, store_block && !storeMissQueue.io.replay.req.bits.meta.replay)
storeMissQueue.io.replay.resp <> stu.io.lsu.resp
XSDebug(store_block, "StorePipe blocked\n")
//----------------------------------------
// atomics pipe
atomics.io.wb_invalidate_lrsc := wb.io.inflight_addr
atomicsMissQueue.io.lsu <> io.lsu.atomics
atomicsMissQueue.io.replay <> atomics.io.lsu
val atomics_block = block_atomics(atomicsMissQueue.io.replay.req.bits.addr)
block_decoupled(atomicsMissQueue.io.replay.req, atomics.io.lsu.req, atomics_block && !atomicsMissQueue.io.replay.req.bits.meta.replay)
XSDebug(atomics_block, "AtomicsPipe blocked\n")
// when atomics are in flight, there should be no load or store in flight
// so atomics and store should not show up at the same time
val atomics_inflight = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid)).reduce(_||_)
val store_inflight = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid)).reduce(_||_)
assert(!(atomics_inflight && store_inflight))
// some other stuff
val atomicsReq = io.lsu.atomics.req
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.replay),
"Atomics does not support request replay")
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.mmio),
"MMIO requests should not go to cache")
assert(!(atomicsReq.fire() && atomicsReq.bits.meta.tlb_miss),
"TLB missed requests should not go to cache")
//----------------------------------------
// miss queue
require(LoadPipelineWidth == 2, "We hard code the number of load misses")
val loadMissQueueClientId_0 = 0.U(clientIdWidth.W)
val loadMissQueueClientId_1 = 1.U(clientIdWidth.W)
val storeMissQueueClientId = 2.U(clientIdWidth.W)
val atomicsMissQueueClientId = 3.U(clientIdWidth.W)
// Request
val missReqArb = Module(new Arbiter(new MissReq, nClientMissQueues))
val missReq = missQueue.io.req
val loadMissReq_0 = ldu(0).io.miss_req
val loadMissReq_1 = ldu(1).io.miss_req
val storeMissReq = stu.io.miss_req
val atomicsMissReq = atomics.io.miss_req
missReqArb.io.in(0) <> loadMissReq_0
missReqArb.io.in(0).bits.client_id := Cat(loadMissQueueClientId_0,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(1) <> loadMissReq_1
missReqArb.io.in(1).bits.client_id := Cat(loadMissQueueClientId_1,
loadMissReq_0.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(2).valid := storeMissReq.valid
storeMissReq.ready := missReqArb.io.in(2).ready
missReqArb.io.in(2).bits := storeMissReq.bits
missReqArb.io.in(2).bits.client_id := Cat(storeMissQueueClientId,
storeMissReq.bits.client_id(entryIdMSB, entryIdLSB))
missReqArb.io.in(3).valid := atomicsMissReq.valid
atomicsMissReq.ready := missReqArb.io.in(3).ready
missReqArb.io.in(3).bits := atomicsMissReq.bits
missReqArb.io.in(3).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissReq.bits.client_id(entryIdMSB, entryIdLSB))
val miss_block = block_miss(missReqArb.io.out.bits.addr)
block_decoupled(missReqArb.io.out, missReq, miss_block)
XSDebug(miss_block, "MissQueue blocked\n")
// Response
// store and atomics wait for miss queue responses
val missResp = missQueue.io.resp
val storeMissResp = storeMissQueue.io.miss_resp
val atomicsMissResp = atomicsMissQueue.io.miss_resp
val clientId = missResp.bits.client_id(clientIdMSB, clientIdLSB)
val isStoreMissResp = clientId === storeMissQueueClientId
storeMissResp.valid := missResp.valid && isStoreMissResp
storeMissResp.bits := missResp.bits
storeMissResp.bits.client_id := missResp.bits.client_id(entryIdMSB, entryIdLSB)
val isAtomicsMissResp = clientId === atomicsMissQueueClientId
atomicsMissResp.valid := missResp.valid && isAtomicsMissResp
atomicsMissResp.bits := missResp.bits
atomicsMissResp.bits.client_id := missResp.bits.client_id(entryIdMSB, entryIdLSB)
// Finish
val missFinish = missQueue.io.finish
val storeMissFinish = storeMissQueue.io.miss_finish
val atomicsMissFinish = atomicsMissQueue.io.miss_finish
val missFinishArb = Module(new Arbiter(new MissFinish, 2))
missFinishArb.io.in(0).valid := storeMissFinish.valid
storeMissFinish.ready := missFinishArb.io.in(0).ready
missFinishArb.io.in(0).bits.entry_id := storeMissFinish.bits.entry_id
missFinishArb.io.in(0).bits.client_id := Cat(storeMissQueueClientId,
storeMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinishArb.io.in(1).valid := atomicsMissFinish.valid
atomicsMissFinish.ready := missFinishArb.io.in(1).ready
missFinishArb.io.in(1).bits.entry_id := atomicsMissFinish.bits.entry_id
missFinishArb.io.in(1).bits.client_id := Cat(atomicsMissQueueClientId,
atomicsMissFinish.bits.client_id(entryIdMSB, entryIdLSB))
missFinish <> missFinishArb.io.out
// refill to load queue
io.lsu.lsq <> missQueue.io.refill
// tilelink stuff
bus.a <> missQueue.io.mem_acquire
bus.e <> missQueue.io.mem_finish
when (bus.d.bits.source === cfg.nMissEntries.U) {
// This should be ReleaseAck
bus.d.ready := true.B
missQueue.io.mem_grant.valid := false.B
missQueue.io.mem_grant.bits := DontCare
} .otherwise {
// This should be GrantData
missQueue.io.mem_grant <> bus.d
}
// sync with prober
missQueue.io.probe_wb_req.valid := prober.io.wb_req.fire()
missQueue.io.probe_wb_req.bits := prober.io.wb_req.bits
missQueue.io.probe_active := prober.io.inflight_req_idx
//----------------------------------------
// prober
prober.io.req.valid := bus.b.valid && !block_probe(get_block_addr(bus.b.bits.address))
bus.b.ready := prober.io.req.ready && !block_probe(get_block_addr(bus.b.bits.address))
prober.io.req.bits := bus.b.bits
//----------------------------------------
// wb
// 0 goes to prober, 1 goes to missQueue evictions
val wbArb = Module(new Arbiter(new WritebackReq(edge.bundle.sourceBits), 2))
wbArb.io.in(0) <> prober.io.wb_req
wbArb.io.in(1) <> missQueue.io.wb_req
wb.io.req <> wbArb.io.out
missQueue.io.wb_resp := wb.io.resp
prober.io.wb_resp := wb.io.resp
wb.io.mem_grant := bus.d.fire() && bus.d.bits.source === cfg.nMissEntries.U
TLArbiter.lowestFromSeq(edge, bus.c, Seq(prober.io.rep, wb.io.release))
// dcache should only deal with DRAM addresses
when (bus.a.fire()) {
assert(bus.a.bits.address >= 0x80000000L.U)
}
when (bus.b.fire()) {
assert(bus.b.bits.address >= 0x80000000L.U)
}
when (bus.c.fire()) {
assert(bus.c.bits.address >= 0x80000000L.U)
}
io.prefetch.valid := missQueue.io.req.fire()
io.prefetch.bits := missQueue.io.req.bits
// synchronization stuff
def nack_load(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || atomics_addr_match || prober_idx_match || miss_idx_match
}
def block_store(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_atomics(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_miss(addr: UInt) = {
val prober_idx_match = prober.io.inflight_req_idx.valid && prober.io.inflight_req_idx.bits === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_idx_match || miss_idx_match
}
def block_probe(addr: UInt) = {
val store_idx_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val store_idx_match = store_idx_matches.reduce(_||_)
val atomics_idx_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val atomics_idx_match = atomics_idx_matches.reduce(_||_)
val lrsc_addr_match = atomics.io.block_probe_addr.valid && atomics.io.block_probe_addr.bits === get_block_addr(addr)
val miss_idx_matches = VecInit(missQueue.io.block_probe_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
// the missed req
val miss_req_idx_match = missReq.fire() && get_idx(missReq.bits.addr) === get_idx(addr)
store_idx_match || atomics_idx_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match
}
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {
sink.valid := source.valid && !block_signal
source.ready := sink.ready && !block_signal
sink.bits := source.bits
}
}
......@@ -140,10 +140,10 @@ class TlbEntry(superpage: Boolean = false) extends TlbBundle {
val insideLevel = level.getOrElse(0.U)
val a = tag(vpnnLen*3-1, vpnnLen*2) === vpn(vpnnLen*3-1, vpnnLen*2)
val b = tag(vpnnLen*2-1, vpnnLen*1) === vpn(vpnnLen*2-1, vpnnLen*1)
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")("TlbEntrySuperpage")
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")
Mux(insideLevel.asBool, a&b, a)
} else {
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")("TlbEntryNormalpage")
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")
tag === vpn
}
}
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink._
import utils.{HasTLDump, XSDebug, BoolStopWatch}
import chisel3.ExcitingUtils._
class MissReq extends DCacheBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val client_id = UInt(missQueueClientIdWidth.W)
val tag_match = Bool()
val way_en = Bits(nWays.W)
val old_meta = new L1Metadata
}
class MissResp extends DCacheBundle
{
val client_id = UInt(missQueueClientIdWidth.W)
val entry_id = UInt(missQueueEntryIdWidth.W)
}
class MissFinish extends DCacheBundle
{
val client_id = UInt(missQueueClientIdWidth.W)
val entry_id = UInt(missQueueEntryIdWidth.W)
}
// One miss entry deals with one missed block
class MissEntry(edge: TLEdgeOut) extends DCacheModule
{
val io = IO(new Bundle {
// MSHR ID
val id = Input(UInt())
// client requests
val req = Flipped(DecoupledIO(new MissReq))
val resp = DecoupledIO(new MissResp)
val finish = Flipped(DecoupledIO(new MissFinish))
// refill to load queue to wake up missed requests
val refill = ValidIO(new Refill)
// bus
val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val mem_finish = DecoupledIO(new TLBundleE(edge.bundle))
// write back
val wb_req = DecoupledIO(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
// write meta and data
val meta_write = DecoupledIO(new L1MetaWriteReq)
val data_write = DecoupledIO(new L1DataWriteReq)
// for synchronization
val block_idx = Output(Valid(UInt()))
val block_addr = Output(Valid(UInt()))
val block_probe_idx = Output(Valid(UInt()))
val block_probe_addr = Output(Valid(UInt()))
// watch prober's write back requests
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
})
// MSHR:
// 1. get req
// 2. refill when necessary
// 3. writeback when necessary
// 4. update meta data
// 5. send response back to client
// 6. wait for client's finish
// 7. done
val s_invalid :: s_refill_req :: s_refill_resp :: s_mem_finish :: s_wait_probe_exit :: s_wb_req :: s_wb_resp :: s_data_write_req :: s_meta_write_req :: s_send_resp :: s_client_finish :: Nil = Enum(11)
val state = RegInit(s_invalid)
val req_reg = Reg(new MissReq)
val req = Mux(io.req.fire(), io.req.bits, req_reg)
val req_idx = get_idx(req.addr)
val req_tag = get_tag(req.addr)
val req_block_addr = get_block_addr(req.addr)
// meta read results
val req_tag_match = req.tag_match
val req_old_meta = req.old_meta
val req_way_en = req.way_en
// what permission to release for the old block?
val (_, shrink_param, coh_on_clear) = req_old_meta.coh.onCacheControl(M_FLUSH)
// what permission to acquire for the new block?
val new_coh = RegInit(ClientMetadata.onReset)
val grow_param = new_coh.onAccess(req.cmd)._2
val coh_on_grant = new_coh.onGrant(req.cmd, io.mem_grant.bits.param)
val (_, _, refill_done, refill_address_inc) = edge.addr_inc(io.mem_grant)
val grantack = Reg(Valid(new TLBundleE(edge.bundle)))
val refill_ctr = Reg(UInt(log2Up(refillCycles).W))
val should_refill_data = Reg(Bool())
val needs_writeback = Reg(Bool())
// for read, we do not need to replay requests
// just refill data to load queue, and then, we can exit
// no need to walk through send_resp and client_finish state
//
// for store and amo
// we send back response when we have finished everything
// inform clients to replay requests
val no_replay = Reg(Bool())
// assign default values to output signals
io.req.ready := false.B
io.resp.valid := false.B
io.resp.bits := DontCare
io.finish.ready := false.B
io.refill.valid := false.B
io.refill.bits := DontCare
io.mem_acquire.valid := false.B
io.mem_acquire.bits := DontCare
io.mem_grant.ready := false.B
io.mem_finish.valid := false.B
io.mem_finish.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.meta_write.valid := false.B
io.meta_write.bits := DontCare
io.data_write.valid := false.B
io.data_write.bits := DontCare
io.block_idx.valid := state =/= s_invalid
io.block_addr.valid := state =/= s_invalid
// break combinational loop
io.block_idx.bits := get_idx(req_reg.addr)
io.block_addr.bits := get_block_addr(req_reg.addr)
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io.block_probe_idx.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_addr.valid := state =/= s_invalid && state =/= s_refill_req && state =/= s_refill_resp
io.block_probe_idx.bits := get_idx(req_reg.addr)
io.block_probe_addr.bits := get_block_addr(req_reg.addr)
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
XSDebug("entry: %d block_idx_valid: %b block_idx: %x block_addr_valid: %b block_addr: %x\n",
io.id, io.block_idx.valid, io.block_idx.bits, io.block_addr.valid, io.block_addr.bits)
XSDebug("entry: %d block_probe_idx_valid: %b block_probe_idx: %x block_probe_addr_valid: %b block_probe_addr: %x\n",
io.id, io.block_probe_idx.valid, io.block_probe_idx.bits, io.block_probe_addr.valid, io.block_probe_addr.bits)
}
// --------------------------------------------
// s_invalid: receive requests
// decision making
def decide_next_state(): UInt = {
val new_state = WireInit(s_invalid)
val old_coh = req_old_meta.coh
val needs_wb = old_coh.onCacheControl(M_FLUSH)._1 // does the line we are evicting need to be written back
no_replay := req.cmd === M_XRD
when (req_tag_match) {
val (is_hit, _, coh_on_hit) = old_coh.onAccess(req.cmd)
when (is_hit) { // set dirty bit
// read should never go here
// we get here only when we need to set dirty bit
assert(isWrite(req.cmd))
// go update meta
new_coh := coh_on_hit
new_state := s_meta_write_req
} .otherwise { // upgrade permissions
new_coh := old_coh
new_state := s_refill_req
}
} .otherwise { // refill and writeback if necessary
new_coh := ClientMetadata.onReset
should_refill_data := true.B
needs_writeback := needs_wb
// refill first to decrease load miss penalty
new_state := s_refill_req
}
new_state
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
grantack.valid := false.B
refill_ctr := 0.U
should_refill_data := false.B
needs_writeback := false.B
no_replay := false.B
req_reg := io.req.bits
state := decide_next_state()
}
}
// --------------------------------------------
// refill
when (state === s_refill_req) {
io.mem_acquire.valid := true.B
// TODO: Use AcquirePerm if just doing permissions acquire
// TODO: review this
io.mem_acquire.bits := edge.AcquireBlock(
fromSource = io.id,
toAddress = (Cat(req_tag, req_idx) << blockOffBits).asUInt(),
lgSize = (log2Up(cfg.blockBytes)).U,
growPermissions = grow_param)._2
when (io.mem_acquire.fire()) {
state := s_refill_resp
}
}
// ecc-encoded data
val refill_data = Reg(Vec(blockRows, UInt(encRowBits.W)))
// raw data
val refill_data_raw = Reg(Vec(blockRows, UInt(rowBits.W)))
when (state === s_refill_resp) {
io.mem_grant.ready := true.B
when (edge.hasData(io.mem_grant.bits)) {
when (io.mem_grant.fire()) {
// for AcquireBlock BtoT, we clear should_refill_data
// and expect response with no data(Grant, not GrantData)
// but block inclusive cache responded with a GrantData!
// so we temporarily removed this assertion
// we may consider using AcquirePerm BtoT for permission upgrade
// assert(should_refill_data)
refill_ctr := refill_ctr + 1.U
for (i <- 0 until beatRows) {
val row = io.mem_grant.bits.data(rowBits * (i + 1) - 1, rowBits * i)
refill_data((refill_ctr << log2Floor(beatRows)) + i.U) := Cat((0 until rowWords).reverse map { w =>
val word = row(wordBits * (w + 1) - 1, wordBits * w)
val word_encoded = cacheParams.dataCode.encode(word)
word_encoded
})
refill_data_raw((refill_ctr << log2Floor(beatRows)) + i.U) := row
}
when (refill_ctr === (refillCycles - 1).U) {
assert(refill_done, "refill not done!")
}
}
}
when (refill_done) {
grantack.valid := edge.isRequest(io.mem_grant.bits)
grantack.bits := edge.GrantAck(io.mem_grant.bits)
new_coh := coh_on_grant
state := s_mem_finish
}
}
// refill data to load queue
io.refill.valid := RegNext(state === s_refill_resp && refill_done &&
should_refill_data && no_replay)
io.refill.bits.addr := req_block_addr
io.refill.bits.data := refill_data_raw.asUInt
when (state === s_mem_finish) {
io.mem_finish.valid := grantack.valid
io.mem_finish.bits := grantack.bits
when (io.mem_finish.fire()) {
grantack.valid := false.B
state := s_wait_probe_exit
}
}
// --------------------------------------------
// sync with probe
when (state === s_wait_probe_exit) {
// we only wait for probe, when prober is manipulating our set
val should_wait_for_probe_exit = io.probe_active.valid && io.probe_active.bits === req_idx
when (!should_wait_for_probe_exit) {
when (needs_writeback) {
// write back data
state := s_wb_req
} .otherwise {
// no need to write back
when (should_refill_data) {
// fill data into dcache
state := s_data_write_req
} otherwise {
// permission update only
state := s_meta_write_req
}
}
}
}
// during refill, probe may step in, it may release our blocks
// if it releases the block we are trying to acquire, we don't care, since we will get it back eventually
// but we need to know whether it releases the block we are trying to evict
val prober_writeback_our_block = (state === s_refill_req || state === s_refill_resp ||
state === s_mem_finish || state === s_wait_probe_exit) &&
io.probe_wb_req.valid && !io.probe_wb_req.bits.voluntary &&
io.probe_wb_req.bits.tag === req_old_meta.tag &&
io.probe_wb_req.bits.idx === req_idx &&
io.probe_wb_req.bits.way_en === req_way_en &&
needs_writeback
def onShrink(param: UInt): ClientMetadata = {
import freechips.rocketchip.tilelink.ClientStates._
import freechips.rocketchip.tilelink.TLPermissions._
val state = MuxLookup(param, Nothing, Seq(
TtoB -> Branch,
TtoN -> Nothing,
BtoN -> Nothing))
ClientMetadata(state)
}
when (prober_writeback_our_block) {
req_reg.old_meta.coh := onShrink(io.probe_wb_req.bits.param)
}
// --------------------------------------------
// write back
when (state === s_wb_req) {
io.wb_req.valid := true.B
io.wb_req.bits.tag := req_old_meta.tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.param := shrink_param
io.wb_req.bits.way_en := req_way_en
io.wb_req.bits.source := io.id
io.wb_req.bits.voluntary := true.B
when (io.wb_req.fire()) {
state := s_wb_resp
}
}
when (state === s_wb_resp) {
when (io.wb_resp) {
state := s_data_write_req
}
}
// --------------------------------------------
// data write
when (state === s_data_write_req) {
io.data_write.valid := true.B
io.data_write.bits.addr := req_block_addr
io.data_write.bits.way_en := req_way_en
io.data_write.bits.wmask := VecInit((0 until blockRows) map (i => ~0.U(rowWords.W)))
io.data_write.bits.rmask := DontCare
io.data_write.bits.data := refill_data
when (io.data_write.fire()) {
state := s_meta_write_req
}
}
// --------------------------------------------
// meta write
when (state === s_meta_write_req) {
io.meta_write.valid := true.B
io.meta_write.bits.idx := req_idx
io.meta_write.bits.data.coh := new_coh
io.meta_write.bits.data.tag := req_tag
io.meta_write.bits.way_en := req_way_en
when (io.meta_write.fire()) {
when (no_replay) {
// no need to replay, exit now
state := s_invalid
} .otherwise {
state := s_send_resp
}
}
}
// --------------------------------------------
when (state === s_send_resp) {
io.resp.valid := true.B
io.resp.bits.client_id := req.client_id
io.resp.bits.entry_id := io.id
when (io.resp.fire()) {
// additional assertion
val (is_hit, _, coh_on_hit) = new_coh.onAccess(req.cmd)
assert(is_hit, "We still don't have permissions for this block")
assert(new_coh === coh_on_hit, "Incorrect coherence meta data")
state := s_client_finish
}
}
when (state === s_client_finish) {
io.finish.ready := true.B
when (io.finish.fire()) {
state := s_invalid
}
}
}
class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
{
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new MissReq))
val resp = ValidIO(new MissResp)
val finish = Flipped(DecoupledIO(new MissFinish))
val refill = ValidIO(new Refill)
val mem_acquire = Decoupled(new TLBundleA(edge.bundle))
val mem_grant = Flipped(Decoupled(new TLBundleD(edge.bundle)))
val mem_finish = Decoupled(new TLBundleE(edge.bundle))
val wb_req = Decoupled(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
val meta_write = Decoupled(new L1MetaWriteReq)
val data_write = Decoupled(new L1DataWriteReq)
val probe_wb_req = Flipped(ValidIO(new WritebackReq(edge.bundle.sourceBits)))
val probe_active = Flipped(ValidIO(UInt()))
val inflight_req_idxes = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val block_probe_idxes = Output(Vec(cfg.nMissEntries, Valid(UInt())))
val block_probe_addrs = Output(Vec(cfg.nMissEntries, Valid(UInt())))
})
val resp_arb = Module(new Arbiter(new MissResp, cfg.nMissEntries))
val refill_arb = Module(new Arbiter(new Refill, cfg.nMissEntries))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, cfg.nMissEntries))
val data_write_arb = Module(new Arbiter(new L1DataWriteReq, cfg.nMissEntries))
val wb_req_arb = Module(new Arbiter(new WritebackReq(edge.bundle.sourceBits), cfg.nMissEntries))
// assign default values to output signals
io.finish.ready := false.B
io.mem_grant.ready := false.B
val entry_alloc_idx = Wire(UInt())
val req_ready = WireInit(false.B)
val entries = (0 until cfg.nMissEntries) map { i =>
val entry = Module(new MissEntry(edge))
entry.io.id := i.U(log2Up(cfg.nMissEntries).W)
// entry req
entry.io.req.valid := (i.U === entry_alloc_idx) && io.req.valid
entry.io.req.bits := io.req.bits
when (i.U === entry_alloc_idx) {
req_ready := entry.io.req.ready
}
// entry resp
resp_arb.io.in(i) <> entry.io.resp
refill_arb.io.in(i).valid := entry.io.refill.valid
refill_arb.io.in(i).bits := entry.io.refill.bits
// entry finish
entry.io.finish.valid := (i.U === io.finish.bits.entry_id) && io.finish.valid
entry.io.finish.bits := io.finish.bits
when (entry.io.finish.valid) {
io.finish.ready := entry.io.finish.ready
}
meta_write_arb.io.in(i) <> entry.io.meta_write
data_write_arb.io.in(i) <> entry.io.data_write
wb_req_arb.io.in(i) <> entry.io.wb_req
entry.io.wb_resp := io.wb_resp
entry.io.probe_wb_req <> io.probe_wb_req
entry.io.probe_active <> io.probe_active
entry.io.mem_grant.valid := false.B
entry.io.mem_grant.bits := DontCare
when (io.mem_grant.bits.source === i.U) {
entry.io.mem_grant <> io.mem_grant
}
io.inflight_req_idxes(i) <> entry.io.block_idx
io.inflight_req_block_addrs(i) <> entry.io.block_addr
io.block_probe_idxes(i) <> entry.io.block_probe_idx
io.block_probe_addrs(i) <> entry.io.block_probe_addr
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
entry
}
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.req.ready))
io.req.ready := req_ready
io.resp.valid := resp_arb.io.out.valid
io.resp.bits := resp_arb.io.out.bits
resp_arb.io.out.ready := true.B
io.refill.valid := refill_arb.io.out.valid
io.refill.bits := refill_arb.io.out.bits
refill_arb.io.out.ready := true.B
// one refill at a time
val refill_vec = refill_arb.io.in.map(c => c.valid)
assert(PopCount(refill_vec) === 0.U || PopCount(refill_vec) === 1.U)
io.meta_write <> meta_write_arb.io.out
io.data_write <> data_write_arb.io.out
io.wb_req <> wb_req_arb.io.out
TLArbiter.lowestFromSeq(edge, io.mem_acquire, entries.map(_.io.mem_acquire))
TLArbiter.lowestFromSeq(edge, io.mem_finish, entries.map(_.io.mem_finish))
// print all input/output requests for debug purpose
// print req
val req = io.req
XSDebug(req.fire(), "req cmd: %x addr: %x client_id: %d\n",
req.bits.cmd, req.bits.addr, req.bits.client_id)
val resp = io.resp
XSDebug(resp.fire(), "resp client_id: %d entry_id: %d\n",
resp.bits.client_id, resp.bits.entry_id)
val finish = io.finish
XSDebug(finish.fire(), "finish client_id: %d entry_id: %d\n",
finish.bits.client_id, finish.bits.entry_id)
// print refill
XSDebug(io.refill.fire(), "refill addr %x\n", io.refill.bits.addr)
// print data_write
XSDebug(io.data_write.fire(), "data_write addr %x\n", io.data_write.bits.addr)
// print meta_write
XSDebug(io.meta_write.fire(), "meta_write idx %x way_en: %x old_tag: %x new_coh: %d new_tag: %x\n",
io.meta_write.bits.idx, io.meta_write.bits.way_en, io.meta_write.bits.tag,
io.meta_write.bits.data.coh.state, io.meta_write.bits.data.tag)
// print wb_req
XSDebug(io.wb_req.fire(), "wb_req idx %x tag: %x source: %d param: %x way_en: %x voluntary: %b\n",
io.wb_req.bits.idx, io.wb_req.bits.tag,
io.wb_req.bits.source, io.wb_req.bits.param,
io.wb_req.bits.way_en, io.wb_req.bits.voluntary)
// print tilelink messages
when (io.mem_acquire.fire()) {
XSDebug("mem_acquire ")
io.mem_acquire.bits.dump
}
when (io.mem_grant.fire()) {
XSDebug("mem_grant ")
io.mem_grant.bits.dump
}
when (io.mem_finish.fire()) {
XSDebug("mem_finish ")
io.mem_finish.bits.dump
}
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(io.req.fire(), "perfCntDCacheMiss", Perf)
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink._
import utils.{HasTLDump, XSDebug}
class ProbeUnit(edge: TLEdgeOut) extends DCacheModule with HasTLDump {
val io = IO(new Bundle {
val req = Flipped(Decoupled(new TLBundleB(edge.bundle)))
val rep = Decoupled(new TLBundleC(edge.bundle))
val meta_read = Decoupled(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val meta_write = Decoupled(new L1MetaWriteReq)
val wb_req = Decoupled(new WritebackReq(edge.bundle.sourceBits))
val wb_resp = Input(Bool())
val inflight_req_idx = Output(Valid(UInt()))
val inflight_req_block_addr = Output(Valid(UInt()))
})
val s_invalid :: s_meta_read_req :: s_meta_read_resp :: s_decide_next_state :: s_release :: s_wb_req :: s_wb_resp :: s_meta_write_req :: Nil = Enum(8)
val state = RegInit(s_invalid)
val req = Reg(new TLBundleB(edge.bundle))
val req_idx = get_idx(req.address)
val req_tag = get_tag(req.address)
val req_block_addr = get_block_addr(req.address)
val req_way_en = Reg(UInt())
val tag_matches = req_way_en.orR
val old_coh = Reg(new ClientMetadata)
val miss_coh = ClientMetadata.onReset
val reply_coh = Mux(tag_matches, old_coh, miss_coh)
val (is_dirty, report_param, new_coh) = reply_coh.onProbe(req.param)
// assign default values to signals
io.req.ready := false.B
io.rep.valid := false.B
io.rep.bits := DontCare
io.meta_read.valid := false.B
io.meta_read.bits := DontCare
io.meta_write.valid := false.B
io.meta_write.bits := DontCare
io.wb_req.valid := false.B
io.wb_req.bits := DontCare
io.inflight_req_idx.valid := state =/= s_invalid
io.inflight_req_idx.bits := req_idx
io.inflight_req_block_addr.valid := state =/= s_invalid
io.inflight_req_block_addr.bits := req_block_addr
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
req := io.req.bits
state := s_meta_read_req
}
}
when (state === s_meta_read_req) {
io.meta_read.valid := true.B
val meta_read = io.meta_read.bits
meta_read.idx := req_idx
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
when (io.meta_read.fire()) {
state := s_meta_read_resp
}
}
when (state === s_meta_read_resp) {
// tag check
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val tag_eq_way = wayMap((w: Int) => io.meta_resp(w).tag === (req_tag)).asUInt
val tag_match_way = wayMap((w: Int) => tag_eq_way(w) && io.meta_resp(w).coh.isValid()).asUInt
val hit_state = Mux1H(tag_match_way, wayMap((w: Int) => io.meta_resp(w).coh))
old_coh := hit_state
req_way_en := tag_match_way
state := s_decide_next_state
}
when (state === s_decide_next_state) {
// decide next state
state := Mux(tag_matches && is_dirty, s_wb_req, s_release)
}
// no need to write back, just release
when (state === s_release) {
io.rep.valid := true.B
io.rep.bits := edge.ProbeAck(req, report_param)
when (io.rep.fire()) {
state := Mux(tag_matches, s_meta_write_req, s_invalid)
}
}
when (state === s_wb_req) {
io.wb_req.valid := true.B
io.wb_req.bits.tag := req_tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.param := report_param
io.wb_req.bits.way_en := req_way_en
io.wb_req.bits.source := req.source
io.wb_req.bits.voluntary := false.B
when (io.wb_req.fire()) {
state := s_wb_resp
}
}
when (state === s_wb_resp) {
when (io.wb_resp) {
state := s_meta_write_req
}
}
when (state === s_meta_write_req) {
io.meta_write.valid := true.B
io.meta_write.bits.idx := req_idx
io.meta_write.bits.data.coh := new_coh
io.meta_write.bits.data.tag := req_tag
io.meta_write.bits.way_en := req_way_en
when (io.meta_write.fire()) {
state := s_invalid
}
}
// print wb_req
XSDebug(io.wb_req.fire(), "wb_req idx %x tag: %x source: %d param: %x way_en: %x voluntary: %b\n",
io.wb_req.bits.idx, io.wb_req.bits.tag,
io.wb_req.bits.source, io.wb_req.bits.param,
io.wb_req.bits.way_en, io.wb_req.bits.voluntary)
// print tilelink messages
when (io.req.fire()) {
XSDebug("mem_probe ")
io.req.bits.dump
}
when (io.rep.fire()) {
XSDebug("mem_release ")
io.rep.bits.dump
}
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import bus.tilelink._
class StoreMissEntry extends DCacheModule
{
val io = IO(new Bundle {
val id = Input(UInt())
val lsu = Flipped(new DCacheLineIO)
val replay = new DCacheLineIO
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
val idx = Output(Valid(UInt()))
val tag = Output(Valid(UInt()))
})
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_resp :: s_miss_finish :: Nil = Enum(6)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq )
val resp = Reg(new DCacheLineResp)
val req_idx = get_idx(req.addr)
val req_tag = get_tag(req.addr)
val req_block_addr = get_block_addr(req.addr)
val reg_miss_resp = Reg(new MissResp)
// assign default values to output signals
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.replay.req.valid := false.B
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_finish.valid := false.B
io.miss_finish.bits := DontCare
io.idx.valid := state =/= s_invalid
io.tag.valid := state =/= s_invalid
io.idx.bits := req_idx
io.tag.bits := req_tag
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d idx: %x tag: %x\n", io.id, state, io.idx.bits, io.tag.bits)
}
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.lsu.req.fire()) {
assert(io.lsu.req.bits.cmd === M_XWR)
assert(!io.lsu.req.bits.meta.replay)
req := io.lsu.req.bits
state := s_replay_req
}
}
// --------------------------------------------
// replay
when (state === s_replay_req) {
io.replay.req.valid := true.B
io.replay.req.bits := req
// use our own storeMissEntryId
// miss resp are routed by this id
io.replay.req.bits.meta.id := io.id
when (io.replay.req.fire()) {
state := s_replay_resp
}
}
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
// req missed
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when (io.replay.resp.bits.replay) {
state := s_replay_req
} .otherwise {
// the req missed and enters mshr
// wait for miss response
state := s_miss_resp
}
} .otherwise {
// req hits, everything OK
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
// if it's a replayed request
// we need to tell mshr, we are done
state := s_miss_finish
}
}
}
}
when (state === s_miss_resp) {
when (io.miss_resp.fire()) {
reg_miss_resp := io.miss_resp.bits
// mark req as replayed req
req.meta.replay := true.B
state := s_replay_req
}
}
when (state === s_miss_finish) {
io.miss_finish.valid := true.B
io.miss_finish.bits.client_id := io.id
io.miss_finish.bits.entry_id := reg_miss_resp.entry_id
when (io.miss_finish.fire()) {
state := s_resp
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
// response to sbuffer should carry the original request id
io.lsu.resp.bits.meta.id := req.meta.id
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
// debug output
when (io.lsu.req.fire()) {
XSDebug(s"StoreMissEntryTransaction req %d\n", io.id)
}
when (io.lsu.resp.fire()) {
XSDebug(s"StoreMissEntryTransaction resp %d\n", io.id)
}
}
class StoreMissQueue extends DCacheModule
{
val io = IO(new Bundle {
val lsu = Flipped(new DCacheLineIO)
val replay = new DCacheLineIO
val miss_resp = Flipped(ValidIO(new MissResp))
val miss_finish = DecoupledIO(new MissFinish)
})
val miss_finish_arb = Module(new Arbiter(new MissFinish, cfg.nStoreMissEntries))
val replay_arb = Module(new Arbiter(new DCacheLineReq, cfg.nStoreMissEntries))
val resp_arb = Module(new Arbiter(new DCacheLineResp, cfg.nStoreMissEntries))
val idx_matches = Wire(Vec(cfg.nStoreMissEntries, Bool()))
val tag_matches = Wire(Vec(cfg.nStoreMissEntries, Bool()))
val tag_match = Mux1H(idx_matches, tag_matches)
val idx_match = idx_matches.reduce(_||_)
when (io.lsu.req.valid) {
XSDebug("idx_match: %b tag_match: %b\n", idx_match, tag_match)
}
val req = io.lsu.req
val entry_alloc_idx = Wire(UInt())
val pri_rdy = WireInit(false.B)
val pri_val = req.valid && !idx_match
// sbuffer should not send down the same block twice
// what's more, it should allow write into sbuffer
// if the same block is being handled dcache
// assert(!(req.valid && tag_match))
io.replay.resp.ready := false.B
val entry_id_MSB = reqIdWidth - 1
val entry_id_LSB = reqIdWidth - storeMissQueueEntryIdWidth
val entries = (0 until cfg.nStoreMissEntries) map { i =>
val entry = Module(new StoreMissEntry)
entry.io.id := i.U(storeMissQueueEntryIdWidth.W)
idx_matches(i) := entry.io.idx.valid && entry.io.idx.bits === get_idx(req.bits.addr)
tag_matches(i) := entry.io.tag.valid && entry.io.tag.bits === get_tag(req.bits.addr)
// lsu req and resp
val entry_lsu = entry.io.lsu
entry_lsu.req.valid := (i.U === entry_alloc_idx) && pri_val
when (i.U === entry_alloc_idx) {
pri_rdy := entry_lsu.req.ready
}
entry_lsu.req.bits := req.bits
resp_arb.io.in(i) <> entry_lsu.resp
// replay req and resp
val entry_replay = entry.io.replay
replay_arb.io.in(i) <> entry_replay.req
replay_arb.io.in(i).bits.meta.id <> Cat(entry.io.id,
entry_replay.req.bits.meta.id(entry_id_LSB - 1, 0))
val resp_entry_id = io.replay.resp.bits.meta.id(entry_id_MSB, entry_id_LSB)
entry_replay.resp.valid := (i.U === resp_entry_id) && io.replay.resp.valid
entry_replay.resp.bits := io.replay.resp.bits
entry_replay.resp.bits.meta.id := Cat(0.U(storeMissQueueEntryIdWidth.W),
io.replay.resp.bits.meta.id(entry_id_LSB - 1, 0))
when (entry_replay.resp.valid) {
io.replay.resp.ready := entry_replay.resp.ready
}
entry.io.miss_resp.valid := (i.U === io.miss_resp.bits.client_id) && io.miss_resp.valid
entry.io.miss_resp.bits := io.miss_resp.bits
miss_finish_arb.io.in(i) <> entry.io.miss_finish
entry
}
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.lsu.req.ready))
// whenever index matches, do not let it in
req.ready := pri_rdy && !idx_match
io.lsu.resp <> resp_arb.io.out
io.replay.req <> replay_arb.io.out
io.miss_finish <> miss_finish_arb.io.out
// debug output
when (req.fire()) {
XSDebug(s"req cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask, req.bits.meta.id, req.bits.meta.replay)
}
val replay = io.replay.req
when (replay.fire()) {
XSDebug(s"replay cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n",
replay.bits.cmd, replay.bits.addr, replay.bits.data, replay.bits.mask, replay.bits.meta.id, replay.bits.meta.replay)
}
val resp = io.lsu.resp
when (resp.fire()) {
XSDebug(s"resp: data: %x id: %d replay: %b miss: %b replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
val miss_resp = io.miss_resp
XSDebug(miss_resp.fire(), "miss_resp client_id: %d entry_id: %d\n",
miss_resp.bits.client_id, miss_resp.bits.entry_id)
val miss_finish = io.miss_finish
XSDebug(miss_finish.fire(), "miss_finish client_id: %d entry_id: %d\n",
miss_finish.bits.client_id, miss_finish.bits.entry_id)
}
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.{XSDebug}
class StorePipe extends DCacheModule
{
val io = IO(new DCacheBundle{
val lsu = Flipped(new DCacheLineIO)
val data_read = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val data_write = DecoupledIO(new L1DataWriteReq)
val meta_read = DecoupledIO(new L1MetaReadReq)
val meta_resp = Input(Vec(nWays, new L1Metadata))
val inflight_req_idxes = Output(Vec(3, Valid(UInt())))
val inflight_req_block_addrs = Output(Vec(3, Valid(UInt())))
// send miss request to miss queue
val miss_req = DecoupledIO(new MissReq)
})
// LSU requests
io.lsu.req.ready := io.meta_read.ready && io.data_read.ready
io.meta_read.valid := io.lsu.req.valid
io.data_read.valid := io.lsu.req.valid
val meta_read = io.meta_read.bits
val data_read = io.data_read.bits
// Tag read for new requests
meta_read.idx := get_idx(io.lsu.req.bits.addr)
meta_read.way_en := ~0.U(nWays.W)
meta_read.tag := DontCare
// Data read for new requests
data_read.addr := io.lsu.req.bits.addr
data_read.way_en := ~0.U(nWays.W)
data_read.rmask := ~0.U(blockRows.W)
// Pipeline
// stage 0
val s0_valid = io.lsu.req.fire()
val s0_req = io.lsu.req.bits
assert(!(s0_valid && s0_req.cmd =/= MemoryOpConstants.M_XWR), "StorePipe only accepts store req")
dump_pipeline_reqs("StorePipe s0", s0_valid, s0_req)
// stage 1
val s1_req = RegNext(s0_req)
val s1_valid = RegNext(s0_valid, init = false.B)
val s1_addr = s1_req.addr
val s1_nack = false.B
dump_pipeline_reqs("StorePipe s1", s1_valid, s1_req)
val meta_resp = io.meta_resp
// tag check
def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_addr))).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
val s1_tag_match = s1_tag_match_way.orR
val s1_hit_meta = Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w)))
val s1_hit_state = s1_hit_meta.coh
// replacement policy
val replacer = cacheParams.replacement
val s1_repl_way_en = UIntToOH(replacer.way)
val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
when (io.miss_req.fire()) {
replacer.miss
}
// stage 2
val s2_req = RegNext(s1_req)
val s2_valid = RegNext(s1_valid, init = false.B)
dump_pipeline_reqs("StorePipe s2", s2_valid, s2_req)
val s2_tag_match_way = RegNext(s1_tag_match_way)
val s2_tag_match = RegNext(s1_tag_match)
val s2_hit_meta = RegNext(s1_hit_meta)
val s2_hit_state = RegNext(s1_hit_state)
val s2_has_permission = s2_hit_state.onAccess(s2_req.cmd)._1
val s2_new_hit_state = s2_hit_state.onAccess(s2_req.cmd)._3
val s2_repl_meta = RegNext(s1_repl_meta)
val s2_repl_way_en = RegNext(s1_repl_way_en)
val s2_old_meta = Mux(s2_tag_match, s2_hit_meta, s2_repl_meta)
val s2_way_en = Mux(s2_tag_match, s2_tag_match_way, s2_repl_way_en)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
//
// If state changes on hit,
// we should treat it as not hit, and let mshr deal with it,
// since we can not write meta data on the main pipeline.
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state
val s2_nack = Wire(Bool())
// when req got nacked, upper levels should replay this request
// the same set is busy
val s2_nack_hit = RegNext(s1_nack)
// can no allocate mshr for store miss
val s2_nack_no_mshr = io.miss_req.valid && !io.miss_req.ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val s2_nack_data = false.B
s2_nack := s2_nack_hit || s2_nack_no_mshr || s2_nack_data
val s2_info = p"tag match: $s2_tag_match hasPerm: $s2_has_permission" +
p" hit state: $s2_hit_state new state: $s2_new_hit_state s2_nack: $s2_nack\n"
// deal with data
val data_resp = io.data_resp
val s2_data = Mux1H(s2_tag_match_way, data_resp)
val s2_data_decoded = (0 until blockRows) map { r =>
(0 until rowWords) map { w =>
val data = s2_data(r)(encWordBits * (w + 1) - 1, encWordBits * w)
val decoded = cacheParams.dataCode.decode(data)
assert(!(s2_valid && s2_hit && !s2_nack && decoded.uncorrectable))
decoded.corrected
}
}
val wdata_merged = Wire(Vec(blockRows, UInt(encRowBits.W)))
def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
val full_wmask = FillInterleaved(8, wmask)
((~full_wmask & old_data) | (full_wmask & new_data))
}
// now, we do not deal with ECC
for (i <- 0 until blockRows) {
wdata_merged(i) := Cat((0 until rowWords).reverse map { w =>
val old_data = s2_data_decoded(i)(w)
val new_data = s2_req.data(rowBits * (i + 1) - 1, rowBits * i)(wordBits * (w + 1) - 1, wordBits * w)
val wmask = s2_req.mask(rowBytes * (i + 1) - 1, rowBytes * i)(wordBytes * (w + 1) - 1, wordBytes * w)
val wdata = mergePutData(old_data, new_data, wmask)
val wdata_encoded = cacheParams.dataCode.encode(wdata)
wdata_encoded
})
}
// write dcache if hit
val data_write = io.data_write.bits
io.data_write.valid := s2_valid && s2_hit
data_write.rmask := DontCare
data_write.way_en := s2_tag_match_way
data_write.addr := s2_req.addr
data_write.wmask := VecInit((0 until blockRows) map (i => ~0.U(rowWords.W)))
data_write.data := wdata_merged
assert(!(io.data_write.valid && !io.data_write.ready))
// only dump these signals when they are actually valid
dump_pipeline_valids("StorePipe s2", "s2_hit", s2_valid && s2_hit)
dump_pipeline_valids("StorePipe s2", "s2_nack", s2_valid && s2_nack)
dump_pipeline_valids("StorePipe s2", "s2_nack_hit", s2_valid && s2_nack_hit)
dump_pipeline_valids("StorePipe s2", "s2_nack_no_mshr", s2_valid && s2_nack_no_mshr)
dump_pipeline_valids("StorePipe s2", "s2_nack_data", s2_valid && s2_nack_data)
// send load miss to miss queue
io.miss_req.valid := s2_valid && !s2_nack_hit && !s2_nack_data && !s2_hit
io.miss_req.bits.cmd := s2_req.cmd
io.miss_req.bits.addr := get_block_addr(s2_req.addr)
io.miss_req.bits.tag_match := s2_tag_match
io.miss_req.bits.way_en := s2_way_en
io.miss_req.bits.old_meta := s2_old_meta
io.miss_req.bits.client_id := s2_req.meta.id
val resp = Wire(Valid(new DCacheLineResp))
resp.valid := s2_valid
resp.bits.data := DontCare
resp.bits.meta := s2_req.meta
resp.bits.miss := !s2_hit || s2_nack
resp.bits.replay := resp.bits.miss && (!io.miss_req.fire() || s2_nack)
io.lsu.resp.valid := resp.valid
io.lsu.resp.bits := resp.bits
assert(!(resp.valid && !io.lsu.resp.ready))
when (resp.valid) {
XSDebug(s"StorePipe resp: data: %x id: %d replayed_req: %b miss: %b need_replay: %b\n",
resp.bits.data, resp.bits.meta.id, resp.bits.meta.replay, resp.bits.miss, resp.bits.replay)
}
io.inflight_req_idxes(0).valid := io.lsu.req.valid
io.inflight_req_idxes(1).valid := s1_valid
io.inflight_req_idxes(2).valid := s2_valid
io.inflight_req_idxes(0).bits := get_idx(s0_req.addr)
io.inflight_req_idxes(1).bits := get_idx(s1_req.addr)
io.inflight_req_idxes(2).bits := get_idx(s2_req.addr)
io.inflight_req_block_addrs(0).valid := io.lsu.req.valid
io.inflight_req_block_addrs(1).valid := s1_valid
io.inflight_req_block_addrs(2).valid := s2_valid
io.inflight_req_block_addrs(0).bits := get_block_addr(s0_req.addr)
io.inflight_req_block_addrs(1).bits := get_block_addr(s1_req.addr)
io.inflight_req_block_addrs(2).bits := get_block_addr(s2_req.addr)
// -------
// Debug logging functions
def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool, req: DCacheLineReq ) = {
when (valid) {
XSDebug(
s"$pipeline_stage_name cmd: %x addr: %x id: %d replay: %b\n",
req.cmd, req.addr, req.meta.id, req.meta.replay
)
}
}
def dump_pipeline_valids(pipeline_stage_name: String, signal_name: String, valid: Bool) = {
when (valid) {
XSDebug(p"$pipeline_stage_name $signal_name " + s2_info)
}
}
}
......@@ -106,10 +106,10 @@ class MMIOEntry(edge: TLEdgeOut) extends DCacheModule
// --------------------------------------------
when (state === s_send_resp) {
io.resp.valid := true.B
io.resp.bits.data := resp_data
io.resp.bits.data := resp_data
// meta data should go with the response
io.resp.bits.meta := req.meta
io.resp.bits.miss := false.B
io.resp.bits.id := req.id
io.resp.bits.miss := false.B
io.resp.bits.replay := false.B
when (io.resp.fire()) {
......
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils.XSDebug
import freechips.rocketchip.tilelink.{TLBundleC, TLEdgeOut, TLPermissions}
class WritebackReq(sourceBits: Int) extends DCacheBundle {
val tag = Bits(tagBits.W)
val idx = Bits(idxBits.W)
val source = UInt(sourceBits.W)
val param = UInt(TLPermissions.cWidth.W)
val way_en = Bits(nWays.W)
val voluntary = Bool()
override def cloneType: WritebackReq.this.type = new WritebackReq(sourceBits).asInstanceOf[this.type]
}
class WritebackUnit(edge: TLEdgeOut) extends DCacheModule {
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new WritebackReq(edge.bundle.sourceBits)))
val resp = Output(Bool())
val data_req = DecoupledIO(new L1DataReadReq)
val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W))))
val release = DecoupledIO(new TLBundleC(edge.bundle))
val mem_grant = Input(Bool())
val inflight_addr = Output(Valid(UInt()))
})
val req = Reg(new WritebackReq(edge.bundle.sourceBits))
val s_invalid :: s_data_read_req :: s_data_read_resp :: s_active :: s_grant :: s_resp :: Nil = Enum(6)
val state = RegInit(s_invalid)
val should_writeback_data = Reg(Bool())
val data_req_cnt = RegInit(0.U(log2Up(refillCycles+1).W))
val (_, last_beat, all_beats_done, beat_count) = edge.count(io.release)
val wb_buffer = Reg(Vec(refillCycles, UInt(beatBits.W)))
val acked = RegInit(false.B)
// assign default value to signals
io.req.ready := false.B
io.resp := false.B
io.data_req.valid := false.B
io.data_req.bits := DontCare
io.release.valid := false.B
io.release.bits := DontCare
io.inflight_addr.valid := state =/= s_invalid
io.inflight_addr.bits := req.idx << blockOffBits
when (state =/= s_invalid) {
XSDebug("state: %d\n", state)
}
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
// for report types: TtoT, BtoB, NtoN, we do nothing
import freechips.rocketchip.tilelink.TLPermissions._
def is_dirty(x: UInt) = x <= TtoN
def do_nothing(x: UInt) = x > BtoN
when (do_nothing(io.req.bits.param)) {
should_writeback_data := false.B
state := s_resp
} .otherwise {
when (is_dirty(io.req.bits.param)) {
state := s_data_read_req
should_writeback_data := true.B
} .otherwise {
state := s_active
should_writeback_data := false.B
}
data_req_cnt := 0.U
req := io.req.bits
acked := false.B
}
}
}
val dataArrayLatency = 2
val data_array_ctr = Reg(UInt(log2Up(dataArrayLatency).W))
when (state === s_data_read_req) {
// Data read for new requests
io.data_req.valid := true.B
io.data_req.bits.addr := req.idx << blockOffBits
io.data_req.bits.way_en := req.way_en
io.data_req.bits.rmask := ~0.U(blockRows.W)
when (io.data_req.fire()) {
state := s_data_read_resp
data_array_ctr := 0.U
}
}
when (state === s_data_read_resp) {
data_array_ctr := data_array_ctr + 1.U
when (data_array_ctr === (dataArrayLatency - 1).U) {
val way_idx = OHToUInt(req.way_en)
for (i <- 0 until refillCycles) {
wb_buffer(i) := Cat((0 until beatRows).reverse map { j =>
val idx = i * beatRows + j
val row = io.data_resp(way_idx)(idx)
// encode each word in this row
val row_decoded = Cat((0 until rowWords).reverse map { w =>
val data_word = row(encWordBits * (w + 1) - 1, encWordBits * w)
val decoded = cacheParams.dataCode.decode(data_word)
val data_word_decoded = decoded.corrected
assert(!decoded.uncorrectable)
data_word_decoded
})
row_decoded
})
}
state := s_active
}
}
// release
val r_address = (Cat(req.tag, req.idx) << blockOffBits).asUInt()
val id = cfg.nMissEntries
val probeResponse = edge.ProbeAck(
fromSource = req.source,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param
)
val probeResponseData = edge.ProbeAck(
fromSource = req.source,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param,
data = wb_buffer(data_req_cnt)
)
val voluntaryRelease = edge.Release(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param
)._2
val voluntaryReleaseData = edge.Release(
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param,
data = wb_buffer(data_req_cnt)
)._2
when (state === s_active) {
io.release.valid := data_req_cnt < refillCycles.U
io.release.bits := Mux(req.voluntary,
Mux(should_writeback_data, voluntaryReleaseData, voluntaryRelease),
Mux(should_writeback_data, probeResponseData, probeResponse))
when (io.mem_grant) {
acked := true.B
}
when (io.release.fire()) {
data_req_cnt := data_req_cnt + 1.U
val last_beat = Mux(should_writeback_data, data_req_cnt === (refillCycles-1).U, true.B)
when (last_beat) {
state := Mux(req.voluntary, s_grant, s_resp)
}
}
}
when (state === s_grant) {
when (io.mem_grant) {
acked := true.B
}
when (acked) {
state := s_resp
}
}
when (state === s_resp) {
io.resp := true.B
state := s_invalid
}
// print all input/output requests for debug purpose
// print req
val io_req = io.req.bits
XSDebug(io.req.fire(), "req tag: %x idx: %x source: %d param: %x way_en: %x voluntary: %b\n",
io_req.tag, io_req.idx, io_req.source, io_req.param, io_req.way_en, io_req.voluntary)
// print data req
val io_data_req = io.data_req.bits
XSDebug(io.data_req.fire(), "data_req addr: %x way_en: %x\n", io_data_req.addr, io_data_req.way_en)
// print release
// XSDebug.exec(io.release.fire(), io.release.bits.dump)
// print mem_grant
XSDebug(io.mem_grant, "mem_grant\n")
}
......@@ -557,14 +557,7 @@ class LoadQueue extends XSModule
io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.id := DontCare
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.uncache.rdata.paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.req.bits.id := DontCare
io.uncache.resp.ready := true.B
......
......@@ -215,14 +215,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.uncache.req.bits.data := dataModule.io.rdata(0).data
io.uncache.req.bits.mask := dataModule.io.rdata(0).mask
io.uncache.req.bits.meta.id := DontCare
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(0).paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.rdata(0).mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.req.bits.id := DontCare
when(io.uncache.req.fire()){
pending(deqPtr) := false.B
......@@ -282,11 +275,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.sbuffer(i).bits.addr := dataModuleRead(i).paddr
io.sbuffer(i).bits.data := dataModuleRead(i).data
io.sbuffer(i).bits.mask := dataModuleRead(i).mask
io.sbuffer(i).bits.meta := DontCare
io.sbuffer(i).bits.meta.tlb_miss := false.B
io.sbuffer(i).bits.meta.uop := DontCare
io.sbuffer(i).bits.meta.mmio := false.B
io.sbuffer(i).bits.meta.mask := dataModuleRead(i).mask
io.sbuffer(i).bits.id := DontCare
when (io.sbuffer(i).fire()) {
allocated(ptr) := false.B
......
......@@ -157,10 +157,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.dcache.req.bits.data := genWdata(in.src2, in.uop.ctrl.fuOpType(1,0))
// TODO: atomics do need mask: fix mask
io.dcache.req.bits.mask := genWmask(paddr, in.uop.ctrl.fuOpType(1,0))
io.dcache.req.bits.meta.id := DontCare
io.dcache.req.bits.meta.paddr := paddr
io.dcache.req.bits.meta.tlb_miss := false.B
io.dcache.req.bits.meta.replay := false.B
io.dcache.req.bits.id := DontCare
when(io.dcache.req.fire()){
state := s_cache_resp
......@@ -170,7 +167,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
when (state === s_cache_resp) {
io.dcache.resp.ready := true.B
when(io.dcache.resp.fire()) {
is_lrsc_valid := io.dcache.resp.bits.meta.id
is_lrsc_valid := io.dcache.resp.bits.id
val rdata = io.dcache.resp.bits.data
val rdataSel = LookupTree(paddr(2, 0), List(
"b000".U -> rdata(63, 0),
......@@ -232,4 +229,4 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
when(io.redirect.valid){
atom_override_xtval := false.B
}
}
\ No newline at end of file
}
......@@ -44,14 +44,7 @@ class LoadUnit_S0 extends XSModule {
io.dcacheReq.bits.data := DontCare
// TODO: update cache meta
io.dcacheReq.bits.meta.id := DontCare
io.dcacheReq.bits.meta.vaddr := s0_vaddr
io.dcacheReq.bits.meta.paddr := DontCare
io.dcacheReq.bits.meta.uop := s0_uop
io.dcacheReq.bits.meta.mmio := false.B
io.dcacheReq.bits.meta.tlb_miss := false.B
io.dcacheReq.bits.meta.mask := s0_mask
io.dcacheReq.bits.meta.replay := false.B
io.dcacheReq.bits.id := DontCare
val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List(
"b00".U -> true.B, //b
......
......@@ -57,7 +57,7 @@ class FakeSbuffer extends XSModule {
dcache_req.bits.addr := block_addr(req.addr)
dcache_req.bits.data := wdataVec.asUInt
dcache_req.bits.mask := wmaskVec.asUInt
dcache_req.bits.meta := DontCare
dcache_req.bits.id := DontCare
when (dcache_req.fire()) {
state := s_resp
......
......@@ -355,9 +355,8 @@ class NewSbuffer extends XSModule with HasSbufferCst {
io.dcache.req.bits.addr := getAddr(tagRead(prepareIdx))
io.dcache.req.bits.data := bufferRead(prepareIdx).data
io.dcache.req.bits.mask := bufferRead(prepareIdx).mask
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.meta := DontCare
io.dcache.req.bits.meta.id := prepareIdx
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.id := prepareIdx
when(io.dcache.req.fire()){
stateVec(prepareIdx) := s_inflight
}
......@@ -368,7 +367,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
)
io.dcache.resp.ready := true.B // sbuffer always ready to recv dcache resp
val respId = io.dcache.resp.bits.meta.id
val respId = io.dcache.resp.bits.id
when(io.dcache.resp.fire()){
stateVec(respId) := s_invalid
assert(stateVec(respId) === s_inflight)
......
......@@ -368,7 +368,7 @@ class Sbuffer extends XSModule with HasSBufferConst {
io.dcache.req.bits.data := dcacheData
io.dcache.req.bits.mask := dcacheMask
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.meta := DontCare // NOT USED
io.dcache.req.bits.id := DontCare // NOT USED
io.dcache.resp.ready := false.B
wb_arb.io.out.ready := false.B
......
......@@ -349,14 +349,7 @@ class StoreQueue(nEntries: Int) extends Queue(nEntries, "StoreQueue") {
req.bits.addr.poke(r.addr.U)
req.bits.data.poke(r.data.U)
req.bits.mask.poke(FULL_MASK)
req.bits.meta.id.poke(tId.U)
req.bits.meta.vaddr.poke(r.addr.U)
req.bits.meta.paddr.poke(r.addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(false.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK)
req.bits.meta.replay.poke(false.B)
req.bits.id.poke(tId.U)
}
if (req.valid.peek().litToBoolean && req.ready.peek().litToBoolean) {
......@@ -369,7 +362,7 @@ class StoreQueue(nEntries: Int) extends Queue(nEntries, "StoreQueue") {
// always ready
resp.ready.poke(true.B)
if (resp.valid.peek().litToBoolean) {
val id = resp.bits.meta.id.peek().litValue.longValue.toInt
val id = resp.bits.id.peek().litValue.longValue.toInt
idPool.free(id)
retire(id)
}
......
......@@ -162,14 +162,7 @@ class L2NonInclusiveGetTest extends AnyFlatSpec with ChiselScalatestTester with
req.bits.addr.poke(addr.U)
req.bits.data.poke(0.U)
req.bits.mask.poke(FULL_MASK_64)
req.bits.meta.id.poke(0.U)
req.bits.meta.vaddr.poke(addr.U)
req.bits.meta.paddr.poke(addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(true.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK_64)
req.bits.meta.replay.poke(false.B)
req.bits.id.poke(0.U)
while (!req.ready.peek().litToBoolean) {
c.clock.step()
......@@ -204,14 +197,7 @@ class L2NonInclusiveGetTest extends AnyFlatSpec with ChiselScalatestTester with
req.bits.addr.poke(addr.U)
req.bits.data.poke(data.U)
req.bits.mask.poke(FULL_MASK_64)
req.bits.meta.id.poke(0.U)
req.bits.meta.vaddr.poke(addr.U)
req.bits.meta.paddr.poke(addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(true.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK_64)
req.bits.meta.replay.poke(false.B)
req.bits.id.poke(0.U)
while (!req.ready.peek().litToBoolean) {
c.clock.step()
......
......@@ -136,21 +136,21 @@ class L2TestTop()(implicit p: Parameters) extends LazyModule{
def sendStoreReq(addr: UInt, data: UInt): DCacheLineReq = {
val req = Wire(new DCacheLineReq)
req.cmd := MemoryOpConstants.M_XWR
req.cmd := MemoryOpConstants.M_XWR
req.addr := addr
req.data := data
req.mask := Fill(req.mask.getWidth, true.B)
req.meta := DontCare
req.id := DontCare
req
}
def sendLoadReq(addr: UInt): DCacheWordReq = {
val req = Wire(new DCacheWordReq)
req.cmd := MemoryOpConstants.M_XA_ADD
req.cmd := MemoryOpConstants.M_XA_ADD
req.addr := addr
req.data := 0.U
req.mask := Fill(req.mask.getWidth, true.B)
req.meta := DontCare
req.id := DontCare
req
}
......
......@@ -257,18 +257,11 @@ class UnalignedGetTestTop()(implicit p: Parameters) extends LazyModule{
def sendFlushReq(addr: UInt): DCacheWordReq = {
val req = Wire(new DCacheWordReq)
req.cmd := MemoryOpConstants.M_XWR
req.cmd := MemoryOpConstants.M_XWR
req.addr := FLUSH64_ADDR.U
req.data := addr
req.mask := FULL_MASK_64
req.meta.id := 0.U
req.meta.vaddr := FLUSH64_ADDR.U
req.meta.paddr := FLUSH64_ADDR.U
req.meta.uop := DontCare
req.meta.mmio := true.B
req.meta.tlb_miss := false.B
req.meta.mask := FULL_MASK_64
req.meta.replay := false.B
req.id := 0.U
req
}
......@@ -278,7 +271,7 @@ class UnalignedGetTestTop()(implicit p: Parameters) extends LazyModule{
req.addr := addr
req.data := data
req.mask := Fill(req.mask.getWidth, true.B)
req.meta := DontCare
req.id := DontCare
req
}
......
......@@ -30,7 +30,7 @@ class SbufferWapper extends XSModule {
// fake dcache
sbuffer.io.dcache.req.ready := true.B
sbuffer.io.dcache.resp.valid := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.valid))))
sbuffer.io.dcache.resp.bits.meta.id := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.bits.meta.id))))
sbuffer.io.dcache.resp.bits.id := RegNext(RegNext(RegNext(RegNext(sbuffer.io.dcache.req.bits.id))))
}
class SbufferTest extends AnyFlatSpec
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册