提交 638167ab 编写于 作者: A Allen

Added L1DCache, L1DCacheTest and some tilelink stuff.

Just compiles.
上级 61f69ef5
// See LICENSE.SiFive for license details.
package bus.tilelink
import chisel3._
import chisel3.util._
import utils.Or
object TLArbiter
{
// (valids, select) => readys
type Policy = (Int, UInt, Bool) => UInt
val lowestIndexFirst: Policy = (width, valids, select) => ~(Or.leftOR(valids) << 1)(width-1, 0)
val roundRobin: Policy = (width, valids, select) => if (width == 1) 1.U(1.W) else {
val valid = valids(width-1, 0)
assert (valid === valids)
val mask = RegInit(((BigInt(1) << width)-1).U(width.W))
val filter = Cat(valid & ~mask, valid)
val unready = (Or.rightOR(filter, width*2, width) >> 1) | (mask << width)
val readys = ~((unready >> width) & unready(width-1, 0))
when (select && valid.orR) {
mask := Or.leftOR(readys & valid, width)
}
readys(width-1, 0)
}
def lowestFromSeq[T <: TLChannel](sink: DecoupledIO[T], sources: Seq[DecoupledIO[T]]) {
apply(lowestIndexFirst)(sink, sources.map(s => (TLUtilities.numBeats1(s.bits), s)):_*)
}
def lowest[T <: TLChannel](sink: DecoupledIO[T], sources: DecoupledIO[T]*) {
apply(lowestIndexFirst)(sink, sources.toList.map(s => (TLUtilities.numBeats1(s.bits), s)):_*)
}
def robin[T <: TLChannel](sink: DecoupledIO[T], sources: DecoupledIO[T]*) {
apply(roundRobin)(sink, sources.toList.map(s => (TLUtilities.numBeats1(s.bits), s)):_*)
}
def apply[T <: Data](policy: Policy)(sink: DecoupledIO[T], sources: (UInt, DecoupledIO[T])*) {
if (sources.isEmpty) {
sink.valid := false.B
} else if (sources.size == 1) {
sink <> sources.head._2
} else {
val pairs = sources.toList
val beatsIn = pairs.map(_._1)
val sourcesIn = pairs.map(_._2)
// The number of beats which remain to be sent
val beatsLeft = RegInit(0.U)
val idle = beatsLeft === 0.U
val latch = idle && sink.ready // winner (if any) claims sink
// Who wants access to the sink?
val valids = sourcesIn.map(_.valid)
// Arbitrate amongst the requests
val readys = VecInit(policy(valids.size, Cat(valids.reverse), latch).asBools)
// Which request wins arbitration?
val winner = VecInit((readys zip valids) map { case (r,v) => r&&v })
// Confirm the policy works properly
require (readys.size == valids.size)
// Never two winners
val prefixOR = winner.scanLeft(false.B)(_||_).init
assert((prefixOR zip winner) map { case (p,w) => !p || !w } reduce {_ && _})
// If there was any request, there is a winner
assert (!valids.reduce(_||_) || winner.reduce(_||_))
// Track remaining beats
val maskedBeats = (winner zip beatsIn) map { case (w,b) => Mux(w, b, 0.U) }
val initBeats = maskedBeats.reduce(_ | _) // no winner => 0 beats
beatsLeft := Mux(latch, initBeats, beatsLeft - sink.fire())
// The one-hot source granted access in the previous cycle
val state = RegInit(VecInit(Seq.fill(sources.size)(false.B)))
val muxState = Mux(idle, winner, state)
state := muxState
val allowed = Mux(idle, readys, state)
(sourcesIn zip allowed) foreach { case (s, r) =>
s.ready := sink.ready && r
}
sink.valid := Mux(idle, valids.reduce(_||_), Mux1H(state, valids))
sink.bits := Mux1H(muxState, sourcesIn.map(_.bits))
}
}
}
/** Synthesizeable unit tests */
/*
import freechips.rocketchip.unittest._
class TestRobin(txns: Int = 128, timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) {
val sources = Wire(Vec(6, DecoupledIO(UInt(width=3))))
val sink = Wire(DecoupledIO(UInt(width=3)))
val count = RegInit(0.U(8.W))
val lfsr = LFSR(16, Bool(true))
val valid = lfsr(0)
val ready = lfsr(15)
sources.zipWithIndex.map { case (z, i) => z.bits := i.U }
sources(0).valid := valid
sources(1).valid := false.B
sources(2).valid := valid
sources(3).valid := valid
sources(4).valid := false.B
sources(5).valid := valid
sink.ready := ready
TLArbiter(TLArbiter.roundRobin)(sink, sources.zipWithIndex.map { case (z, i) => (i.U, z) }:_*)
when (sink.fire()) { printf("TestRobin: %d\n", sink.bits) }
when (!sink.fire()) { printf("TestRobin: idle (%d %d)\n", valid, ready) }
count := count + 1.U
io.finished := count >= txns.U
}
*/
// See LICENSE.SiFive for license details.
// See LICENSE.Berkeley for license details.
package bus.tilelink
import chisel3._
import chisel3.util._
import xiangshan.mem.MemoryOpConstants
import utils.MuxTLookup
object ClientStates {
val width = 2
// 这个估计是和MESI类似的一个协议?
// 就是不知道这个状态是tilelink指定的还是这里自己定义的
// 可以肯定的是:
// nothing的意思是invalid
// dirty就是dirty
// 然后branch和trunk,一个是shared,一个是exclusive?
// 然后根据下面的hasWritePermission,估计branch是shared
// 然后Trunk是exclusive,这样子就好理解了。
def Nothing = 0.U(width.W)
def Branch = 1.U(width.W)
def Trunk = 2.U(width.W)
def Dirty = 3.U(width.W)
def hasReadPermission(state: UInt): Bool = state > Nothing
def hasWritePermission(state: UInt): Bool = state > Branch
}
object MemoryOpCategories extends MemoryOpConstants {
def wr = Cat(true.B, true.B) // Op actually writes
def wi = Cat(false.B, true.B) // Future op will write
def rd = Cat(false.B, false.B) // Op only reads
def categorize(cmd: UInt): UInt = {
val cat = Cat(isWrite(cmd), isWriteIntent(cmd))
//assert(cat.isOneOf(wr,wi,rd), "Could not categorize command.")
cat
}
}
/** Stores the client-side coherence information,
* such as permissions on the data and whether the data is dirty.
* Its API can be used to make TileLink messages in response to
* memory operations, cache control oeprations, or Probe messages.
*/
class ClientMetadata extends Bundle {
/** Actual state information stored in this bundle */
val state = UInt(width = ClientStates.width.W)
/** Metadata equality */
def ===(rhs: UInt): Bool = state === rhs
def ===(rhs: ClientMetadata): Bool = state === rhs.state
def =/=(rhs: ClientMetadata): Bool = !this.===(rhs)
/** Is the block's data present in this cache */
def isValid(dummy: Int = 0): Bool = state > ClientStates.Nothing
/** Determine whether this cmd misses, and the new state (on hit) or param to be sent (on miss) */
private def growStarter(cmd: UInt): (Bool, UInt) = {
import MemoryOpCategories._
import TLPermissions._
import ClientStates._
val c = categorize(cmd)
MuxTLookup(Cat(c, state), (false.B, 0.U), Seq(
//(effect, am now) -> (was a hit, next)
Cat(rd, Dirty) -> (true.B, Dirty),
Cat(rd, Trunk) -> (true.B, Trunk),
Cat(rd, Branch) -> (true.B, Branch),
Cat(wi, Dirty) -> (true.B, Dirty),
Cat(wi, Trunk) -> (true.B, Trunk),
Cat(wr, Dirty) -> (true.B, Dirty),
Cat(wr, Trunk) -> (true.B, Dirty),
//(effect, am now) -> (was a miss, param)
Cat(rd, Nothing) -> (false.B, NtoB),
Cat(wi, Branch) -> (false.B, BtoT),
Cat(wi, Nothing) -> (false.B, NtoT),
Cat(wr, Branch) -> (false.B, BtoT),
Cat(wr, Nothing) -> (false.B, NtoT)))
}
/** Determine what state to go to after miss based on Grant param
* For now, doesn't depend on state (which may have been Probed).
*/
private def growFinisher(cmd: UInt, param: UInt): UInt = {
import MemoryOpCategories._
import TLPermissions._
import ClientStates._
val c = categorize(cmd)
//assert(c === rd || param === toT, "Client was expecting trunk permissions.")
MuxLookup(Cat(c, param), Nothing, Seq(
//(effect param) -> (next)
Cat(rd, toB) -> Branch,
Cat(rd, toT) -> Trunk,
Cat(wi, toT) -> Trunk,
Cat(wr, toT) -> Dirty))
}
/** Does this cache have permissions on this block sufficient to perform op,
* and what to do next (Acquire message param or updated metadata). */
def onAccess(cmd: UInt): (Bool, UInt, ClientMetadata) = {
val r = growStarter(cmd)
(r._1, r._2, ClientMetadata(r._2))
}
/** Does a secondary miss on the block require another Acquire message */
def onSecondaryAccess(first_cmd: UInt, second_cmd: UInt): (Bool, Bool, UInt, ClientMetadata, UInt) = {
import MemoryOpCategories._
val r1 = growStarter(first_cmd)
val r2 = growStarter(second_cmd)
val needs_second_acq = isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
val hit_again = r1._1 && r2._1
val dirties = categorize(second_cmd) === wr
val biggest_grow_param = Mux(dirties, r2._2, r1._2)
val dirtiest_state = ClientMetadata(biggest_grow_param)
val dirtiest_cmd = Mux(dirties, second_cmd, first_cmd)
(needs_second_acq, hit_again, biggest_grow_param, dirtiest_state, dirtiest_cmd)
}
/** Metadata change on a returned Grant */
def onGrant(cmd: UInt, param: UInt): ClientMetadata = ClientMetadata(growFinisher(cmd, param))
/** Determine what state to go to based on Probe param */
// 这个其实就是根据当前状态还有目标状态,来看具体的动作?
private def shrinkHelper(param: UInt): (Bool, UInt, UInt) = {
import ClientStates._
import TLPermissions._
MuxTLookup(Cat(param, state), (false.B, 0.U, 0.U), Seq(
//(wanted, am now) -> (hasDirtyData resp, next)
Cat(toT, Dirty) -> (true.B, TtoT, Trunk),
Cat(toT, Trunk) -> (false.B, TtoT, Trunk),
Cat(toT, Branch) -> (false.B, BtoB, Branch),
Cat(toT, Nothing) -> (false.B, NtoN, Nothing),
Cat(toB, Dirty) -> (true.B, TtoB, Branch),
Cat(toB, Trunk) -> (false.B, TtoB, Branch), // Policy: Don't notify on clean downgrade
Cat(toB, Branch) -> (false.B, BtoB, Branch),
Cat(toB, Nothing) -> (false.B, NtoN, Nothing),
Cat(toN, Dirty) -> (true.B, TtoN, Nothing),
Cat(toN, Trunk) -> (false.B, TtoN, Nothing), // Policy: Don't notify on clean downgrade
Cat(toN, Branch) -> (false.B, BtoN, Nothing), // Policy: Don't notify on clean downgrade
Cat(toN, Nothing) -> (false.B, NtoN, Nothing)))
}
/** Translate cache control cmds into Probe param */
// 在不同的cache control模式下,应该进行什么样的状态转换?
private def cmdToPermCap(cmd: UInt): UInt = {
import MemoryOpCategories._
import TLPermissions._
MuxLookup(cmd, toN, Seq(
M_FLUSH -> toN,
M_PRODUCE -> toB,
M_CLEAN -> toT))
}
def onCacheControl(cmd: UInt): (Bool, UInt, ClientMetadata) = {
val r = shrinkHelper(cmdToPermCap(cmd))
(r._1, r._2, ClientMetadata(r._3))
}
def onProbe(param: UInt): (Bool, UInt, ClientMetadata) = {
val r = shrinkHelper(param)
(r._1, r._2, ClientMetadata(r._3))
}
}
/** Factories for ClientMetadata, including on reset */
object ClientMetadata {
def apply(perm: UInt) = {
val meta = Wire(new ClientMetadata)
meta.state := perm
meta
}
def onReset = ClientMetadata(ClientStates.Nothing)
def maximum = ClientMetadata(ClientStates.Dirty)
}
// See LICENSE.SiFive for license details.
package bus.tilelink
import chisel3._
import chisel3.util._
import bus.axi4.AXI4
import bus.axi4.AXI4Parameters
import utils.GTimer
// a simpel TileLink to AXI4 converter
// only support TileLink put and get
class NaiveTLToAXI4(params: TLParameters) extends Module
{
val io = IO(new Bundle{
val in = Flipped(new TLCached(params))
val out = new AXI4
})
val debug = true
val in = io.in
val out = io.out
/* parameters */
val Y = true.B
val N = false.B
val blockSize = 64 * 8
val blockBytes = blockSize / 8
val innerBeatSize = in.d.bits.data.getWidth
val innerBeatBytes = innerBeatSize / 8
val innerDataBeats = blockSize / innerBeatSize
val innerBeatBits = log2Ceil(innerBeatBytes)
val innerBeatIndexBits = log2Ceil(innerDataBeats)
val innerBeatLSB = innerBeatBits
val innerBeatMSB = innerBeatLSB + innerBeatIndexBits - 1
val outerBeatSize = out.w.bits.data.getWidth
val outerBeatBytes = outerBeatSize / 8
val outerDataBeats = blockSize / outerBeatSize
val outerBeatLen = log2Ceil(outerBeatBytes)
val outerBurstLen = outerBeatLen + log2Ceil(outerDataBeats)
val addrWidth = in.a.bits.address.getWidth
val innerIdWidth = in.a.bits.source.getWidth
val outerIdWidth = out.aw.bits.id.getWidth
assert(in.a.bits.address.getWidth == out.aw.bits.addr.getWidth)
assert(innerBeatSize == outerBeatSize)
val split = innerBeatSize / outerBeatSize
val splitBits = log2Ceil(split)
require(isPow2(split))
val s_idle :: s_gather_write_data :: s_wait_awready :: s_mem_write :: s_wait_bresp :: s_wait_arready :: s_mem_read :: s_read_resp :: s_write_resp :: Nil = Enum(9)
val state = RegInit(s_idle)
val timer = GTimer()
val log_prefix = "cycle: %d [L2Cache] state %x "
def log_raw(prefix: String, fmt: String, tail: String, args: Bits*) = {
if (debug) {
printf(prefix + fmt + tail, args:_*)
}
}
/** Single log */
def log(fmt: String, args: Bits*) = log_raw(log_prefix, fmt, "\n", timer +: state +: args:_*)
/** Log with line continued */
def log_part(fmt: String, args: Bits*) = log_raw(log_prefix, fmt, "", timer +: state +: args:_*)
/** Log with nothing added */
def log_plain(fmt: String, args: Bits*) = log_raw("", fmt, "", args:_*)
when (in.a.fire()) {
log("in.a opcode %x, dsid %x, param %x, size %x, source %x, address %x, mask %x, data %x",
in.a.bits.opcode,
in.a.bits.param,
in.a.bits.size,
in.a.bits.source,
in.a.bits.address,
in.a.bits.mask,
in.a.bits.data)
}
/*
when (out.a.fire()) {
log("out.a.opcode %x, dsid %x, param %x, size %x, source %x, address %x, mask %x, data %x",
out.a.bits.opcode,
out.a.bits.dsid,
out.a.bits.param,
out.a.bits.size,
out.a.bits.source,
out.a.bits.address,
out.a.bits.mask,
out.a.bits.data)
}
*/
val in_opcode = in.a.bits.opcode
val in_addr = in.a.bits.address
val in_id = in.a.bits.source
val in_len_shift = in.a.bits.size >= innerBeatBits.U
val in_len = Mux(in_len_shift, ((1.U << in.a.bits.size) >> innerBeatBits) - 1.U, 0.U) // #word, i.e., arlen in AXI
val in_data = in.a.bits.data
val in_data_mask = in.a.bits.mask
val in_recv_fire = in.a.fire()
val in_read_req = in_recv_fire && (in_opcode === TLMessages.Get)
val in_write_req = in_recv_fire && (in_opcode === TLMessages.PutFullData)
val addr = Reg(UInt(addrWidth.W))
val id = Reg(UInt(innerIdWidth.W))
val opcode = Reg(UInt(3.W))
val size_reg = Reg(UInt(in.a.bits.size.getWidth.W))
val ren = RegInit(N)
val wen = RegInit(N)
val start_beat = in_addr(innerBeatMSB, innerBeatLSB)
val inner_end_beat_reg = Reg(UInt(4.W))
val inner_end_beat = Mux(state === s_idle, start_beat + in_len, inner_end_beat_reg)
// gather write data beat count
val gather_curr_beat_reg = RegInit(0.asUInt(log2Ceil(innerDataBeats).W))
val gather_curr_beat = Mux(state === s_idle, start_beat, gather_curr_beat_reg)
val gather_last_beat = gather_curr_beat === inner_end_beat
// read response beat count
val resp_curr_beat = RegInit(0.asUInt(log2Ceil(innerDataBeats).W))
val resp_last_beat = resp_curr_beat === inner_end_beat
// state transitions:
// s_idle: idle state
// capture requests
// --------------------------------------------------------------------------------
when (state === s_idle) {
when (in_read_req) {
ren := Y
wen := N
addr := in_addr
id := in_id
opcode := in_opcode
size_reg := in.a.bits.size
resp_curr_beat := start_beat
inner_end_beat_reg := start_beat + in_len
state := s_wait_arready
} .elsewhen (in_write_req) {
ren := N
wen := Y
addr := in_addr
id := in_id
opcode := in_opcode
size_reg := in.a.bits.size
resp_curr_beat := start_beat
inner_end_beat_reg := start_beat + in_len
state := s_gather_write_data
} .elsewhen (in.b.fire() || in.c.fire() || in.e.fire()) {
assert(N, "Inner tilelink Unexpected handshake")
}
}
// s_gather_write_data:
// gather write data
// --------------------------------------------------------------------------------
val data_buf = Reg(Vec(outerDataBeats, UInt(outerBeatSize.W)))
val data_mask = Reg(Vec(outerDataBeats, UInt(outerBeatBytes.W)))
// tilelink receives the first data beat when address handshake
// which is different from axi
val first_data_beat = state === s_idle && in_write_req
val following_data_beat = state === s_gather_write_data && in_recv_fire
val gather_data_beat = first_data_beat || following_data_beat
when (first_data_beat) {
gather_curr_beat_reg := start_beat + 1.U
}
when (following_data_beat) {
gather_curr_beat_reg := gather_curr_beat_reg + 1.U
}
when (first_data_beat || following_data_beat) {
for (i <- 0 until split) {
data_buf((gather_curr_beat << splitBits) + i.U) := in_data(outerBeatSize * (i + 1) - 1, outerBeatSize * i)
data_mask((gather_curr_beat << splitBits) + i.U) := in_data_mask(outerBeatBytes * (i + 1) - 1, outerBeatBytes * i)
}
when (gather_last_beat) {
state := s_write_resp
}
}
when (state === s_write_resp && in.d.fire()) {
state := s_wait_awready
}
// s_wait_arready, s_mem_read, s_read_resp
// deal with read
// --------------------------------------------------------------------------------
when (state === s_wait_arready && out.ar.fire()) {
state := s_mem_read
}
val out_rdata_fire = out.r.fire()
val (refill_cnt, refill_done) = Counter(out_rdata_fire && state === s_mem_read, outerDataBeats)
when (state === s_mem_read && out_rdata_fire) {
data_buf(refill_cnt) := out.r.bits.data
when (refill_done) {
state := s_read_resp
}
}
when (state === s_read_resp && in.d.fire()) {
resp_curr_beat := resp_curr_beat + 1.U
when (resp_last_beat) {
state := s_idle
}
}
val resp_data = Wire(Vec(split, UInt(outerBeatSize.W)))
for (i <- 0 until split) {
resp_data(i) := data_buf((resp_curr_beat << splitBits) + i.U)
}
// deal with write
// s_wait_awready & s_mem_write
// --------------------------------------------------------------------------------
when (state === s_wait_awready && out.aw.fire()) {
state := s_mem_write
}
val (wb_cnt, wb_done) = Counter(out.w.fire() && state === s_mem_write, outerDataBeats)
when (state === s_mem_write && wb_done) {
state := s_wait_bresp
}
when (state === s_wait_bresp && out.b.fire()) {
state := s_idle
}
// IO ports
// Input tilelink channels
// --------------------------------------------------------------------------------
// channel A
in.a.ready := state === s_idle || state === s_gather_write_data
// channel B
in.b.valid := N
// channel C
in.c.ready := N
// channel D
val in_read_resp = state === s_read_resp
val in_write_resp = state === s_write_resp
in.d.valid := in_write_resp || in_read_resp
in.d.bits.opcode := Mux(in_read_resp, TLMessages.AccessAckData, TLMessages.AccessAck)
in.d.bits.param := 0.U
in.d.bits.size := size_reg
in.d.bits.source := id
in.d.bits.sink := 0.U
in.d.bits.denied := N
in.d.bits.data := resp_data(resp_curr_beat)
in.d.bits.corrupt := N
// channel E
in.e.ready := N
// Output AXI4 channels
// --------------------------------------------------------------------------------
val axi4_size = log2Up(outerBeatBytes).U
// AW channel
// write address channel signals
out.aw.valid := state === s_wait_awready
out.aw.bits.id := 0.U
out.aw.bits.addr := addr
out.aw.bits.len := (outerDataBeats - 1).asUInt(8.W)
out.aw.bits.size := axi4_size
out.aw.bits.burst := AXI4Parameters.BURST_INCR // normal sequential memory
out.aw.bits.lock := 0.asUInt(1.W)
out.aw.bits.cache := AXI4Parameters.CACHE_RALLOCATE | AXI4Parameters.CACHE_WALLOCATE | AXI4Parameters.CACHE_MODIFIABLE | AXI4Parameters.CACHE_BUFFERABLE
out.aw.bits.prot := 0.asUInt(3.W)
out.aw.bits.qos := 0.asUInt(4.W)
// W channel
// write data channel signals
out.w.valid := state === s_mem_write
out.w.bits.data := data_buf(wb_cnt)
out.w.bits.strb := Fill(outerBeatBytes, 1.asUInt(1.W))
out.w.bits.last := wb_cnt === (outerDataBeats - 1).U
// B channel
// write response channel signals
out.b.ready := state === s_wait_bresp
// AR channel
// read address channel signals
out.ar.valid := state === s_wait_arready
out.ar.bits.id := 0.asUInt(outerIdWidth.W)
out.ar.bits.addr := addr
out.ar.bits.len := (outerDataBeats - 1).asUInt(8.W)
out.ar.bits.size := axi4_size
out.ar.bits.burst := AXI4Parameters.BURST_INCR
out.ar.bits.lock := 0.asUInt(1.W)
out.ar.bits.cache := AXI4Parameters.CACHE_RALLOCATE | AXI4Parameters.CACHE_WALLOCATE | AXI4Parameters.CACHE_MODIFIABLE | AXI4Parameters.CACHE_BUFFERABLE
out.ar.bits.prot := 0.asUInt(3.W)
out.ar.bits.qos := 0.asUInt(4.W)
// R channel
// read data channel signals
out.r.ready := state === s_mem_read
}
object NaiveTLToAXI4
{
def apply(params: TLParameters) = { new NaiveTLToAXI4(params) }
}
此差异已折叠。
// See LICENSE.SiFive for license details.
package bus.tilelink
import chisel3._
import chisel3.util._
import xiangshan.HasXSParameter
case class TLParameters(
addressBits: Int = 64,
dataBits: Int = 64,
sourceBits: Int = 1,
sinkBits: Int = 1,
sizeBits: Int = 3,
maxTransfer: Int = 64) {
def beatBytes = dataBits / 8
val maxLgSize = log2Ceil(maxTransfer)
}
object TLMessages
{
// opcode width
val width = 3
// A B C D E
def PutFullData = 0.U(width.W) // . . => AccessAck
def PutPartialData = 1.U(width.W) // . . => AccessAck
def ArithmeticData = 2.U(width.W) // . . => AccessAckData
def LogicalData = 3.U(width.W) // . . => AccessAckData
def Get = 4.U(width.W) // . . => AccessAckData
def Hint = 5.U(width.W) // . . => HintAck
def AcquireBlock = 6.U(width.W) // . => Grant[Data]
def AcquirePerm = 7.U(width.W) // . => Grant[Data]
def Probe = 6.U(width.W) // . => ProbeAck[Data]
def AccessAck = 0.U(width.W) // . .
def AccessAckData = 1.U(width.W) // . .
def HintAck = 2.U(width.W) // . .
def ProbeAck = 4.U(width.W) // .
def ProbeAckData = 5.U(width.W) // .
def Release = 6.U(width.W) // . => ReleaseAck
def ReleaseData = 7.U(width.W) // . => ReleaseAck
def Grant = 4.U(width.W) // . => GrantAck
def GrantData = 5.U(width.W) // . => GrantAck
def ReleaseAck = 6.U(width.W) // .
def GrantAck = 0.U(width.W) // .
def isA(x: UInt) = x <= AcquirePerm
def isB(x: UInt) = x <= Probe
def isC(x: UInt) = x <= ReleaseData
def isD(x: UInt) = x <= ReleaseAck
def adResponse = VecInit(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck, Grant, Grant)
def bcResponse = VecInit(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck, ProbeAck, ProbeAck)
def a = Seq( ("PutFullData",TLPermissions.PermMsgReserved),
("PutPartialData",TLPermissions.PermMsgReserved),
("ArithmeticData",TLAtomics.ArithMsg),
("LogicalData",TLAtomics.LogicMsg),
("Get",TLPermissions.PermMsgReserved),
("Hint",TLHints.HintsMsg),
("AcquireBlock",TLPermissions.PermMsgGrow),
("AcquirePerm",TLPermissions.PermMsgGrow))
def b = Seq( ("PutFullData",TLPermissions.PermMsgReserved),
("PutPartialData",TLPermissions.PermMsgReserved),
("ArithmeticData",TLAtomics.ArithMsg),
("LogicalData",TLAtomics.LogicMsg),
("Get",TLPermissions.PermMsgReserved),
("Hint",TLHints.HintsMsg),
("Probe",TLPermissions.PermMsgCap))
def c = Seq( ("AccessAck",TLPermissions.PermMsgReserved),
("AccessAckData",TLPermissions.PermMsgReserved),
("HintAck",TLPermissions.PermMsgReserved),
("Invalid Opcode",TLPermissions.PermMsgReserved),
("ProbeAck",TLPermissions.PermMsgReport),
("ProbeAckData",TLPermissions.PermMsgReport),
("Release",TLPermissions.PermMsgReport),
("ReleaseData",TLPermissions.PermMsgReport))
def d = Seq( ("AccessAck",TLPermissions.PermMsgReserved),
("AccessAckData",TLPermissions.PermMsgReserved),
("HintAck",TLPermissions.PermMsgReserved),
("Invalid Opcode",TLPermissions.PermMsgReserved),
("Grant",TLPermissions.PermMsgCap),
("GrantData",TLPermissions.PermMsgCap),
("ReleaseAck",TLPermissions.PermMsgReserved))
}
/**
* The three primary TileLink permissions are:
* (T)runk: the agent is (or is on inwards path to) the global point of serialization.
* (B)ranch: the agent is on an outwards path to
* (N)one:
* These permissions are permuted by transfer operations in various ways.
* Operations can cap permissions, request for them to be grown or shrunk,
* or for a report on their current status.
*/
object TLPermissions
{
val aWidth = 2
val bdWidth = 2
val cWidth = 3
// Cap types (Grant = new permissions, Probe = permisions <= target)
def toT = 0.U(bdWidth)
def toB = 1.U(bdWidth)
def toN = 2.U(bdWidth)
def isCap(x: UInt) = x <= toN
// Grow types (Acquire = permissions >= target)
def NtoB = 0.U(aWidth)
def NtoT = 1.U(aWidth)
def BtoT = 2.U(aWidth)
def isGrow(x: UInt) = x <= BtoT
// Shrink types (ProbeAck, Release)
def TtoB = 0.U(cWidth)
def TtoN = 1.U(cWidth)
def BtoN = 2.U(cWidth)
def isShrink(x: UInt) = x <= BtoN
// Report types (ProbeAck, Release)
def TtoT = 3.U(cWidth)
def BtoB = 4.U(cWidth)
def NtoN = 5.U(cWidth)
def isReport(x: UInt) = x <= NtoN
def PermMsgGrow:Seq[String] = Seq("Grow NtoB", "Grow NtoT", "Grow BtoT")
def PermMsgCap:Seq[String] = Seq("Cap toT", "Cap toB", "Cap toN")
def PermMsgReport:Seq[String] = Seq("Shrink TtoB", "Shrink TtoN", "Shrink BtoN", "Report TotT", "Report BtoB", "Report NtoN")
def PermMsgReserved:Seq[String] = Seq("Reserved")
}
object TLAtomics
{
val width = 3
// Arithmetic types
def MIN = 0.U(width)
def MAX = 1.U(width)
def MINU = 2.U(width)
def MAXU = 3.U(width)
def ADD = 4.U(width)
def isArithmetic(x: UInt) = x <= ADD
// Logical types
def XOR = 0.U(width)
def OR = 1.U(width)
def AND = 2.U(width)
def SWAP = 3.U(width)
def isLogical(x: UInt) = x <= SWAP
def ArithMsg:Seq[String] = Seq("MIN", "MAX", "MINU", "MAXU", "ADD")
def LogicMsg:Seq[String] = Seq("XOR", "OR", "AND", "SWAP")
}
object TLHints
{
val width = 1
def PREFETCH_READ = 0.U(width)
def PREFETCH_WRITE = 1.U(width)
def isHints(x: UInt) = x <= PREFETCH_WRITE
def HintsMsg:Seq[String] = Seq("PrefetchRead", "PrefetchWrite")
}
sealed trait TLChannel extends Bundle {
val channelName: String
val params: TLParameters
}
sealed trait TLDataChannel extends TLChannel
sealed trait TLAddrChannel extends TLDataChannel
class TLBundleA(override val params: TLParameters) extends TLAddrChannel
{
val channelName = "'A' channel"
val opcode = UInt(3.W)
val param = UInt(3.W)
val size = UInt(params.sizeBits.W)
val source = UInt(params.sourceBits.W)
val address = UInt(params.addressBits.W)
val mask = UInt((params.dataBits/8).W)
val data = UInt(params.dataBits.W)
val corrupt = Bool()
}
class TLBundleB(override val params: TLParameters) extends TLAddrChannel
{
val channelName = "'B' channel"
val opcode = UInt(3.W)
val param = UInt(3.W)
val size = UInt(params.sizeBits.W)
val source = UInt(params.sourceBits.W)
val address = UInt(params.addressBits.W)
val mask = UInt((params.dataBits/8).W)
val data = UInt(params.dataBits.W)
val corrupt = Bool()
}
class TLBundleC(override val params: TLParameters) extends TLAddrChannel
{
val channelName = "'C' channel"
val opcode = UInt(3.W)
val param = UInt(3.W)
val size = UInt(params.sizeBits.W)
val source = UInt(params.sourceBits.W)
val address = UInt(params.addressBits.W)
val data = UInt(params.dataBits.W)
val corrupt = Bool()
}
class TLBundleD(override val params: TLParameters) extends TLDataChannel
{
val channelName = "'D' channel"
val opcode = UInt(3.W)
val param = UInt(2.W)
val size = UInt(params.sizeBits.W)
val source = UInt(params.sourceBits.W)
val sink = UInt(params.sinkBits.W)
val denied = Bool()
val data = UInt(params.dataBits.W)
val corrupt = Bool()
}
class TLBundleE(override val params: TLParameters) extends TLChannel
{
val channelName = "'E' channel"
val sink = UInt(params.sinkBits.W)
}
// TL-UL and TL-UC
class TLUnCached(val params: TLParameters) extends Bundle {
val a = Decoupled(new TLBundleA(params))
val d = Flipped(Decoupled(new TLBundleD(params)))
}
// TL-C
class TLCached(override val params: TLParameters) extends TLUnCached(params) {
val b = Flipped(Decoupled(new TLBundleB(params)))
val c = Decoupled(new TLBundleC(params))
val e = Decoupled(new TLBundleE(params))
}
object TLUnCached
{
def apply(params: TLParameters) = new TLUnCached(params)
}
object TLCached
{
def apply(params: TLParameters) = new TLCached(params)
}
......@@ -2,6 +2,7 @@ package utils
import chisel3._
import chisel3.util._
import scala.math.min
object WordShift {
def apply(data: UInt, wordIndex: UInt, step: Int) = (data << (wordIndex * step.U))
......@@ -31,3 +32,23 @@ object ZeroExt {
if (aLen == len) a else Cat(0.U((len - aLen).W), a)
}
}
object Or {
// Fill 1s from low bits to high bits
def leftOR(x: UInt): UInt = leftOR(x, x.getWidth, x.getWidth)
def leftOR(x: UInt, width: Integer, cap: Integer = 999999): UInt = {
val stop = min(width, cap)
def helper(s: Int, x: UInt): UInt =
if (s >= stop) x else helper(s+s, x | (x << s)(width-1,0))
helper(1, x)(width-1, 0)
}
// Fill 1s form high bits to low bits
def rightOR(x: UInt): UInt = rightOR(x, x.getWidth, x.getWidth)
def rightOR(x: UInt, width: Integer, cap: Integer = 999999): UInt = {
val stop = min(width, cap)
def helper(s: Int, x: UInt): UInt =
if (s >= stop) x else helper(s+s, x | (x >> s))
helper(1, x)(width-1, 0)
}
}
// See LICENSE.Berkeley for license details.
package utils
import chisel3._
import chisel3.util._
import chisel3.util.random.LFSR
abstract class Decoding
{
def uncorrected: UInt
def corrected: UInt
def correctable: Bool
def uncorrectable: Bool // If true, correctable should be ignored
def error = correctable || uncorrectable
}
abstract class Code
{
def canDetect: Boolean
def canCorrect: Boolean
def width(w0: Int): Int
/** Encode x to a codeword suitable for decode.
* If poison is true, the decoded value will report uncorrectable
* error despite uncorrected == corrected == x.
*/
def encode(x: UInt, poison: Bool = false.B): UInt
def decode(x: UInt): Decoding
/** Copy the bits in x to the right bit positions in an encoded word,
* so that x === decode(swizzle(x)).uncorrected; but don't generate
* the other code bits, so decode(swizzle(x)).error might be true.
* For codes for which this operation is not trivial, throw an
* UnsupportedOperationException. */
def swizzle(x: UInt): UInt
}
class IdentityCode extends Code
{
def canDetect = false
def canCorrect = false
def width(w0: Int) = w0
def encode(x: UInt, poison: Bool = false.B) = {
require (poison.isLit && poison.litValue == 0, "IdentityCode can not be poisoned")
x
}
def swizzle(x: UInt) = x
def decode(y: UInt) = new Decoding {
def uncorrected = y
def corrected = y
def correctable = false.B
def uncorrectable = false.B
}
}
class ParityCode extends Code
{
def canDetect = true
def canCorrect = false
def width(w0: Int) = w0+1
def encode(x: UInt, poison: Bool = false.B) = Cat(x.xorR ^ poison, x)
def swizzle(x: UInt) = Cat(false.B, x)
def decode(y: UInt) = new Decoding {
val uncorrected = y(y.getWidth-2,0)
val corrected = uncorrected
val correctable = false.B
val uncorrectable = y.xorR
}
}
class SECCode extends Code
{
def canDetect = true
def canCorrect = true
// SEC codes may or may not be poisonous depending on the length
// If the code is perfect, every non-codeword is correctable
def poisonous(n: Int) = !isPow2(n+1)
def width(k: Int) = {
val m = log2Floor(k) + 1
k + m + (if((1 << m) < m+k+1) 1 else 0)
}
def swizzle(x: UInt) = {
val k = x.getWidth
val n = width(k)
Cat(0.U((n-k).W), x)
}
// An (n=16, k=11) Hamming code is naturally encoded as:
// PPxPxxxPxxxxxxxP where P are parity bits and x are data
// Indexes typically start at 1, because then the P are on powers of two
// In systematic coding, you put all the data in the front:
// xxxxxxxxxxxPPPPP
// Indexes typically start at 0, because Computer Science
// For sanity when reading SRAMs, you want systematic form.
private def impl(n: Int, k: Int) = {
require (n >= 3 && k >= 1 && !isPow2(n))
val hamm2sys = IndexedSeq.tabulate(n+1) { i =>
if (i == 0) {
n /* undefined */
} else if (isPow2(i)) {
k + log2Ceil(i)
} else {
i - 1 - log2Ceil(i)
}
}
val sys2hamm = hamm2sys.zipWithIndex.sortBy(_._1).map(_._2).toIndexedSeq
def syndrome(j: Int) = {
val bit = 1 << j
("b" + Seq.tabulate(n) { i =>
if ((sys2hamm(i) & bit) != 0) "1" else "0"
}.reverse.mkString).U
}
(hamm2sys, sys2hamm, syndrome _)
}
def encode(x: UInt, poison: Bool = false.B) = {
val k = x.getWidth
val n = width(k)
val (_, _, syndrome) = impl(n, k)
require ((poison.isLit && poison.litValue == 0) || poisonous(n), s"SEC code of length ${n} cannot be poisoned")
/* By setting the entire syndrome on poison, the corrected bit falls off the end of the code */
val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j)(k-1, 0) & x).xorR ^ poison }.asUInt
Cat(syndromeUInt, x)
}
def decode(y: UInt) = new Decoding {
val n = y.getWidth
val k = n - log2Ceil(n)
val (_, sys2hamm, syndrome) = impl(n, k)
val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j) & y).xorR }.asUInt
val hammBadBitOH = UIntToOH(syndromeUInt, n+1)
val sysBadBitOH = VecInit.tabulate(k) { i => hammBadBitOH(sys2hamm(i)) }.asUInt
val uncorrected = y(k-1, 0)
val corrected = uncorrected ^ sysBadBitOH
val correctable = syndromeUInt.orR
val uncorrectable = if (poisonous(n)) { syndromeUInt > n.U } else { false.B }
}
}
class SECDEDCode extends Code
{
def canDetect = true
def canCorrect = true
private val sec = new SECCode
private val par = new ParityCode
def width(k: Int) = sec.width(k)+1
def encode(x: UInt, poison: Bool = false.B) = {
// toggling two bits ensures the error is uncorrectable
// to ensure corrected == uncorrected, we pick one redundant
// bit from SEC (the highest); correcting it does not affect
// corrected == uncorrected. the second toggled bit is the
// parity bit, which also does not appear in the decoding
val toggle_lo = Cat(poison.asUInt, poison.asUInt)
val toggle_hi = toggle_lo << (sec.width(x.getWidth)-1)
par.encode(sec.encode(x)) ^ toggle_hi
}
def swizzle(x: UInt) = par.swizzle(sec.swizzle(x))
def decode(x: UInt) = new Decoding {
val secdec = sec.decode(x(x.getWidth-2,0))
val pardec = par.decode(x)
val uncorrected = secdec.uncorrected
val corrected = secdec.corrected
val correctable = pardec.uncorrectable
val uncorrectable = !pardec.uncorrectable && secdec.correctable
}
}
object ErrGen
{
// generate a 1-bit error with approximate probability 2^-f
def apply(width: Int, f: Int): UInt = {
require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
UIntToOH(LFSR(16)(log2Up(width)+f-1,0))(width-1,0)
}
def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f)
}
trait CanHaveErrors extends Bundle {
val correctable: Option[ValidIO[UInt]]
val uncorrectable: Option[ValidIO[UInt]]
}
case class ECCParams(
bytes: Int = 1,
code: Code = new IdentityCode,
notifyErrors: Boolean = false
)
object Code {
def fromString(s: Option[String]): Code = fromString(s.getOrElse("none"))
def fromString(s: String): Code = s.toLowerCase match {
case "none" => new IdentityCode
case "identity" => new IdentityCode
case "parity" => new ParityCode
case "sec" => new SECCode
case "secded" => new SECDEDCode
case _ => throw new IllegalArgumentException("Unknown ECC type")
}
}
......@@ -12,3 +12,32 @@ object LookupTreeDefault {
def apply[T <: Data](key: UInt, default: T, mapping: Iterable[(UInt, T)]): T =
MuxLookup(key, default, mapping.toSeq)
}
object MuxT {
def apply[T <: Data, U <: Data](cond: Bool, con: (T, U), alt: (T, U)): (T, U) =
(Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2))
def apply[T <: Data, U <: Data, W <: Data](cond: Bool, con: (T, U, W), alt: (T, U, W)): (T, U, W) =
(Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2), Mux(cond, con._3, alt._3))
def apply[T <: Data, U <: Data, W <: Data, X <: Data](cond: Bool, con: (T, U, W, X), alt: (T, U, W, X)): (T, U, W, X) =
(Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2), Mux(cond, con._3, alt._3), Mux(cond, con._4, alt._4))
}
/** Creates a cascade of n MuxTs to search for a key value. */
object MuxTLookup {
def apply[S <: UInt, T <: Data, U <: Data](key: S, default: (T, U), mapping: Seq[(S, (T, U))]): (T, U) = {
var res = default
for ((k, v) <- mapping.reverse)
res = MuxT(k === key, v, res)
res
}
def apply[S <: UInt, T <: Data, U <: Data, W <: Data](key: S, default: (T, U, W), mapping: Seq[(S, (T, U, W))]): (T, U, W) = {
var res = default
for ((k, v) <- mapping.reverse)
res = MuxT(k === key, v, res)
res
}
}
package utils
import chisel3._
import chisel3.util._
// This gets used everywhere, so make the smallest circuit possible ...
// Given an address and size, create a mask of beatBytes size
// eg: (0x3, 0, 4) => 0001, (0x3, 1, 4) => 0011, (0x3, 2, 4) => 1111
// groupBy applies an interleaved OR reduction; groupBy=2 take 0010 => 01
object MaskGen {
def apply(addr_lo: UInt, lgSize: UInt, beatBytes: Int, groupBy: Int = 1): UInt = {
require (groupBy >= 1 && beatBytes >= groupBy)
require (isPow2(beatBytes) && isPow2(groupBy))
val lgBytes = log2Ceil(beatBytes)
val sizeOH = UIntToOH(lgSize | 0.U(log2Up(beatBytes).W), log2Up(beatBytes)) | (groupBy*2 - 1).U
def helper(i: Int): Seq[(Bool, Bool)] = {
if (i == 0) {
Seq((lgSize >= lgBytes.U, true.B))
} else {
val sub = helper(i-1)
val size = sizeOH(lgBytes - i)
val bit = addr_lo(lgBytes - i)
val nbit = !bit
Seq.tabulate (1 << i) { j =>
val (sub_acc, sub_eq) = sub(j/2)
val eq = sub_eq && (if (j % 2 == 1) bit else nbit)
val acc = sub_acc || (size && eq)
(acc, eq)
}
}
}
if (groupBy == beatBytes) 1.U else
Cat(helper(lgBytes-log2Ceil(groupBy)).map(_._1).reverse)
}
}
object Random
{
def apply(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) random(log2Ceil(mod)-1,0)
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
}
def apply(mod: Int): UInt = apply(mod, randomizer)
def oneHot(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
else VecInit(PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod))).asUInt
}
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
private def randomizer = LFSR16()
private def partition(value: UInt, slices: Int) =
Seq.tabulate(slices)(i => value < (((i + 1) << value.getWidth) / slices).U)
}
/**
* Transpose a matrix of Chisel Vecs.
*/
object Transpose
{
def apply[T <: chisel3.core.Data](in: Vec[Vec[T]]) = {
val n = in(0).size
VecInit((0 until n).map(i => VecInit(in.map(row => row(i)))))
}
}
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package utils
import chisel3._
import chisel3.util._
import chisel3.util.random.LFSR
abstract class ReplacementPolicy {
def way: UInt
def miss: Unit
def hit: Unit
}
class RandomReplacement(ways: Int) extends ReplacementPolicy {
private val replace = Wire(Bool())
replace := false.B
val lfsr = LFSR(16, replace)
def way = Random(ways, lfsr)
def miss = replace := true.B
def hit = {}
}
abstract class SeqReplacementPolicy {
def access(set: UInt): Unit
def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit
def way: UInt
}
class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
val logic = new RandomReplacement(n_ways)
def access(set: UInt) = { }
def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
when (valid && !hit) { logic.miss }
}
def way = logic.way
}
class PseudoLRU(n: Int)
{
private val state_reg = Reg(UInt((n-1).W))
def access(way: UInt) {
state_reg := get_next_state(state_reg,way)
}
def access(ways: Seq[ValidIO[UInt]]) {
state_reg := ways.foldLeft(state_reg)((prev, way) => Mux(way.valid, get_next_state(prev, way.bits), prev))
}
def get_next_state(state: UInt, way: UInt) = {
var next_state = state << 1
var idx = 1.U(1.W)
for (i <- log2Up(n)-1 to 0 by -1) {
val bit = way(i)
next_state = next_state.bitSet(idx, !bit)
idx = Cat(idx, bit)
}
next_state(n-1, 1)
}
def replace = get_replace_way(state_reg)
def get_replace_way(state: UInt) = {
val shifted_state = state << 1
var idx = 1.U(1.W)
for (i <- log2Up(n)-1 to 0 by -1) {
val in_bounds = Cat(idx, (BigInt(1) << i).U)(log2Up(n)-1, 0) < n.U
idx = Cat(idx, in_bounds && shifted_state(idx))
}
idx(log2Up(n)-1,0)
}
}
class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
val state = SyncReadMem(n_sets, UInt((n_ways-1).W))
val logic = new PseudoLRU(n_ways)
val current_state = Wire(UInt())
val plru_way = logic.get_replace_way(current_state)
val next_state = Wire(UInt())
def access(set: UInt) = {
current_state := state.read(set)
}
def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
val update_way = Mux(hit, way, plru_way)
next_state := logic.get_next_state(current_state, update_way)
when (valid) { state.write(set, next_state) }
}
def way = plru_way
}
......@@ -9,7 +9,10 @@ import xiangshan.backend.dispatch.DP1Parameters
import xiangshan.backend.exu.ExuParameters
import xiangshan.frontend.Frontend
import xiangshan.mem._
import xiangshan.mem.cache.ICacheParameters
import xiangshan.mem.cache.DCacheParameters
import xiangshan.utils._
import bus.tilelink.TLParameters
trait HasXSParameter {
val XLEN = 64
......@@ -60,6 +63,24 @@ trait HasXSParameter {
LduCnt = 0,
StuCnt = 1
)
val l1BusDataWidth = 64
val l1BusParams = TLParameters(
addressBits = PAddrBits,
dataBits = l1BusDataWidth,
sourceBits = 3,
sinkBits = 3
)
val icacheParameters = ICacheParameters(
)
// the width of LSU to DCache IO
val memWidth = 2
val LRSCCycles = 16
val dcacheParameters = DCacheParameters(
busParams = l1BusParams
)
}
trait HasXSLog { this: Module =>
......
// See LICENSE.Berkeley for license details.
package xiangshan.mem
import chisel3._
import chisel3.util._
import xiangshan.XSBundle
trait MemoryOpConstants {
val META_SZ = 64
val NUM_XA_OPS = 9
val M_SZ = 5
def M_X = BitPat("b?????")
def M_XRD = "b00000".U // int load
def M_XWR = "b00001".U // int store
def M_PFR = "b00010".U // prefetch with intent to read
def M_PFW = "b00011".U // prefetch with intent to write
def M_XA_SWAP = "b00100".U
def M_FLUSH_ALL = "b00101".U // flush all lines
def M_XLR = "b00110".U
def M_XSC = "b00111".U
def M_XA_ADD = "b01000".U
def M_XA_XOR = "b01001".U
def M_XA_OR = "b01010".U
def M_XA_AND = "b01011".U
def M_XA_MIN = "b01100".U
def M_XA_MAX = "b01101".U
def M_XA_MINU = "b01110".U
def M_XA_MAXU = "b01111".U
def M_FLUSH = "b10000".U // write back dirty data and cede R/W permissions
def M_PWR = "b10001".U // partial (masked.U store
def M_PRODUCE = "b10010".U // write back dirty data and cede W permissions
def M_CLEAN = "b10011".U // write back dirty data and retain R/W permissions
def M_SFENCE = "b10100".U // flush TLB
def M_WOK = "b10111".U // check write permissions but don't perform a write
def isAMOLogical(cmd: UInt) = cmd === M_XA_SWAP || cmd === M_XA_XOR || cmd === M_XA_OR || cmd === M_XA_AND
def isAMOArithmetic(cmd: UInt) = cmd === M_XA_ADD || cmd === M_XA_MIN || cmd === M_XA_MAX || cmd === M_XA_MINU || cmd === M_XA_MAXU
def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd)
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd)
def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd)
def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
}
object MemoryOpConstants extends MemoryOpConstants {
}
class MemBundle extends XSBundle
with MemoryOpConstants
class DCacheReq extends MemBundle
{
val cmd = UInt(M_SZ.W)
val addr = UInt(PAddrBits.W)
val data = UInt(DataBits.W)
val mask = UInt((DataBits/8).W)
val meta = UInt(META_SZ.W)
}
class DCacheResp extends MemBundle
{
val data = UInt(DataBits.W)
val meta = UInt(META_SZ.W)
val nack = Bool()
}
class LSUDMemIO extends MemBundle
{
val req = new DecoupledIO(Vec(memWidth, Valid(new DCacheReq)))
val resp = Flipped(Vec(memWidth, new ValidIO(new DCacheResp)))
}
......@@ -25,7 +25,7 @@ class MemPipeline(implicit val p: XSConfig) extends XSModule with NeedImpl{
})
val lsu = Module(new Lsu)
val dcache = Module(new Dcache)
val dcache = Module(new DCache)
val mshq = Module(new MSHQ)
val dtlb = Module(new Dtlb)
val lsroq = Module(new LsRoq)
......@@ -38,4 +38,4 @@ class MemPipeline(implicit val p: XSConfig) extends XSModule with NeedImpl{
lsroq.io := DontCare
sbuffer.io := DontCare
}
\ No newline at end of file
}
// See LICENSE.SiFive for license details.
// See LICENSE.Berkeley for license details.
package xiangshan.mem.cache
import chisel3._
import chisel3.util._
import xiangshan.mem.MemoryOpConstants
class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) {
val size = typ(log2Up(log2Up(maxSize)+1)-1,0)
def misaligned =
(addr & ((1.U << size) - 1.U)(log2Up(maxSize)-1,0)).orR
def mask = {
var res = 1.U
for (i <- 0 until log2Up(maxSize)) {
val upper = Mux(addr(i), res, 0.U) | Mux(size >= (i+1).U, ((BigInt(1) << (1 << i))-1).U, 0.U)
val lower = Mux(addr(i), 0.U, res)
res = Cat(upper, lower)
}
res
}
protected def genData(i: Int): UInt =
if (i >= log2Up(maxSize)) dat
else Mux(size === i.U, Fill(1 << (log2Up(maxSize)-i), dat((8 << i)-1,0)), genData(i+1))
def data = genData(0)
def wordData = genData(2)
}
class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) {
private val size = new StoreGen(typ, addr, dat, maxSize).size
private def genData(logMinSize: Int): UInt = {
var res = dat
for (i <- log2Up(maxSize)-1 to logMinSize by -1) {
val pos = 8 << i
val shifted = Mux(addr(i), res(2*pos-1,pos), res(pos-1,0))
val doZero = (i == 0).B && zero
val zeroed = Mux(doZero, 0.U, shifted)
res = Cat(Mux(size === i.U || doZero, Fill(8*maxSize-pos, signed && zeroed(pos-1)), res(8*maxSize-1,pos)), zeroed)
}
res
}
def wordData = genData(2)
def data = genData(0)
}
class AMOALU(operandBits: Int) extends Module
with MemoryOpConstants {
val minXLen = 32
val widths = (0 to log2Ceil(operandBits / minXLen)).map(minXLen << _)
val io = new Bundle {
val mask = Input(UInt((operandBits/8).W))
val cmd = Input(Bits(M_SZ.W))
val lhs = Input(Bits(operandBits.W))
val rhs = Input(Bits(operandBits.W))
val out = Output(Bits(operandBits.W))
val out_unmasked = Output(Bits(operandBits.W))
}
val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
val add = io.cmd === M_XA_ADD
val logic_and = io.cmd === M_XA_OR || io.cmd === M_XA_AND
val logic_xor = io.cmd === M_XA_XOR || io.cmd === M_XA_OR
val adder_out = {
// partition the carry chain to support sub-xLen addition
val mask = ~(0.U(operandBits.W) +: widths.init.map(w => !io.mask(w/8-1) << (w-1))).reduce(_|_)
(io.lhs & mask) + (io.rhs & mask)
}
val less = {
// break up the comparator so the lower parts will be CSE'd
def isLessUnsigned(x: UInt, y: UInt, n: Int): Bool = {
if (n == minXLen) x(n-1, 0) < y(n-1, 0)
else x(n-1, n/2) < y(n-1, n/2) || x(n-1, n/2) === y(n-1, n/2) && isLessUnsigned(x, y, n/2)
}
def isLess(x: UInt, y: UInt, n: Int): Bool = {
val signed = {
val mask = M_XA_MIN ^ M_XA_MINU
(io.cmd & mask) === (M_XA_MIN & mask)
}
Mux(x(n-1) === y(n-1), isLessUnsigned(x, y, n), Mux(signed, x(n-1), y(n-1)))
}
PriorityMux(widths.reverse.map(w => (io.mask(w/8/2), isLess(io.lhs, io.rhs, w))))
}
val minmax = Mux(Mux(less, min, max), io.lhs, io.rhs)
val logic =
Mux(logic_and, io.lhs & io.rhs, 0.U) |
Mux(logic_xor, io.lhs ^ io.rhs, 0.U)
val out =
Mux(add, adder_out,
Mux(logic_and || logic_xor, logic,
minmax))
val wmask = FillInterleaved(8, io.mask)
io.out := wmask & out | ~wmask & io.lhs
io.out_unmasked := out
}
// See LICENSE.SiFive for license details.
package xiangshan.mem.cache
import chisel3._
import chisel3.util._
import xiangshan.HasXSParameter
import xiangshan.mem.MemoryOpConstants
// this file contains common building blocks that can be shared by ICache and DCache
// this is the common parameter base for L1 ICache and L1 DCache
trait L1CacheParameters {
def nSets: Int
def nWays: Int
def rowBits: Int
def nTLBEntries: Int
def blockBytes: Int
}
trait HasL1CacheParameters extends HasXSParameter
with MemoryOpConstants {
val cacheParams: L1CacheParameters
def nSets = cacheParams.nSets
def blockOffBits = log2Up(cacheParams.blockBytes)
def idxBits = log2Up(cacheParams.nSets)
def untagBits = blockOffBits + idxBits
// 4K page
def pgIdxBits = 12
def pgUntagBits = untagBits min pgIdxBits
// L1 cache are all physically tagged cache
def tagBits = PAddrBits - pgUntagBits
def nWays = cacheParams.nWays
def wayBits = log2Up(nWays)
def rowBits = cacheParams.rowBits
def rowBytes = rowBits/8
def rowOffBits = log2Up(rowBytes)
def nTLBEntries = cacheParams.nTLBEntries
def cacheDataBits = l1BusDataWidth
def cacheDataBytes = cacheDataBits / 8
def cacheDataBeats = (cacheParams.blockBytes * 8) / cacheDataBits
def refillCycles = cacheDataBeats
}
abstract class L1CacheModule extends Module
with HasL1CacheParameters
abstract class L1CacheBundle extends Bundle
with HasL1CacheParameters
package xiangshan.mem.cache
import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.utils._
import chisel3.util.experimental.BoringUtils
import xiangshan.backend.decode.XSTrap
import xiangshan.mem._
import bus.tilelink.TLParameters
import bus.tilelink.TLPermissions
import bus.tilelink.ClientMetadata
import _root_.utils.{Code, RandomReplacement}
// DCache specific parameters
// L1 DCache is 64set, 8way-associative, with 64byte block, a total of 32KB
// It's a virtually indexed, physically tagged cache.
case class ICacheParameters(
nSets: Int = 64,
nWays: Int = 8,
rowBits: Int = 64,
nTLBEntries: Int = 32,
tagECC: Option[String] = None,
dataECC: Option[String] = None,
dataECCBytes: Int = 1,
nMSHRs: Int = 1,
nSDQ: Int = 17,
nRPQ: Int = 16,
nMMIOs: Int = 1,
blockBytes: Int = 64) extends L1CacheParameters {
def tagCode: Code = Code.fromString(tagECC)
def dataCode: Code = Code.fromString(dataECC)
def replacement = new RandomReplacement(nWays)
}
trait HasICacheParameters extends HasL1CacheParameters {
val cacheParams = dcacheParameters
val cfg = cacheParams
// the width of inner CPU data interface
def wordBits = DataBits
def wordBytes = DataBytes
def wordOffBits = log2Up(wordBytes)
def beatBytes = cfg.blockBytes / cacheDataBeats
def beatWords = beatBytes / wordBytes
def beatOffBits = log2Up(beatBytes)
def idxMSB = untagBits-1
def idxLSB = blockOffBits
def offsetmsb = idxLSB-1
def offsetlsb = wordOffBits
def rowWords = rowBits/wordBits
def doNarrowRead = DataBits * nWays % rowBits == 0
def eccBytes = cacheParams.dataECCBytes
val eccBits = cacheParams.dataECCBytes * 8
val encBits = cacheParams.dataCode.width(eccBits)
val encWordBits = encBits * (wordBits / eccBits)
def encDataBits = cacheParams.dataCode.width(wordBits) // NBDCache only
def encRowBits = encDataBits*rowWords
require(isPow2(nSets), s"nSets($nSets) must be pow2")
// To make things easier, now we assume:
// core_data_width(wordBits) == L1_basic_storage_unit_width(rowBits) ==
// outer_tilelink_interface_width(cacheDataBits)
require(rowBits == wordBits, s"rowBits($rowBits) != wordBits($wordBits)")
require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)")
}
abstract class ICacheModule extends Module
with HasICacheParameters
abstract class ICacheBundle extends Bundle
with HasICacheParameters
class ICacheMetaReadReq extends ICacheBundle {
val req = Vec(memWidth, new L1MetaReadReq)
}
class ICacheDataReadReq extends ICacheBundle {
val req = Vec(memWidth, new L1DataReadReq)
val valid = Vec(memWidth, Bool())
}
此差异已折叠。
package xiangshan.mem.cache
import chisel3._
import chisel3.util._
import chisel3.util.experimental.BoringUtils
import bus.tilelink._
class WritebackReq extends DCacheBundle {
val tag = Bits(tagBits.W)
val idx = Bits(idxBits.W)
// TODO: make it configurable
// 问题:这个source就是mshr id吗?那假如是响应probe的请求,那又如何处理呢?
val source = UInt(cfg.busParams.sourceBits.W)
val param = UInt(TLPermissions.cWidth.W)
val way_en = Bits(nWays.W)
// 如果是WBU下来的应该是voluntary的吧?
val voluntary = Bool()
}
class WritebackUnit extends DCacheModule {
val io = new Bundle {
val req = Flipped(Decoupled(new WritebackReq()))
// 这个是啥?
val resp = Output(Bool())
// 这个是干啥用的啊?
// 暂时先简单起见,把无关的都去掉啊!
val data_req = Decoupled(new L1DataReadReq)
val data_resp = Input(UInt(encRowBits.W))
val release = Decoupled(new TLBundleC(cfg.busParams))
val mem_grant = Decoupled(new TLBundleD(cfg.busParams))
}
// 同时处理的request只能有一个
val req = Reg(new WritebackReq())
val s_invalid :: s_fill_buffer :: s_active :: s_grant :: Nil = Enum(4)
val state = RegInit(s_invalid)
// 这俩都是啥?
// r1、r2都是啥?
// 这边之所以要处理成r1,r2是因为数据变成读了之后,要等两拍才出来,所以才必须得搞这种幺蛾子啊。
// 那么现在的问题是,为啥数据必须得等两拍才出来呢?why?
// 似乎是因为bank冲突的逻辑太复杂了?
val r1_data_req_fired = RegInit(false.B)
val r2_data_req_fired = RegInit(false.B)
val r1_data_req_cnt = Reg(UInt(log2Up(refillCycles+1).W))
val r2_data_req_cnt = Reg(UInt(log2Up(refillCycles+1).W))
val data_req_cnt = RegInit(0.U(log2Up(refillCycles+1).W))
val (_, last_beat, all_beats_done, beat_count) = TLUtilities.count(io.release)
// 这边怎么还搞了个wb buffer呢?
// 使用一个buffer,而不是直接挂到总线请求行,可以让同步方便一点
// 毕竟dcache出口是没有ready,valid的
// 假如因为总线没有就绪,就不停地replay,反而太复杂了,所以还是先写到buffer里面好啊。
val wb_buffer = Reg(Vec(refillCycles, UInt(encRowBits.W)))
val acked = RegInit(false.B)
io.release.valid := false.B
io.release.bits := DontCare
io.req.ready := false.B
io.data_req.valid := false.B
io.data_req.bits := DontCare
io.resp := false.B
val r_address = Cat(req.tag, req.idx) << blockOffBits
val id = cfg.nMSHRs
// 这边还要响应probe?
val probeResponse = TLMasterUtilities.ProbeAck(
params = cfg.busParams,
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
reportPermissions = req.param,
data = wb_buffer(data_req_cnt))
val voluntaryRelease = TLMasterUtilities.Release(
params = cfg.busParams,
fromSource = id.U,
toAddress = r_address,
lgSize = log2Ceil(cfg.blockBytes).U,
shrinkPermissions = req.param,
data = wb_buffer(data_req_cnt))._2
when (state === s_invalid) {
io.req.ready := true.B
when (io.req.fire()) {
state := s_fill_buffer
data_req_cnt := 0.U
req := io.req.bits
acked := false.B
}
}
// 所以根据这个时序安排的话,就是:
// data_req_cnt: 读请求发出
// r1_data_req_cnt: 读请求发出后的下一个周期变成valid
// r2_data_req_cnt: 读请求发出后的下下个周期变成valid,此时开始出数据
// 我甚至怀疑这里写的meta只是单纯为了定序?
when (state === s_fill_buffer) {
io.data_req.valid := data_req_cnt < refillCycles.U
io.data_req.bits.way_en := req.way_en
io.data_req.bits.addr := (if(refillCycles > 1)
Cat(req.idx, data_req_cnt(log2Up(refillCycles)-1,0))
else req.idx) << rowOffBits
r1_data_req_fired := false.B
r1_data_req_cnt := 0.U
r2_data_req_fired := r1_data_req_fired
r2_data_req_cnt := r1_data_req_cnt
when (io.data_req.fire()) {
// 当fire的时候,r1就变成true了
r1_data_req_fired := true.B
r1_data_req_cnt := data_req_cnt
data_req_cnt := data_req_cnt + 1.U
}
when (r2_data_req_fired) {
wb_buffer(r2_data_req_cnt) := io.data_resp
when (r2_data_req_cnt === (refillCycles-1).U) {
// 为啥当数据全部读上来时,就开始resp为true了呢?why?
io.resp := true.B
state := s_active
data_req_cnt := 0.U
}
}
} .elsewhen (state === s_active) {
io.release.valid := data_req_cnt < refillCycles.U
// 这两个应该就只是一些域不一样吧?
io.release.bits := Mux(req.voluntary, voluntaryRelease, probeResponse)
// 问题:为啥会在这里出现一个这个呢?why?
when (io.mem_grant.fire()) {
acked := true.B
}
when (io.release.fire()) {
data_req_cnt := data_req_cnt + 1.U
}
when ((data_req_cnt === (refillCycles-1).U) && io.release.fire()) {
// 似乎是voluntary的时候,就需要搞个等待master的grant,假如不是voluntary的时候,就不需要等待,就直接OK了?
state := Mux(req.voluntary, s_grant, s_invalid)
}
} .elsewhen (state === s_grant) {
when (io.mem_grant.fire()) {
acked := true.B
}
when (acked) {
state := s_invalid
}
}
}
......@@ -91,7 +91,7 @@ class LsuIO extends XSBundle with HasMEMConst {
val stin = Vec(2, Flipped(Decoupled(new StuReq)))
val out = Vec(2, Decoupled(new ExuOutput))
val redirect = Flipped(ValidIO(new Redirect))
val dcache = Flipped(new DcacheIO)
val dcache = Flipped(new LSUDMemIO)
val dtlb = Flipped(new DtlbIO)
// lsroq
// sbuffer
......@@ -381,4 +381,4 @@ class Lsu(implicit val p: XSConfig) extends XSModule with HasMEMConst with NeedI
// update store buffer according to store fill buffer
}
\ No newline at end of file
}
package xiangshan.backend.exu
import org.scalatest._
import scala.collection.mutable.{Map, Queue}
import chisel3._
import chisel3.experimental.BundleLiterals._
import chiseltest._
import xiangshan.XSModule
import xiangshan.mem.{LSUDMemIO, MemoryOpConstants}
import xiangshan.mem.cache.DCache
import bus.tilelink.NaiveTLToAXI4
import device.AXI4RAM
class DCacheDut extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(new LSUDMemIO)
})
val dcache = Module(new DCache)
val mem = Module(new AXI4RAM(memByte = 128 * 1024 * 1024, useBlackBox = true))
val tlToAXI = Module(new NaiveTLToAXI4(l1BusParams))
dcache.io.lsu <> io.in
dcache.io.bus <> tlToAXI.io.in
tlToAXI.io.out <> mem.in
}
case class Req(
cmd: UInt,
addr: Long,
data: Long,
mask: Long,
meta: Long
)
case class Resp(
data: Long,
meta: Long
)
class DCacheTest extends FlatSpec with ChiselScalatestTester with Matchers {
behavior of "DCache"
it should "do load store correctly" in {
test(new DCacheDut) { c =>
val CMD_READ = MemoryOpConstants.M_XRD
val CMD_WRITE = MemoryOpConstants.M_XWR
val FULL_MASK = 0xff
val BASE_ADDR = 0x80000000L
val MEM_SIZE = 128 * 1024 * 1024
// for now, we only support load/store of 64bit integers
val INTEGER_SIZE = 8
val num_integers = MEM_SIZE / INTEGER_SIZE
// data structures
// our golden version cache
val mem = new Array[Long](num_integers)
var num_retired_reqs = 0
// at each clock, we try to issue the request bundle at the head
val issue_queue = Queue[Array[Req]]()
// map that store all requests, map req id to req
// whenever you want to replay a req, you can get the req with its id
var all_requests:Map[Long,Req] = Map()
// 之前的请求是否在等待req ready?
var req_waiting:Boolean = false
def init_test = {
req_waiting = false
num_retired_reqs = 0
issue_queue.clear
all_requests.clear
}
// 向某个特定的channel上发送req
def send_req_channel(req: Req, channel: Int) = {
val r = c.io.in.req.bits(channel)
r.bits.cmd.poke(req.cmd)
r.bits.addr.poke(req.addr.U)
r.bits.data.poke(req.data.U)
r.bits.mask.poke(req.mask.U)
r.bits.meta.poke(req.meta.U)
r.valid.poke(true.B)
}
// send a bundle of reqs in the same cycle
def send_req_bundle(reqs: Array[Req]) = {
for (i <- 0 to reqs.length - 1) {
send_req_channel(reqs(i), i)
}
c.io.in.req.valid.poke(true.B)
}
def send_req: Unit = {
// no more requests to issue
if (issue_queue.isEmpty)
return
// there are no requests waiting for handshake
// we may send a new request during this clock
if (!req_waiting) {
req_waiting = true
send_req_bundle(issue_queue.front)
}
// reqs can be fired
if (c.io.in.req.ready.peek().litToBoolean) {
req_waiting = false
issue_queue.dequeue()
}
}
def handle_resp = {
for (i <- 0 to 1) {
val resp = c.io.in.resp(i)
if (resp.valid.peek().litToBoolean) {
val original_req = all_requests(resp.bits.meta.peek().litValue.longValue)
// needs to be replayed
if (resp.bits.nack.peek().litToBoolean) {
issue_queue.enqueue(Array[Req](original_req))
} else {
num_retired_reqs += 1
if (original_req.cmd.litValue == CMD_READ.litValue) {
resp.bits.data.expect(mem(original_req.addr.toInt).U)
}
}
}
}
}
val r = scala.util.Random
// ----------------------------------------
// store test
init_test
// first, initialize every memory cell with random numbers
for (i <- 0 to num_integers - 1) {
val randomNumber = r.nextLong
val req = Req(CMD_WRITE, BASE_ADDR + i * INTEGER_SIZE, randomNumber, FULL_MASK, i)
issue_queue.enqueue(Array[Req](req))
all_requests += (i.toLong -> req)
mem(i) = randomNumber
}
while (num_retired_reqs < num_integers) {
send_req
handle_resp
c.clock.step()
}
// read out every integer
// ----------------------------------------
// read test
init_test
// first, initialize every memory cell with random numbers
for (i <- 0 to num_integers - 1) {
val req = Req(CMD_READ, BASE_ADDR + i * INTEGER_SIZE, 0, FULL_MASK, i)
issue_queue.enqueue(Array[Req](req))
all_requests += (i.toLong -> req)
}
while (num_retired_reqs < num_integers) {
send_req
handle_resp
c.clock.step()
}
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册