DCache.scala 13.7 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.cache
2 3 4

import chisel3._
import chisel3.util._
L
linjiawei 已提交
5
import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
6
import utils.{Code, RandomReplacement, XSDebug, SRAMTemplate, ParallelOR}
7

8
import scala.math.max
A
Allen 已提交
9

10

11 12 13
// DCache specific parameters
// L1 DCache is 64set, 8way-associative, with 64byte block, a total of 32KB
// It's a virtually indexed, physically tagged cache.
L
linjiawei 已提交
14 15
case class DCacheParameters
(
16 17 18 19 20 21
    nSets: Int = 64,
    nWays: Int = 8,
    rowBits: Int = 64,
    nTLBEntries: Int = 32,
    tagECC: Option[String] = None,
    dataECC: Option[String] = None,
22
    nMissEntries: Int = 1,
A
Allen 已提交
23 24 25
    nProbeEntries: Int = 1,
    nReleaseEntries: Int = 1,
    nStoreReplayEntries: Int = 1,
26
    nMMIOEntries: Int = 1,
27
    nMMIOs: Int = 1,
L
linjiawei 已提交
28 29
    blockBytes: Int = 64
) extends L1CacheParameters {
30

31 32 33 34 35 36 37 38 39 40
  def tagCode: Code = Code.fromString(tagECC)
  def dataCode: Code = Code.fromString(dataECC)

  def replacement = new RandomReplacement(nWays)
}

trait HasDCacheParameters extends HasL1CacheParameters {
  val cacheParams = dcacheParameters
  val cfg = cacheParams

A
Allen 已提交
41 42
  def encWordBits = cacheParams.dataCode.width(wordBits)
  def encRowBits = encWordBits*rowWords
43 44 45 46 47 48
  def lrscCycles = LRSCCycles // ISA requires 16-insn LRSC sequences to succeed
  def lrscBackoff = 3 // disallow LRSC reacquisition briefly
  def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant
  def nIOMSHRs = cacheParams.nMMIOs
  def maxUncachedInFlight = cacheParams.nMMIOs

A
Allen 已提交
49 50 51 52 53 54
  def nSourceType = 3
  def sourceTypeWidth = log2Up(nSourceType)
  def LOAD_SOURCE = 0
  def STORE_SOURCE = 1
  def AMO_SOURCE = 2
  // each source use a id to distinguish its multiple reqs
A
Allen 已提交
55
  def reqIdWidth = 64
56

57
  require(isPow2(nSets), s"nSets($nSets) must be pow2")
A
Allen 已提交
58 59 60 61
  require(isPow2(nWays), s"nWays($nWays) must be pow2")
  require(full_divide(rowBits, wordBits), s"rowBits($rowBits) must be multiple of wordBits($wordBits)")
  require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)")
  // this is a VIPT L1 cache
62
  require(pgIdxBits >= untagBits, s"page aliasing problem: pgIdxBits($pgIdxBits) < untagBits($untagBits)")
A
Allen 已提交
63
  require(rowWords == 1, "Our DCache Implementation assumes rowWords == 1")
64 65
}

A
Allen 已提交
66
abstract class DCacheModule extends L1CacheModule
67 68
  with HasDCacheParameters

A
Allen 已提交
69
abstract class DCacheBundle extends L1CacheBundle
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
  with HasDCacheParameters

// basic building blocks for L1 DCache
class L1Metadata extends DCacheBundle {
  val coh = new ClientMetadata
  val tag = UInt(tagBits.W)
}

object L1Metadata {
  def apply(tag: Bits, coh: ClientMetadata) = {
    val meta = Wire(new L1Metadata)
    meta.tag := tag
    meta.coh := coh
    meta
  }
}

class L1MetaReadReq extends DCacheBundle {
  val idx    = UInt(idxBits.W)
  val way_en = UInt(nWays.W)
  val tag    = UInt(tagBits.W)
}

class L1MetaWriteReq extends L1MetaReadReq {
  val data = new L1Metadata
}

class L1DataReadReq extends DCacheBundle {
A
Allen 已提交
98 99
  // you can choose which bank to read to save power
  val rmask  = Bits(blockRows.W)
100 101 102 103
  val way_en = Bits(nWays.W)
  val addr   = Bits(untagBits.W)
}

A
Allen 已提交
104
// Now, we can write a cache-block in a single cycle
105
class L1DataWriteReq extends L1DataReadReq {
A
Allen 已提交
106 107
  val wmask  = Vec(blockRows, Bits(rowWords.W))
  val data   = Vec(blockRows, Bits(encRowBits.W))
108
}
109

110 111
abstract class AbstractDataArray extends DCacheModule {
  val io = IO(new DCacheBundle {
A
Allen 已提交
112 113
    val read  = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1DataReadReq)))
    val write = Flipped(DecoupledIO(new L1DataWriteReq))
A
Allen 已提交
114
    val resp  = Output(Vec(LoadPipelineWidth, Vec(nWays, Vec(blockRows, Bits(encRowBits.W)))))
115
    val nacks = Output(Vec(LoadPipelineWidth, Bool()))
116 117
  })

118
  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
119 120

  def dumpRead() = {
121
    (0 until LoadPipelineWidth) map { w =>
122 123 124 125 126 127 128 129 130
      when (io.read(w).valid) {
        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
          io.read(w).bits.way_en, io.read(w).bits.addr)
      }
    }
  }

  def dumpWrite() = {
    when (io.write.valid) {
131 132 133
      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
        io.write.bits.way_en, io.write.bits.addr)

A
Allen 已提交
134
      (0 until blockRows) map { r =>
135 136 137
        XSDebug(s"cycle: $r data: %x wmask: %x\n",
          io.write.bits.data(r), io.write.bits.wmask(r))
      }
138 139 140 141
    }
  }

  def dumpResp() = {
142
    (0 until LoadPipelineWidth) map { w =>
143 144
      XSDebug(s"DataArray ReadResp channel: $w\n")
      (0 until nWays) map { i =>
A
Allen 已提交
145
        (0 until blockRows) map { r =>
146 147
          XSDebug(s"way: $i cycle: $r data: %x\n", io.resp(w)(i)(r))
        }
148 149 150 151 152
      }
    }
  }

  def dumpNack() = {
153
    (0 until LoadPipelineWidth) map { w =>
154 155 156 157 158 159 160 161 162 163 164 165
      when (io.nacks(w)) {
        XSDebug(s"DataArray NACK channel: $w\n")
      }
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpNack
    dumpResp
  }
166 167
}

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
abstract class TransposeAbstractDataArray extends DCacheModule {
  val io = IO(new DCacheBundle {
    val read  = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1DataReadReq)))
    val write = Flipped(DecoupledIO(new L1DataWriteReq))
    val resp = Output(Vec(LoadPipelineWidth, Vec(blockRows, Bits(encRowBits.W))))
    val nacks = Output(Vec(LoadPipelineWidth, Bool()))
  })

  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))

  def dumpRead() = {
    (0 until LoadPipelineWidth) map { w =>
      when (io.read(w).valid) {
        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
          io.read(w).bits.way_en, io.read(w).bits.addr)
      }
    }
  }

  def dumpWrite() = {
    when (io.write.valid) {
      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
        io.write.bits.way_en, io.write.bits.addr)

      (0 until blockRows) map { r =>
        XSDebug(s"cycle: $r data: %x wmask: %x\n",
          io.write.bits.data(r), io.write.bits.wmask(r))
      }
    }
  }

  def dumpResp() = {
    (0 until LoadPipelineWidth) map { w =>
      XSDebug(s"DataArray ReadResp channel: $w\n")
      (0 until blockRows) map { r =>
        XSDebug(s"cycle: $r data: %x\n", io.resp(w)(r))
      }
    }
  }

  def dumpNack() = {
    (0 until LoadPipelineWidth) map { w =>
      when (io.nacks(w)) {
        XSDebug(s"DataArray NACK channel: $w\n")
      }
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpNack
    dumpResp
  }
}

224 225
class DuplicatedDataArray extends AbstractDataArray
{
A
Allen 已提交
226
  val singlePort = true
227 228
  // write is always ready
  io.write.ready := true.B
L
linjiawei 已提交
229
  val waddr = (io.write.bits.addr >> blockOffBits).asUInt()
230
  for (j <- 0 until LoadPipelineWidth) {
L
linjiawei 已提交
231
    val raddr = (io.read(j).bits.addr >> blockOffBits).asUInt()
A
Allen 已提交
232 233 234 235 236 237

    // for single port SRAM, do not allow read and write in the same cycle
    // for dual port SRAM, raddr === waddr is undefined behavior
    val rwhazard = if(singlePort) io.write.valid else io.write.valid && waddr === raddr
    io.read(j).ready := !rwhazard

238
    for (w <- 0 until nWays) {
A
Allen 已提交
239
      for (r <- 0 until blockRows) {
240 241 242 243
        val resp = Seq.fill(rowWords)(Wire(Bits(encWordBits.W)))
        io.resp(j)(w)(r) := Cat((0 until rowWords).reverse map (k => resp(k)))

        for (k <- 0 until rowWords) {
244
          val array = Module(new SRAMTemplate(
245 246 247 248 249 250 251
            Bits(encWordBits.W),
            set=nSets,
            way=1,
            shouldReset=false,
            holdRead=false,
            singlePort=singlePort
          ))
252 253 254 255 256 257
          // data write
          val wen = io.write.valid && io.write.bits.way_en(w) && io.write.bits.wmask(r)(k)
          array.io.w.req.valid := wen
          array.io.w.req.bits.apply(
            setIdx=waddr,
            data=io.write.bits.data(r)(encWordBits*(k+1)-1,encWordBits*k),
258 259
            waymask=1.U
          )
260 261 262 263 264

          // data read
          val ren = io.read(j).valid && io.read(j).bits.way_en(w) && io.read(j).bits.rmask(r)
          array.io.r.req.valid := ren
          array.io.r.req.bits.apply(setIdx=raddr)
265
          resp(k) := array.io.r.resp.data(0)
A
Allen 已提交
266
        }
267 268 269 270 271 272
      }
    }
    io.nacks(j) := false.B
  }
}

273 274
class TransposeDuplicatedDataArray extends TransposeAbstractDataArray {
  val singlePort = true
275 276
  val readHighPriority = true
  
277
  val waddr = (io.write.bits.addr >> blockOffBits).asUInt()
278
  val raddrs = io.read.map(r => (r.bits.addr >> blockOffBits).asUInt)
279
  io.write.ready := (if (readHighPriority) {
280
    if (singlePort) {
281
      !VecInit(io.read.map(_.valid)).asUInt.orR
282
    } else {
283
      !(Cat(io.read.zipWithIndex.map { case (r, i) => r.valid && raddrs(i) === waddr }).orR)
284 285 286
    }
  } else {
    true.B
287
  })
288
  for (j <- 0 until LoadPipelineWidth) {
289
    val raddr = raddrs(j)
290 291 292 293

    // for single port SRAM, do not allow read and write in the same cycle
    // for dual port SRAM, raddr === waddr is undefined behavior
    val rwhazard = if(singlePort) io.write.valid else io.write.valid && waddr === raddr
294
    io.read(j).ready := (if (readHighPriority) true.B else !rwhazard)
295

296 297 298 299
    // use way_en to select a way after data read out
    assert(!(RegNext(io.read(j).fire() && PopCount(io.read(j).bits.way_en) > 1.U)))
    val way_en = RegNext(io.read(j).bits.way_en)

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    for (r <- 0 until blockRows) {
      val resp = Wire(Vec(rowWords, Vec(nWays, Bits(encWordBits.W))))
      val resp_chosen = Wire(Vec(rowWords, Bits(encWordBits.W)))

      for (k <- 0 until rowWords) {
        for (w <- 0 until nWays) {
          val array = Module(new SRAMTemplate(
            Bits(encWordBits.W),
            set = nSets,
            way = 1,
            shouldReset = false,
            holdRead = false,
            singlePort = singlePort
          ))

          // data write
          val wen = io.write.valid && io.write.bits.way_en(w) && io.write.bits.wmask(r)(k)
          array.io.w.req.valid := wen
          array.io.w.req.bits.apply(
            setIdx = waddr,
            data = io.write.bits.data(r)(encWordBits*(k+1)-1, encWordBits*k),
            waymask = 1.U
          )

          // data read
325 326
          // read all ways and choose one after resp
          val ren = io.read(j).valid/* && io.read(j).bits.way_en(w)*/ && io.read(j).bits.rmask(r)
327 328
          array.io.r.req.valid := ren
          array.io.r.req.bits.apply(setIdx = raddr)
329
          resp(k)(w) := array.io.r.resp.data(0)
330
        }
331
        resp_chosen(k) := Mux1H(way_en, resp(k))
332 333 334 335 336 337 338
      }
      io.resp(j)(r) := Cat(resp_chosen)
    }
    io.nacks(j) := false.B
  }
}

339
class L1MetadataArray(onReset: () => L1Metadata) extends DCacheModule {
A
Allen 已提交
340 341 342 343
  val rstVal = onReset()
  val io = IO(new Bundle {
    val read = Flipped(Decoupled(new L1MetaReadReq))
    val write = Flipped(Decoupled(new L1MetaWriteReq))
344
    val resp = Output(Vec(nWays, new L1Metadata))
A
Allen 已提交
345 346 347 348 349 350 351 352 353
  })
  val rst_cnt = RegInit(0.U(log2Up(nSets+1).W))
  val rst = rst_cnt < nSets.U
  val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
  val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt
  val wmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.write.bits.way_en.asSInt).asBools
  val rmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.read.bits.way_en.asSInt).asBools
  when (rst) { rst_cnt := rst_cnt + 1.U }

A
Allen 已提交
354 355 356
  val metaBits = rstVal.getWidth
  val encMetaBits = cacheParams.tagCode.width(metaBits)

357
  val tag_array = Module(new SRAMTemplate(UInt(encMetaBits.W), set=nSets, way=nWays,
358 359 360
    shouldReset=false, holdRead=false, singlePort=true))

  // tag write
361
  val wen = rst || io.write.fire()
362 363 364 365 366 367 368
  tag_array.io.w.req.valid := wen
  tag_array.io.w.req.bits.apply(
    setIdx=waddr,
    data=cacheParams.tagCode.encode(wdata),
    waymask=VecInit(wmask).asUInt)

  // tag read
369 370
  val ren = io.read.fire()
  tag_array.io.r.req.valid := ren
371 372
  tag_array.io.r.req.bits.apply(setIdx=io.read.bits.idx)
  io.resp := tag_array.io.r.resp.data.map(rdata =>
373
      cacheParams.tagCode.decode(rdata).corrected.asTypeOf(rstVal))
A
Allen 已提交
374

375 376 377 378
  // io.read.ready := !wen
  // io.write.ready := !rst
  io.write.ready := !ren
  io.read.ready := !rst
A
Allen 已提交
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409

  def dumpRead() = {
    when (io.read.fire()) {
      XSDebug("MetaArray Read: idx: %d way_en: %x tag: %x\n",
        io.read.bits.idx, io.read.bits.way_en, io.read.bits.tag)
    }
  }

  def dumpWrite() = {
    when (io.write.fire()) {
      XSDebug("MetaArray Write: idx: %d way_en: %x tag: %x new_tag: %x new_coh: %x\n",
        io.write.bits.idx, io.write.bits.way_en, io.write.bits.tag, io.write.bits.data.tag, io.write.bits.data.coh.state)
    }
  }

  def dumpResp() = {
    (0 until nWays) map { i =>
      XSDebug(s"MetaArray Resp: way: $i tag: %x coh: %x\n",
        io.resp(i).tag, io.resp(i).coh.state)
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpResp
  }
}

class DuplicatedMetaArray extends DCacheModule {
  val io = IO(new DCacheBundle {
A
Allen 已提交
410 411 412
    val read  = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1MetaReadReq)))
    val write = Flipped(DecoupledIO(new L1MetaWriteReq))
    val resp  = Output(Vec(LoadPipelineWidth, Vec(nWays, new L1Metadata)))
A
Allen 已提交
413 414 415
  })

  def onReset = L1Metadata(0.U, ClientMetadata.onReset)
416
  val meta = Seq.fill(LoadPipelineWidth) { Module(new L1MetadataArray(onReset _)) }
A
Allen 已提交
417

418
  for (w <- 0 until LoadPipelineWidth) {
A
Allen 已提交
419 420
    meta(w).io.write <> io.write
    meta(w).io.read  <> io.read(w)
421
    io.resp(w) <> meta(w).io.resp
A
Allen 已提交
422 423
  }

424
  def dumpRead() = {
425
    (0 until LoadPipelineWidth) map { w =>
426
      when (io.read(w).fire()) {
427
        XSDebug(s"MetaArray Read channel: $w idx: %d way_en: %x tag: %x\n",
428 429 430 431 432 433 434 435 436
          io.read(w).bits.idx, io.read(w).bits.way_en, io.read(w).bits.tag)
      }
    }
  }

  def dumpWrite() = {
    when (io.write.fire()) {
      XSDebug("MetaArray Write: idx: %d way_en: %x tag: %x new_tag: %x new_coh: %x\n",
        io.write.bits.idx, io.write.bits.way_en, io.write.bits.tag, io.write.bits.data.tag, io.write.bits.data.coh.state)
A
Allen 已提交
437 438
    }
  }
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453

  def dumpResp() = {
    (0 until LoadPipelineWidth) map { w =>
      (0 until nWays) map { i =>
        XSDebug(s"MetaArray Resp: channel: $w way: $i tag: %x coh: %x\n",
          io.resp(w)(i).tag, io.resp(w)(i).coh.state)
      }
    }
  }

  def dump() = {
    dumpRead
    dumpWrite
    dumpResp
  }
A
Allen 已提交
454
}