Skip to content
6 changes: 3 additions & 3 deletions src/main/scala/huancun/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class SourceDReq(implicit p: Parameters) extends InnerTask with HasChannelBits {
val sinkId = UInt(mshrBits.W)
val bypassPut = Bool()
val dirty = Bool()
val isHit = Bool()
val hitLevelL3toL2 = UInt(2.W)
}

class SourceAReq(implicit p: Parameters) extends HuanCunBundle {
Expand Down Expand Up @@ -116,7 +116,6 @@ class SinkDResp(implicit p: Parameters) extends HuanCunBundle {
val last = Bool() // last beat
val denied = Bool()
val dirty = Bool()
// val isHit = Bool()
val bufIdx = UInt(bufIdxBits.W)
}
class SinkEResp(implicit p: Parameters) extends HuanCunBundle {
Expand Down Expand Up @@ -155,11 +154,12 @@ class MSHRRequest(implicit p: Parameters) extends HuanCunBundle with HasChannelB
val alias = aliasBitsOpt.map(_ => UInt(aliasBitsOpt.get.W))
val preferCache = Bool()
val dirty = Bool()
val isHit = Bool()
val fromProbeHelper = Bool()
val fromCmoHelper = Bool()
val needProbeAckData = if (cacheParams.inclusive) None else Some(Bool())
val reqSource = UInt(MemReqSource.reqSourceBits.W)
val hitLevelL3toL2 = UInt(2.W)

}

class MSHRStatus(implicit p: Parameters) extends HuanCunBundle with HasChannelBits {
Expand Down
15 changes: 9 additions & 6 deletions src/main/scala/huancun/HCCacheParameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,14 @@ case object PreferCacheKey extends ControlKey[Bool](name = "preferCache")

case class PreferCacheField() extends BundleField[Bool](PreferCacheKey, Output(Bool()), _ := false.B)

// indicate whether this block is granted from L3 or not (only used when grantData to L2)
// now it only works for non-inclusive cache (ignored in inclusive cache)
case object IsHitKey extends ControlKey[Bool](name = "isHitInL3")

case class IsHitField() extends BundleField[Bool](IsHitKey, Output(Bool()), _ := true.B)
// indicate where this granted-block is from(only used in handle Grant/GrantData)
// now it only works for non-inclusive cache (ignored in inclusive cache)
// 0:isHitinMem or default
// 1:isHitinL3
// 2:isHitinAnotherCore
// 3:isHitinCork
case object HitLevelKey extends ControlKey[UInt](name = "hitlevel")
case class HitLevelField(width: Int) extends BundleField[UInt](HitLevelKey, Output(UInt(width.W)), _ := 0.U(width.W))

// indicate whether this block is dirty or not (only used in handle Release/ReleaseData)
// now it only works for non-inclusive cache (ignored in inclusive cache)
Expand Down Expand Up @@ -108,7 +111,7 @@ case class HCCacheParameters
reqField: Seq[BundleFieldBase] = Nil, // master
respKey: Seq[BundleKeyBase] = Nil,
reqKey: Seq[BundleKeyBase] = Seq(PrefetchKey, PreferCacheKey, AliasKey, ReqSourceKey), // slave
respField: Seq[BundleFieldBase] = Nil,
respField: Seq[BundleFieldBase] = Seq(HitLevelField(2)),
ctrl: Option[CacheCtrl] = None,
sramClkDivBy2: Boolean = false,
sramDepthDiv: Int = 1,
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class SinkA(implicit p: Parameters) extends HuanCunModule {
allocInfo.preferCache := Mux((a.bits.opcode === TLMessages.Get || a.bits.opcode(2,1) === 0.U), true.B, a.bits.user.lift(PreferCacheKey).getOrElse(true.B))
}
allocInfo.dirty := false.B // ignored
allocInfo.isHit := true.B // ignored
allocInfo.hitLevelL3toL2 := 0.U // ignored
allocInfo.fromProbeHelper := false.B
allocInfo.fromCmoHelper := false.B
allocInfo.needProbeAckData.foreach(_ := false.B)
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SinkB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class SinkB(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule {
io.alloc.bits.alias.foreach(_ := 0.U)
io.alloc.bits.preferCache := true.B
io.alloc.bits.dirty := false.B // ignored
io.alloc.bits.isHit := true.B // ignored
io.alloc.bits.hitLevelL3toL2 := 0.U // ignored
io.alloc.bits.fromProbeHelper := false.B
io.alloc.bits.fromCmoHelper := false.B
io.alloc.bits.needProbeAckData.foreach(_ := io.b.bits.data(0))
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ class Slice()(implicit p: Parameters) extends HuanCunModule {
mshrReq.bits.fromCmoHelper := false.B
mshrReq.bits.bufIdx := DontCare
mshrReq.bits.dirty := false.B
mshrReq.bits.isHit := true.B
mshrReq.bits.hitLevelL3toL2 := 0.U
mshrReq.bits.needProbeAckData.foreach(_ := false.B)
mshrReq.bits.reqSource := pftReq.bits.pfSource
pftReq.ready := mshrReq.ready
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/SourceC.scala
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class SourceC(edge: TLEdgeOut)(implicit p: Parameters) extends HuanCunModule {
queue.io.enq.bits.user.lift(PreferCacheKey).foreach(_ := true.B)
queue.io.enq.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.NoWhere.id.U)
queue.io.enq.bits.echo.lift(DirtyKey).foreach(_ := pipeOut.bits.task.dirty)
queue.io.enq.bits.user.lift(IsHitKey).foreach(_ := true.B)
queue.io.enq.bits.user.lift(HitLevelKey).foreach(_ := 0.U)

io.c <> queue.io.deq
}
4 changes: 2 additions & 2 deletions src/main/scala/huancun/SourceD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
s2_d.bits.data := s1_queue.io.deq.bits.data
s2_d.bits.corrupt := s2_d.bits.denied
s2_d.bits.echo.lift(DirtyKey).foreach(_ := s2_req.dirty)
s2_d.bits.user.lift(IsHitKey).foreach(_ := s2_req.isHit)
s2_d.bits.user.lift(HitLevelKey).foreach(_ := s2_req.hitLevelL3toL2)
dontTouch(s2_d.bits.user)

val s2_can_go = Mux(s2_d.valid, s2_d.ready, s3_ready && (!s2_valid_pb || pb_ready))
Expand Down Expand Up @@ -225,7 +225,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
s3_d.bits.corrupt := s3_req.denied ||
(s3_req.opcode =/= TLMessages.AccessAck && s3_req.opcode =/= TLMessages.Grant && s3_queue.io.deq.bits.corrupt)
s3_d.bits.echo.lift(DirtyKey).foreach(_ := s3_req.dirty)
s3_d.bits.user.lift(IsHitKey).foreach(_ := s3_req.isHit)
s3_d.bits.user.lift(HitLevelKey).foreach(_ := s3_req.hitLevelL3toL2)
dontTouch(s3_d.bits.user)

s3_queue.io.enq.valid := RegNextN(
Expand Down
32 changes: 16 additions & 16 deletions src/main/scala/huancun/noninclusive/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -317,33 +317,33 @@ class Directory(implicit p: Parameters)
assert(dirReadPorts == 1)
val req_r = RegEnable(req.bits, req.fire)
XSPerfAccumulate(cacheParams, "selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)

XSPerfAccumulate(cacheParams, "selfdir_dirty", resp.valid && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", resp.valid && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", resp.valid && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", resp.valid && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", resp.valid && resp.bits.self.state === INVALID)
XSPerfAccumulate(cacheParams, "selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcDir))
//})
val perfinfo = IO(Output(Vec(numPCntHcDir, (UInt(6.W)))))
val perfEvents = Seq(
("selfdir_A_req ", req_r.replacerInfo.channel(0) && resp.valid ),
("selfdir_A_hit ", req_r.replacerInfo.channel(0) && resp.valid && resp.bits.self.hit),
("selfdir_A_hit ", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit),
("selfdir_B_req ", req_r.replacerInfo.channel(1) && resp.valid ),
("selfdir_B_hit ", req_r.replacerInfo.channel(1) && resp.valid && resp.bits.self.hit),
("selfdir_B_hit ", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit),
("selfdir_C_req ", req_r.replacerInfo.channel(2) && resp.valid ),
("selfdir_C_hit ", req_r.replacerInfo.channel(2) && resp.valid && resp.bits.self.hit),
("selfdir_dirty ", resp.valid && resp.bits.self.dirty ),
("selfdir_TIP ", resp.valid && resp.bits.self.state === TIP ),
("selfdir_BRANCH ", resp.valid && resp.bits.self.state === BRANCH ),
("selfdir_TRUNK ", resp.valid && resp.bits.self.state === TRUNK ),
("selfdir_INVALID ", resp.valid && resp.bits.self.state === INVALID ),
("selfdir_C_hit ", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit),
("selfdir_dirty ", RegNext(resp.valid) && resp.bits.self.dirty ),
("selfdir_TIP ", RegNext(resp.valid) && resp.bits.self.state === TIP ),
("selfdir_BRANCH ", RegNext(resp.valid) && resp.bits.self.state === BRANCH ),
("selfdir_TRUNK ", RegNext(resp.valid) && resp.bits.self.state === TRUNK ),
("selfdir_INVALID ", RegNext(resp.valid) && resp.bits.self.state === INVALID ),
)

for (((perf_out,(perf_name,perf)),i) <- perfinfo.zip(perfEvents).zipWithIndex) {
Expand Down
43 changes: 42 additions & 1 deletion src/main/scala/huancun/noninclusive/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
val w_sinkcack = RegInit(true.B)

val acquire_flag = RegInit(false.B)
// hold part of s_acquire:
// send acquire to memory when receive an acquire from L2
// hold until read Directory next time
val s_acquire_hold_for_acquire = RegInit(false.B)
// hold part of s_probe:
// send probe to another L2 when receive an acquire from L2
// hold until read Directory next time
val s_probe_hold_for_acquire = RegInit(false.B)

// val sourceD_valid_hold = RegInit(false.B)

def reset_all_flags(): Unit = {
// Default value
Expand Down Expand Up @@ -595,10 +605,16 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
nested_c_hit_reg := false.B
gotDirty := false.B
acquire_flag := false.B
s_acquire_hold_for_acquire := false.B
s_probe_hold_for_acquire := false.B
// sourceD_valid_hold := false.B
a_do_release := false.B
a_do_probe := false.B
}
when(!s_acquire) { acquire_flag := acquire_flag | true.B }

when(!s_acquire && req_acquire) { s_acquire_hold_for_acquire := s_acquire_hold_for_acquire | true.B }
when(!s_probe && req_acquire) { s_probe_hold_for_acquire := s_probe_hold_for_acquire | true.B }

def x_schedule(): Unit = { // TODO
// Do probe to maintain coherence
Expand Down Expand Up @@ -960,6 +976,10 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
io.tasks.prefetch_train.foreach(_.valid := !s_triggerprefetch.get)
io.tasks.prefetch_resp.foreach(_.valid := !s_prefetchack.get && w_grantfirst)

// when(io.tasks.source_d.valid){
// sourceD_valid_hold := sourceD_valid_hold | true.B
// }

val oa = io.tasks.source_a.bits
val ob = io.tasks.source_b.bits
val oc = io.tasks.source_c.bits
Expand Down Expand Up @@ -990,6 +1010,9 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
// full overwrite, we can always acquire perm, no need to acquire block
val acquire_perm_NtoT = req.opcode === AcquirePerm && req.param === NtoT

// will hit in cork if the acquire from L2 need to acquire downwords L3 and the state transfer is BtoT
val acquire_BtoT = req.fromA && req_acquire && req.param === BtoT

val acquire_opcode = if (cacheParams.name == "L2") {
Mux(req.opcode === AcquirePerm && req.param === BtoT, AcquirePerm, Mux(req.opcode === Hint, AcquireBlock, req.opcode))
} else {
Expand Down Expand Up @@ -1174,10 +1197,28 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S
),
false.B
)
od.isHit := self_meta.hit
od.bufIdx := req.bufIdx
od.bypassPut := bypassPut_latch

// hitLevelL3toL2 makes sense when sourceD_valid is high
val hitFromCork = req.fromA && s_acquire_hold_for_acquire && acquire_BtoT
val hitFromMem = req.fromA && s_acquire_hold_for_acquire && !hitFromCork
val hitFromAnotherCore = req.fromA && !s_acquire_hold_for_acquire && s_probe_hold_for_acquire
val hitFromL3 = req.fromA && req_acquire && !s_acquire_hold_for_acquire && !s_probe_hold_for_acquire && self_meta.hit

when(hitFromCork){
od.hitLevelL3toL2 := 3.U
}.elsewhen(hitFromMem){
od.hitLevelL3toL2 := 0.U
}.elsewhen(hitFromAnotherCore){
od.hitLevelL3toL2 := 2.U
}.elsewhen(hitFromL3){
od.hitLevelL3toL2 := 1.U
}.otherwise{
od.hitLevelL3toL2 := 0.U
}


oe.sink := sink

ia.sourceId := req.source
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/ProbeHelper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)
req.alias.foreach(_ := 0.U)
req.preferCache := true.B
req.dirty := false.B // ignored
req.isHit := true.B // ignored
req.hitLevelL3toL2 := 0.U // ignored
req.needProbeAckData.foreach(_ := false.B)
req.fromCmoHelper := false.B
req.reqSource := MemReqSource.NoWhere.id.U
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/SinkC.scala
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class SinkC(implicit p: Parameters) extends BaseSinkC {
io.alloc.bits.isBop.foreach(_ := false.B)
io.alloc.bits.alias.foreach(_ := 0.U)
io.alloc.bits.preferCache := true.B
io.alloc.bits.isHit := true.B
io.alloc.bits.hitLevelL3toL2 := 0.U
io.alloc.bits.dirty := c.bits.echo.lift(DirtyKey).getOrElse(true.B)
io.alloc.bits.fromProbeHelper := false.B
io.alloc.bits.fromCmoHelper := false.B
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/noninclusive/SliceCtrl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class SliceCtrl()(implicit p: Parameters) extends HuanCunModule {
io.cmo_req.bits.alias.foreach(_ := false.B)
io.cmo_req.bits.preferCache := false.B
io.cmo_req.bits.dirty := false.B
io.cmo_req.bits.isHit := true.B
io.cmo_req.bits.hitLevelL3toL2 := 0.U
io.cmo_req.bits.fromProbeHelper := false.B
io.cmo_req.bits.fromCmoHelper := true.B
io.cmo_req.bits.needProbeAckData.foreach(_ := false.B)
Expand Down
1 change: 1 addition & 0 deletions src/test/scala/huancun/FakeClient.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ abstract class BaseFakeClient(name: String, nBanks: Int, probe: Boolean = true)(
echoFields = cacheParams.echoField,
requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField()),
responseKeys = cacheParams.respKey
// responseKeys = Seq(HitLevelKey)
)
})
}
Expand Down
8 changes: 4 additions & 4 deletions src/test/scala/huancun/TestTop.scala
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
minLatency = 1,
echoFields = Seq(DirtyField()),
requestFields = Seq(PrefetchField(), PreferCacheField(), DirtyField(), AliasField(2)),
responseKeys = Seq(IsHitKey)
responseKeys = Seq(HitLevelKey)
)
))
masterNode
Expand All @@ -227,7 +227,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
prefetch = Some(huancun.prefetch.BOPParameters()),
reqField = Seq(PreferCacheField()),
echoField = Seq(DirtyField()),
respKey = Seq(IsHitKey)
respKey = Seq(HitLevelKey)
)
}))).node)

Expand All @@ -238,7 +238,7 @@ class TestTop_L2L3()(implicit p: Parameters) extends LazyModule {
inclusive = false,
clientCaches = Seq(CacheParameters(sets = 32, ways = 8, blockGranularity = 5, name = "L3")),
echoField = Seq(DirtyField()),
respField = Seq(IsHitField()),
respField = Seq(HitLevelField(2)),
simulation = true
)
})))
Expand Down Expand Up @@ -346,7 +346,7 @@ class TestTop_FullSys()(implicit p: Parameters) extends LazyModule {
clientCaches = Seq(CacheParameters("dcache", sets = 32, ways = 8, blockGranularity = 5)),
reqField = Seq(PreferCacheField()),
echoField = Seq(DirtyField()),
respKey = Seq(IsHitKey),
respKey = Seq(HitLevelKey),
prefetch = Some(huancun.prefetch.BOPParameters()),
sramDepthDiv = 2,
simulation = true
Expand Down