Skip to content

Commit 8a511bb

Browse files
author
Ruohan Yan
committed
Merge branch 'main' of https://github.com/ucb-bar/radiance
2 parents 6cb8a9c + f208541 commit 8a511bb

File tree

21 files changed

+1043
-139
lines changed

21 files changed

+1043
-139
lines changed

.github/workflows/ci.yml

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,15 @@ jobs:
152152
source env.sh
153153
source /ecad/tools/vlsi.bashrc
154154
cd generators/radiance
155-
python3 test/run_isa_tests.py -j "$(nproc)"
155+
python3 test/run_isa_tests.py -j "$(nproc)" \
156+
--json-out test/isa-test-logs/soc/results.json
157+
- name: Render ISA test report
158+
if: always()
159+
run: |
160+
cd "${CHIPYARD_DIR}/generators/radiance"
161+
python3 test/render_isa_test_report.py \
162+
test/isa-test-logs/soc/results.json \
163+
--github-step-summary
156164
157165
run-core-isa-tests:
158166
name: Run Core-standalone ISA tests
@@ -165,7 +173,15 @@ jobs:
165173
source env.sh
166174
source /ecad/tools/vlsi.bashrc
167175
cd generators/radiance
168-
python3 test/run_isa_tests.py -j "$(nproc)" -c core
176+
python3 test/run_isa_tests.py -j "$(nproc)" -c core \
177+
--json-out test/isa-test-logs/core/results.json
178+
- name: Render ISA test report
179+
if: always()
180+
run: |
181+
cd "${CHIPYARD_DIR}/generators/radiance"
182+
python3 test/render_isa_test_report.py \
183+
test/isa-test-logs/core/results.json \
184+
--github-step-summary
169185
170186
run-cosim-isa-tests:
171187
name: Run Cyclotron-as-a-Tile Co-sim ISA tests
@@ -178,25 +194,41 @@ jobs:
178194
source env.sh
179195
source /ecad/tools/vlsi.bashrc
180196
cd generators/radiance
181-
python3 test/run_isa_tests.py -j "$(nproc)" -c cosim
182-
183-
run-backend-cosim-isa-tests:
184-
name: Run Backend Co-sim ISA tests
185-
runs-on: as4
186-
needs: build
187-
steps:
188-
- name: Run ISA tests on backend co-sim config
197+
python3 test/run_isa_tests.py -j "$(nproc)" -c cosim \
198+
--json-out test/isa-test-logs/cosim/results.json
199+
- name: Render ISA test report
200+
if: always()
189201
run: |
190-
cd "${CHIPYARD_DIR}"
191-
source env.sh
192-
source /ecad/tools/vlsi.bashrc
193-
cd generators/radiance
194-
python3 test/run_isa_tests.py -j "$(nproc)" -c backend
202+
cd "${CHIPYARD_DIR}/generators/radiance"
203+
python3 test/render_isa_test_report.py \
204+
test/isa-test-logs/cosim/results.json \
205+
--github-step-summary
206+
207+
# run-backend-cosim-isa-tests:
208+
# name: Run Backend Co-sim ISA tests
209+
# runs-on: as4
210+
# needs: build
211+
# steps:
212+
# - name: Run ISA tests on backend co-sim config
213+
# run: |
214+
# cd "${CHIPYARD_DIR}"
215+
# source env.sh
216+
# source /ecad/tools/vlsi.bashrc
217+
# cd generators/radiance
218+
# python3 test/run_isa_tests.py -j "$(nproc)" -c backend \
219+
# --json-out test/isa-test-logs/backend/results.json
220+
# - name: Render ISA test report
221+
# if: always()
222+
# run: |
223+
# cd "${CHIPYARD_DIR}/generators/radiance"
224+
# python3 test/render_isa_test_report.py \
225+
# test/isa-test-logs/backend/results.json \
226+
# --github-step-summary
195227

196228
clean:
197229
name: Clean
198230
runs-on: as4
199-
needs: [build, run-unit-tests, run-soc-isa-tests, run-core-isa-tests, run-cosim-isa-tests, run-backend-cosim-isa-tests]
231+
needs: [build, run-unit-tests, run-soc-isa-tests, run-core-isa-tests, run-cosim-isa-tests]
200232
if: always()
201233
steps:
202234
- name: Cleanup workspace, chipyard and conda

chipyard/RadianceConfigs.scala

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,23 @@ object TapeoutSmemConfig extends RadianceSharedMemKey(
6363
)
6464

6565
object L0iCacheConfig extends DCacheParams(
66-
nSets = 128,
66+
nSets = 512,
6767
nWays = 1,
6868
rowBits = 32 * 8,
6969
blockBytes = 32,
7070
nMSHRs = 2,
7171
)
7272

73+
object L0iCacheHugeConfig extends DCacheParams(
74+
nSets = 8192,
75+
nWays = 1,
76+
rowBits = 32 * 8,
77+
blockBytes = 32,
78+
nMSHRs = 4,
79+
)
80+
7381
object L0dCacheConfig extends DCacheParams(
74-
nSets = 512,
82+
nSets = 64,
7583
nWays = 1,
7684
rowBits = 64 * 8,
7785
blockBytes = 64,
@@ -202,6 +210,16 @@ class RadianceSingleClusterConfig extends Config(
202210
new RadianceBaseConfig
203211
)
204212

213+
class RadianceSingleClusterLargeICacheConfig extends Config(
214+
new WithRadianceMxGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 8, 8)) ++
215+
new WithMuonCores(2, location = InCluster(0), noILP = false, l0i = Some(L0iCacheHugeConfig), l0d = Some(L0dCacheConfig), trace = true) ++
216+
new WithRadianceCluster(0, smemConfig = TapeoutSmemConfig, l1Config = L1CacheConfig) ++
217+
new WithExtGPUMem() ++
218+
new WithRadianceRocket ++
219+
new WithGPUResetAggregator(defaultReset = false) ++
220+
new RadianceBaseConfig
221+
)
222+
205223
class RadianceSingleClusterDiffTestConfig extends Config(
206224
new WithRadianceMxGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 8, 8)) ++
207225
new WithMuonCores(2, location = InCluster(0), noILP = false, l0i = Some(L0iCacheConfig), l0d = Some(L0dCacheConfig), trace = true, difftest = true) ++
@@ -234,6 +252,19 @@ class RadianceLeanTapeoutSimConfig extends Config(
234252
new RadianceBaseConfig
235253
)
236254

255+
class RadianceTapeoutSimTraceConfig extends Config(
256+
new WithRadianceMxGemmini(location = InCluster(1), dim = 16, accSizeInKB = 32, tileSize = (8, 8, 8)) ++
257+
new WithMuonCores(2, location = InCluster(1), noILP = false, l0i = Some(L0iCacheConfig), l0d = Some(L0dCacheConfig), trace = true) ++
258+
new WithRadianceCluster(1, smemConfig = TapeoutSmemConfig, l1Config = L1CacheConfig) ++
259+
new WithRadianceMxGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 8, 8)) ++
260+
new WithMuonCores(2, location = InCluster(0), noILP = false, l0i = Some(L0iCacheConfig), l0d = Some(L0dCacheConfig), trace = true) ++
261+
new WithRadianceCluster(0, smemConfig = TapeoutSmemConfig, l1Config = L1CacheConfig) ++
262+
new WithExtGPUMem() ++
263+
new WithRadianceRocket ++
264+
new WithGPUResetAggregator(defaultReset = false) ++
265+
new RadianceBaseConfig
266+
)
267+
237268
class RadianceTapeoutSimConfig extends Config(
238269
new WithRadianceMxGemmini(location = InCluster(1), dim = 16, accSizeInKB = 32, tileSize = (8, 8, 8)) ++
239270
new WithMuonCores(2, location = InCluster(1), noILP = false, l0i = Some(L0iCacheConfig), l0d = Some(L0dCacheConfig)) ++

cyclotron

Submodule cyclotron updated 189 files

docs/isa.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,17 @@ The values of `rs1` and `rs2` are taken from the "leader" lane.
259259
Note: `vx_wspawn` eventually be superceded by command processor's scheduling
260260
capabilities.
261261

262+
#### `vx_bar`
263+
264+
```
265+
vx_bar rs1, rs2
266+
```
267+
268+
Waits for `rs2` warps in a single cluster to reach barrier with id given by `rs1`.
269+
The values of `rs1` and `rs2` are taken from "leader" lane. TODO: how many barriers? 16?
270+
271+
Note: `vx_bar` will eventually be superceded by neutrino / command processor barrier mechanism
272+
262273
## New Registers
263274

264275
At 128 registers, we have 96 additional registers to allocate.

src/main/scala/radiance/cluster/GemminiTile.scala

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -225,22 +225,7 @@ class GemminiTile private (
225225
}
226226

227227
// width is max output width & num output lanes
228-
val requantizerSmemClient = gemminiParams.requantizer.map { q =>
229-
TLClientNode(Seq(TLMasterPortParameters.v1(
230-
clients = Seq(TLMasterParameters.v2(
231-
name = "requantizer_out",
232-
sourceId = IdRange(0, 1 << q.outputIdBits),
233-
emits = TLMasterToSlaveTransferSizes(
234-
putFull = TransferSizes(q.numOutputLanes * q.minOutputBits / 8,
235-
q.numOutputLanes * q.maxOutputBits / 8),
236-
putPartial = TransferSizes(q.numOutputLanes * q.minOutputBits / 8,
237-
q.numOutputLanes * q.maxOutputBits / 8)
238-
)
239-
))
240-
)))
241-
}
242-
243-
// TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLMasterParameters.v1("")))))
228+
val requantizerSmemClient = None
244229

245230
override lazy val module = new GemminiTileModuleImp(this)
246231
}
@@ -311,7 +296,7 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
311296
// requantizer
312297
outer.gemminiParams.requantizer.foreach { q =>
313298
val in = Wire(Decoupled(new RequantizerInBundle(q.numGPUInputLanes, q.inputBits)))
314-
val out = Wire(Decoupled(new RequantizerOutBundle(q.numOutputLanes, q.maxOutputBits)))
299+
// val out = Wire(Decoupled(new RequantizerOutBundle(q.numOutputLanes, q.maxOutputBits)))
315300

316301
{ // input
317302
val (node, edge) = outer.requantizerMuonManager.get.in.head
@@ -331,40 +316,7 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
331316
assert(!node.a.valid || ((node.a.bits.address & q.baseAddr.U) === q.baseAddr.U))
332317
}
333318

334-
{ // output
335-
val (node, edge) = outer.requantizerSmemClient.get.out.head
336-
337-
// data
338-
val isFP4 = out.bits.dataType === RequantizerDataType.FP4
339-
val fullWidth = q.numOutputLanes
340-
val halfWidth = q.numOutputLanes / 2
341-
node.a.bits := edge.Put(
342-
fromSource = 0.U, // gets overridden
343-
toAddress = out.bits.address,
344-
lgSize = Mux(isFP4,
345-
log2Ceil(halfWidth).U, // half byte per lane
346-
log2Ceil(q.numOutputLanes).U // fp6, fp8: 1 byte per lane
347-
),
348-
data = Mux(isFP4,
349-
Mux(
350-
out.bits.address(log2Ceil(halfWidth)), // not aligned to full line
351-
(out.bits.data(halfWidth - 1, 0) << halfWidth).asTypeOf(UInt(fullWidth.W)),
352-
out.bits.data
353-
),
354-
out.bits.data
355-
)
356-
)._2
357-
358-
// source
359-
val (sourceReady, _) = SourceGenerator(node)
360-
out.ready := node.a.ready && sourceReady
361-
node.a.valid := out.valid && sourceReady
362-
assert(out.fire === node.a.fire)
363-
node.d.ready := true.B
364-
}
365-
366319
outer.gemmini.module.mx_io.get.requant_in_gpu <> in
367-
outer.gemmini.module.mx_io.get.requant_out <> out
368320
}
369321

370322
// lut
@@ -420,14 +372,15 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
420372

421373
val gemminiBaseMMIO = Seq(
422374
0x00 -> Seq(RegField.w(32, gemminiCommandReg(_, _))),
375+
0x08 -> Seq(RegField.r(32, gemminiIO.ready)),
423376
0x10 -> Seq(
424377
RegField.w(32, gemminiRs1RegLSB),
425378
RegField.w(32, gemminiRs1RegMSB)),
426379
0x18 -> Seq(
427380
RegField.w(32, gemminiRs2RegLSB),
428381
RegField.w(32, gemminiRs2RegMSB)),
429382
0x20 -> Seq(RegField.r(32, gemminiBusyReg(_))),
430-
0x28 -> Seq(RegField.r(32, gemminiRunningLoopsReg(_)))
383+
0x28 -> Seq(RegField.r(32, gemminiRunningLoopsReg(_))),
431384
)
432385

433386
val gemminiCiscMMIO = Option.when(!outer.gemminiParams.hasAccSlave) {

src/main/scala/radiance/cluster/RadianceCluster.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class RadianceCluster (
131131
cache = thisClusterParams.l1Config,
132132
cacheTagBits = muonTiles.head.muonParams.core.l1ReqTagBits,
133133
overrideDChannelSize = Some(log2Ceil(thisClusterParams.l1Config.blockBytes)),
134+
makeLandingPads = true
134135
))(
135136
p.alterMap(Map(
136137
TileVisibilityNodeKey -> visibilityNode,

src/main/scala/radiance/cluster/RadianceSharedMemComponents.scala

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package radiance.cluster
22

33
import chisel3._
44
import chisel3.util._
5-
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams}
5+
import freechips.rocketchip.diplomacy.BufferParams
66
import freechips.rocketchip.tilelink._
77
import gemmini._
88
import org.chipsalliance.cde.config.Parameters
@@ -162,13 +162,8 @@ class RadianceSharedMemComponents(
162162
.map(connectOne(_, () => RWSplitterNode(f"muon_aligned_splitter")))
163163
val muonAligned = Seq.fill(2)(muonSplitterNodes.map(connectXbarName(_, Some("muon_aligned_fanout"))))
164164

165-
val quantOutputWidth = gemminiTiles.flatMap(_.gemminiParams.requantizer
166-
.map(q => q.numOutputLanes * q.maxOutputBits / 8))
167-
val quantOutputNodesSingleBank = distAndDuplicate(
168-
gemminiTiles.flatMap(_.requantizerSmemClient).map(x =>
169-
(connectOne(x, () => AddressOrNode(clusterParams.baseAddr)), quantOutputWidth.head)
170-
), "quant_w")
171-
val quantOutputNodes = Seq.fill(smemBanks)(quantOutputNodesSingleBank)
165+
val quantOutputNodes = Seq.fill(smemBanks)(Seq.fill(smemSubbanks)(Seq[TLNexusNode]()))
166+
// val quantOutputNodes = Seq.fill(smemBanks)(quantOutputNodesSingleBank)
172167

173168
// connect requantizer managers directly here TODO: move outside, make smemNodes xbars
174169
gemminiTiles.flatMap(_.requantizerMuonManager).foreach { qm =>

src/main/scala/radiance/memory/FlitMergeNode.scala

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,36 @@ class FlitMergeNode(from: Int, to: Int, alwaysMerge: Boolean = true)
6969
val finalOut = WireInit(mergedReq)
7070
finalOut.data := filledData
7171
finalOut.mask := filledMask
72+
finalOut.source := in.a.bits.source // last beat response
7273

7374
out.a.valid := isLastReq
7475
out.a.bits := finalOut
75-
in.a.ready := Mux(isLastReq, out.a.ready, true.B)
76+
// we acknowledge all A subrequests except for the last one immediately
77+
in.a.ready := Mux(isLastReq, out.a.ready, in.d.ready && !out.d.valid)
7678
}.otherwise {
7779
out.a <> in.a
7880
}
7981

8082
// restore size on D channel if merged on A
81-
val wasMerged = VecInit.fill(1 << out.a.bits.params.sourceBits)(false.B)
83+
val wasMerged = RegInit(VecInit.fill(1 << out.a.bits.params.sourceBits)(false.B))
8284
when (in.a.fire) {
8385
wasMerged(in.a.bits.source) := shouldMerge
8486
}
85-
in.d <> out.d
86-
in.d.bits.size := Mux(wasMerged(out.d.bits.source), log2Ceil(from).U, out.d.bits.size)
87+
88+
// if D channel has a response, we prioritize passthru; otherwise, we try to
89+
// acknowledge all but the last mergeable subrequest from A
90+
in.d.valid := out.d.valid || (shouldMerge && in.a.valid && !isLastReq)
91+
in.d.bits := Mux(out.d.valid, out.d.bits, ie.AccessAck(in.a.bits))
92+
in.d.bits.size := Mux(
93+
out.d.valid,
94+
Mux(
95+
wasMerged(out.d.bits.source),
96+
log2Ceil(from).U,
97+
out.d.bits.size
98+
),
99+
log2Ceil(from).U
100+
)
101+
out.d.ready := in.d.ready
87102
}
88103
}
89104
}

0 commit comments

Comments
 (0)