Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,9 +807,12 @@ def unparse_cr(sdfg, wcr_ast, dtype):
def connected_to_gpu_memory(node: nodes.Node, state: SDFGState, sdfg: SDFG):
for e in state.all_edges(node):
path = state.memlet_path(e)
if ((isinstance(path[0].src, nodes.AccessNode)
and path[0].src.desc(sdfg).storage is dtypes.StorageType.GPU_Global)):
if (((isinstance(path[0].src, nodes.AccessNode)
and path[0].src.desc(sdfg).storage is dtypes.StorageType.GPU_Global))
or ((isinstance(path[-1].dst, nodes.AccessNode)
and path[-1].dst.desc(sdfg).storage is dtypes.StorageType.GPU_Global))):
return True

return False


Expand Down
18 changes: 16 additions & 2 deletions dace/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class ScheduleType(AutoNumberEnum):
GPU_ThreadBlock = () #: Thread-block code
GPU_ThreadBlock_Dynamic = () #: Allows rescheduling work within a block
GPU_Persistent = ()
GPU_Warp = ()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we need this, but is it actually used anywhere in this PR?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not utilized for the copies, but it is part of the new GPU codegen, I potentially had error when porting dtypes from the GPU codegen branch to this branch.


Snitch = ()
Snitch_Multicore = ()
Expand All @@ -84,6 +85,11 @@ class ScheduleType(AutoNumberEnum):
ScheduleType.GPU_Persistent,
]

# A subset of GPU schedule types for ExperimentalCUDACodeGen
EXPERIMENTAL_GPU_SCHEDULES = [
ScheduleType.GPU_Warp,
]

# A subset of CPU schedule types
CPU_SCHEDULES = [
ScheduleType.CPU_Multicore,
Expand All @@ -95,6 +101,8 @@ class ScheduleType(AutoNumberEnum):
StorageType.GPU_Shared,
]

GPU_KERNEL_ACCESSIBLE_STORAGES = [StorageType.GPU_Global, StorageType.GPU_Shared, StorageType.Register]


@undefined_safe_enum
class ReductionType(AutoNumberEnum):
Expand Down Expand Up @@ -192,7 +200,8 @@ class TilingType(AutoNumberEnum):
ScheduleType.GPU_ThreadBlock: StorageType.Register,
ScheduleType.GPU_ThreadBlock_Dynamic: StorageType.Register,
ScheduleType.SVE_Map: StorageType.CPU_Heap,
ScheduleType.Snitch: StorageType.Snitch_TCDM
ScheduleType.Snitch: StorageType.Snitch_TCDM,
ScheduleType.GPU_Warp: StorageType.Register,
}

# Maps from ScheduleType to default ScheduleType for sub-scopes
Expand All @@ -207,9 +216,10 @@ class TilingType(AutoNumberEnum):
ScheduleType.GPU_Device: ScheduleType.GPU_ThreadBlock,
ScheduleType.GPU_ThreadBlock: ScheduleType.Sequential,
ScheduleType.GPU_ThreadBlock_Dynamic: ScheduleType.Sequential,
ScheduleType.GPU_Warp: ScheduleType.Sequential,
ScheduleType.SVE_Map: ScheduleType.Sequential,
ScheduleType.Snitch: ScheduleType.Snitch,
ScheduleType.Snitch_Multicore: ScheduleType.Snitch_Multicore
ScheduleType.Snitch_Multicore: ScheduleType.Snitch_Multicore,
}

# Maps from StorageType to a preferred ScheduleType for helping determine schedules.
Expand Down Expand Up @@ -1240,6 +1250,7 @@ class string(_DaCeArray, npt.NDArray[numpy.str_]): ...
class vector(_DaCeArray, npt.NDArray[numpy.void]): ...
class MPI_Request(_DaCeArray, npt.NDArray[numpy.void]): ...
class float32sr(_DaCeArray, npt.NDArray[numpy.float32]): ...
class gpuStream_t(_DaCeArray, npt.NDArray[numpy.void]): ...
# yapf: enable
else:
# Runtime definitions
Expand All @@ -1260,6 +1271,7 @@ class float32sr(_DaCeArray, npt.NDArray[numpy.float32]): ...
complex128 = typeclass(numpy.complex128)
string = stringtype()
MPI_Request = opaque('MPI_Request')
gpuStream_t = opaque('gpuStream_t')
float32sr = Float32sr()


Expand All @@ -1281,6 +1293,7 @@ class Typeclasses(AutoNumberEnum):
float64 = float64
complex64 = complex64
complex128 = complex128
gpuStream_t = gpuStream_t


_bool = bool
Expand Down Expand Up @@ -1508,6 +1521,7 @@ def can_access(schedule: ScheduleType, storage: StorageType):
ScheduleType.GPU_Persistent,
ScheduleType.GPU_ThreadBlock,
ScheduleType.GPU_ThreadBlock_Dynamic,
ScheduleType.GPU_Warp,
]:
return storage in [StorageType.GPU_Global, StorageType.GPU_Shared, StorageType.CPU_Pinned]
elif schedule in [ScheduleType.Default, ScheduleType.CPU_Multicore, ScheduleType.CPU_Persistent]:
Expand Down
Empty file.
Empty file.
Loading
Loading