Skip to content

Commit e909f7e

Browse files
committed
ExLlamaV3: Respect device split when loading draft model
1 parent 6aa842a commit e909f7e

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

backends/exllamav3/model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ def load_model_sync(self, progress_callback=None):
500500
if self.use_vision:
501501
for value in self.vision_model.load_gen(
502502
reserve_per_device=self.autosplit_reserve,
503+
use_per_device=self.gpu_split,
503504
callback=progress_callback,
504505
):
505506
if value:
@@ -508,6 +509,7 @@ def load_model_sync(self, progress_callback=None):
508509
if self.use_draft_model:
509510
for value in self.draft_model.load_gen(
510511
reserve_per_device=self.autosplit_reserve,
512+
use_per_device=self.gpu_split,
511513
callback=progress_callback,
512514
):
513515
if value:

0 commit comments

Comments
 (0)