We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6aa842a commit e909f7eCopy full SHA for e909f7e
1 file changed
backends/exllamav3/model.py
@@ -500,6 +500,7 @@ def load_model_sync(self, progress_callback=None):
500
if self.use_vision:
501
for value in self.vision_model.load_gen(
502
reserve_per_device=self.autosplit_reserve,
503
+ use_per_device=self.gpu_split,
504
callback=progress_callback,
505
):
506
if value:
@@ -508,6 +509,7 @@ def load_model_sync(self, progress_callback=None):
508
509
if self.use_draft_model:
510
for value in self.draft_model.load_gen(
511
512
513
514
515
0 commit comments