Skip to content

Multi_GPU and AttributeError: Can't pickle local object 'add_hook_to_module.<locals>.new_forward #164

@pepper725

Description

@pepper725

I encountered the following problem when running the code, after understanding the previous issues I know it is probably a problem of not being able to run on multiple GPUs, but on a single GPU my graphics card can't run it, how can I change it, details are as follows:
Here's my code:

from llm2vec import LLM2Vec
import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel

model_path = "/mnt/data1/hja/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp"


tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, local_files_only=True)
model = LLM2Vec.from_pretrained(
   "/mnt/data1/hja/meta-llama/Meta-Llama-3-8B-Instruct",
    config=config,
    torch_dtype=torch.bfloat16,
    device_map="cuda",  # 自动分配设备
    use_safetensors=True,  # 使用 safetensors 格式
    local_files_only=True  # 只加载本地文件,不访问 Hugging Face
)
model = PeftModel.from_pretrained(
    model,
    "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp",
)
# Loading MNTP (Masked Next Token Prediction) model.
model = PeftModel.from_pretrained(
    model,
    "/mnt/data1/hja/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp",
)

# Wrapper for encoding and pooling operations
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)


# Encoding queries using instructions
instruction = (
    "Given a web search query, retrieve relevant passages that answer the query:"
)
queries = [
    [instruction, "how much protein should a female eat"],
    [instruction, "summit define"],
]
q_reps = l2v.encode(queries, batch_size=8, show_progress_bar=True, convert_to_numpy=True, device='cpu')

# Encoding documents. Instruction are not required for documents
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]
d_reps = l2v.encode(documents)

# Compute cosine similarity
q_reps_norm = torch.nn.functional.normalize(q_reps, p=2, dim=1)
d_reps_norm = torch.nn.functional.normalize(d_reps, p=2, dim=1)
cos_sim = torch.mm(q_reps_norm, d_reps_norm.transpose(0, 1))

print(cos_sim)
"""
tensor([[0.7740, 0.5580],
        [0.4845, 0.4993]])
"""

The following is the error message:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Input In [5], in <cell line: 9>()
      2 instruction = (
      3     "Given a web search query, retrieve relevant passages that answer the query:"
      4 )
      5 queries = [
      6     [instruction, "how much protein should a female eat"],
      7     [instruction, "summit define"],
      8 ]
----> 9 q_reps = l2v.encode(queries, batch_size=8, show_progress_bar=True, convert_to_numpy=True, device='cpu')
     11 # Encoding documents. Instruction are not required for documents
     12 documents = [
     13     "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
     14     "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
     15 ]

File ~/anaconda3/envs/llm2vec/lib/python3.10/site-packages/llm2vec/llm2vec.py:402, in LLM2Vec.encode(self, sentences, batch_size, show_progress_bar, convert_to_numpy, convert_to_tensor, device)
    393         for batch in sentences_batches:
    394             results.append(
    395                 p.apply_async(
    396                     self._encode,
   (...)
    399                 )
    400             )
--> 402         all_embeddings = [result.get() for result in results]
    403         progress_bar.close()
    405 all_embeddings = torch.cat(all_embeddings, dim=0)

File ~/anaconda3/envs/llm2vec/lib/python3.10/site-packages/llm2vec/llm2vec.py:402, in <listcomp>(.0)
    393         for batch in sentences_batches:
    394             results.append(
    395                 p.apply_async(
    396                     self._encode,
   (...)
    399                 )
    400             )
--> 402         all_embeddings = [result.get() for result in results]
    403         progress_bar.close()
    405 all_embeddings = torch.cat(all_embeddings, dim=0)

File ~/anaconda3/envs/llm2vec/lib/python3.10/multiprocessing/pool.py:771, in ApplyResult.get(self, timeout)
    769     return self._value
    770 else:
--> 771     raise self._value

File ~/anaconda3/envs/llm2vec/lib/python3.10/multiprocessing/pool.py:537, in Pool._handle_tasks(taskqueue, put, outqueue, pool, cache)
    535     break
    536 try:
--> 537     put(task)
    538 except Exception as e:
    539     job, idx = task[:2]

File ~/anaconda3/envs/llm2vec/lib/python3.10/multiprocessing/connection.py:211, in _ConnectionBase.send(self, obj)
    209 self._check_closed()
    210 self._check_writable()
--> 211 self._send_bytes(_ForkingPickler.dumps(obj))

File ~/anaconda3/envs/llm2vec/lib/python3.10/multiprocessing/reduction.py:51, in ForkingPickler.dumps(cls, obj, protocol)
     48 @classmethod
     49 def dumps(cls, obj, protocol=None):
     50     buf = io.BytesIO()
---> 51     cls(buf, protocol).dump(obj)
     52     return buf.getbuffer()

AttributeError: Can't pickle local object 'add_hook_to_module.<locals>.new_forward'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions