似乎你们的剪枝相关的函数写的有点问题 #1890

yedaotian9 · 2024-07-27T17:13:07Z

该函数的引用方式：
from paddleslim.nas.ofa.utils import nlp_utils
该函数的原文：
def compute_neuron_head_importance(task_name,
model,
data_loader,
num_layers,
num_heads,
loss_fct=paddle.nn.loss.CrossEntropyLoss(),
intermediate_name='linear1',
output_name='linear2'):
"""
Compute the importance of multi-head attention and feed-forward neuron in each transformer layer.

Args:
    task_name(str): task name.
    model(paddle.nn.Layer): the instance of transformer model.
    data_loader(DataLoader): An iterable data loader is used for evaluate. An instance of `paddle.io.Dataloader`.
    num_layers(int): number of transformer layers.
    num_heads(int): number of heads in each multi-head attention.
    loss_fct(Loss|optional): loss function can be a `paddle.nn.Layer` instance. Default: `nn.loss.CrossEntropyLoss()`.
    intermediate_name(str|optional): the name of intermediate `Linear` layer in feed-forward. Default: `linear1`.
    output_name(str|optional): the name of output `Linear` layer in feed-forward. Default: `linear2`.
"""
head_importance = paddle.zeros(
    shape=[num_layers, num_heads], dtype='float32')
head_mask = paddle.ones(shape=[num_layers, num_heads], dtype='float32')
head_mask.stop_gradient = False

intermediate_weight = []
intermediate_bias = []
output_weight = []

for name, w in model.named_parameters():
    if intermediate_name in name:
        if len(w.shape) > 1:
            intermediate_weight.append(w)
        else:
            intermediate_bias.append(w)

    if output_name in name:
        if len(w.shape) > 1:
            output_weight.append(w)

neuron_importance = []
for w in intermediate_weight:
    neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32'))

if task_name.lower() != 'mnli':
    data_loader = (data_loader, )
for data in data_loader:
    for batch in data:
        if isinstance(batch, dict):
            input_ids, segment_ids, labels = batch['input_ids'], batch[
                'token_type_ids'], batch['labels']
        else:
            input_ids, segment_ids, labels = batch
        logits = model(
            input_ids, segment_ids, attention_mask=[None, head_mask])
        loss = loss_fct(logits, labels)
        loss.backward()
        head_importance += paddle.abs(
            paddle.to_tensor(head_mask.gradient()))

        for w1, b1, w2, current_importance in zip(
                intermediate_weight, intermediate_bias, output_weight,
                neuron_importance):
            current_importance += np.abs(
                (np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
                 b1.gradient()))
            current_importance += np.abs(
                np.sum(w2.numpy() * w2.gradient(), axis=1))

return head_importance, neuron_importance

在使用该函数时，我遇到了报错：
AttributeError Traceback (most recent call last)
Cell In[46], line 180
172 dev_batch_sampler = paddle.io.BatchSampler(
173 dev_ds, batch_size=4, shuffle=False)
174 dev_data_loader = DataLoader(
175 dataset=dev_ds,
176 #batch_sampler=dev_batch_sampler,
177 #collate_fn=batchify_fn
178 )
--> 180 head_importance, neuron_importance = nlp_utils.compute_neuron_head_importance(
181 task_name='cluewsc2020',
182 model=ofa_model.model,
183 data_loader=dev_ds,
184 loss_fct=paddle.nn.loss.CrossEntropyLoss(
185 ) if [True,False] else paddle.nn.loss.MSELoss(),
186 num_layers=model.ppminilm.config['num_hidden_layers'],
187 num_heads=model.ppminilm.config['num_attention_heads'])
189 # 重新组合参数的顺序
190 reorder_neuron_head(ofa_model.model, head_importance, neuron_importance)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleslim/nas/ofa/utils/nlp_utils.py:76, in compute_neuron_head_importance(task_name, model, data_loader, num_layers, num_heads, loss_fct, intermediate_name, output_name)
74 else:
75 input_ids, segment_ids, labels = batch
---> 76 logits = model(
77 input_ids, segment_ids, attention_mask=[None, head_mask])
78 loss = loss_fct(logits, labels)
79 loss.backward()

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddle/nn/layer/layers.py:1426, in Layer.call(self, *inputs, **kwargs)
1417 if (
1418 (not in_to_static_mode())
1419 and (not self._forward_pre_hooks)
(...)
1423 and (not in_profiler_mode())
1424 ):
1425 self._build_once(*inputs, **kwargs)
-> 1426 return self.forward(*inputs, **kwargs)
1427 else:
1428 return self._dygraph_call_func(*inputs, **kwargs)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddlenlp/transformers/ppminilm/modeling.py:300, in PPMiniLMForSequenceClassification.forward(self, input_ids, token_type_ids, position_ids, attention_mask)
270 def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None):
271 r"""
272 Args:
273 input_ids (Tensor):
(...)
298
299 """
--> 300 _, pooled_output = self.ppminilm(
301 input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask
302 )
304 pooled_output = self.dropout(pooled_output)
305 logits = self.classifier(pooled_output)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddle/nn/layer/layers.py:1426, in Layer.call(self, *inputs, **kwargs)
1417 if (
1418 (not in_to_static_mode())
1419 and (not self._forward_pre_hooks)
(...)
1423 and (not in_profiler_mode())
1424 ):
1425 self._build_once(*inputs, **kwargs)
-> 1426 return self.forward(*inputs, **kwargs)
1427 else:
1428 return self._dygraph_call_func(*inputs, **kwargs)

File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddlenlp/transformers/ppminilm/modeling.py:230, in PPMiniLMModel.forward(self, input_ids, token_type_ids, position_ids, attention_mask)
226 attention_mask = paddle.unsqueeze(
227 (input_ids == self.pad_token_id).astype(self.pooler.dense.weight.dtype) * -1e4, axis=[1, 2]
228 )
229 else:
--> 230 if attention_mask.ndim == 2:
231 # attention_mask [batch_size, sequence_length] -> [batch_size, 1, 1, sequence_length]
232 attention_mask = attention_mask.unsqueeze(axis=[1, 2]).astype(paddle.get_default_dtype())
233 attention_mask = (1.0 - attention_mask) * -1e4

AttributeError: 'list' object has no attribute 'ndim'

经过我的甄别，我觉得该函数的attention_mask部分写的有问题：
input_ids, segment_ids, attention_mask=[None, head_mask])
在这一行代码中，attention_mask=[None, head_mask]，这导致了函数的报错

The text was updated successfully, but these errors were encountered:

paddle-bot bot assigned minghaoBD Jul 27, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

似乎你们的剪枝相关的函数写的有点问题 #1890

似乎你们的剪枝相关的函数写的有点问题 #1890

yedaotian9 commented Jul 27, 2024

似乎你们的剪枝相关的函数写的有点问题 #1890

似乎你们的剪枝相关的函数写的有点问题 #1890

Comments

yedaotian9 commented Jul 27, 2024