Blogs
Tranformers’ pipeline refinement
2021 Mar. 14
tranformers’ original Pipeline does not include batchify and truncate the input, but you can fix this with newly defined classes, exampled as below:
from transformers.tokenization_utils_base import TruncationStrategy
import more_itertools
import math
class RefinedTextClassificationPipeline(TextClassificationPipeline):
def __init__(self, truncation=TruncationStrategy.DO_NOT_TRUNCATE, max_length=512, batch_size=128, **kwargs):
super().__init__(**kwargs)
self.truncation = truncation # whether to truncate the input sequence or not
self.max_length = max_length # what is the max sequence length if want to truncate
def _parse_and_tokenize(
self, inputs, padding=True, add_special_tokens=True, **kwargs
):
"""
Parse arguments and tokenize
"""
# Parse arguments
inputs = self.tokenizer(
inputs,
add_special_tokens=add_special_tokens,
return_tensors=self.framework,
padding=padding,
truncation=self.truncation,
max_length=self.max_length,
)
return inputs
def __call__(self, *args, **kwargs):
input_list = inputs
# do whatever customized unwrap to the input you need here, assume now you have a input_list
unwraped_inputs_dicts = [None] * (math.ceil(len(input_list)/prediction_batch_size))
for i, inputs in tqdm(enumerate(more_itertools.chunked(input_list, prediction_batch_size))):
inputs = self._parse_and_tokenize(inputs, *args, **kwargs)
unwraped_inputs_dicts[i] = self._forward(inputs)
# wrap unwraped_inputs_dicts up to the dictionary form again
inputs = input_list
Do Not Use the Transformers Custom Loss Example
2021 Mar. 10
from transformers import Trainer
class MyTrainer(Trainer):
def compute_loss(self, model, inputs):
labels = inputs.pop("labels")
outputs = model(**inputs)
logits = outputs[0]
return my_custom_loss(logits, labels)
will give you this error
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer.py", line 989, in train
self._maybe_log_save_evaluate(tr_loss, model, trial, epoch)
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer.py", line 1058, in _maybe_log_save_evaluate
metrics = self.evaluate()
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer.py", line 1506, in evaluate
output = self.prediction_loop(
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer.py", line 1630, in prediction_loop
loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer.py", line 1762, in prediction_step
labels = nested_detach(tuple(inputs.get(name) for name in self.label_names))
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer_pt_utils.py", line 111, in nested_detach
return type(tensors)(nested_detach(t) for t in tensors)
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer_pt_utils.py", line 111, in <genexpr>
return type(tensors)(nested_detach(t) for t in tensors)
File "/home/xlova/anaconda3/lib/python3.8/site-packages/transformers/trainer_pt_utils.py", line 112, in nested_detach
return tensors.detach()
AttributeError: 'NoneType' object has no attribute 'detach'
Solution is, instead of pop, do
from transformers import Trainer
class MyTrainer(Trainer):
def compute_loss(self, model, inputs):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get('logits')
return my_custom_loss(logits, labels)