torch-mlir/examples/ltc_backend_bert.py

# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# Also available under a BSD-style license. See LICENSE.
"""
Runs a training of the Bert model using the Lazy Tensor Core with the
example Torch MLIR backend.

Most of the code in this example was copied from the wonderful tutorial
    https://huggingface.co/transformers/training.html#fine-tuning-in-native-pytorch

Based on LTC code samples by ramiro050
    https://github.com/ramiro050/lazy-tensor-samples
"""

import argparse
import torch
from datasets import load_dataset
from datasets.dataset_dict import DatasetDict
from torch.utils.data import DataLoader
from transformers import BertForSequenceClassification, \
    BertConfig, BertTokenizer, AdamW, get_scheduler
from typing import List


def tokenize_dataset(dataset: DatasetDict) -> DatasetDict:
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length",
                         truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    tokenized_datasets = tokenized_datasets.remove_columns(['text'])
    tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')
    tokenized_datasets.set_format('torch')

    return tokenized_datasets


def train(model: BertForSequenceClassification,
          num_epochs: int,
          num_training_steps: int,
          train_dataloader: DataLoader,
          device: torch.device,
          do_mark_step: bool) -> List[torch.Tensor]:
    optimizer = AdamW(model.parameters(), lr=5e-5)
    lr_scheduler = get_scheduler('linear', optimizer=optimizer,
                                 num_warmup_steps=0,
                                 num_training_steps=num_training_steps)

    model.train()
    losses = []
    for _ in range(num_epochs):
        for batch in train_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            loss.backward()
            losses.append(loss)

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

            if do_mark_step and 'lazy' in str(model.device):
                print("Calling Mark Step")
                torch._lazy.mark_step()

    return losses


def main(device, lower_only, full_size):
    if device in ("TS", "MLIR_EXAMPLE"):
        import torch._lazy

        if device == "TS":
            import torch._lazy.ts_backend

            torch._lazy.ts_backend.init()

        elif device == "MLIR_EXAMPLE":
            import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend

            ltc_backend._initialize()

        device = "lazy"
        print("Initialized backend")
    else:
        device = device.lower()

    tokenized_datasets = tokenize_dataset(load_dataset('imdb'))
    small_train_dataset = tokenized_datasets['train'].shuffle(seed=42) \
        .select(range(2))

    train_dataloader = DataLoader(small_train_dataset, shuffle=True,
                                  batch_size=8)
    if full_size:
        model = BertForSequenceClassification.from_pretrained('bert-base-cased',
                                                              num_labels=2)
    else:
        configuration = BertConfig(
            vocab_size=28996,
            hidden_size=32,
            num_hidden_layers=1,
            num_attention_heads=2,
            intermediate_size=32,
            hidden_act='gelu',
            hidden_dropout_prob=0.0,
            attention_probs_dropout_prob=0.0,
            max_position_embeddings=512,
            layer_norm_eps=1.0e-05,
        )
        model = BertForSequenceClassification(configuration)

    model.to(device)

    num_epochs = 3
    num_training_steps = num_epochs * len(train_dataloader)
    losses = train(model, num_epochs,
                   num_training_steps, train_dataloader, device, not lower_only)

    if lower_only:
        print('\nJIT Graph:')
        import torch._C
        graph_str = torch._C._lazy._get_tensors_backend([losses[0]])
        print(graph_str)
    else:
        # Execute computation
        print('Loss: ', losses)


if __name__ == "__main__":
    torch.manual_seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--device",
        type=str.upper,
        choices=["CPU", "TS", "MLIR_EXAMPLE"],
        default="MLIR_EXAMPLE",
        help="The device type",
    )
    parser.add_argument(
        "-l",
        "--lower_only",
        action='store_true',
        default=False,
        help="Only get backend printout -- do not execute computation",
    )
    parser.add_argument(
        "-f",
        "--full_size",
        action='store_true',
        default=False,
        help="Use full sized BERT model instead of one with smaller parameterization",
    )
    args = parser.parse_args()
    main(args.device, args.lower_only, args.full_size)
Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`# See https://llvm.org/LICENSE.txt for license information.`
			`# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`# Also available under a BSD-style license. See LICENSE.`
			`"""`
			`Runs a training of the Bert model using the Lazy Tensor Core with the`
			`example Torch MLIR backend.`

			`Most of the code in this example was copied from the wonderful tutorial`
			`https://huggingface.co/transformers/training.html#fine-tuning-in-native-pytorch`

			`Based on LTC code samples by ramiro050`
			`https://github.com/ramiro050/lazy-tensor-samples`
			`"""`

			`import argparse`
			`import torch`
			`from datasets import load_dataset`
			`from datasets.dataset_dict import DatasetDict`
			`from torch.utils.data import DataLoader`
			`from transformers import BertForSequenceClassification, \`
E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`BertConfig, BertTokenizer, AdamW, get_scheduler`
Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`from typing import List`


			`def tokenize_dataset(dataset: DatasetDict) -> DatasetDict:`
			`tokenizer = BertTokenizer.from_pretrained('bert-base-cased')`

			`def tokenize_function(examples):`
			`return tokenizer(examples["text"], padding="max_length",`
			`truncation=True)`

			`tokenized_datasets = dataset.map(tokenize_function, batched=True)`
			`tokenized_datasets = tokenized_datasets.remove_columns(['text'])`
			`tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')`
			`tokenized_datasets.set_format('torch')`

			`return tokenized_datasets`


			`def train(model: BertForSequenceClassification,`
			`num_epochs: int,`
			`num_training_steps: int,`
			`train_dataloader: DataLoader,`
			`device: torch.device,`
			`do_mark_step: bool) -> List[torch.Tensor]:`
			`optimizer = AdamW(model.parameters(), lr=5e-5)`
			`lr_scheduler = get_scheduler('linear', optimizer=optimizer,`
			`num_warmup_steps=0,`
			`num_training_steps=num_training_steps)`

			`model.train()`
			`losses = []`
			`for _ in range(num_epochs):`
			`for batch in train_dataloader:`
			`batch = {k: v.to(device) for k, v in batch.items()}`
			`outputs = model(**batch)`
			`loss = outputs.loss`
			`loss.backward()`
			`losses.append(loss)`

			`optimizer.step()`
			`lr_scheduler.step()`
			`optimizer.zero_grad()`

			`if do_mark_step and 'lazy' in str(model.device):`
			`print("Calling Mark Step")`
			`torch._lazy.mark_step()`

			`return losses`


E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`def main(device, lower_only, full_size):`
Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`if device in ("TS", "MLIR_EXAMPLE"):`
			`import torch._lazy`

			`if device == "TS":`
			`import torch._lazy.ts_backend`

			`torch._lazy.ts_backend.init()`

			`elif device == "MLIR_EXAMPLE":`
			`import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend`

			`ltc_backend._initialize()`

			`device = "lazy"`
			`print("Initialized backend")`
			`else:`
			`device = device.lower()`

			`tokenized_datasets = tokenize_dataset(load_dataset('imdb'))`
			`small_train_dataset = tokenized_datasets['train'].shuffle(seed=42) \`
			`.select(range(2))`

			`train_dataloader = DataLoader(small_train_dataset, shuffle=True,`
			`batch_size=8)`
E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`if full_size:`
			`model = BertForSequenceClassification.from_pretrained('bert-base-cased',`
			`num_labels=2)`
			`else:`
			`configuration = BertConfig(`
			`vocab_size=28996,`
			`hidden_size=32,`
			`num_hidden_layers=1,`
			`num_attention_heads=2,`
			`intermediate_size=32,`
			`hidden_act='gelu',`
			`hidden_dropout_prob=0.0,`
			`attention_probs_dropout_prob=0.0,`
			`max_position_embeddings=512,`
			`layer_norm_eps=1.0e-05,`
			`)`
			`model = BertForSequenceClassification(configuration)`

Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`model.to(device)`

			`num_epochs = 3`
			`num_training_steps = num_epochs * len(train_dataloader)`
			`losses = train(model, num_epochs,`
			`num_training_steps, train_dataloader, device, not lower_only)`

			`if lower_only:`
			`print('\nJIT Graph:')`
			`import torch._C`
			`graph_str = torch._C._lazy._get_tensors_backend([losses[0]])`
			`print(graph_str)`
			`else:`
			`# Execute computation`
			`print('Loss: ', losses)`


			`if __name__ == "__main__":`
E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`torch.manual_seed(0)`

Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
			`"-d",`
			`"--device",`
			`type=str.upper,`
			`choices=["CPU", "TS", "MLIR_EXAMPLE"],`
			`default="MLIR_EXAMPLE",`
			`help="The device type",`
			`)`
			`parser.add_argument(`
			`"-l",`
			`"--lower_only",`
			`action='store_true',`
			`default=False,`
			`help="Only get backend printout -- do not execute computation",`
			`)`
E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`parser.add_argument(`
			`"-f",`
			`"--full_size",`
			`action='store_true',`
			`default=False,`
			`help="Use full sized BERT model instead of one with smaller parameterization",`
			`)`
Bert example and relevant shape inference functions (#831) 2022-05-10 21:03:41 +08:00			`args = parser.parse_args()`
E2E HuggingFace Bert using LTC Backend (#912) * Update native function definitions * Add ops to support bert lowering - Add empty_strided and as_strided - Restore zeros_like to op blacklist (Without this, tensors will be unintentionally created with a CPU device rather than lazy) - Check for composite implicit ops and add device data IR - Also fix codegen for functionalization * Add autogen to CMakeList * Remove PyTorch submodule * Reduced BERT model size * Print Mark Step status in Torch MLIR LTC debug string * Apply fixes to work with latest upstream/main - Pass importOptions into getMlirTypeFromTorchType during NodeImporter::importNode Without this, the tensor type created may have a mismatched type as ImportOptions may cause vtensor to be used instead of tensor * Update shape inference functions - Fixed compute_shape_native_batch_norm when mean and var are uninitialized Previously, the number of shapes returned would be <3 if either mean or val was didn't exist. Instead, we now initialize them with a vector matching the number of channels. - Implemented compute_shape_mul - Fixed bug in reshape shape inference error message * Get MLIR backend more consistent with TS backend - Remove LazyNativeFunctions::_unsafe_view from autogen - Blacklist ops to make JIT graph more like output of TS backend - Print graph when SSA value has mismatch of types and results - Remove normalize_index from LazyShapeInference - Fix seeds for LTC example models * Update and clean up shape inference functions - Prune shape inference functions - Add shape inference function for GenerateSlice - Add shape inference function for GenerateCopy Co-authored-by: Henry Tu <henry.tu@cerebras.net> 2022-06-08 02:38:50 +08:00			`main(args.device, args.lower_only, args.full_size)`