Bert example and relevant shape inference functions (#831)

2022-05-10 09:03:41 -04:00 · 2022-05-10 09:03:41 -04:00 · de5b380143
parent 406d1e7538
commit de5b380143
2 changed files with 376 additions and 0 deletions
--- a/examples/ltc_backend_bert.py
+++ b/examples/ltc_backend_bert.py
@ -0,0 +1,135 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# Also available under a BSD-style license. See LICENSE.
+"""
+Runs a training of the Bert model using the Lazy Tensor Core with the
+example Torch MLIR backend.
+
+Most of the code in this example was copied from the wonderful tutorial
+    https://huggingface.co/transformers/training.html#fine-tuning-in-native-pytorch
+
+Based on LTC code samples by ramiro050
+    https://github.com/ramiro050/lazy-tensor-samples
+"""
+
+import argparse
+import torch
+from datasets import load_dataset
+from datasets.dataset_dict import DatasetDict
+from torch.utils.data import DataLoader
+from transformers import BertForSequenceClassification, \
+    BertTokenizer, AdamW, get_scheduler
+from typing import List
+
+
+def tokenize_dataset(dataset: DatasetDict) -> DatasetDict:
+    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
+
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length",
+                         truncation=True)
+
+    tokenized_datasets = dataset.map(tokenize_function, batched=True)
+    tokenized_datasets = tokenized_datasets.remove_columns(['text'])
+    tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')
+    tokenized_datasets.set_format('torch')
+
+    return tokenized_datasets
+
+
+def train(model: BertForSequenceClassification,
+          num_epochs: int,
+          num_training_steps: int,
+          train_dataloader: DataLoader,
+          device: torch.device,
+          do_mark_step: bool) -> List[torch.Tensor]:
+    optimizer = AdamW(model.parameters(), lr=5e-5)
+    lr_scheduler = get_scheduler('linear', optimizer=optimizer,
+                                 num_warmup_steps=0,
+                                 num_training_steps=num_training_steps)
+
+    model.train()
+    losses = []
+    for _ in range(num_epochs):
+        for batch in train_dataloader:
+            batch = {k: v.to(device) for k, v in batch.items()}
+            outputs = model(**batch)
+            loss = outputs.loss
+            loss.backward()
+            losses.append(loss)
+
+            optimizer.step()
+            lr_scheduler.step()
+            optimizer.zero_grad()
+
+            if do_mark_step and 'lazy' in str(model.device):
+                print("Calling Mark Step")
+                torch._lazy.mark_step()
+
+    return losses
+
+
+def main(device, lower_only):
+    if device in ("TS", "MLIR_EXAMPLE"):
+        import torch._lazy
+
+        if device == "TS":
+            import torch._lazy.ts_backend
+
+            torch._lazy.ts_backend.init()
+
+        elif device == "MLIR_EXAMPLE":
+            import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend
+
+            ltc_backend._initialize()
+
+        device = "lazy"
+        print("Initialized backend")
+    else:
+        device = device.lower()
+
+    tokenized_datasets = tokenize_dataset(load_dataset('imdb'))
+    small_train_dataset = tokenized_datasets['train'].shuffle(seed=42) \
+        .select(range(2))
+
+    train_dataloader = DataLoader(small_train_dataset, shuffle=True,
+                                  batch_size=8)
+    model = BertForSequenceClassification.from_pretrained('bert-base-cased',
+                                                          num_labels=2)
+    model.to(device)
+
+    num_epochs = 3
+    num_training_steps = num_epochs * len(train_dataloader)
+    losses = train(model, num_epochs,
+                   num_training_steps, train_dataloader, device, not lower_only)
+
+    if lower_only:
+        print('\nJIT Graph:')
+        import torch._C
+        graph_str = torch._C._lazy._get_tensors_backend([losses[0]])
+        print(graph_str)
+    else:
+        # Execute computation
+        print('Loss: ', losses)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-d",
+        "--device",
+        type=str.upper,
+        choices=["CPU", "TS", "MLIR_EXAMPLE"],
+        default="MLIR_EXAMPLE",
+        help="The device type",
+    )
+    parser.add_argument(
+        "-l",
+        "--lower_only",
+        action='store_true',
+        default=False,
+        help="Only get backend printout -- do not execute computation",
+    )
+    args = parser.parse_args()
+    main(args.device, args.lower_only)
--- a/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.cpp
+++ b/python/torch_mlir/csrc/base_lazy_backend/LazyShapeInference.cpp
@ -9,10 +9,127 @@

 #include "LazyShapeInference.h"
 #include "../utils/exception.h"
+#include <cmath>

 namespace torch {
 namespace lazy {

+// TODO(henrytu): Upstream these shape inference functions to PyTorch in the future.
+
+// Turns any negative index positive (assuming it's valid)
+int64_t normalize_index(int64_t index, unsigned dims) {
+  return index < 0 ? (int64_t)dims + index : index;
+}
+
+std::vector<Shape>
+compute_shape_dropout(const at::Tensor& input, double p, bool train) {
+  return {Shape(input.scalar_type(), input.sizes().vec())};
+}
+
+std::vector<Shape> compute_shape_layer_norm(
+    const at::Tensor& input, at::IntArrayRef normalized_shape,
+    const c10::optional<at::Tensor>& weight,
+    const c10::optional<at::Tensor>& bias, double eps, bool cudnn_enable) {
+  return {Shape(input.scalar_type(), input.sizes().vec())};
+}
+
+std::vector<Shape>
+compute_shape_matmul(const at::Tensor& self, const at::Tensor& other) {
+  std::vector<int64_t> sizes;
+
+  auto self_sizes = self.sizes().vec();
+  auto other_sizes = other.sizes().vec();
+
+  // For tensors with dimensions >2, the leading dimensions are for batch info.
+  // The last 2 (or 1 in the case of a single dim tensor) dimensions are the
+  // matrix dimensions themselves, which is checked to ensure the matmul op
+  // is legal.
+  //
+  // Example:
+  // [1, 2, 3, 4] -> [1, 2] batch dims and [3, 4] matrix
+  //    [1, 4, 5] ->    [1] batch dims and [4, 5] matrix
+  //       [4, 5] ->     [] batch dims and [4, 5] matrix
+  //          [5] ->     [] batch dims and    [5] matrix
+  //
+  // We'll start by splitting the shapes as described above.
+  auto partition_shape = [](at::ArrayRef<int64_t> sizes) {
+    if (sizes.size() <= 2) {
+      return std::make_pair(
+          std::vector<int64_t>(),
+          std::vector<int64_t>(sizes.begin(), sizes.end()));
+    } else {
+      std::size_t partition_idx = sizes.size() - 2;
+      return std::make_pair(
+          std::vector<int64_t>(sizes.begin(), sizes.begin() + partition_idx),
+          std::vector<int64_t>(sizes.begin() + partition_idx, sizes.end()));
+    }
+  };
+  auto [self_batch_sizes, self_matrix_sizes] = partition_shape(self_sizes);
+  auto [other_batch_sizes, other_matrix_sizes] = partition_shape(other_sizes);
+
+  // Insert batch dimensions.
+  // The final list of sizes will be based on the tensor w/ more dims.
+  // Individual dimension sizes are "right justified" as we iterate thru
+  // to pick the larger dimension between them.
+  // 0 1 1 3 4
+  //     5 1 2
+  // ---------
+  // 0 1 5 3 4 <- Result
+  int64_t self_size, other_size;
+  std::size_t num_batch_dim =
+      std::max(self_batch_sizes.size(), other_batch_sizes.size());
+  auto get_batch_dim = [&](std::vector<int64_t> batch_sizes, std::size_t dim) {
+    long idx = dim - num_batch_dim + batch_sizes.size();
+    // Negative index means out of bounds, which defaults to a dim size of 1.
+    return idx < 0 ? 1 : batch_sizes[idx];
+  };
+  for (std::size_t i = 0; i < num_batch_dim; i++) {
+    self_size = get_batch_dim(self_batch_sizes, i);
+    other_size = get_batch_dim(other_batch_sizes, i);
+
+    TORCH_CHECK(
+        self_size == 1 || other_size == 1 || self_size == other_size,
+        "At trailing dimension ", i, ", expected for dimensions ",
+        "to either match or have one of them equal one, but got ", self_size,
+        " and ", other_size, " instead!");
+
+    sizes.push_back(std::max(self_size, other_size));
+  }
+
+  // Keep track of the inner dimensions of matmul to validate op is valid.
+  std::pair<int64_t, int64_t> inner_sizes;
+  if (self_matrix_sizes.size() == 1 && other_matrix_sizes.size() == 1) {
+    // Dot-Product -- scalar output, so no dimensions inserted
+    inner_sizes = std::make_pair(self_matrix_sizes[0], other_matrix_sizes[0]);
+  } else if (self_matrix_sizes.size() == 1 && other_matrix_sizes.size() == 2) {
+    // Vector-Matrix product (m) @ (m, n) -> (n)
+    inner_sizes = std::make_pair(self_matrix_sizes[0], other_matrix_sizes[0]);
+
+    sizes.push_back(other_matrix_sizes[1]);
+  } else if (self_matrix_sizes.size() == 2 && other_matrix_sizes.size() == 1) {
+    // Matrix-Vector product (m, n) @ (n) -> (m)
+    inner_sizes = std::make_pair(self_matrix_sizes[1], other_matrix_sizes[0]);
+
+    sizes.push_back(self_matrix_sizes[0]);
+  } else if (self_matrix_sizes.size() == 2 && other_matrix_sizes.size() == 2) {
+    // Matrix-Matrix product (m, n) @ (n, o) -> (m, o)
+    inner_sizes = std::make_pair(self_matrix_sizes[1], other_matrix_sizes[0]);
+
+    sizes.push_back(self_matrix_sizes[0]);
+    sizes.push_back(other_matrix_sizes[1]);
+  } else {
+    // By this time, self_matrix_sizes and other_matrix_sizes should have at
+    // most 2 dims, so if this is executed something has gone wrong...
+    TORCH_CHECK(false, "Invalid matmul shape combination!");
+  }
+
+  TORCH_CHECK(
+      inner_sizes.first == inner_sizes.second, "Inner dimension of matrix (",
+      inner_sizes.first, ") does not ", "match (", inner_sizes.second, ")!");
+
+  return {Shape(self.scalar_type(), sizes)};
+}
+
 std::vector<Shape> compute_shape_native_batch_norm(
    const at::Tensor& input, const c10::optional<at::Tensor>& weight,
    const c10::optional<at::Tensor>& bias,
@ -33,5 +150,129 @@ std::vector<Shape> compute_shape_native_batch_norm(
  return shapes;
 }

+std::vector<Shape>
+compute_shape_reshape(const at::Tensor& self, at::IntArrayRef shape) {
+  // Make a copy of the desired output shape.
+  std::vector<int64_t> sizes(shape.begin(), shape.end());
+
+  // Product of all sizes in input shape is the number of entries in tensor.
+  int64_t num_entries = 1;
+  for (int64_t i : self.sizes().vec()) {
+    num_entries *= i;
+  }
+
+  // Validate the number of entries in the desired shape. If there is a wildcard
+  // dimension, we need to find it now in order to populate it.
+  long wildcard_idx = -1;
+  int64_t num_concrete_entries = 1;
+  for (std::size_t idx = 0; idx < sizes.size(); idx++) {
+    if (sizes[idx] != -1) {
+      num_concrete_entries *= sizes[idx];
+    } else {
+      TORCH_CHECK(wildcard_idx == -1, "only one dimension can be inferred");
+      wildcard_idx = idx;
+    }
+  }
+
+  if (wildcard_idx == -1) {
+    // No wildcard, the shape should already be known.
+    TORCH_CHECK(
+        num_entries == num_concrete_entries, "shape `[", sizes,
+        "]` is invalid for input of size ", num_concrete_entries);
+  } else {
+    // There is one dimension which is not explicitly declared -- we need to
+    // infer.
+    TORCH_CHECK(
+        num_entries % num_concrete_entries == 0, "shape `[", sizes,
+        "]` is invalid for input of size ", num_concrete_entries);
+
+    sizes[wildcard_idx] = num_entries / num_concrete_entries;
+  }
+
+  return {Shape(self.scalar_type(), sizes)};
+}
+
+std::vector<Shape> compute_shape_rsub(
+    const at::Tensor& self, const at::Scalar& other, const at::Scalar& alpha) {
+  // Since other is scalar, the result will match tensor shape.
+  return {Shape(self.scalar_type(), self.sizes().vec())};
+}
+
+std::vector<Shape>
+compute_shape_select(const at::Tensor& self, int64_t dim, int64_t index) {
+  auto original_shape = self.sizes().vec();
+  std::vector<int64_t> sizes(original_shape.begin(), original_shape.end());
+
+  TORCH_CHECK(
+      dim < (int64_t)sizes.size(), "Dimension ", dim,
+      " is out of bounds for tensor with ", sizes.size(), " dimensions!");
+  TORCH_CHECK(
+      index < sizes[dim], "Index ", index,
+      " is out of bounds for dimension of size ", sizes[dim]);
+  sizes.erase(sizes.begin() + dim);
+
+  return {Shape(self.scalar_type(), sizes)};
+}
+
+std::vector<Shape> compute_shape_slice(
+    const at::Tensor& self, int64_t dim, c10::optional<int64_t> start,
+    c10::optional<int64_t> end, int64_t step) {
+  auto original_shape = self.sizes().vec();
+  std::vector<int64_t> sizes(original_shape.begin(), original_shape.end());
+
+  int64_t dim_size = sizes[dim];
+
+  // Index may be negative, so we must normalize it.
+  int64_t start_norm = normalize_index(start.value(), dim_size);
+  int64_t end_norm = normalize_index(end.value(), dim_size);
+
+  if (start_norm >= end_norm || start_norm >= dim_size || end_norm <= 0) {
+    // Slice is out of bounds, nothing in range.
+    sizes[dim] = 0;
+  } else {
+    // Clamp upper and lower bound to valid indices.
+    start_norm = std::max((int64_t)0, start_norm);
+    end_norm = std::min(dim_size, end_norm);
+
+    // Final size is determined by step and interval size.
+    sizes[dim] = std::ceil((double)(end_norm - start_norm) / (double)step);
+  }
+
+  return {Shape(self.scalar_type(), sizes)};
+}
+
+std::vector<Shape> compute_shape_softmax(
+    const at::Tensor& self, int64_t dim, c10::optional<at::ScalarType> dtype) {
+  if (dtype.has_value()) {
+    return {Shape(dtype.value(), self.sizes().vec())};
+  }
+  return {Shape(self.scalar_type(), self.sizes().vec())};
+}
+
+std::vector<Shape>
+compute_shape_transpose(const at::Tensor& self, int64_t dim0, int64_t dim1) {
+  auto original_shape = self.sizes().vec();
+  std::vector<int64_t> sizes{original_shape.begin(), original_shape.end()};
+
+  // Index may be negative, so we must normalize it. We create new variables
+  // instead of replacing the existing ones so that in the case of an error,
+  // the original values can be printed out.
+  int64_t dim0_norm = normalize_index(dim0, sizes.size());
+  int64_t dim1_norm = normalize_index(dim1, sizes.size());
+
+  // Verify dimensions are valid.
+  TORCH_CHECK(
+      0 <= dim0_norm && dim0_norm < (int64_t)sizes.size(), "dim0 has value ",
+      dim0, ", but there are only ", sizes.size(), " tensor dimensions");
+  TORCH_CHECK(
+      0 <= dim1_norm && dim1_norm < (int64_t)sizes.size(), "dim1 has value ",
+      dim1, ", but there are only ", sizes.size(), " tensor dimensions");
+
+  // Swap shapes at dimensions.
+  std::swap(sizes[dim0_norm], sizes[dim1_norm]);
+
+  return {Shape(self.scalar_type(), sizes)};
+}
+
 } // namespace lazy
 } // namespace torch