Merge pull request #570 from jaybdub/linear_functional_converter

jaybdub · web-flow · commit 9457b8c9d864 · 2021-06-15T15:35:32.000-07:00
Linear functional converter
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## [Master]
 
+- Added converter for ``torch.nn.functional.layer_norm``
+- Added converter for ``torch.nn.functional.gelu``
+- Added converter for ``torch.nn.functional.linear``
+- Added converter for ``torch.nn.functional.silu``
+
 ## [0.2.0] - 03/02/2021
 
 - Added converter for ``torch.Tensor.flatten``
diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py
@@ -2,34 +2,35 @@
 from torch2trt.module_test import add_module_test
 
 
-@tensorrt_converter('torch.nn.Linear.forward')
+@tensorrt_converter('torch.nn.functional.linear')
 def convert_Linear(ctx):
-    module = ctx.method_args[0]
-    input = ctx.method_args[1]
+    input = ctx.method_args[0]
+    weight = get_arg(ctx, 'weight', 1, None)
+    bias = get_arg(ctx, 'bias', 2, None)
     input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
     output = ctx.method_return
 
     # reshape to ...xNx1x1
     layer = ctx.network.add_shuffle(input_trt)
     layer.reshape_dims = tuple(input_trt.shape) + (1, 1) 
 
-    bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
-    if module.bias is not None:
-        bias = module.bias.detach().cpu().numpy()
+    bias_trt = trt.Weights(torch_dtype_to_trt(weight.dtype))
+    if bias is not None:
+        bias_trt = bias.detach().cpu().numpy()
         
     # add fully connected
     layer = ctx.network.add_fully_connected(
         input=layer.get_output(0),
-        num_outputs=module.out_features,
-        kernel=module.weight.detach().cpu().numpy(),
-        bias=bias)
+        num_outputs=int(weight.shape[0]),
+        kernel=weight.detach().cpu().numpy(),
+        bias=bias_trt)
 
     # reshape back to N
     layer = ctx.network.add_shuffle(layer.get_output(0))
     layer.reshape_dims = tuple(output.shape[1:])
 
     output._trt = layer.get_output(0)
-
+    
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
@@ -42,4 +43,4 @@ def test_Linear_basic():
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)])
 def test_Linear_no_bias():
-    return torch.nn.Linear(10, 5, bias=False)
+    return torch.nn.Linear(10, 5, bias=False)
diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py
@@ -28,11 +28,13 @@
 from .div import *
 from .expand import *
 from .floordiv import *
+from .gelu import *
 from .getitem import *
+from .group_norm import *
 from .identity import *
 from .instance_norm import *
 from .interpolate import *
-from .group_norm import *
+from .layer_norm import *
 from .max import *
 from .max_pool2d import *
 from .mean import *
@@ -50,6 +52,7 @@
 from .relu import *
 from .relu6 import *
 from .sigmoid import *
+from .silu import *
 from .softmax import *
 from .split import *
 from .stack import *
diff --git a/torch2trt/converters/gelu.py b/torch2trt/converters/gelu.py
@@ -0,0 +1,63 @@
+from torch2trt.torch2trt import *
+from torch2trt.module_test import add_module_test
+import math
+
+
+@tensorrt_converter('torch.nn.functional.gelu')
+def convert_gelu_v1(ctx):
+    # approximate equation 1 from paper
+    input = get_arg(ctx, 'input', 0, None)
+    output = ctx.method_return
+    
+    x, c05, c1, cs2pi, c044, c3 = add_missing_trt_tensors(
+        ctx.network,
+        [input, 0.5, 1.0, math.sqrt(2.0 / math.pi), 0.044715, 3.0]
+    )
+    
+    x, c05, c1, cs2pi, c044, c3 = broadcast_trt_tensors(
+        ctx.network, 
+        [x, c05, c1, cs2pi, c044, c3], 
+        len(output.shape) - 1
+    )
+    
+    y = ctx.network.add_elementwise(x, c3, trt.ElementWiseOperation.POW).get_output(0)
+    y = ctx.network.add_elementwise(y, c044, trt.ElementWiseOperation.PROD).get_output(0)
+    y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.SUM).get_output(0)
+    y = ctx.network.add_elementwise(y, cs2pi, trt.ElementWiseOperation.PROD).get_output(0)
+    y = ctx.network.add_activation(y, trt.ActivationType.TANH).get_output(0)
+    y = ctx.network.add_elementwise(y, c1, trt.ElementWiseOperation.SUM).get_output(0)
+    y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.PROD).get_output(0)
+    y = ctx.network.add_elementwise(y, c05, trt.ElementWiseOperation.PROD).get_output(0)
+    
+    output._trt = y
+    
+    
+# @tensorrt_converter('torch.nn.functional.gelu')
+# def convert_gelu_v2(ctx):
+#     # approximate equation 1 from paper
+#     input = get_arg(ctx, 'input', 0, None)
+#     output = ctx.method_return
+    
+#     x, c1702 = add_missing_trt_tensors(
+#         ctx.network,
+#         [input, 1.702]
+#     )
+    
+#     x, c1702 = broadcast_trt_tensors(
+#         ctx.network, 
+#         [x, c1702], 
+#         len(output.shape) - 1
+#     )
+    
+#     y = ctx.network.add_elementwise(x, c1702, trt.ElementWiseOperation.PROD).get_output(0)
+#     y = ctx.network.add_activation(y, trt.ActivationType.SIGMOID).get_output(0)
+#     y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.PROD).get_output(0)
+    
+#     output._trt = y
+
+
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
+def test_silu():
+    return torch.nn.GELU()
diff --git a/torch2trt/converters/layer_norm.py b/torch2trt/converters/layer_norm.py
@@ -0,0 +1,103 @@
+from torch2trt.torch2trt import *
+from torch2trt.module_test import add_module_test
+
+
+@tensorrt_converter('torch.nn.functional.layer_norm')
+def convert_layernorm(ctx):
+    input = get_arg(ctx, 'input', 0, None)
+    shape = get_arg(ctx, 'normalized_shape', 1, None)
+    weight = get_arg(ctx, 'weight', 2, None)
+    bias = get_arg(ctx, 'bias', 3, None)
+    eps = get_arg(ctx, 'eps', 4, 1e-05)
+    output = ctx.method_return
+    
+    input_trt, eps_trt = add_missing_trt_tensors(
+        ctx.network,
+        [input, eps]
+    )
+    
+    input_trt, eps_trt = broadcast_trt_tensors(
+        ctx.network, 
+        [input_trt, eps_trt],
+        len(output.shape) - 1
+    )
+    
+    if weight is not None:
+        _, weight_trt = add_missing_trt_tensors(
+            ctx.network,
+            [input, weight]
+        )
+        _, weight_trt = broadcast_trt_tensors(
+            ctx.network, 
+            [input_trt, weight_trt],
+            len(output.shape) - 1
+        )
+    
+    if bias is not None:
+        _, bias_trt = add_missing_trt_tensors(
+            ctx.network,
+            [input, bias]
+        )
+        _, bias_trt = broadcast_trt_tensors(
+            ctx.network, 
+            [input_trt, bias_trt],
+            len(output.shape) - 1
+        )
+    
+    if isinstance(shape, int):
+        shape = (shape,)
+    dim = tuple([-i - 1 for i in range(len(shape))])
+    dim = torch_dim_resolve_negative(dim, len(input.shape))
+    axes = torch_dim_to_trt_axes(dim)
+    
+    ux = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims=True).get_output(0)
+    numerator = ctx.network.add_elementwise(input_trt, ux, trt.ElementWiseOperation.SUB).get_output(0)
+    varx = ctx.network.add_elementwise(numerator, numerator, trt.ElementWiseOperation.PROD).get_output(0)
+    varx = ctx.network.add_reduce(varx, trt.ReduceOperation.AVG, axes, keep_dims=True).get_output(0)
+    denom = ctx.network.add_elementwise(varx, eps_trt, trt.ElementWiseOperation.SUM).get_output(0)
+    denom = ctx.network.add_unary(denom, trt.UnaryOperation.SQRT).get_output(0)
+    y = ctx.network.add_elementwise(numerator, denom, trt.ElementWiseOperation.DIV).get_output(0)
+    
+    if weight is not None:
+        y = ctx.network.add_elementwise(y, weight_trt, trt.ElementWiseOperation.PROD).get_output(0)
+        
+    if bias is not None:
+        y = ctx.network.add_elementwise(y, bias_trt, trt.ElementWiseOperation.SUM).get_output(0)
+    
+    output._trt = y
+    
+    
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_1d():
+    return torch.nn.LayerNorm(3)
+
+    
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_2d():
+    return torch.nn.LayerNorm((5, 3))
+
+
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_3d():
+    return torch.nn.LayerNorm((5, 5, 3))
+
+
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_1d_nonaffine():
+    return torch.nn.LayerNorm(3, elementwise_affine=False)
+
+    
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_2d_nonaffine():
+    return torch.nn.LayerNorm((5, 3), elementwise_affine=False)
+
+
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)])
+def test_layer_norm_3d_nonaffine():
+    return torch.nn.LayerNorm((5, 5, 3), elementwise_affine=False)
diff --git a/torch2trt/converters/silu.py b/torch2trt/converters/silu.py
@@ -0,0 +1,21 @@
+from torch2trt.torch2trt import *
+from torch2trt.module_test import add_module_test
+
+
+@tensorrt_converter('torch.nn.functional.silu')
+def convert_silu(ctx):
+    input = get_arg(ctx, 'input', pos=0, default=None)
+    output = ctx.method_return
+    input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
+    
+    layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID)
+    layer = ctx.network.add_elementwise(input_trt, layer.get_output(0), trt.ElementWiseOperation.PROD)
+    
+    output._trt = layer.get_output(0)
+    
+    
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
+def test_silu():
+    return torch.nn.SiLU()
diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py
@@ -87,6 +87,17 @@ def trt_num_outputs(engine):
     return count
 
 
+def torch_dim_resolve_negative(dim, ndim):
+    if not isinstance(dim, tuple):
+        dim = (dim,)
+    pos = []
+    for d in dim:
+        if d < 0:
+            d = ndim + d
+        pos.append(d)
+    return tuple(pos)
+
+
 def torch_dim_to_trt_axes(dim):
     """Converts torch dim, or tuple of dims to a tensorrt axes bitmask"""
     if not isinstance(dim, tuple):