# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""kernel decorator and related util functions"""
import ast
import json
from functools import wraps
from itertools import product
import numpy
from mindspore import context, log
def _allocate(shape, dtype='float32', scope='global'):
"""Allocate a buffer with given shape
Parameters
----------
shape: Tuple
The shape of the tensor to be allocated
dtype: string
The data type of the tensor
scope: string
The storage scope of the tensor
Returns
-------
tensor: numpy.array
The tensor allocated
"""
del scope
return numpy.zeros(shape).astype(dtype)
def _rsqrt(x):
"""
Computes reciprocal of square root of x element-wise
Parameters
----------
x: Tensor
Returns
-------
res: Tensor
The result of reciprocal of square root of x
"""
return numpy.ones_like(x) / numpy.sqrt(x)
def _erf(x):
"""
Erf function of x, aka erf(x) = 2 / sqrt(pi) * integral(exp(-t*t), t = 0..x).
The algorithm comes from Handbook of Mathematical Functions, formula 7.1.26.
Parameters
----------
x: a real number
Returns
-------
res: a real number
The result of erf function
"""
# save the sign of x
sign = 1 if x >= 0 else -1
x = numpy.abs(x)
# constants
a1 = 0.254829592
a2 = -0.284496736
a3 = 1.421413741
a4 = -1.453152027
a5 = 1.061405429
p = 0.3275911
# A&S formula 7.1.26
t = 1.0 / (1.0 + p * x)
y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * numpy.exp(-x * x)
return sign * y # erf(-x) = -erf(x)
def _grid(extents):
extents_list = []
for ext in extents:
extents_list.append(list(range(ext)))
return product(*extents_list)
class WithStub:
"""
Runtime support for with scrop intrin in Hybrid DSL
"""
def __init__(self):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
del exc_type, exc_value, exc_traceback
return self
def __del__(self):
return self
def __call__(self, *arg, **kwargs):
return self
class VariableUsage(ast.NodeVisitor):
"""
The ast visitor to perform static check for the source code,
and determine the index of inplace assign outputs
"""
intrin_buffer = {
'allocate': _allocate,
'output_tensor': _allocate
}
intrin_loop = {
'range': range,
'serial': range,
'vectorize': range,
'parallel': range,
'reduce': range,
'grid': _grid,
}
intrin_with_scope = {
'attr': WithStub(),
'block_realize': WithStub(),
}
intrin_unary_op = {
'sqrt': numpy.sqrt,
'sign': numpy.sign,
'log': numpy.log,
'tanh': numpy.tanh,
'exp': numpy.exp,
'abs': numpy.abs,
'int32': numpy.int32,
'float16': numpy.float16,
'float32': numpy.float32,
}
intrin_bin_op = {
'power': numpy.power,
}
intrin_globals = {
**intrin_buffer,
**intrin_loop,
**intrin_with_scope,
**intrin_unary_op,
**intrin_bin_op,
}
intrin_general_unary_op = {
'rsqrt': _rsqrt,
'erf': _erf,
'isnan': numpy.isnan,
'int8': numpy.int8,
'int16': numpy.int16,
'int64': numpy.int64,
'float64': numpy.float64,
'sin': numpy.sin,
'cos': numpy.cos,
'isinf': numpy.isinf,
'isfinite': numpy.isfinite,
'atan': numpy.arctan,
'atan2': numpy.arctan2,
'expm1': numpy.expm1,
'floor': numpy.floor,
'ceil': numpy.ceil,
'trunc': numpy.trunc,
'round': numpy.round,
}
intrin_cpu_not_support = ["atan2", "expm1", "float16"]
intrin_general_bin_op = {
'ceil_div': lambda a, b: (a + b - 1) // b,
}
intrin_general = {
**intrin_general_unary_op,
**intrin_general_bin_op
}
intrin_runtime = {
**intrin_globals,
**intrin_general
}
def __init__(self, func_name):
self.func_name = func_name
self.scope_level = []
self.inplace_assign_output = []
self.args_index = {}
self.status = {}
self.output_tensor = []
self.temp_tensor = []
self.device = context.get_context('device_target')
def visit_FunctionDef(self, node):
"""
Ast visitor for FunctionDef
collect all input tensors
"""
self.scope_level.append(node)
for idx, arg in enumerate(node.args.args):
self.args_index[arg.arg] = idx
for elem in node.body:
self.visit(elem)
def visit_For(self, node):
"""
Ast visitor for For loop
append and pop Ast.For node as scope
"""
self.visit(node.iter)
self.scope_level.append(node)
for i in node.body:
self.visit(i)
self.scope_level.pop()
def visit_Name(self, node):
"""
Ast visitor for Name
Check the use of variables, including
- whether it is defined
- whether it is used inside its scope
"""
# If it is from the argument list or loop variable, we do not worry about it!
if node.id in self.args_index.keys():
return
fors = list(loop.target.id for loop in self.scope_level if isinstance(loop, ast.For))
if node.id in fors:
# The loop variable cannot be overwritten when iteration
if isinstance(node.ctx, ast.Store):
raise ValueError(
"In the function {} written in the Hybrid DSL, "
"iter var cannot be overwritten: {}".format(self.func_name, node.id))
return
if node.id not in self.status.keys():
if not isinstance(node.ctx, ast.Store):
raise ValueError(
"In the function {} written in the Hybrid DSL, there is "
"a undeclared variable: {}".format(self.func_name, node.id))
self.status[node.id] = (node, self.scope_level[-1], set())
else:
decl, loop, usage = self.status.get(node.id, (None, None, None))
usage.add(type(node.ctx))
if loop not in self.scope_level:
raise ValueError(
"In the function {} written in the Hybrid DSL, there is "
"a variable used out of the scope it is defined: {}".format(self.func_name, node.id))
self.status[node.id] = (decl, loop, usage)
def visit_Call(self, node):
"""
Ast visitor for Call
Check the func call used in the DSL. Only those in intrin_runtime are supported for now.
"""
func_id = node.func.id
if not (func_id in list(VariableUsage.intrin_runtime.keys()) +
['max', 'min', 'len', 'kernel', 'ms_kernel']):
raise ValueError(
"In the function {} written in the Hybrid DSL, function call id {} "
"not in intrinsics' list".format(self.func_name, func_id))
if (self.device == "Ascend" and func_id in list(VariableUsage.intrin_general.keys())) or \
(self.device == "CPU" and func_id in VariableUsage.intrin_cpu_not_support):
raise ValueError(
"In the function {} written in the Hybrid DSL, function {} is not available on the "
"device {}".format(self.func_name, func_id, self.device))
if func_id in list(VariableUsage.intrin_unary_op.keys()) + list(VariableUsage.intrin_general_unary_op.keys()) \
and len(node.args) != 1:
raise TypeError(
"In the function {} written in the Hybrid DSL, function {} "
"expects one input, but get {}".format(self.func_name, func_id, len(node.args)))
if func_id in list(VariableUsage.intrin_bin_op.keys()) + list(VariableUsage.intrin_general_bin_op.keys()) + \
list(VariableUsage.intrin_buffer.keys()) and len(node.args) != 2:
raise TypeError(
"In the function {} written in the Hybrid DSL, function {} "
"expects two inputs, but get {}".format(self.func_name, func_id, len(node.args)))
for elem in node.args:
self.visit(elem)
def visit_With(self, node):
"""
Ast visitor for With
Check the func used in the with scope. Only attr and block_realize are supported for now.
"""
context_expr = node.items[0].context_expr
if context_expr.func.id == "attr":
if len(context_expr.args) != 2:
raise ValueError(
"In the function {} written in the Hybrid DSL, two inputs are expected by 'attr', "
"but get {}".format(self.func_name, len(context_expr.args)))
if not isinstance(context_expr.args[0], ast.Str):
raise ValueError(
"In the function {} written in the Hybrid DSL, the first input of 'attr' should be a string, "
"but get {}".format(self.func_name, type(context_expr.args[0])))
if not (isinstance(context_expr.args[1], (ast.Str, ast.Num, ast.NameConstant)) and
context_expr.args[1].value is not None):
raise ValueError(
"In the function {} written in the Hybrid DSL, the second input of 'attr' should be a string, "
"number or bool value, but get {}".format(self.func_name, type(context_expr.args[1])))
elif context_expr.func.id == "block_realize":
if len(context_expr.args) != 1:
raise ValueError(
"In the function {} written in the Hybrid DSL, only one input is accepted by 'block_realize', "
"but get {}".format(self.func_name, len(context_expr.args)))
if not isinstance(context_expr.args[0], ast.Name):
raise ValueError(
"In the function {} written in the Hybrid DSL, the input of 'block_realize' should be "
"a tensor name, but get {}".format(self.func_name, type(context_expr.args[0])))
else:
raise ValueError(
"Unsupported function in With scope in the function {} written in the Hybrid DSL: "
"{} ".format(self.func_name, context_expr.func.id))
for stmt in node.body:
if not (isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str)):
self.visit(stmt)
def visit_Assign(self, node):
"""
Ast visitor for Assign
Collect all tensor declared by allocate and output_tensor
"""
if len(node.targets) > 1:
raise ValueError(
"One statement with multiple assignments is not allowed in the function {} "
"written in the Hybrid DSL.".format(self.func_name))
if isinstance(node.targets[0], ast.Name) and isinstance(node.value, ast.Call) and \
isinstance(node.value.func, ast.Name):
assign_id = node.targets[0].id
func_name = node.value.func.id
if assign_id in self.output_tensor + self.temp_tensor:
raise ValueError(
"In the function {} written in the Hybrid DSL, the tensor is "
"redefined: {}".format(self.func_name, assign_id))
if func_name == "allocate":
self.temp_tensor.append(assign_id)
if func_name == "output_tensor":
self.output_tensor.append(assign_id)
return self.generic_visit(node)
def visit_Break(self, node):
"""
Ast visitor for Break
Throw an error if the key word break in the DSL
"""
del node
raise TypeError(
"Keyword 'break' not accepted in the function {} written in the Hybrid DSL!".format(self.func_name))
def visit_Continue(self, node):
"""
Ast visitor for Continue
Throw an error if the key word continue in the DSL
"""
del node
raise TypeError(
"Keyword 'continue' not accepted in the function {} written in the Hybrid DSL!".format(self.func_name))
def visit_While(self, node):
"""
Ast visitor for While
Throw an error if the key word while in the DSL
"""
del node
raise TypeError(
"Keyword 'while' not accepted in the function {} written in the Hybrid DSL!".format(self.func_name))
def visit_Attribute(self, node):
"""
Ast visitor for Attribute
Throw an error if the attribute is neither shape nor dtype.
"""
if not isinstance(node.value, ast.Name):
raise ValueError(
"In the function {} written in the Hybrid DSL, getattr is only supported for a tensor object, "
"not for the object with type: {}".format(self.func_name, type(node.value)))
if node.value.id not in self.output_tensor + self.temp_tensor + list(self.args_index.keys()):
raise ValueError(
"In the function {} written in the Hybrid DSL, getattr is only supported for a tensor variable "
"after its declaration, not for: {}".format(self.func_name, node.value.id))
if not (node.attr in ['shape', 'dtype']):
raise ValueError(
"In the function {} written in the Hybrid DSL, a tensor object "
"has no attribute called {}".format(self.func_name, node.attr))
def visit_Return(self, node):
"""
Ast visitor for Return
Calculate all inplace_assign index, namely which output is in fact an input
"""
symbols = []
if isinstance(node.value, ast.Name):
symbols = [node.value.id]
else:
if not isinstance(node.value, ast.Tuple):
raise TypeError(
"In the function {} written in the Hybrid DSL, the return value should be "
"either a single tensor or a tuple, but get a {}.".format(self.func_name, type(node.value)))
for i in node.value.elts:
if not isinstance(i, ast.Name):
raise TypeError("In the function {} written in the Hybrid DSL, the element in the return value "
"should be the name of a tensor, but get a {}.".format(self.func_name, type(i)))
symbols = list(i.id for i in node.value.elts)
for sy in symbols:
if sy not in list(self.args_index.keys()) + self.output_tensor:
raise TypeError("In the function {} written in the Hybrid DSL, the element in the return value "
"should be either an input tensor or a tensor allocated by output_tensor, "
"but get name: {}".format(self.func_name, sy))
for sy in self.output_tensor:
if sy not in symbols:
raise TypeError("In the function {} written in the Hybrid DSL, the tensor is allocated as an output "
"tensor but not in the return value: {}".format(self.func_name, sy))
self.inplace_assign_output = list([idx, self.args_index.get(val, -1)]
for idx, val in enumerate(symbols)
if val in self.args_index)
def determine_variable_usage(root, func_name):
"""
The function to perform static check for the source code,
and determine the index of inplace assign outputs
Parameters
----------
root: an ast tree root
Returns
-------
inplace_assign_output: a list
The list of index about inplace assign outputs
"""
visitor = VariableUsage(func_name)
visitor.visit(root)
return visitor.inplace_assign_output
[docs]def kernel(fn=None, reg_info=None, compile_attrs=None):
"""
The decorator of the Hybrid DSL function for the Custom Op.
When a function written by the Hybrid DSL is decorated by kernel,
it can be run as a usual Python function.
Also, this function can be used in the api Custom and to create :class:`mindspore.ops.Custom`, with func_type
"hybrid" or "pyfunc". Creating :class:`mindspore.ops.Custom` with mode "hybrid" by the Hybrid DSL function
will enjoy the automatic dtype/shape infer for free.
Args:
fn (Function): The Python function that will be run as a custom operator. Default: None.
reg_info (tuple[str, dict]): Each item represents registration information in json format. Default: None.
compile_attrs (Dict): The Python object is used to distinguish the compiled function. Default: None.
Returns:
Function, if `fn` is not None, returns a callable function that will execute the Hybrid DSL function;
If `fn` is None, returns a decorator and when this decorator invokes with a single `fn` argument, the
callable function is equal to the case when `fn` is not None.
Supported Platforms:
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> import numpy as np
>>> from mindspore import ops, Tensor
>>> from mindspore.ops import kernel, DataType, CustomRegOp
...
>>> # Create a dict for the compile flags.
>>> attrs = {
... "test1": True,
... "test2": "good",
... "test3": 12,
... }
>>> # Create the reg info json string.
>>> op_gpu_info = CustomRegOp() \\
... .input(0, "a") \\
... .input(0, "b") \\
... .output(0, "y") \\
... .dtype_format(DataType.F32_None, DataType.F32_None, DataType.F32_None) \\
... .target("GPU") \\
... .get_op_info()
>>>
>>> # Create inputs for the custom op.
>>> input_x = np.ones([4, 4]).astype(np.float32)
>>> input_y = np.ones([4, 4]).astype(np.float32)
...
>>> # Write a Hybrid DSL function through the decorator @kernel.
>>> # We can also pass the compile attrs and the reg info through the decorator.
>>> @kernel(reg_info=op_gpu_info, compile_attrs=attrs)
... def outer_product(a, b):
... c = output_tensor(a.shape, a.dtype)
...
... with block_realize(c):
... for i0 in range(a.shape[0]):
... for i1 in range(b.shape[1]):
... c[i0, i1] = 0.0
... for i2 in range(a.shape[1]):
... c[i0, i1] = c[i0, i1] + (a[i0, i2] * b[i2, i1])
... return c
...
>>> # We can use the function directly as a python function.
>>> # In this case, the inputs should be numpy arrays.
>>> result = outer_product(input_x, input_y)
...
>>> # Create a custom op with mode "hybrid" (default value) by the Hybrid DSL function.
>>> # In this case, we will enjoy the automatic dtype/shape infer for free.
>>> # The inputs should be mindspore tensors.
>>> test_op_hybrid = ops.Custom(outer_product)
>>> output = test_op_hybrid(Tensor(input_x), Tensor(input_y))
"""
if compile_attrs is None:
compile_attrs = {}
if not isinstance(compile_attrs, dict):
raise TypeError("The input 'compile_attrs' of @kernel must be a dict, "
"but get a {}".format(type(compile_attrs)))
for key in compile_attrs.keys():
if not isinstance(key, str):
raise TypeError("The key of 'compile_attrs' of @kernel must be a str, "
"but get a {}".format(type(key)))
if reg_info is not None and not isinstance(reg_info, (str, dict, tuple)):
raise TypeError(
"The input 'reg_info' of @kernel should be one of "
"str, dict and tuple, but get a {}".format(type(reg_info)))
def wrap_ms_kernel(func):
setattr(func, "ms_kernel_flag", True)
# we enable ml scheduler automatically for kernel function
if context.get_context('device_target') == "Ascend":
compile_attrs["enable_polytops"] = "always"
setattr(func, "compile_attrs", json.dumps(compile_attrs))
if reg_info is not None:
setattr(func, "reg_info", reg_info)
@wraps(func)
def _patch_intrins_to_runtime(*args):
_globals = func.__globals__
for elem in list(VariableUsage.intrin_runtime.keys()):
_globals[elem] = VariableUsage.intrin_runtime[elem]
return func(*args)
return _patch_intrins_to_runtime
if fn is not None:
return wrap_ms_kernel(fn)
return wrap_ms_kernel
def ms_kernel(fn=None, reg_info=None, compile_attrs=None):
"""
Same as docarator kernel. ms_hybrid will be deprecated in the future.
Please use kernel instead.
Supported Platforms:
Deprecated
"""
log.warning("'ms_kernel' is deprecated from version 1.8 and "
"will be removed in a future version, use 'kernel' instead.")
return kernel(fn, reg_info, compile_attrs)