diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py index ef9b67d604df795a4a7580fe76b1f31eccebb419..8ab657e47d403d4a16e7af0d2961625cecd2f890 100644 --- a/imperative/python/megengine/__init__.py +++ b/imperative/python/megengine/__init__.py @@ -74,8 +74,7 @@ from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func from .device import * from .logger import enable_debug_log, get_logger, set_log_file, set_log_level from .serialization import load, save -from .tensor import Tensor, tensor -from .tensor_nn import Buffer, Parameter +from .tensor import Parameter, Tensor, tensor from .version import __version__ _set_fork_exec_path_for_timed_func( diff --git a/imperative/python/megengine/autodiff/grad_manager.py b/imperative/python/megengine/autodiff/grad_manager.py index f1790c3f55246a92934eadf4bfd7e4f7dd6a4101..be806cab078ae11358d70d1edc363c233d7d45c9 100644 --- a/imperative/python/megengine/autodiff/grad_manager.py +++ b/imperative/python/megengine/autodiff/grad_manager.py @@ -22,7 +22,7 @@ class GradManager: self._after_backward_callback = [] self._gradients = dict() - def register(self, params, callbacks=None): + def attach(self, params, callbacks=None): if callbacks is None: callbacks = [] if isinstance(callbacks, Callable): @@ -62,7 +62,7 @@ class GradManager: if isinstance(grad, Future): grad = grad.get() param = self._param_dict[p] - if getattr(param, "grad", None) is None: + if param.grad is None: param.grad = grad else: param.grad += grad @@ -70,9 +70,9 @@ class GradManager: self._stop_record() backwarding_grad_manager = cache - def __enter__(self): + def record(self): if self._recording: - return self + raise RuntimeError("already recording") grad = Grad() self._recording = True self._grad = grad @@ -88,16 +88,22 @@ class GradManager: grad.wrt(param_wrapper, callback=callback) grad.__enter__() - return self - def __exit__(self, exc_type, exc_val, exc_tb): + def release(self): + if not self._recording: + raise RuntimeError("not recording") self._stop_record() - record = __enter__ - def _stop_record(self): if self._grad is not None: self._grad.__exit__(None, None, None) self._recording = False self._grad = None self._gradients = dict() + + def __enter__(self): + self.record() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._stop_record() diff --git a/imperative/python/megengine/core/ops/_internal/misc_ops.py b/imperative/python/megengine/core/ops/_internal/misc_ops.py index e02ddee95c8a693df7f39cbc492f1152ebd27bcd..a257efc769a7a29e4aff9ca671f1fb42741a4120 100644 --- a/imperative/python/megengine/core/ops/_internal/misc_ops.py +++ b/imperative/python/megengine/core/ops/_internal/misc_ops.py @@ -70,7 +70,7 @@ class Dimshuffle(PodOpVisitor): return bytes(ctypes.c_uint32(0)) + bytes(self) def __init__(self, pattern, ndim=0): - assert isinstance(pattern, collections.Iterable) + assert isinstance(pattern, collections.abc.Iterable) assert len(pattern) <= TensorShape.MAX_NDIM pattern_array = Dimshuffle.Pattern.Pattern_Array() for idx, v in enumerate(pattern): diff --git a/imperative/python/megengine/core/tensor/megbrain_graph.py b/imperative/python/megengine/core/tensor/megbrain_graph.py index af96ec2d17d182d6a86e9a7eec43a639ec7cdbe9..da2c6f6c4fc347c79abaa8007466ed24388683b7 100644 --- a/imperative/python/megengine/core/tensor/megbrain_graph.py +++ b/imperative/python/megengine/core/tensor/megbrain_graph.py @@ -231,13 +231,13 @@ class OpNode: def _wrap(x): - if isinstance(x, collections.Sequence): + if isinstance(x, collections.abc.Sequence): return type(x)(map(_wrap, x)) return x.graph._wrap(x) def _unwrap(x): - if isinstance(x, collections.Sequence): + if isinstance(x, collections.abc.Sequence): return type(x)(map(_unwrap, x)) return x._node diff --git a/imperative/python/megengine/core/tensor/tensor_wrapper.py b/imperative/python/megengine/core/tensor/tensor_wrapper.py index 722d5dc9033958413daecbde5a63ea9fe9fbdbb9..946c6f9612364248e5ec63c62c1ac3a48c01ff74 100644 --- a/imperative/python/megengine/core/tensor/tensor_wrapper.py +++ b/imperative/python/megengine/core/tensor/tensor_wrapper.py @@ -166,7 +166,7 @@ def _reduce(mode): op = builtin.Reduce(mode=mode, axis=0) (result,) = apply(op, data) - elif isinstance(axis, collections.Iterable): + elif isinstance(axis, collections.abc.Iterable): axis = list(axis) axis.sort(reverse=True) @@ -204,7 +204,9 @@ def _todo(*_): def _expand_args(args): if len(args) == 1: - if isinstance(args[0], (collections.Sequence, TensorBase, TensorWrapperBase)): + if isinstance( + args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase) + ): args = args[0] return args diff --git a/imperative/python/megengine/core/tensor/utils.py b/imperative/python/megengine/core/tensor/utils.py index b700c1cdb9b78b1bea784d1f2c89cd8d680cdaa9..9c795fbe56c521c9bd3c303d741a66e413a15e58 100644 --- a/imperative/python/megengine/core/tensor/utils.py +++ b/imperative/python/megengine/core/tensor/utils.py @@ -143,7 +143,7 @@ def astensor1d(x, *reference, dtype=None, device=None): (x,) = Const(x, dtype=dtype, device=device)(*reference) return x - if not isinstance(x, collections.Sequence): + if not isinstance(x, collections.abc.Sequence): raise TypeError if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x): diff --git a/imperative/python/megengine/functional/math.py b/imperative/python/megengine/functional/math.py index 1bdf308069c3793a5154d7256a8e636240c5586b..bc7d9dd88dc80967e1d60df7ea1924b2a248b67f 100644 --- a/imperative/python/megengine/functional/math.py +++ b/imperative/python/megengine/functional/math.py @@ -432,7 +432,7 @@ def argmin( [0] """ - if isinstance(axis, collections.Iterable): + if isinstance(axis, collections.abc.Iterable): axis = list(axis) axis.sort(reverse=True) @@ -486,7 +486,7 @@ def argmax( [5] """ - if isinstance(axis, collections.Iterable): + if isinstance(axis, collections.abc.Iterable): axis = list(axis) axis.sort(reverse=True) diff --git a/imperative/python/megengine/functional/types.py b/imperative/python/megengine/functional/types.py index 465ca03ce68f02d3944ddb87f5b0d4abde5ef9f9..03f0709eee10c05df800147998831b71d2db7e5c 100644 --- a/imperative/python/megengine/functional/types.py +++ b/imperative/python/megengine/functional/types.py @@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True): :type allow_zero: bool :param allow_zero: whether to allow zero tuple value""" - if not isinstance(value, collections.Iterable): + if not isinstance(value, collections.abc.Iterable): value = int(value) value = tuple([value for i in range(n)]) else: diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py index 2d272dbaabb41927962307017c33c98c1e6234aa..c660a5bb6b426826318e4f20bbb4104a95a45bdc 100644 --- a/imperative/python/megengine/jit/tracing.py +++ b/imperative/python/megengine/jit/tracing.py @@ -502,7 +502,7 @@ class trace: raise TypeError( "cannot specify output_names when output is already in dict format" ) - if output_names and not isinstance(output_names, collections.Sequence): + if output_names and not isinstance(output_names, collections.abc.Sequence): output_names = (output_names,) if output_names and len(output_names) != len(self._output_bindings): raise ValueError( @@ -510,7 +510,7 @@ class trace: len(self._output_bindings) ) ) - if arg_names and not isinstance(arg_names, collections.Sequence): + if arg_names and not isinstance(arg_names, collections.abc.Sequence): arg_names = (arg_names,) if arg_names and len(arg_names) != len(self._arg_bindings): raise ValueError( @@ -646,9 +646,9 @@ class trace: def _process_outputs(self, outputs): output_names = None - if isinstance(outputs, collections.Mapping): + if isinstance(outputs, collections.abc.Mapping): output_names, outputs = zip(*sorted(outputs.items())) - elif not isinstance(outputs, collections.Sequence): + elif not isinstance(outputs, collections.abc.Sequence): outputs = (outputs,) if not self._untraced: diff --git a/imperative/python/megengine/module/__init__.py b/imperative/python/megengine/module/__init__.py index a10228f20eae774f3a09ab0288d39787963d581f..916000d08cbcacba0176cf28ac9b4a48072f757f 100644 --- a/imperative/python/megengine/module/__init__.py +++ b/imperative/python/megengine/module/__init__.py @@ -18,7 +18,6 @@ from .embedding import Embedding from .identity import Identity from .linear import Linear from .module import Module -from .parampack import ParamPack from .pooling import AvgPool2d, MaxPool2d from .quant_dequant import DequantStub, QuantStub from .sequential import Sequential diff --git a/imperative/python/megengine/module/activation.py b/imperative/python/megengine/module/activation.py index 025844ed03ac6803d438f3686537e5f88e98641e..52533a45c927375dd62d66c19e07e6e8bd75aae0 100644 --- a/imperative/python/megengine/module/activation.py +++ b/imperative/python/megengine/module/activation.py @@ -9,7 +9,7 @@ import numpy as np from ..functional import leaky_relu, prelu, relu, sigmoid, softmax -from ..tensor_nn import Parameter +from ..tensor import Parameter from .module import Module diff --git a/imperative/python/megengine/module/batchnorm.py b/imperative/python/megengine/module/batchnorm.py index 9c1fa824a7b0e57290f5d8f41069b4f982631716..16436af9cb710c3af7fb6ab57c5376c92c46fe7a 100644 --- a/imperative/python/megengine/module/batchnorm.py +++ b/imperative/python/megengine/module/batchnorm.py @@ -12,7 +12,7 @@ import numpy as np from ..distributed.group import WORLD, Group from ..functional import batch_norm2d, sync_batch_norm -from ..tensor_nn import Buffer, Parameter, Tensor +from ..tensor import Parameter, Tensor from . import init from .module import Module @@ -45,8 +45,8 @@ class _BatchNorm(Module): tshape = (1, self.num_features, 1, 1) if self.track_running_stats: - self.running_mean = Buffer(np.zeros(tshape, dtype=np.float32)) - self.running_var = Buffer(np.ones(tshape, dtype=np.float32)) + self.running_mean = Tensor(np.zeros(tshape, dtype=np.float32)) + self.running_var = Tensor(np.ones(tshape, dtype=np.float32)) else: self.running_mean = None self.running_var = None diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py index 74699818d91828021638a22435f78f109bc810bb..db4d1cb12f3d86a0415624ccc7dd806b028f90fb 100644 --- a/imperative/python/megengine/module/conv.py +++ b/imperative/python/megengine/module/conv.py @@ -13,7 +13,7 @@ import numpy as np from ..core.ops._internal import param_defs as P from ..functional import conv2d, conv_transpose2d, local_conv2d, relu from ..functional.types import _pair, _pair_nonzero -from ..tensor_nn import Parameter +from ..tensor import Parameter from . import init from .module import Module diff --git a/imperative/python/megengine/module/embedding.py b/imperative/python/megengine/module/embedding.py index 15c196517cefaecb0e146707affde9affdccf098..4a281be2c4c4effe8d161ef5ff2ab608fc272654 100644 --- a/imperative/python/megengine/module/embedding.py +++ b/imperative/python/megengine/module/embedding.py @@ -11,7 +11,7 @@ from typing import Optional import numpy as np from ..functional import embedding as embedding_func -from ..tensor_nn import Parameter +from ..tensor import Parameter from . import init from .module import Module @@ -72,6 +72,7 @@ class Embedding(Module): max_norm: Optional[float] = None, norm_type: Optional[float] = None, initial_weight: Parameter = None, + freeze: bool = False, ): super().__init__() if padding_idx is not None: @@ -83,6 +84,7 @@ class Embedding(Module): self.norm_type = norm_type self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim + self.freeze = freeze if initial_weight is None: self.weight = Parameter( np.random.uniform( @@ -101,7 +103,11 @@ class Embedding(Module): init.normal_(self.weight) def forward(self, inputs): - return embedding_func(inputs, self.weight) + if self.freeze: + weight = self.weight.detach() + else: + weight = self.weight + return embedding_func(inputs, weight) @classmethod def from_pretrained( @@ -166,6 +172,6 @@ class Embedding(Module): padding_idx=padding_idx, max_norm=max_norm, norm_type=norm_type, + freeze=freeze, ) - embedding.weight.requires_grad = not freeze return embedding diff --git a/imperative/python/megengine/module/init.py b/imperative/python/megengine/module/init.py index 7d8e06f86f265cb9c6ae9e7c52566e74fee03558..4c8151a4f167d1c1559e15268ab297eb4781ea2e 100644 --- a/imperative/python/megengine/module/init.py +++ b/imperative/python/megengine/module/init.py @@ -23,7 +23,7 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None: :param tensor: An n-dimentional tensor to be initialized :param val: The value to be filled throughout the tensor """ - tensor.set_value(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) + tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) def zeros_(tensor: Tensor) -> None: @@ -50,7 +50,7 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: :param a: Lower bound of the sampling interval :param b: Upper bound of the sampling interval """ - tensor.set_value(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype)) + tensor._reset(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype)) def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: @@ -61,7 +61,7 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: :param mean: The mean of the normal distribution :param std: The standard deviation of the normal distribution """ - tensor.set_value(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype)) + tensor._reset(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype)) def calculate_gain( diff --git a/imperative/python/megengine/module/linear.py b/imperative/python/megengine/module/linear.py index 34900a2e43b09ce7dcf0ab426d85942342cf72a4..f04c3717c935aace5d2676e951afc304d56ab76d 100644 --- a/imperative/python/megengine/module/linear.py +++ b/imperative/python/megengine/module/linear.py @@ -8,7 +8,7 @@ import numpy as np from ..functional import linear -from ..tensor_nn import Parameter +from ..tensor import Parameter from . import init from .module import Module diff --git a/imperative/python/megengine/module/module.py b/imperative/python/megengine/module/module.py index a0c23dfcacf1711216776b8c91201fa18f28c8a9..9b1dbc284cd81b360b3a1e961bd2ebdd93304108 100644 --- a/imperative/python/megengine/module/module.py +++ b/imperative/python/megengine/module/module.py @@ -5,6 +5,7 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import warnings from abc import ABCMeta, abstractmethod from collections import OrderedDict from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union @@ -14,8 +15,8 @@ import numpy as np from ..core.tensor.dtype import is_quantize from ..core.tensor.utils import make_shape_tuple from ..logger import get_logger -from ..tensor import Tensor -from ..tensor_nn import Buffer, Parameter +from ..tensor import Parameter, Tensor +from ..utils.deprecation import deprecated from ..utils.hook import HookHandler logger = get_logger(__name__) @@ -48,7 +49,7 @@ def _is_parameter(obj): def _is_buffer(obj): - return isinstance(obj, Buffer) + return isinstance(obj, Tensor) and not isinstance(obj, Parameter) def _is_module(obj): @@ -163,49 +164,43 @@ class Module(metaclass=ABCMeta): seen=seen, ) - def parameters( - self, requires_grad: Optional[bool] = None, recursive: bool = True, **kwargs - ) -> Iterable[Parameter]: + def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]: r"""Returns an iterable for the :class:`~.Parameter` of the module. - :param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad` - attribute of returned :class:`.Parameter`. ``None`` for no limitation. :param recursive: If ``True``, returns all :class:`~.Parameter` within this module, else only returns :class:`~.Parameter` that are direct attributes of this module. """ + if "requires_grad" in kwargs: + del kwargs["requires_grad"] + warnings.warn("passing requires_grad has no effect currently") + def predicate(obj) -> bool: - return _is_parameter(obj) and ( - requires_grad is None or obj.requires_grad == requires_grad - ) + return _is_parameter(obj) yield from self._flatten( with_key=False, predicate=predicate, recursive=recursive, **kwargs ) def named_parameters( - self, - requires_grad: Optional[bool] = None, - prefix: Optional[str] = None, - recursive: bool = True, - **kwargs + self, prefix: Optional[str] = None, recursive: bool = True, **kwargs ) -> Iterable[Tuple[str, Parameter]]: """Returns an iterable for key :class:`~.Parameter` pairs of the module, where ``key`` is the dotted path from this module to the :class:`~.Parameter` . - :param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad` - attribute of returned :class:`~.Parameter` . ``None`` for no limitation. :param prefix: The prefix prepended to the keys. :param recursive: If ``True``, returns all :class:`~.Parameter` within this module, else only returns :class:`~.Parameter` that are direct attributes of this module. """ + if "requires_grad" in kwargs: + del kwargs["requires_grad"] + warnings.warn("passing requires_grad has no effect currently") + def predicate(obj) -> bool: - return _is_parameter(obj) and ( - requires_grad is None or obj.requires_grad == requires_grad - ) + return _is_parameter(obj) yield from self._flatten( with_key=True, @@ -215,11 +210,13 @@ class Module(metaclass=ABCMeta): **kwargs, ) - def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Buffer]: - """Returns an iterable for the :class:`~.Buffer` of the module. + def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]: + """Returns an iterable for the buffers of the module. - :param recursive: If ``True``, returns all :class:`~.Buffer` within this - module, else only returns :class:`~.Buffer` that are direct attributes + Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. + + :param recursive: If ``True``, returns all buffers within this + module, else only returns buffers that are direct attributes of this module. """ yield from self._flatten( @@ -228,13 +225,15 @@ class Module(metaclass=ABCMeta): def named_buffers( self, prefix: Optional[str] = None, recursive: bool = True, **kwargs - ) -> Iterable[Tuple[str, Buffer]]: - """Returns an iterable for key :class:`~.Buffer` pairs of the module, where - ``key`` is the dotted path from this module to the :class:`~.Buffer` . + ) -> Iterable[Tuple[str, Tensor]]: + """Returns an iterable for key buffer pairs of the module, where + ``key`` is the dotted path from this module to the buffer. + + Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. :param prefix: The prefix prepended to the keys. - :param recursive: If ``True``, returns all :class:`~.Buffer` within this - module, else only returns :class:`~.Buffer` that are direct attributes + :param recursive: If ``True``, returns all buffers within this + module, else only returns buffers that are direct attributes of this module. """ yield from self._flatten( @@ -297,6 +296,7 @@ class Module(metaclass=ABCMeta): for it in self.modules(): fn(it) + @deprecated(version="1.0") def zero_grad(self) -> None: """Set all parameters' grads to zero """ @@ -505,7 +505,7 @@ class Module(metaclass=ABCMeta): # scale/zero_points maybe invalid, use pretrained dtype instead. if is_quantize(to_be_load.dtype) and is_quantize(var.dtype): var = var.astype(to_be_load.dtype) - var.set_value(to_be_load) + var._reset(to_be_load) loaded.append(k) return set(loaded), set(skipped) diff --git a/imperative/python/megengine/module/parampack.py b/imperative/python/megengine/module/parampack.py deleted file mode 100644 index feb14c613aa5198c1f1d9bc627302fb3b868f914..0000000000000000000000000000000000000000 --- a/imperative/python/megengine/module/parampack.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding: utf-8 -*- -# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") -# -# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -import collections -from typing import Callable, Iterable, Optional, Tuple - -import numpy as np - -from ..tensor_nn import Parameter, Tensor -from .module import Module - - -class ParamPack(Module): - r"""Pack module's parameters by gathering their memory to continuous address. - Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True), - parameters with same key will be packed togather. - It helps a lot for multimachine training by speeding up allreduce gradients. - - :param model: the module you want to pack parameters. - :param nr_ignore_first: how many parameters will be unpacked at first. - :param max_size_per_group: upper bound of packed parameters' size in MB. - :param max_nr_params_per_group: upper bound of the number of parameters of each group. - - """ - - def __init__( - self, - model: Module, - nr_ignore_first: int = 8, - max_size_per_group: int = 10, - max_nr_params_per_group: int = 100, - group_func: Callable = lambda name, param: 0, - ): - super().__init__() - self._model = model - self._nr_ignore_first = nr_ignore_first - self._max_size_per_group = max_size_per_group - self._max_nr_params_per_group = max_nr_params_per_group - self._group_func = group_func - self._grouped_params = [] - self._packed_params = [] - - params = model.named_parameters() - self._pack_params(params) - - def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]: - for param in self._packed_params: - if requires_grad is None or param.requires_grad == requires_grad: - yield param - - def named_parameters( - self, requires_grad: Optional[bool] = None - ) -> Iterable[Tuple[str, Parameter]]: - for idx, param in enumerate(self._packed_params): - if requires_grad is None or param.requires_grad == requires_grad: - yield "packed_param_" + str(idx), param - - def _pack_params(self, params: Iterable[Tuple[str, Parameter]]): - groups = collections.defaultdict(list) - ignored = 0 - param_id = 0 - for name, param in params: - if self._nr_ignore_first > ignored: - ignored += 1 - self._grouped_params.append([{"shape": param.shape, "id": param_id}]) - param.pack_group_key = self._group_func(name, param) - self._packed_params.append(param) - else: - key = ( - param.dtype, - param.device, - param.requires_grad, - self._group_func(name, param), - ) - groups[key].append({"tensor": param, "id": param_id}) - param_id += 1 - for (dtype, device, requires_grad, group_key) in groups.keys(): - dtype_sz = np.dtype(dtype).itemsize - align = device.mem_align - if align < dtype_sz: - align = 1 - else: - assert align % dtype_sz == 0 - align //= dtype_sz - - group = groups[(dtype, device, requires_grad, group_key)] - while group: - aligned_pos = [] - offset = 0 - params = [] - idx = 0 - while idx < len(group): - param = group[idx] - assert param["tensor"].device == device - padding = (align - (offset & (align - 1))) & (align - 1) - offset += padding - aligned_pos.append(offset) - params.append(param) - offset += int(np.prod(param["tensor"].shape)) - idx += 1 - - if ( - offset * dtype_sz >= self._max_size_per_group * 1024 * 1024 - or idx >= self._max_nr_params_per_group - ): - break - group = group[idx:] - if idx == 1: - # ignore param packs with only one item - params[0]["tensor"].pack_group_key = group_key - self._packed_params.append(params[0]["tensor"]) - self._grouped_params.append( - [{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}] - ) - continue - - packed_value = np.zeros((offset,), dtype=dtype) - for param, pos in zip(params, aligned_pos): - val = param["tensor"].numpy() - packed_value[pos : pos + val.size] = val.flatten() - new_param = Parameter( - value=packed_value, - device=device, - dtype=dtype, - requires_grad=requires_grad, - ) - new_param.pack_group_key = group_key - self._packed_params.append(new_param) - self._grouped_params.append( - [{"shape": i["tensor"].shape, "id": i["id"]} for i in params] - ) - - def forward(self, *args, **kwargs): - replace_param = dict() - for i in range(len(self._packed_params)): - packed_param = self._packed_params[i] - grouped_params = self._grouped_params[i] - if len(grouped_params) == 1: - continue - split = param_pack_split( - packed_param._symvar, [i["shape"] for i in grouped_params] - ) - split = [ - Parameter(Tensor(i, requires_grad=packed_param.requires_grad)) - for i in split - ] - for j in range(len(split)): - replace_param[grouped_params[j]["id"]] = split[j] - self._model.replace_param(replace_param, 0) - - return self._model.forward(*args, **kwargs) diff --git a/imperative/python/megengine/module/quantized/conv.py b/imperative/python/megengine/module/quantized/conv.py index 696e4f63ec62577c726cc43934c43aa30b27e995..0710a2b03fdacf049cdf095f4a686bea54b47b01 100644 --- a/imperative/python/megengine/module/quantized/conv.py +++ b/imperative/python/megengine/module/quantized/conv.py @@ -12,7 +12,7 @@ import numpy as np from ... import module as Float from ...core.tensor import dtype from ...functional import conv_bias_activation -from ...tensor_nn import Parameter +from ...tensor import Parameter from ..qat import conv as QAT from .module import QuantizedModule diff --git a/imperative/python/megengine/module/quantized/conv_bn.py b/imperative/python/megengine/module/quantized/conv_bn.py index e7c1de08aec800101d613a16637c89cf215da70d..529c7932d7e5c8160b9d0cf9ae44af4241802915 100644 --- a/imperative/python/megengine/module/quantized/conv_bn.py +++ b/imperative/python/megengine/module/quantized/conv_bn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -from ...tensor_nn import Parameter +from ...tensor import Parameter from ..qat import conv_bn as QAT from .conv import Conv2d diff --git a/imperative/python/megengine/module/quantized/linear.py b/imperative/python/megengine/module/quantized/linear.py index e42fe266b9e051923a20c62ab22e2a5b07ebb18e..2f26d430052e3c370529e75ab59a04d9beab96fe 100644 --- a/imperative/python/megengine/module/quantized/linear.py +++ b/imperative/python/megengine/module/quantized/linear.py @@ -9,7 +9,7 @@ import numpy as np from ... import functional as F from ...core.tensor import dtype -from ...tensor_nn import Parameter +from ...tensor import Parameter from ..qat import linear as QAT from .module import QuantizedModule diff --git a/imperative/python/megengine/optimizer/adadelta.py b/imperative/python/megengine/optimizer/adadelta.py index 0491cff9225efddbf3f61c1e20c72202c4f65e62..1a9558ea21a0cb9f02c1076adf129c959f04b14b 100644 --- a/imperative/python/megengine/optimizer/adadelta.py +++ b/imperative/python/megengine/optimizer/adadelta.py @@ -11,7 +11,7 @@ from typing import Iterable, Union import numpy as np from ..functional import sqrt -from ..tensor_nn import Parameter +from ..tensor import Parameter from .optimizer import Optimizer @@ -63,7 +63,7 @@ class Adadelta(Optimizer): for param in param_group["params"]: - if not param.requires_grad or "grad" not in param.__dict__: + if param.grad is None: continue states = self._state[param] diff --git a/imperative/python/megengine/optimizer/adagrad.py b/imperative/python/megengine/optimizer/adagrad.py index d0fe8728ae49c622306026bc256aeaeff6bc8652..7a229747912d5fefb92aa7131142690ea910a55b 100644 --- a/imperative/python/megengine/optimizer/adagrad.py +++ b/imperative/python/megengine/optimizer/adagrad.py @@ -11,7 +11,7 @@ from typing import Iterable, Union import numpy as np from ..functional import sqrt -from ..tensor_nn import Parameter +from ..tensor import Parameter from .optimizer import Optimizer @@ -62,7 +62,7 @@ class Adagrad(Optimizer): for param in param_group["params"]: - if not param.requires_grad or "grad" not in param.__dict__: + if param.grad is None: continue states = self._state[param] diff --git a/imperative/python/megengine/optimizer/adam.py b/imperative/python/megengine/optimizer/adam.py index d411945e2e53953553a94be13b10ac5396bc6dcc..dee2b68c0cf469ebf5bb5e47090d0b9d7f5cf06b 100644 --- a/imperative/python/megengine/optimizer/adam.py +++ b/imperative/python/megengine/optimizer/adam.py @@ -8,7 +8,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from typing import Iterable, Tuple, Union -from ..tensor_nn import Parameter +from ..tensor import Parameter from .optimizer import Optimizer @@ -59,7 +59,7 @@ class Adam(Optimizer): for param in param_group["params"]: - if not param.requires_grad or "grad" not in param.__dict__: + if param.grad is None: continue grad = param.grad diff --git a/imperative/python/megengine/optimizer/optimizer.py b/imperative/python/megengine/optimizer/optimizer.py index 2063a5851fd73f39e084babc3f0ecbbe09742ca0..90a51eeb4a7c838ace0322e2b00dbdb1e79b10f9 100644 --- a/imperative/python/megengine/optimizer/optimizer.py +++ b/imperative/python/megengine/optimizer/optimizer.py @@ -7,7 +7,7 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from abc import ABCMeta, abstractmethod -from collections import Iterable +from collections.abc import Iterable from contextlib import contextmanager from typing import Dict from typing import Iterable as Iter @@ -15,8 +15,7 @@ from typing import Union import numpy as np -from ..tensor import Tensor, TensorDict -from ..tensor_nn import Buffer, Parameter +from ..tensor import Parameter, Tensor class _RequiredParameter: @@ -37,7 +36,7 @@ class Optimizer(metaclass=ABCMeta): def __init__( # pylint: disable=too-many-branches self, params: Union[Iter[Parameter], dict], defaults: dict, ): - self._state = TensorDict() + self._state = dict() self._defaults = defaults if isinstance(params, (Parameter, dict)): @@ -93,10 +92,6 @@ class Optimizer(metaclass=ABCMeta): "optimizer can only optimize Parameters, but one of the params is " + type(param) ) - if not param.requires_grad: - raise ValueError( - "optimizer can only optimize Parameters with requires_grad=True" - ) for name, default in self._defaults.items(): if default is required and name not in param_group: @@ -122,7 +117,7 @@ class Optimizer(metaclass=ABCMeta): initializer = np.zeros(param.shape, dtype=np.float32) state_dict = self._state.setdefault(param, {}) assert state_name not in state_dict - state = Buffer(initializer) + state = Tensor(initializer) state_dict[state_name] = state @abstractmethod @@ -140,7 +135,7 @@ class Optimizer(metaclass=ABCMeta): params.append(param) return params - def step(self, clear_grad=False): + def step(self): r"""Performs a single optimization step. """ @@ -152,8 +147,7 @@ class Optimizer(metaclass=ABCMeta): "Please use a list instead." ) self._updates(group) - if clear_grad: - self.clear_grad() + return self def clear_grad(self): r"""Clear the grad buffer. @@ -161,8 +155,7 @@ class Optimizer(metaclass=ABCMeta): """ for param_group in self.param_groups: for param in param_group["params"]: - if getattr(param, "grad", None) is not None: - param.grad = None + param.grad = None def state_dict(self) -> Dict: r"""Export the optimizer state. @@ -171,7 +164,7 @@ class Optimizer(metaclass=ABCMeta): """ param_groups = [] state = dict() - param2id = TensorDict() + param2id = dict() cur_id = 0 for group in self.param_groups: @@ -213,8 +206,9 @@ class Optimizer(metaclass=ABCMeta): p = param_new self._state[p] = state["state"][param_saved].copy() for k, v in self._state[p].items(): - if isinstance(v, Buffer): - self._state[p][k] = Buffer(v.numpy()) + if isinstance(v, Tensor): + # TODO: maybe a more efficient way? + self._state[p][k] = Tensor(v.numpy()) if set(group_new.keys()) != set(group_saved.keys()): raise ValueError( diff --git a/imperative/python/megengine/optimizer/sgd.py b/imperative/python/megengine/optimizer/sgd.py index 9215ef48c2a849ed7223963e8372c92f147c068c..fce5b72bb34d0d18f517742cc8e56084916c7591 100644 --- a/imperative/python/megengine/optimizer/sgd.py +++ b/imperative/python/megengine/optimizer/sgd.py @@ -8,7 +8,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from typing import Iterable, Union -from ..tensor_nn import Parameter +from ..tensor import Parameter from .optimizer import Optimizer @@ -52,7 +52,7 @@ class SGD(Optimizer): momentum = param_group["momentum"] for param in param_group["params"]: - if not param.requires_grad or "grad" not in param.__dict__: + if param.grad is None: continue grad = param.grad diff --git a/imperative/python/megengine/quantization/fake_quant.py b/imperative/python/megengine/quantization/fake_quant.py index 941445c310eefe9f5a650578db3a79a75c8cc1b9..06f0cff8deba6c49e27333a6e132a995dd3ccb6f 100644 --- a/imperative/python/megengine/quantization/fake_quant.py +++ b/imperative/python/megengine/quantization/fake_quant.py @@ -14,8 +14,7 @@ from .. import functional as F from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype from ..core.tensor.function import Function from ..module import Module -from ..tensor import Tensor -from ..tensor_nn import Parameter +from ..tensor import Parameter, Tensor from .utils import QuantMode, fake_quant_tensor, get_qparam_dict diff --git a/imperative/python/megengine/quantization/observer.py b/imperative/python/megengine/quantization/observer.py index 65f883842c28e16a954090e76c3f6c92914a8eba..c3ffa77cf1e456b01cc15ea170bdf02b1108374d 100644 --- a/imperative/python/megengine/quantization/observer.py +++ b/imperative/python/megengine/quantization/observer.py @@ -13,7 +13,7 @@ import numpy as np from .. import functional as F from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype from ..module import Module -from ..tensor_nn import Buffer +from ..tensor import Tensor from .utils import QuantMode, Round, get_qparam_dict @@ -82,8 +82,8 @@ class MinMaxObserver(Observer): ): super().__init__(dtype, narrow_range) self.mode = mode - self.min_val = Buffer(np.finfo(np.float32).max, dtype=np.float32) - self.max_val = Buffer(np.finfo(np.float32).min, dtype=np.float32) + self.min_val = Tensor(np.finfo(np.float32).max, dtype=np.float32) + self.max_val = Tensor(np.finfo(np.float32).min, dtype=np.float32) self.scale_limit = eps def _calculate_qparams(self, inp_min_val, inp_max_val): @@ -118,8 +118,8 @@ class MinMaxObserver(Observer): # stop gradient x = x_orig.detach() # find max and min - self.min_val.set_value(F.minimum(self.min_val, x.min())) - self.max_val.set_value(F.maximum(self.max_val, x.max())) + self.min_val._reset(F.minimum(self.min_val, x.min())) + self.max_val._reset(F.maximum(self.max_val, x.max())) return x_orig @@ -133,22 +133,22 @@ class ExponentialMovingAverageObserver(MinMaxObserver): narrow_range: bool = False, ): super().__init__(mode, eps, dtype, narrow_range) - self.momentum = Buffer(momentum) - self.runtime_momentum = Buffer(0.0) + self.momentum = Tensor(momentum) + self.runtime_momentum = Tensor(0.0) def set_momentum(self, momentum): - self.momentum.set_value(momentum) + self.momentum._reset(momentum) def forward(self, x_orig): if self.enabled: # stop gradient x = x_orig.detach() # Exponential Moving Average - self.min_val.set_value( + self.min_val._reset( self.min_val * self.runtime_momentum + (1 - self.runtime_momentum) * x.min() ) - self.max_val.set_value( + self.max_val._reset( self.max_val * self.runtime_momentum + (1 - self.runtime_momentum) * x.max() ) @@ -171,7 +171,7 @@ class HistogramObserver(MinMaxObserver): self.bins = bins self.upsample_rate = upsample_rate self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1 - self.histogram = Buffer([-1] + [0.0] * (bins - 1)) + self.histogram = Tensor([-1] + [0.0] * (bins - 1)) def _non_linear_param_search(self): r"""Non-linear parameter search. @@ -395,9 +395,9 @@ class HistogramObserver(MinMaxObserver): self.bins, ) - self.histogram.set_value(new_histogram) - self.min_val.set_value(new_min) - self.max_val.set_value(new_max) + self.histogram._reset(new_histogram) + self.min_val._reset(new_min) + self.max_val._reset(new_max) def forward(self, x_orig): self.sideeffect_forward(x_orig) diff --git a/imperative/python/megengine/tensor.py b/imperative/python/megengine/tensor.py index 89436323226ea8e10451b7a7ace36e90cef12277..726236e31de7ef4be9a1e91dbd912bdea41d34fb 100644 --- a/imperative/python/megengine/tensor.py +++ b/imperative/python/megengine/tensor.py @@ -14,10 +14,11 @@ from .core import Tensor as _Tensor from .core.ops.builtin import Copy from .core.tensor.core import apply from .device import get_default_device +from .utils.deprecation import deprecated class Tensor(_Tensor): - requires_grad = False + grad = None dmap_callback = None def __init__(self, data, dtype=None, device=None): @@ -26,15 +27,32 @@ class Tensor(_Tensor): self.q_dict = {"mode": None, "scale": None, "zero_point": None} super().__init__(data, dtype=dtype, device=device) + @deprecated(version="1.0", reason="no need to reuse an existing tensor since 1.0") def set_value(self, value): self._reset(value) + @deprecated(version="1.0", reason="use *= 0 instead") def reset_zero(self): self *= 0 def to(self, cn): return apply(Copy(comp_node=cn), self)[0] + @property + def requires_grad(self): + raise AttributeError("requires_grad is reserved for future use") + + @requires_grad.setter + def requires_grad(self, value): + raise AttributeError("requires_grad is reserved for future use") + + @requires_grad.deleter + def requires_grad(self): + raise AttributeError("requires_grad is reserved for future use") + + def __hash__(self): + return id(self) + def __getstate__(self): r""" __getstate__ will be called for pickle serialization or deep copy """ @@ -73,53 +91,6 @@ class Tensor(_Tensor): tensor = Tensor -class Dict(collections.MutableMapping): - def __init__(self, *args, key=None, **kwargs): - self.data = {} - if key: - self.keyfn = key - for i in args: - self.update(i) - self.update(**kwargs) - - @staticmethod - def keyfn(key): # pylint: disable=method-hidden - return key - - def __getitem__(self, key): - _, v = self.data[self.keyfn(key)] - return v - - def __setitem__(self, key, value): - self.data[self.keyfn(key)] = key, value - - def __delitem__(self, key): - del self.data[self.keyfn(key)] - - def __iter__(self): - for _, (k, _) in self.data.items(): - yield k - - def __len__(self): - return len(self.data) - - -class TensorDict(Dict): # pylint: disable=too-many-ancestors - class keyfn: - def __new__(cls, x: Tensor): - if not isinstance(x, Tensor): - return x - return super().__new__(cls) - - def __init__(self, x: Tensor): - self._data = x # do not save id directly to make pickle work - - def __hash__(self): - return id(self._data) - - def __eq__(self, other): - # pylint: disable=undefined-variable - return isinstance(other, __class__) and id(self._data) == id(other._data) - - def __init__(self, *args): - super().__init__(*args) +class Parameter(Tensor): + r"""A kind of Tensor that is to be considered a module parameter. + """ diff --git a/imperative/python/megengine/tensor_nn.py b/imperative/python/megengine/tensor_nn.py deleted file mode 100644 index 0c4916fb4a8e14b8bc4bf7f4464202603143b4ef..0000000000000000000000000000000000000000 --- a/imperative/python/megengine/tensor_nn.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") -# -# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -from . import Tensor, tensor - - -class Buffer(Tensor): - r"""A kind of Tensor with ``requires_grad=False``. - """ - - -class Parameter(Tensor): - r"""A kind of Tensor that is to be considered a module parameter. - """ - requires_grad = True diff --git a/imperative/python/megengine/utils/deprecation.py b/imperative/python/megengine/utils/deprecation.py new file mode 100644 index 0000000000000000000000000000000000000000..e47f073c7d4944056541683cc7d7807762850f72 --- /dev/null +++ b/imperative/python/megengine/utils/deprecation.py @@ -0,0 +1 @@ +from deprecated.sphinx import deprecated diff --git a/imperative/python/megengine/utils/types.py b/imperative/python/megengine/utils/types.py index 465ca03ce68f02d3944ddb87f5b0d4abde5ef9f9..03f0709eee10c05df800147998831b71d2db7e5c 100644 --- a/imperative/python/megengine/utils/types.py +++ b/imperative/python/megengine/utils/types.py @@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True): :type allow_zero: bool :param allow_zero: whether to allow zero tuple value""" - if not isinstance(value, collections.Iterable): + if not isinstance(value, collections.abc.Iterable): value = int(value) value = tuple([value for i in range(n)]) else: diff --git a/imperative/python/requires.txt b/imperative/python/requires.txt index 3e9b44c98ba3afb32c32b636b0bfca2f56a742b1..abcbb2453661e1daa05bdff658775823ca7fc681 100644 --- a/imperative/python/requires.txt +++ b/imperative/python/requires.txt @@ -5,3 +5,4 @@ requests tabulate tqdm redispy +deprecated diff --git a/imperative/python/test/integration/test_advance_indexing.py b/imperative/python/test/integration/test_advance_indexing.py index 6267a3d0031d63ac72e137feb8085e1a8fdac005..2785292afc1a63172306034c036ec470507ca5c2 100644 --- a/imperative/python/test/integration/test_advance_indexing.py +++ b/imperative/python/test/integration/test_advance_indexing.py @@ -38,7 +38,7 @@ class Simple2(Module): def test_advance_indexing(): net = Simple() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() @@ -48,7 +48,7 @@ def test_advance_indexing(): data = tensor(raw_data) mask = tensor(raw_mask) answer = 1.0 - raw_data[raw_mask].sum() - with gm.record(): + with gm: loss = net(data, mask).sum() gm.backward(loss) optim.step() @@ -58,7 +58,7 @@ def test_advance_indexing(): def test_advance_indexing_with_subtensor(): net = Simple2() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() @@ -66,7 +66,7 @@ def test_advance_indexing_with_subtensor(): raw_data = np.arange(576).reshape(dshape).astype(np.float32) data = tensor(raw_data) answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum() - with gm.record(): + with gm: loss = net(data).sum() gm.backward(loss) optim.step() diff --git a/imperative/python/test/integration/test_ai.py b/imperative/python/test/integration/test_ai.py index fdf54fa9798b80ea74172c3b42736b10def30f39..89c6e86ef849895c65ce0d91182b1b12fd515d44 100644 --- a/imperative/python/test/integration/test_ai.py +++ b/imperative/python/test/integration/test_ai.py @@ -28,13 +28,13 @@ class Simple(Module): def test_ai(): net = Simple() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() dshape = (10, 10) data = tensor(np.ones(dshape).astype(np.float32)) - with gm.record(): + with gm: loss = net(data).sum() gm.backward(loss) optim.step() diff --git a/imperative/python/test/integration/test_bn.py b/imperative/python/test/integration/test_bn.py index 2795523280931643c61fbf541293eb98ab51803a..8767af309a48fb33f8f0015e229ec4a86190a797 100644 --- a/imperative/python/test/integration/test_bn.py +++ b/imperative/python/test/integration/test_bn.py @@ -25,12 +25,12 @@ def test_frozen_bn(): saved_wt = m.weight.numpy() saved_bias = m.bias.numpy() - gm = ad.GradManager().register(m.parameters()) + gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") - with gm.record(): + with gm: loss = m(data).mean() gm.backward(loss) optim.step() @@ -46,12 +46,12 @@ def test_bn_no_track_stat(): nchannel = 3 m = BatchNorm2d(nchannel, track_running_stats=False) - gm = ad.GradManager().register(m.parameters()) + gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") - with gm.record(): + with gm: loss = m(data).sum() gm.backward(loss) optim.step() @@ -68,12 +68,12 @@ def test_bn_no_track_stat2(): saved_mean = m.running_mean.numpy() assert saved_mean is not None - gm = ad.GradManager().register(m.parameters()) + gm = ad.GradManager().attach(m.parameters()) optim = optimizer.SGD(m.parameters(), lr=1.0) optim.clear_grad() data = np.random.random((6, nchannel, 2, 2)).astype("float32") - with gm.record(): + with gm: loss = m(data).sum() gm.backward(loss) optim.step() diff --git a/imperative/python/test/integration/test_converge.py b/imperative/python/test/integration/test_converge.py index 42267e2c49312882e3a970920051d6e76f73b6b7..1beded2184037d605fd648f6c499bb46fe9096ac 100644 --- a/imperative/python/test/integration/test_converge.py +++ b/imperative/python/test/integration/test_converge.py @@ -74,13 +74,11 @@ class XORNet(Module): def test_training_converge(): net = XORNet() - opt = SGD( - net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4 - ) - gm = ad.GradManager().register(net.parameters()) + opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) + gm = ad.GradManager().attach(net.parameters()) def train(data, label): - with gm.record(): + with gm: pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) gm.backward(loss) diff --git a/imperative/python/test/integration/test_correctness.py b/imperative/python/test/integration/test_correctness.py index 8a6726075e611d3c5580a687e54c0552be001e96..db77e374d28fc4d917e75b572fcb04223d5c9432 100644 --- a/imperative/python/test/integration/test_correctness.py +++ b/imperative/python/test/integration/test_correctness.py @@ -91,7 +91,7 @@ class MnistNet(Module): def train(data, label, net, opt, gm): - with gm.record(): + with gm: pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) gm.backward(loss) @@ -117,7 +117,7 @@ def update_model(model_path): net.load_state_dict(checkpoint["net_init"]) lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = Tensor(checkpoint["data"], dtype=np.float32) label = Tensor(checkpoint["label"], dtype=np.int32) @@ -152,7 +152,7 @@ def run_train( net.load_state_dict(checkpoint["net_init"]) lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = Tensor(checkpoint["data"], dtype=np.float32) label = Tensor(checkpoint["label"], dtype=np.int32) diff --git a/imperative/python/test/integration/test_detach.py b/imperative/python/test/integration/test_detach.py index 6a2f70376f1e03cd9ee8d381084d4a58ef6bc58c..6bd9c890f5a0079cf5f175290dd006a76a801c23 100644 --- a/imperative/python/test/integration/test_detach.py +++ b/imperative/python/test/integration/test_detach.py @@ -32,11 +32,11 @@ def test_detach(): optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) dshape = (10, 10) data = tensor(np.ones(dshape).astype(np.float32)) - with gm.record(): + with gm: loss = net(data).sum() gm.backward(loss) optim.step() diff --git a/imperative/python/test/integration/test_dp_correctness.py b/imperative/python/test/integration/test_dp_correctness.py index 8e62105a7815775793572c2d9297a91b79fee983..e35a4e8ffae6271e34876ccb6fd6d7f5fa5f646c 100644 --- a/imperative/python/test/integration/test_dp_correctness.py +++ b/imperative/python/test/integration/test_dp_correctness.py @@ -97,7 +97,7 @@ class MnistNet(Module): def train(data, label, net, opt, gm): opt.clear_grad() - with gm.record(): + with gm: pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) gm.backward(loss) @@ -125,8 +125,7 @@ def update_model(model_path): lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) - gm = ad.GradManager() - gm.register( + gm = ad.GradManager().attach( net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)] ) @@ -171,8 +170,7 @@ def run_test( lr = checkpoint["sgd_lr"] opt = SGD(net.parameters(), lr=lr) - gm = ad.GradManager() - gm.register( + gm = ad.GradManager().attach( net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)] ) diff --git a/imperative/python/test/integration/test_hello_world.py b/imperative/python/test/integration/test_hello_world.py index af01c3f171bcce7f59621d92879d49d78b65cd8f..138518bf886fa029424bfb3367c9a5f3d0850ca5 100644 --- a/imperative/python/test/integration/test_hello_world.py +++ b/imperative/python/test/integration/test_hello_world.py @@ -33,10 +33,10 @@ def test_hello_world(): optim = optimizer.SGD(net.parameters(), lr=1.0) optim.clear_grad() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) - with gm.record(): + with gm: loss = net(data) gm.backward(loss) optim.step() diff --git a/imperative/python/test/integration/test_optimizer.py b/imperative/python/test/integration/test_optimizer.py index 62cd7b467b6d63202fff280fdcbdc182201b90bb..a1e8a2e0e7057441ce9d19c63f580af7f4637571 100644 --- a/imperative/python/test/integration/test_optimizer.py +++ b/imperative/python/test/integration/test_optimizer.py @@ -13,7 +13,7 @@ import megengine.functional as F from megengine import Parameter, optimizer from megengine.jit import trace from megengine.module import Linear, Module -from megengine.tensor import TensorDict, tensor +from megengine.tensor import tensor class MLP(Module): @@ -44,7 +44,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): net = Simple() opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) check_func = check_class(net, **test_case) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) step = 0 data_shape = (2, 28) @@ -57,12 +57,12 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): data = tensor(np.random.random(data_shape).astype(np.float32)) opt.clear_grad() - with gm.record(): + with gm: pred = net(data) loss = pred.sum() gm.backward(loss) - ori_params = TensorDict() + ori_params = {} for param in net.parameters(): ori_params[param] = np.copy(param.numpy()) opt.step() @@ -75,7 +75,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): @trace(symbolic=symbolic) def train_func(data, *, opt=None, gm=None): opt.clear_grad() - with gm.record(): + with gm: pred = net(data) loss = pred.sum() gm.backward(loss) @@ -84,7 +84,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): # reset net and opt net = Simple() opt = getattr(optimizer, opt_str)(net.parameters(), **test_case) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) check_func = check_class(net, **test_case) step = 0 for i in range(iter_num): @@ -93,7 +93,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): group["lr"] += 0.01 check_func.lr += 0.01 - ori_params = TensorDict() + ori_params = {} for param in net.parameters(): ori_params[param] = np.copy(param.numpy()) @@ -105,7 +105,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False): def test_sgd(): class CheckValue: def __init__(self, net, **kwarg): - self.slots = TensorDict() + self.slots = {} for param in net.parameters(): self.slots[param] = np.zeros(param.shape).astype(np.float32) for k, v in kwarg.items(): @@ -134,8 +134,8 @@ def test_sgd(): def test_adam(): class CheckValue: def __init__(self, net, **kwarg): - self.m_slots = TensorDict() - self.v_slots = TensorDict() + self.m_slots = {} + self.v_slots = {} for param in net.parameters(): self.m_slots[param] = np.zeros(param.shape).astype(np.float32) self.v_slots[param] = np.zeros(param.shape).astype(np.float32) @@ -175,7 +175,7 @@ def test_adam(): def test_adagrad(): class CheckValue: def __init__(self, net, **kwarg): - self.s_slots = TensorDict() + self.s_slots = {} for param in net.parameters(): self.s_slots[param] = np.zeros(param.shape).astype(np.float32) for k, v in kwarg.items(): @@ -207,8 +207,8 @@ def test_adagrad(): def test_adadelta(): class CheckValue: def __init__(self, net, **kwarg): - self.s_slots = TensorDict() - self.a_slots = TensorDict() + self.s_slots = {} + self.a_slots = {} for param in net.parameters(): self.s_slots[param] = np.zeros(param.shape).astype(np.float32) self.a_slots[param] = np.zeros(param.shape).astype(np.float32) diff --git a/imperative/python/test/integration/test_save_load.py b/imperative/python/test/integration/test_save_load.py index 2008a211f5288cee7575fa514007531e82d160ca..be93523f9c6af2148cb44d0b6ede69749c27db3c 100644 --- a/imperative/python/test/integration/test_save_load.py +++ b/imperative/python/test/integration/test_save_load.py @@ -23,11 +23,11 @@ def test_save_load(): optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim.clear_grad() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) - with gm.record(): + with gm: loss = net(data) gm.backward(loss) @@ -55,7 +55,7 @@ def test_save_load(): optim.load_state_dict(checkpoint["opt_state"]) print("load done") - with gm.record(): + with gm: loss = net([1.23]) gm.backward(loss) diff --git a/imperative/python/test/integration/test_sgd_momentum.py b/imperative/python/test/integration/test_sgd_momentum.py index 34562e6b1df66e421fd5599d7c2aa0dc8a6caac8..6c8638773f30af763b763ed9c64035f8134b2bff 100644 --- a/imperative/python/test/integration/test_sgd_momentum.py +++ b/imperative/python/test/integration/test_sgd_momentum.py @@ -31,12 +31,12 @@ def test_sgd_momentum(): optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) optim.clear_grad() - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) # do a step of train - with gm.record(): + with gm: loss = net(data) gm.backward(loss) optim.step() @@ -51,7 +51,7 @@ def test_sgd_momentum(): # do a step of train optim.clear_grad() - with gm.record(): + with gm: loss = net(data) gm.backward(loss) optim.step() @@ -69,7 +69,7 @@ def test_sgd_momentum_trace(): @trace(symbolic=symbolic) def train_func(data, *, model=None, optim=None, gm=None): optim.clear_grad() - with gm.record(): + with gm: loss = net(data) gm.backward(loss) optim.step() @@ -82,7 +82,7 @@ def test_sgd_momentum_trace(): net = Simple() optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) data = tensor([2.34]) train_func(data, model=net, optim=optim, gm=gm) np.testing.assert_almost_equal( diff --git a/imperative/python/test/integration/test_trace_dump.py b/imperative/python/test/integration/test_trace_dump.py index 1cd6546ab6e86ab7ed3e77dd63b963f536d2a521..f6e2739813250c1a22f212f0e66444430bdb9986 100644 --- a/imperative/python/test/integration/test_trace_dump.py +++ b/imperative/python/test/integration/test_trace_dump.py @@ -61,15 +61,15 @@ class XORNet(M.Module): def test_xornet_trace_dump(): net = XORNet() - opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9) - gm = GradManager().register(net.parameters(requires_grad=True)) + opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) + gm = GradManager().attach(net.parameters()) batch_size = 64 train_dataset = minibatch_generator(batch_size) val_dataset = minibatch_generator(batch_size) @trace def train_fun(data, label): - with gm.record(): + with gm: net.train() pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index 4aec162711f6178661b58755a27b297312bfb134..05b280629444063c23361df7bcd34d56d4c8c873 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -14,7 +14,7 @@ import pytest import megengine.core.ops.builtin as builtin import megengine.core.tensor.dtype as dtype import megengine.functional as F -from megengine import Buffer, Parameter, is_cuda_available, tensor +from megengine import Parameter, Tensor, is_cuda_available, tensor from megengine.core._trace_option import use_tensor_shape from megengine.core.autodiff.grad import Grad from megengine.core.tensor.utils import make_shape_tuple @@ -330,7 +330,7 @@ def test_roi_pooling(): def test_add_update(): shape = (2, 3) v = np.random.random(shape).astype(np.float32) - b = Buffer(v) + b = Tensor(v) u = F.add_update(b, 1) assertTensorClose(u.numpy(), v + 1) @@ -347,7 +347,7 @@ def test_add_update(): def test_add_update_params(): b = np.random.random((2, 3)).astype(np.float32) - y = Buffer(b) + y = Tensor(b) # @jit.trace def f(x): @@ -355,7 +355,7 @@ def test_add_update_params(): f(np.zeros((2, 3)).astype(np.float32)) - z = Buffer(np.zeros((2, 3)).astype(np.float32)) + z = Tensor(np.zeros((2, 3)).astype(np.float32)) F.add_update(y, z, beta=0.1) res = f(np.ones((2, 3)).astype(np.float32)) diff --git a/imperative/python/test/unit/functional/test_tensor.py b/imperative/python/test/unit/functional/test_tensor.py index 2b86933d05fc1cbe6d9a004ce4c4611ac977a26e..253e840fd0d0f02ce4097fdf12e49a6c2e616529 100644 --- a/imperative/python/test/unit/functional/test_tensor.py +++ b/imperative/python/test/unit/functional/test_tensor.py @@ -12,7 +12,7 @@ import numpy as np import pytest import megengine.functional as F -from megengine import Buffer, Parameter, is_cuda_available, tensor +from megengine import tensor from megengine.core._trace_option import use_tensor_shape from megengine.core.tensor.utils import astensor1d from megengine.distributed.helper import get_device_count_by_fork diff --git a/imperative/python/test/unit/module/test_batchnorm.py b/imperative/python/test/unit/module/test_batchnorm.py index 213b6fc3900a02d6cd3c7c75d155d5174661da70..c99dd6b34a4e1b3ea67a32e85f4bb9fc05360f7f 100644 --- a/imperative/python/test/unit/module/test_batchnorm.py +++ b/imperative/python/test/unit/module/test_batchnorm.py @@ -14,10 +14,9 @@ import pytest import megengine as mge import megengine.distributed as dist -from megengine import tensor +from megengine import Tensor from megengine.core._trace_option import use_tensor_shape from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm -from megengine.tensor import Tensor from megengine.test import assertTensorClose @@ -45,10 +44,8 @@ def test_syncbn(): return dist.init_process_group("localhost", port, nr_ranks, rank, rank) bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps) - data_tensor = tensor([]) for i in range(steps): - data_tensor.set_value(data[i]) - yv = bn(data_tensor) + yv = bn(Tensor(data[i])) assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6) @@ -105,7 +102,6 @@ def test_batchnorm(): bn = BatchNorm1d(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32) - data = tensor([]) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) @@ -120,8 +116,7 @@ def test_batchnorm(): running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -137,7 +132,7 @@ def test_batchnorm(): var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) - data.set_value(xv) + data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) @@ -161,7 +156,6 @@ def test_syncbn1d(): bn = SyncBatchNorm(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1), dtype=np.float32) - data = tensor([]) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) @@ -176,8 +170,7 @@ def test_syncbn1d(): running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -193,7 +186,7 @@ def test_syncbn1d(): var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) - data.set_value(xv) + data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) @@ -210,7 +203,6 @@ def test_batchnorm2d(): bn = BatchNorm2d(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32) - data = tensor([]) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( @@ -226,8 +218,7 @@ def test_batchnorm2d(): running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -239,7 +230,7 @@ def test_batchnorm2d(): var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) - data.set_value(xv) + data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) @@ -263,7 +254,6 @@ def test_syncbn2d(): bn = SyncBatchNorm(nr_chan, momentum=momentum) running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32) running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32) - data = tensor([]) for i in range(3): xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( @@ -279,8 +269,7 @@ def test_syncbn2d(): running_mean = running_mean * momentum + mean * (1 - momentum) running_var = running_var * momentum + var_unbiased * (1 - momentum) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -292,7 +281,7 @@ def test_syncbn2d(): var_backup = bn.running_var.numpy() bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) - data.set_value(xv) + data = Tensor(xv) yv1 = bn(data) yv2 = bn(data) assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0) @@ -306,7 +295,6 @@ def test_batchnorm_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 4) bn = BatchNorm1d(8, track_running_stats=False) - data = tensor([]) for i in range(4): if i == 2: bn.training = False @@ -320,8 +308,7 @@ def test_batchnorm_no_stats(): ).reshape((1, nr_chan, 1)) sd = np.sqrt(var + bn.eps) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -338,7 +325,6 @@ def test_syncbn_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 4) bn = SyncBatchNorm(8, track_running_stats=False) - data = tensor([]) for i in range(4): if i == 2: bn.training = False @@ -352,8 +338,7 @@ def test_syncbn_no_stats(): ).reshape((1, nr_chan, 1)) sd = np.sqrt(var + bn.eps) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -363,7 +348,6 @@ def test_batchnorm2d_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) bn = BatchNorm2d(8, track_running_stats=False) - data = tensor([]) for i in range(4): if i == 2: bn.training = False @@ -376,8 +360,7 @@ def test_batchnorm2d_no_stats(): var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) sd = np.sqrt(var + bn.eps) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) @@ -394,7 +377,6 @@ def test_syncbn2d_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) bn = SyncBatchNorm(8, track_running_stats=False) - data = tensor([]) for i in range(4): if i == 2: bn.training = False @@ -407,8 +389,7 @@ def test_syncbn2d_no_stats(): var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) sd = np.sqrt(var + bn.eps) - data.set_value(xv) - yv = bn(data) + yv = bn(Tensor(xv)) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6) diff --git a/imperative/python/test/unit/module/test_external.py b/imperative/python/test/unit/module/test_external.py index 8ba6b09426ec1b88cfe18040a6dbf7512624cb54..cab28c457e474d983e921290586444187d494d69 100644 --- a/imperative/python/test/unit/module/test_external.py +++ b/imperative/python/test/unit/module/test_external.py @@ -12,7 +12,7 @@ import numpy as np import pytest import megengine as mge -from megengine import tensor +from megengine import Tensor from megengine.module import Module @@ -35,12 +35,12 @@ def test_cambricon_module(): with open(model, "rb") as f: data = f.read() m = MyModule(data) - inputs = [] - inputs.append(tensor(data=[], dtype=np.float16, device="cambricon0")) - inputs[0].set_value(np.random.normal(size=(1, 64, 32, 32)).astype(np.float16)) + inp = Tensor( + np.random.normal((1, 64, 32, 32)).astype(np.float16), device="cambricon0" + ) def inference(inps): pred = m(inps) return pred - pred = inference(inputs) + pred = inference([inp]) diff --git a/imperative/python/test/unit/module/test_module.py b/imperative/python/test/unit/module/test_module.py index 1b7502c7ede4c0f5314bccf120f249ef889f612d..d4a5f30479aa85d10d73e17755cce18bcb1d15ed 100644 --- a/imperative/python/test/unit/module/test_module.py +++ b/imperative/python/test/unit/module/test_module.py @@ -16,7 +16,7 @@ import pytest import megengine as mge import megengine.functional as F -from megengine import Buffer, Parameter, Tensor, tensor +from megengine import Parameter, Tensor, tensor from megengine.module import ( BatchNorm1d, BatchNorm2d, @@ -196,7 +196,7 @@ class MyModule(Module): self.i = self.InnerModule() self.bn = BatchNorm2d(4) self.param = Parameter(np.ones(1, dtype=np.float32)) - self.buff = Buffer(np.ones(1, dtype=np.float32)) + self.buff = Tensor(np.ones(1, dtype=np.float32)) def forward(self, x): x = self.i(x) @@ -464,8 +464,7 @@ def test_sequential_named_children(): def test_state_dict(): data_shape = (2, 28) - data = tensor([]) - data.set_value(np.random.random(data_shape)) + data = tensor(np.random.random(data_shape)) mlp = MLP() pred0 = mlp(data) @@ -542,8 +541,7 @@ def test_shared_param(): def test_pickle_module(): data_shape = (2, 28) - data = tensor([]) - data.set_value(np.random.random(data_shape)) + data = tensor(np.random.random(data_shape)) mlp = MLP() # pickle before forward with BytesIO() as fout: @@ -568,8 +566,7 @@ def test_pickle_module(): @pytest.mark.skip(reason="under development") def test_dump_model(): data_shape = (2, 28) - data = tensor([]) - data.set_value(np.random.random(data_shape)) + data = Tensor(np.random.random(data_shape)) mlp = MLP() pred = mlp(data) f = tempfile.NamedTemporaryFile(delete=False) diff --git a/imperative/python/test/unit/module/test_tensor.py b/imperative/python/test/unit/module/test_tensor.py index 63ef464ff6ef19caaea8e81fa1f7cea58c0082f8..38686b08c7b3ab18610e55268160d4b009538a29 100644 --- a/imperative/python/test/unit/module/test_tensor.py +++ b/imperative/python/test/unit/module/test_tensor.py @@ -13,7 +13,7 @@ import pytest import megengine as mge import megengine.functional as F -from megengine import Buffer, Parameter +from megengine import Parameter, Tensor from megengine.module import Conv2d from megengine.test import assertTensorClose @@ -33,7 +33,7 @@ def test_set_value(): @pytest.mark.skip(reason="fill unsupported") def test_fill(): - a = Buffer(np.zeros((2, 3), dtype=np.float32)) + a = Tensor(np.zeros((2, 3), dtype=np.float32)) a.fill(3) assertTensorClose(a.numpy(), np.full((2, 3), 3, dtype=np.float32)) a.fill(124.568) @@ -80,7 +80,7 @@ def test_fill(): # def test_shape_warning(): # with Graph() as cg: # cg.set_option("eager_evaluation", False) -# b = Buffer(np.ones((2, 3)).astype(np.float32)) +# b = Tensor(np.ones((2, 3)).astype(np.float32)) # with pytest.warns(None) as record: # print(b.shape) # if len(record) != 0: diff --git a/imperative/python/test/unit/test_function.py b/imperative/python/test/unit/test_function.py index cef30bd94d43c10375d7caf146288838e613bed7..fc01faf912d434360179067a6b5bed645e54e274 100644 --- a/imperative/python/test/unit/test_function.py +++ b/imperative/python/test/unit/test_function.py @@ -42,11 +42,11 @@ def test_single_input(): return x net = Simple(av) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) opt = optimizer.SGD(net.parameters(), lr=1.0) opt.clear_grad() - with gm.record(): + with gm: loss = net() gm.backward(loss.sum()) opt.step() @@ -81,11 +81,11 @@ def test_multi_input(): return x net = Simple(av, bv) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) opt = optimizer.SGD(net.parameters(), lr=1.0) opt.clear_grad() - with gm.record(): + with gm: loss = net() gm.backward(loss.sum()) opt.step() @@ -121,11 +121,11 @@ def test_multi_output(): return x + y net = Simple(av, bv) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) opt = optimizer.SGD(net.parameters(), lr=1.0) opt.clear_grad() - with gm.record(): + with gm: loss = net() gm.backward(loss.sum()) opt.step() @@ -163,9 +163,9 @@ def test_skip_invalid_grad(): net = Simple(av, bv) optim = optimizer.SGD(net.parameters(), lr=1.0) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() - with gm.record(): + with gm: loss = net().sum() gm.backward(loss) optim.step() @@ -198,10 +198,10 @@ def test_ste(): av = np.random.random(data_shape).astype(np.float32) net = Simple(av) optim = optimizer.SGD(net.parameters(), lr=1.0) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() - with gm.record(): + with gm: loss = net() gm.backward(loss.sum()) optim.step() @@ -256,9 +256,9 @@ def test_none_in_out_grad(): b = tensor(np.array([2.0], dtype=np.float32)) net = Simple(a, b) optim = optimizer.SGD(net.parameters(), lr=1.0) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() - with gm.record(): + with gm: loss, _ = net() gm.backward(loss) optim.step() @@ -293,10 +293,10 @@ def test_zero_grad(): a = tensor(np.array([1.0], dtype=np.float32)) net = Simple(a) optim = optimizer.SGD(net.parameters(), lr=1.0) - gm = ad.GradManager().register(net.parameters()) + gm = ad.GradManager().attach(net.parameters()) optim.clear_grad() - with gm.record(): + with gm: loss = net() gm.backward(loss.sum()) optim.step() diff --git a/imperative/python/test/unit/test_indexing_op.py b/imperative/python/test/unit/test_indexing_op.py index d7ba3716f229e0f49b87aef7ed53a38e3d5e12ad..4eabefcbaf62a345dd9308300b072b9b80972d42 100644 --- a/imperative/python/test/unit/test_indexing_op.py +++ b/imperative/python/test/unit/test_indexing_op.py @@ -38,7 +38,7 @@ def cvt_to_shape_desc(val, inpvar, config=None): if isinstance(val, RawTensor): return as_tensor(val, device) - if not isinstance(val, collections.Iterable): + if not isinstance(val, collections.abc.Iterable): val = [val] components = [] diff --git a/imperative/python/test/unit/test_serialization.py b/imperative/python/test/unit/test_serialization.py index 8ca6a9f6d2c5d2ce181df856dce8f5dc9b889b55..5a5991aebcbdc71a27d31ffaf4ca796429b9f894 100644 --- a/imperative/python/test/unit/test_serialization.py +++ b/imperative/python/test/unit/test_serialization.py @@ -12,19 +12,18 @@ from tempfile import TemporaryFile import numpy as np import megengine as mge -from megengine import Buffer, Parameter, tensor +from megengine import Parameter, Tensor def test_tensor_serialization(): def tensor_eq(a, b): assert a.dtype == b.dtype assert a.device == b.device - assert a.requires_grad == b.requires_grad np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: data = np.random.randint(low=0, high=7, size=[233]) - a = tensor(data, device="xpux", dtype=np.int32) + a = Tensor(data, device="xpux", dtype=np.int32) pickle.dump(a, f) f.seek(0) b = pickle.load(f) @@ -39,19 +38,19 @@ def test_tensor_serialization(): np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: - a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) pickle.dump(a, f) f.seek(0) b = pickle.load(f) - assert isinstance(b, Buffer) + assert type(b) is Tensor np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: - a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f, map_location="cpux") - assert isinstance(b, Buffer) + assert type(b) is Tensor assert "cpu" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) @@ -59,12 +58,12 @@ def test_tensor_serialization(): if mge.is_cuda_available(): device_org = mge.get_default_device() mge.set_default_device("gpu0") - a = Buffer(np.random.random(size=(2, 233)).astype(np.float32)) + a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) mge.set_default_device("cpux") b = mge.load(f, map_location={"gpu0": "cpu0"}) - assert isinstance(b, Buffer) + assert type(b) is Tensor assert "cpu0" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) mge.set_default_device(device_org) diff --git a/sdk/xor-deploy/xornet.py b/sdk/xor-deploy/xornet.py index ef4935cc344e13974cc62f217f67e2c077b1b5d2..a032ef56600dda475442572b1a76613050f58305 100644 --- a/sdk/xor-deploy/xornet.py +++ b/sdk/xor-deploy/xornet.py @@ -66,7 +66,7 @@ def main(): mge.set_default_device("cpux") net = XORNet() - opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9) + opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) batch_size = 64 train_dataset = minibatch_generator(batch_size) val_dataset = minibatch_generator(batch_size)