group-wbl/.venv/lib/python3.13/site-packages/onnxruntime/quantization/operators/pad.py

# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation.  All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
from __future__ import annotations

from typing import Any

import numpy as np
import onnx

from ..quant_utils import (
    TENSOR_NAME_QUANT_SUFFIX,
    QuantizedValue,
    QuantizedValueType,
    attribute_to_kwarg,
    quantize_nparray,
)
from .base_operator import QuantOperatorBase
from .qdq_base_operator import QDQOperatorBase


class QPad(QuantOperatorBase):
    def __init__(self, onnx_quantizer, onnx_node):
        super().__init__(onnx_quantizer, onnx_node)

    def quantize(self):
        node = self.node
        assert node.op_type == "Pad"

        # Only after version 11, it has the optional constant_value
        # If input[0] is not quantized, do not quanitize this node
        if (self.quantizer.opset_version < 11) or (node.input[0] not in self.quantizer.quantized_value_map):
            super().quantize()
            return
        quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]

        kwargs = {}
        for attribute in node.attribute:
            kv = attribute_to_kwarg(attribute)
            kwargs.update(kv)

        if "mode" not in kwargs or kwargs["mode"] == b"constant":
            if len(node.input) > 2 and node.input[2] != "":  # There is 3rd input 'constant_value'
                zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name)
                scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name)
                if zp_tensor is None or scale_tensor is None:
                    super().quantize()
                    return

                padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2])
                if padding_constant_initializer is not None:
                    zp_array = onnx.numpy_helper.to_array(zp_tensor)
                    zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0]
                    scale_array = onnx.numpy_helper.to_array(scale_tensor)
                    scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0]
                    padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer)
                    quantized_padding_constant_array = quantize_nparray(
                        self.quantizer.activation_qType,
                        padding_constant_array,
                        scale_value,
                        zp_value,
                    )
                    quantized_padding_constant_name = node.input[2] + TENSOR_NAME_QUANT_SUFFIX
                    quantized_padding_constant_initializer = onnx.numpy_helper.from_array(
                        quantized_padding_constant_array,
                        quantized_padding_constant_name,
                    )
                    # Suppose this padding constant initializer only used by the node
                    self.quantizer.model.remove_initializer(padding_constant_initializer)
                    self.quantizer.model.add_initializer(quantized_padding_constant_initializer)
                    node.input[2] = quantized_padding_constant_name
                else:
                    # TODO: check quantize_inputs after sub graph is supported
                    pad_value_qnodes = self.quantizer._get_quantize_input_nodes(
                        node,
                        2,
                        self.quantizer.activation_qType,
                        quantized_input_value.scale_name,
                        quantized_input_value.zp_name,
                        initial_type=scale_tensor.data_type,
                    )
                    self.quantizer.new_nodes.extend(pad_value_qnodes)
                    node.input[2] = pad_value_qnodes[0].output[0]
            else:
                # In quantized format, the `zero` before quantization is mapped
                # to quantized_input_value.zp_name. Thus, padding 0 to
                # original tensor should become padding zero point to quantized
                # tensor.
                if len(node.input) == 2:
                    # Feed quantization's zero point to padding node.
                    node.input.append(quantized_input_value.zp_name)
                else:
                    # Assign quantization's zero point to padding node.
                    assert node.input[2] == ""
                    node.input[2] = quantized_input_value.zp_name

        # Create an entry for output quantized value
        quantized_output_value = QuantizedValue(
            node.output[0],
            node.output[0] + TENSOR_NAME_QUANT_SUFFIX,
            quantized_input_value.scale_name,
            quantized_input_value.zp_name,
            QuantizedValueType.Input,
        )
        self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value

        node.input[0] = quantized_input_value.q_name
        node.output[0] = quantized_output_value.q_name
        self.quantizer.new_nodes += [node]


class QDQPad(QDQOperatorBase):
    def __init__(self, onnx_quantizer, onnx_node):
        super().__init__(onnx_quantizer, onnx_node)

    def _get_pad_const_val(self, attrs_dict: dict[str, Any]) -> np.ndarray | None:
        """
        Returns the Pad's constant padding value. Returns `None` if the padding value is
        not constant (i.e., comes from a dynamic input).
        """
        const_val = None
        onnx_tensor_type = self.quantizer.model.get_tensor_type(self.node.input[0])
        if onnx_tensor_type is None:
            return None

        np_dtype = onnx.helper.tensor_dtype_to_np_dtype(onnx_tensor_type.elem_type)
        if self.quantizer.opset_version < 11:
            const_val = np.array(attrs_dict.get("value", 0), dtype=np_dtype)
        elif len(self.node.input) >= 3 and self.node.input[2]:
            const_val = self.quantizer.model.get_constant_value(self.node.input[2])
        else:
            const_val = np.array(0, dtype=np_dtype)

        return const_val

    def _should_quantize_output_same_as_input(self) -> bool:
        """
        Returns true if Pad's output should use the same quantization parameters as input[0]
        """
        attrs_dict = {}
        for attribute in self.node.attribute:
            kv = attribute_to_kwarg(attribute)
            attrs_dict.update(kv)

        pad_mode = attrs_dict.get("mode", b"constant")
        if pad_mode in (b"reflect", b"edge", b"wrap"):
            # These modes pad the output with a value that already exists in the input.
            # So, we can quantize the output the same as the input.
            return True

        # For 'constant' mode, if padding with 0, we can also quantize the output the same as the input
        # because our quantization floating-point range always includes 0.
        if pad_mode == b"constant":
            pad_val = self._get_pad_const_val(attrs_dict)
            if pad_val is not None and pad_val.dtype in (np.float32, np.float16):
                return float(pad_val.item()) == 0

        return False

    def quantize(self):
        assert self.node.op_type == "Pad"

        for input_name in self.node.input:
            if input_name:
                self.quantizer.quantize_activation_tensor(input_name)

        if not self.disable_qdq_for_node_output:
            if self._should_quantize_output_same_as_input():
                self.quantizer.quantize_output_same_as_input(self.node.output[0], self.node.input[0], self.node.name)
            else:
                self.quantizer.quantize_activation_tensor(self.node.output[0])
Add __pycache__ and .venv directories 2026-01-09 09:48:03 +08:00			`# --------------------------------------------------------------------------`
			`# Copyright (c) Microsoft Corporation. All rights reserved.`
			`# Licensed under the MIT License.`
			`# --------------------------------------------------------------------------`
			`from __future__ import annotations`

			`from typing import Any`

			`import numpy as np`
			`import onnx`

			`from ..quant_utils import (`
			`TENSOR_NAME_QUANT_SUFFIX,`
			`QuantizedValue,`
			`QuantizedValueType,`
			`attribute_to_kwarg,`
			`quantize_nparray,`
			`)`
			`from .base_operator import QuantOperatorBase`
			`from .qdq_base_operator import QDQOperatorBase`


			`class QPad(QuantOperatorBase):`
			`def __init__(self, onnx_quantizer, onnx_node):`
			`super().__init__(onnx_quantizer, onnx_node)`

			`def quantize(self):`
			`node = self.node`
			`assert node.op_type == "Pad"`

			`# Only after version 11, it has the optional constant_value`
			`# If input[0] is not quantized, do not quanitize this node`
			`if (self.quantizer.opset_version < 11) or (node.input[0] not in self.quantizer.quantized_value_map):`
			`super().quantize()`
			`return`
			`quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]`

			`kwargs = {}`
			`for attribute in node.attribute:`
			`kv = attribute_to_kwarg(attribute)`
			`kwargs.update(kv)`

			`if "mode" not in kwargs or kwargs["mode"] == b"constant":`
			`if len(node.input) > 2 and node.input[2] != "": # There is 3rd input 'constant_value'`
			`zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name)`
			`scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name)`
			`if zp_tensor is None or scale_tensor is None:`
			`super().quantize()`
			`return`

			`padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2])`
			`if padding_constant_initializer is not None:`
			`zp_array = onnx.numpy_helper.to_array(zp_tensor)`
			`zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0]`
			`scale_array = onnx.numpy_helper.to_array(scale_tensor)`
			`scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0]`
			`padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer)`
			`quantized_padding_constant_array = quantize_nparray(`
			`self.quantizer.activation_qType,`
			`padding_constant_array,`
			`scale_value,`
			`zp_value,`
			`)`
			`quantized_padding_constant_name = node.input[2] + TENSOR_NAME_QUANT_SUFFIX`
			`quantized_padding_constant_initializer = onnx.numpy_helper.from_array(`
			`quantized_padding_constant_array,`
			`quantized_padding_constant_name,`
			`)`
			`# Suppose this padding constant initializer only used by the node`
			`self.quantizer.model.remove_initializer(padding_constant_initializer)`
			`self.quantizer.model.add_initializer(quantized_padding_constant_initializer)`
			`node.input[2] = quantized_padding_constant_name`
			`else:`
			`# TODO: check quantize_inputs after sub graph is supported`
			`pad_value_qnodes = self.quantizer._get_quantize_input_nodes(`
			`node,`
			`2,`
			`self.quantizer.activation_qType,`
			`quantized_input_value.scale_name,`
			`quantized_input_value.zp_name,`
			`initial_type=scale_tensor.data_type,`
			`)`
			`self.quantizer.new_nodes.extend(pad_value_qnodes)`
			`node.input[2] = pad_value_qnodes[0].output[0]`
			`else:`
			# In quantized format, the `zero` before quantization is mapped
			`# to quantized_input_value.zp_name. Thus, padding 0 to`
			`# original tensor should become padding zero point to quantized`
			`# tensor.`
			`if len(node.input) == 2:`
			`# Feed quantization's zero point to padding node.`
			`node.input.append(quantized_input_value.zp_name)`
			`else:`
			`# Assign quantization's zero point to padding node.`
			`assert node.input[2] == ""`
			`node.input[2] = quantized_input_value.zp_name`

			`# Create an entry for output quantized value`
			`quantized_output_value = QuantizedValue(`
			`node.output[0],`
			`node.output[0] + TENSOR_NAME_QUANT_SUFFIX,`
			`quantized_input_value.scale_name,`
			`quantized_input_value.zp_name,`
			`QuantizedValueType.Input,`
			`)`
			`self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value`

			`node.input[0] = quantized_input_value.q_name`
			`node.output[0] = quantized_output_value.q_name`
			`self.quantizer.new_nodes += [node]`


			`class QDQPad(QDQOperatorBase):`
			`def __init__(self, onnx_quantizer, onnx_node):`
			`super().__init__(onnx_quantizer, onnx_node)`

			`def _get_pad_const_val(self, attrs_dict: dict[str, Any]) -> np.ndarray \| None:`
			`"""`
			Returns the Pad's constant padding value. Returns `None` if the padding value is
			`not constant (i.e., comes from a dynamic input).`
			`"""`
			`const_val = None`
			`onnx_tensor_type = self.quantizer.model.get_tensor_type(self.node.input[0])`
			`if onnx_tensor_type is None:`
			`return None`

			`np_dtype = onnx.helper.tensor_dtype_to_np_dtype(onnx_tensor_type.elem_type)`
			`if self.quantizer.opset_version < 11:`
			`const_val = np.array(attrs_dict.get("value", 0), dtype=np_dtype)`
			`elif len(self.node.input) >= 3 and self.node.input[2]:`
			`const_val = self.quantizer.model.get_constant_value(self.node.input[2])`
			`else:`
			`const_val = np.array(0, dtype=np_dtype)`

			`return const_val`

			`def _should_quantize_output_same_as_input(self) -> bool:`
			`"""`
			`Returns true if Pad's output should use the same quantization parameters as input[0]`
			`"""`
			`attrs_dict = {}`
			`for attribute in self.node.attribute:`
			`kv = attribute_to_kwarg(attribute)`
			`attrs_dict.update(kv)`

			`pad_mode = attrs_dict.get("mode", b"constant")`
			`if pad_mode in (b"reflect", b"edge", b"wrap"):`
			`# These modes pad the output with a value that already exists in the input.`
			`# So, we can quantize the output the same as the input.`
			`return True`

			`# For 'constant' mode, if padding with 0, we can also quantize the output the same as the input`
			`# because our quantization floating-point range always includes 0.`
			`if pad_mode == b"constant":`
			`pad_val = self._get_pad_const_val(attrs_dict)`
			`if pad_val is not None and pad_val.dtype in (np.float32, np.float16):`
			`return float(pad_val.item()) == 0`

			`return False`

			`def quantize(self):`
			`assert self.node.op_type == "Pad"`

			`for input_name in self.node.input:`
			`if input_name:`
			`self.quantizer.quantize_activation_tensor(input_name)`

			`if not self.disable_qdq_for_node_output:`
			`if self._should_quantize_output_same_as_input():`
			`self.quantizer.quantize_output_same_as_input(self.node.output[0], self.node.input[0], self.node.name)`
			`else:`
			`self.quantizer.quantize_activation_tensor(self.node.output[0])`