TIM-VX/src/tim/vx/test_utils.h

87 lines
2.9 KiB
C
Raw Normal View History

#ifndef TIM_VX_TEST_UTILS_H_
#define TIM_VX_TEST_UTILS_H_
#include <cmath>
#include <limits>
#include <ostream>
#include <vector>
template <typename T>
std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
int32_t zero_point = 0;
float scale = 0;
const T qmin = std::numeric_limits<T>::min();
const T qmax = std::numeric_limits<T>::max();
const float qmin_double = qmin;
const float qmax_double = qmax;
// 0 should always be a representable value. Let's assume that the initial
// min,max range contains 0.
if (f_min == f_max) {
// Special case where the min,max range is a point. Should be {0}.
return {scale, zero_point};
}
// General case.
//
// First determine the scale.
scale = (f_max - f_min) / (qmax_double - qmin_double);
// Zero-point computation.
// First the initial floating-point computation. The zero-point can be
// determined from solving an affine equation for any known pair
// (real value, corresponding quantized value).
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
// The arithmetic error on the zero point computed from either pair
// will be roughly machine_epsilon * (sum of absolute values of terms)
// so we want to use the variant that adds the smaller terms.
const float zero_point_from_min = qmin_double - f_min / scale;
const float zero_point_from_max = qmax_double - f_max / scale;
const float zero_point_from_min_error =
std::abs(qmin_double) + std::abs(f_min / scale);
const float zero_point_from_max_error =
std::abs(qmax_double) + std::abs(f_max / scale);
const float zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
// Now we need to nudge the zero point to be an integer
// (our zero points are integer, and this is motivated by the requirement
// to be able to represent the real value "0" exactly as a quantized value,
// which is required in multiple places, for example in Im2col with SAME
// padding).
T nudged_zero_point = 0;
if (zero_point_double < qmin_double) {
nudged_zero_point = qmin;
} else if (zero_point_double > qmax_double) {
nudged_zero_point = qmax;
} else {
nudged_zero_point = static_cast<T>(std::round(zero_point_double));
}
// The zero point should always be in the range of quantized value,
// // [qmin, qmax].
zero_point = nudged_zero_point;
// finally, return the values
return {scale, zero_point};
}
template <typename T>
inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
int32_t zero_point) {
std::vector<T> q;
for (const auto& f : data) {
q.push_back(static_cast<T>(std::max<float>(
std::numeric_limits<T>::min(),
std::min<float>(std::numeric_limits<T>::max(),
std::round(zero_point + (f / scale))))));
}
return q;
}
#endif /* TIM_VX_TEST_UTILS_H_ */