diff --git a/include/mgba-util/convolve.h b/include/mgba-util/convolve.h index cf7aa52af..2d101d351 100644 --- a/include/mgba-util/convolve.h +++ b/include/mgba-util/convolve.h @@ -26,6 +26,7 @@ void ConvolutionKernelFillCircle(struct ConvolutionKernel* kernel, bool normaliz void Convolve1DPad0PackedS32(const int32_t* restrict src, int32_t* restrict dst, size_t length, const struct ConvolutionKernel* restrict kernel); void Convolve2DClampPacked8(const uint8_t* restrict src, uint8_t* restrict dst, size_t width, size_t height, size_t stride, const struct ConvolutionKernel* restrict kernel); +void Convolve2DClampChannels8(const uint8_t* restrict src, uint8_t* restrict dst, size_t width, size_t height, size_t stride, size_t channels, const struct ConvolutionKernel* restrict kernel); CXX_GUARD_END diff --git a/src/util/convolve.c b/src/util/convolve.c index 358707cb3..8dfcd1b58 100644 --- a/src/util/convolve.c +++ b/src/util/convolve.c @@ -136,3 +136,47 @@ void Convolve2DClampPacked8(const uint8_t* restrict src, uint8_t* restrict dst, } } } + +void Convolve2DClampChannels8(const uint8_t* restrict src, uint8_t* restrict dst, size_t width, size_t height, size_t stride, size_t channels, const struct ConvolutionKernel* restrict kernel) { + if (kernel->rank != 2) { + return; + } + size_t kx2 = kernel->dims[0] / 2; + size_t ky2 = kernel->dims[1] / 2; + size_t y; + for (y = 0; y < height; ++y) { + uint8_t* orow = &dst[y * stride]; + size_t x; + for (x = 0; x < width; ++x) { + size_t c; + for (c = 0; c < channels; ++c) { + float sum = 0.f; + size_t ky; + for (ky = 0; ky < kernel->dims[1]; ++ky) { + size_t cy = 0; + if (y + ky > ky2) { + cy = y + ky - ky2; + } + if (cy >= height) { + cy = height - 1; + } + const uint8_t* irow = &src[cy * stride]; + size_t kx; + for (kx = 0; kx < kernel->dims[0]; ++kx) { + size_t cx = 0; + if (x + kx > kx2) { + cx = x + kx - kx2; + } + if (cx >= width) { + cx = width - 1; + } + cx *= channels; + sum += irow[cx + c] * kernel->kernel[ky * kernel->dims[0] + kx]; + } + } + *orow = sum; + ++orow; + } + } + } +}