CuPBoP/examples/dwt2d/components.cu

194 lines
6.1 KiB
Plaintext
Raw Normal View History

2022-05-22 03:55:49 +08:00
/*
* Copyright (c) 2009, Jiri Matela
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include <error.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include "components.h"
#include "common.h"
#define THREADS 256
/* Store 3 RGB float components */
__device__ void storeComponents(float *d_r, float *d_g, float *d_b, float r, float g, float b, int pos)
{
d_r[pos] = (r/255.0f) - 0.5f;
d_g[pos] = (g/255.0f) - 0.5f;
d_b[pos] = (b/255.0f) - 0.5f;
}
/* Store 3 RGB intege components */
__device__ void storeComponents(int *d_r, int *d_g, int *d_b, int r, int g, int b, int pos)
{
d_r[pos] = r - 128;
d_g[pos] = g - 128;
d_b[pos] = b - 128;
}
/* Store float component */
__device__ void storeComponent(float *d_c, float c, int pos)
{
d_c[pos] = (c/255.0f) - 0.5f;
}
/* Store integer component */
__device__ void storeComponent(int *d_c, int c, int pos)
{
d_c[pos] = c - 128;
}
/* Copy img src data into three separated component buffers */
template<typename T>
__global__ void c_CopySrcToComponents(T *d_r, T *d_g, T *d_b,
unsigned char * d_src,
int pixels)
{
int x = threadIdx.x;
int gX = blockDim.x*blockIdx.x;
__shared__ unsigned char sData[THREADS*3];
/* Copy data to shared mem by 4bytes
other checks are not necessary, since
d_src buffer is aligned to sharedDataSize */
if ( (x*4) < THREADS*3 ) {
float *s = (float *)d_src;
float *d = (float *)sData;
d[x] = s[((gX*3)>>2) + x];
}
__syncthreads();
T r, g, b;
int offset = x*3;
r = (T)(sData[offset]);
g = (T)(sData[offset+1]);
b = (T)(sData[offset+2]);
int globalOutputPosition = gX + x;
if (globalOutputPosition < pixels) {
storeComponents(d_r, d_g, d_b, r, g, b, globalOutputPosition);
}
}
/* Copy img src data into three separated component buffers */
template<typename T>
__global__ void c_CopySrcToComponent(T *d_c, unsigned char * d_src, int pixels)
{
int x = threadIdx.x;
int gX = blockDim.x*blockIdx.x;
__shared__ unsigned char sData[THREADS];
/* Copy data to shared mem by 4bytes
other checks are not necessary, since
d_src buffer is aligned to sharedDataSize */
if ( (x*4) < THREADS) {
float *s = (float *)d_src;
float *d = (float *)sData;
d[x] = s[(gX>>2) + x];
}
__syncthreads();
T c;
c = (T)(sData[x]);
int globalOutputPosition = gX + x;
if (globalOutputPosition < pixels) {
storeComponent(d_c, c, globalOutputPosition);
}
}
/* Separate compoents of 8bit RGB source image */
template<typename T>
void rgbToComponents(T *d_r, T *d_g, T *d_b, unsigned char * src, int width, int height)
{
unsigned char * d_src;
int pixels = width*height;
int alignedSize = DIVANDRND(width*height, THREADS) * THREADS * 3; //aligned to thread block size -- THREADS
/* Alloc d_src buffer */
cudaMalloc((void **)&d_src, alignedSize);
cudaCheckAsyncError("Cuda malloc")
cudaMemset(d_src, 0, alignedSize);
/* Copy data to device */
cudaMemcpy(d_src, src, pixels*3, cudaMemcpyHostToDevice);
cudaCheckError("Copy data to device")
/* Kernel */
dim3 threads(THREADS);
dim3 grid(alignedSize/(THREADS*3));
assert(alignedSize%(THREADS*3) == 0);
c_CopySrcToComponents<<<grid, threads>>>(d_r, d_g, d_b, d_src, pixels);
cudaCheckAsyncError("CopySrcToComponents kernel")
/* Free Memory */
cudaFree(d_src);
cudaCheckAsyncError("Free memory")
}
template void rgbToComponents<float>(float *d_r, float *d_g, float *d_b, unsigned char * src, int width, int height);
template void rgbToComponents<int>(int *d_r, int *d_g, int *d_b, unsigned char * src, int width, int height);
/* Copy a 8bit source image data into a color compoment of type T */
template<typename T>
void bwToComponent(T *d_c, unsigned char * src, int width, int height)
{
unsigned char * d_src;
int pixels = width*height;
int alignedSize = DIVANDRND(pixels, THREADS) * THREADS; //aligned to thread block size -- THREADS
/* Alloc d_src buffer */
cudaMalloc((void **)&d_src, alignedSize);
cudaCheckAsyncError("Cuda malloc")
cudaMemset(d_src, 0, alignedSize);
/* Copy data to device */
cudaMemcpy(d_src, src, pixels, cudaMemcpyHostToDevice);
cudaCheckError("Copy data to device")
/* Kernel */
dim3 threads(THREADS);
dim3 grid(alignedSize/(THREADS));
assert(alignedSize%(THREADS) == 0);
c_CopySrcToComponent<<<grid, threads>>>(d_c, d_src, pixels);
cudaCheckAsyncError("CopySrcToComponent kernel")
/* Free Memory */
cudaFree(d_src);
cudaCheckAsyncError("Free memory")
}
template void bwToComponent<float>(float *d_c, unsigned char *src, int width, int height);
template void bwToComponent<int>(int *d_c, unsigned char *src, int width, int height);