Profiler says that 50% of total time spends inside this function. How would you optimize it? It converts BMP color scheme to YUV. Thanks!
Update: platform is ARMV6 (writing for IPhone)
#define Y_FROM_RGB(_r_,_g_,_b_) ( ( 66 * _b_ + 129 * _g_ + 25 * _r_ + 128) >> 8) + 16
#define V_FROM_RGB(_r_,_g_,_b_) ( ( 112 * _b_ - 94 * _g_ - 18 * _r_ + 128) >> 10) + 128
#define U_FROM_RGB(_r_,_g_,_b_) ( ( -38 * _b_ - 74 * _g_ + 112 * _r_ + 128) >> 10) + 128
/*!
* \brief
* Converts 24 bit image to YCrCb image channels
*
* \param source
* Source 24bit image pointer
*
* \param source_width
* Source image width
*
* \param dest_Y
* destination image Y component pointer
*
* \param dest_scan_size_Y
* destination image Y component line size
*
* \param dest_U
* destination image U component pointer
*
* \param dest_scan_size_U
* destination image U component line size
*
* \param dest_V
* destination image V component pointer
*
* \param dest_scan_size_V
* destination image V component line size
*
* \param dest_width
* Destination image width = source_width
*
* \param dest_height
* Destination image height = source image height
*
* Convert 24 bit image (source) with width (source_width)
* to YCrCb image channels (dest_Y, dest_U, dest_V) with size (dest_width)x(dest_height), and line size
* (dest_scan_size_Y, dest_scan_size_U, dest_scan_size_V) (in bytes)
*
*/
void ImageConvert_24_YUV420P(unsigned char * source, int source_width,
unsigned char * dest_Y, int dest_scan_size_Y,
unsigned char * dest_U, int dest_scan_size_U,
unsigned char * dest_V, int dest_scan_size_V,
int dest_width, int dest_height)
{
int source_scan_size = source_width*3;
int half_width = dest_width/2;
//Y loop
for (int y = 0; y < dest_height/2; y ++)
{
//Start of line
unsigned char * source_scan = source;
unsigned char * source_scan_next = source+source_scan_size;
unsigned char * dest_scan_Y = dest_Y;
unsigned char * dest_scan_U = dest_U;
unsigned char * dest_scan_V = dest_V;
//Do all pixels
for (int x = 0; x < half_width; x++)
{
int R = source_scan[0];
int G = source_scan[1];
int B = source_scan[2];
//Y
int Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
int R1 = source_scan[0];
int G1 = source_scan[1];
int B1 = source_scan[2];
//Y
Y = Y_FROM_RGB(B1, G1, R1);
R += (R1 + source_scan_next[0] + source_scan_next[3]);
G += (G1 + source_scan_next[1] + source_scan_next[4]);
B += (B1 + source_scan_next[2] + source_scan_next[5]);
//YCrCb
*dest_scan_Y = Y;
*dest_scan_V = V_FROM_RGB(B, G, R);
*dest_scan_U = U_FROM_RGB(B, G, R);
source_scan += 3;
dest_scan_Y += 1;
dest_scan_U += 1;
dest_scan_V += 1;
source_scan_next += 6;
};
//scroll to next line
source += source_scan_size;
dest_Y += dest_scan_size_Y;
dest_U += dest_scan_size_U;
dest_V += dest_scan_size_V;
//Start of line
source_scan = source;
dest_scan_Y = dest_Y;
//Do all pixels
for (int x = 0; x < half_width; x ++)
{
int R = source_scan[0];
int G = source_scan[1];
int B = source_scan[2];
//Y
int Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
R = source_scan[0];
G = source_scan[1];
B = source_scan[2];
//Y
Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
};
source += source_scan_size;
dest_Y += dest_scan_size_Y;
};
};