Diff of /xvidcore/src/image/colorspace.c

-revision 1.3.2.2, Thu Nov  7 10:28:15 2002 UTC
+revision 1.5, Sat Oct 19 12:20:33 2002 UTC
 Line 1
- /**************************************************************************
+ /*****************************************************************************
   *
   *      XVID MPEG-4 VIDEO CODEC
-  *      colorspace conversions
+  *  - colorspace conversion module -
+  *
+  *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
+  *               2002 Michael Militzer <isibaar@xvid.org>
+  *
+  *  This program is an implementation of a part of one or more MPEG-4
+  *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+  *  to use this software module in hardware or software products are
+  *  advised that its use may infringe existing patents or copyrights, and
+  *  any such use would be at such party's own risk.  The original
+  *  developer of this software module and his/her company, and subsequent
+  *  editors and their companies, will have no liability for use of this
+  *  software or modifications or derivatives thereof.
   *
   *      This program is free software; you can redistribute it and/or modify
   *      it under the terms of the GNU General Public License as published by
-Line 15
+Line 27
   *
   *      You should have received a copy of the GNU General Public License
   *      along with this program; if not, write to the Free Software
-  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   *
-  *************************************************************************/
+  ****************************************************************************/
- /**************************************************************************
-  *
-  *      History:
-  *
-  *      14.04.2002      added user_to_yuv_c()
-  *      30.02.2002      out_yuv dst_stride2 fix
-  *      26.02.2002      rgb555, rgb565
-  *      24.11.2001      accuracy improvement to yuyv/vyuy conversion
-  *      28.10.2001      total rewrite <pross@cs.rmit.edu.au>
-  *
-  **************************************************************************/
  #include <string.h>                             // memcpy
 Line 39
  // function pointers
  /* input */
- packedFuncPtr rgb555_to_yv12;
+ color_inputFuncPtr rgb555_to_yv12;
- packedFuncPtr rgb565_to_yv12;
+ color_inputFuncPtr rgb565_to_yv12;
- packedFuncPtr bgr_to_yv12;
+ color_inputFuncPtr rgb24_to_yv12;
- packedFuncPtr bgra_to_yv12;
+ color_inputFuncPtr rgb32_to_yv12;
- packedFuncPtr abgr_to_yv12;
+ color_inputFuncPtr yuv_to_yv12;
- packedFuncPtr rgba_to_yv12;
+ color_inputFuncPtr yuyv_to_yv12;
- packedFuncPtr yuv_to_yv12;
+ color_inputFuncPtr uyvy_to_yv12;
- packedFuncPtr yuyv_to_yv12;
- packedFuncPtr uyvy_to_yv12;
- packedFuncPtr rgb555i_to_yv12;
- packedFuncPtr rgb565i_to_yv12;
- packedFuncPtr bgri_to_yv12;
- packedFuncPtr bgrai_to_yv12;
- packedFuncPtr abgri_to_yv12;
- packedFuncPtr rgbai_to_yv12;
- packedFuncPtr yuyvi_to_yv12;
- packedFuncPtr uyvyi_to_yv12;
  /* output */
- packedFuncPtr yv12_to_rgb555;
+ color_outputFuncPtr yv12_to_rgb555;
- packedFuncPtr yv12_to_rgb565;
+ color_outputFuncPtr yv12_to_rgb565;
- packedFuncPtr yv12_to_bgr;
+ color_outputFuncPtr yv12_to_rgb24;
- packedFuncPtr yv12_to_bgra;
+ color_outputFuncPtr yv12_to_rgb32;
- packedFuncPtr yv12_to_abgr;
+ color_outputFuncPtr yv12_to_yuv;
- packedFuncPtr yv12_to_rgba;
+ color_outputFuncPtr yv12_to_yuyv;
- packedFuncPtr yv12_to_yuv;
+ color_outputFuncPtr yv12_to_uyvy;
- packedFuncPtr yv12_to_yuyv;
- packedFuncPtr yv12_to_uyvy;
- packedFuncPtr yv12_to_rgb555i;
- packedFuncPtr yv12_to_rgb565i;
- packedFuncPtr yv12_to_bgri;
- packedFuncPtr yv12_to_bgrai;
- packedFuncPtr yv12_to_abgri;
- packedFuncPtr yv12_to_rgbai;
- packedFuncPtr yv12_to_yuyvi;
- packedFuncPtr yv12_to_uyvyi;
- planarFuncPtr yv12_to_yv12;
- int32_t RGB_Y_tab[256];
- int32_t B_U_tab[256];
- int32_t G_U_tab[256];
- int32_t G_V_tab[256];
- int32_t R_V_tab[256];
  #define MIN(A,B)        ((A)<(B)?(A):(B))
  #define MAX(A,B)        ((A)>(B)?(A):(B))
- /********** generic colorspace macro **********/
- #define MAKE_COLORSPACE(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \
- void    \
- NAME(uint8_t * x_ptr, int x_stride,     \
-                                  uint8_t * y_ptr, uint8_t * u_ptr, uint8_t * v_ptr,     \
-                                  int y_stride, int uv_stride,   \
-                                  int width, int height, int vflip)      \
- {       \
-         int fixed_width = (width + 1) & ~1;                             \
-         int x_dif = x_stride - (SIZE)*fixed_width;              \
-         int y_dif = y_stride - fixed_width;                             \
-         int uv_dif = uv_stride - (fixed_width / 2);             \
-         int x, y;                                                                               \
-         if (vflip) {                                                            \
-                 x_ptr += (height - 1) * x_stride;                       \
-                 x_dif = -(SIZE)*fixed_width - x_stride;         \
-                 x_stride = -x_stride;                                           \
-         }                                                                                               \
-         for (y = 0; y < height; y+=(VPIXELS)) {                 \
-                 FUNC##_ROW(SIZE,C1,C2,C3,C4);                           \
-                 for (x = 0; x < fixed_width; x+=(PIXELS)) {     \
-                         FUNC(SIZE,C1,C2,C3,C4);                         \
-                         x_ptr += (PIXELS)*(SIZE);                               \
-                         y_ptr += (PIXELS);                                              \
-                         u_ptr += (PIXELS)/2;                                    \
-                         v_ptr += (PIXELS)/2;                                    \
-                 }                                                                                       \
-                 x_ptr += x_dif + (VPIXELS-1)*x_stride;          \
-                 y_ptr += y_dif + (VPIXELS-1)*y_stride;          \
-                 u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;    \
-                 v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;    \
-         }                                                                                               \
- }
- /********** colorspace input (xxx_to_yv12) functions **********/
  /*      rgb -> yuv def's
          this following constants are "official spec"
-Line 161
+Line 88
  #define FIX_IN(x)               ((uint16_t) ((x) * (1L<<SCALEBITS_IN) + 0.5))
- /* rgb16/rgb16i input */
+ int32_t RGB_Y_tab[256];
+ int32_t B_U_tab[256];
+ int32_t G_U_tab[256];
+ int32_t G_V_tab[256];
+ int32_t R_V_tab[256];
+ /* rgb555 -> yuv 4:2:0 planar */
+ void
+ rgb555_to_yv12_c(uint8_t * y_out,
+                                  uint8_t * u_out,
+                                  uint8_t * v_out,
+                                  uint8_t * src,
+                                  int width,
+                                  int height,
+                                  int y_stride)
+ {
+         int32_t src_stride = width * 2;
+         uint32_t y_dif = y_stride - width;
+         uint32_t uv_dif = (y_stride - width) / 2;
+         uint32_t x, y;
+         if (height < 0) {
+                 height = -height;
+                 src += (height - 1) * src_stride;
+                 src_stride = -src_stride;
+         }
+         for (y = height / 2; y; y--) {
+                 // process one 2x2 block per iteration
+                 for (x = 0; x < (uint32_t) width; x += 2) {
+                         int rgb, r, g, b, r4, g4, b4;
+                         rgb = *(uint16_t *) (src + x * 2);
+                         b4 = b = (rgb << 3) & 0xf8;
+                         g4 = g = (rgb >> 2) & 0xf8;
+                         r4 = r = (rgb >> 7) & 0xf8;
+                         y_out[0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + src_stride);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 2) & 0xf8;
+                         r4 += r = (rgb >> 7) & 0xf8;
+                         y_out[y_stride] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + 2);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 2) & 0xf8;
+                         r4 += r = (rgb >> 7) & 0xf8;
+                         y_out[1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + src_stride + 2);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 2) & 0xf8;
+                         r4 += r = (rgb >> 7) & 0xf8;
+                         y_out[y_stride + 1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         *u_out++ =
+                                 (uint8_t) ((-FIX_IN(U_R_IN) * r4 - FIX_IN(U_G_IN) * g4 +
+                                                         FIX_IN(U_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 U_ADD_IN;
+                         *v_out++ =
+                                 (uint8_t) ((FIX_IN(V_R_IN) * r4 - FIX_IN(V_G_IN) * g4 -
+                                                         FIX_IN(V_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 V_ADD_IN;
+                         y_out += 2;
+                 }
+                 src += src_stride * 2;
+                 y_out += y_dif + y_stride;
+                 u_out += uv_dif;
+                 v_out += uv_dif;
+         }
+ }
+ /* rgb565_to_yuv_c
+         NOTE:   identical to rgb555 except for shift/mask
+                         not tested */
+ void
+ rgb565_to_yv12_c(uint8_t * y_out,
+                                  uint8_t * u_out,
+                                  uint8_t * v_out,
+                                  uint8_t * src,
+                                  int width,
+                                  int height,
+                                  int y_stride)
+ {
+         int32_t src_stride = width * 2;
+         uint32_t y_dif = y_stride - width;
+         uint32_t uv_dif = (y_stride - width) / 2;
+         uint32_t x, y;
+         if (height < 0) {
+                 height = -height;
+                 src += (height - 1) * src_stride;
+                 src_stride = -src_stride;
+         }
+         for (y = height / 2; y; y--) {
+                 // process one 2x2 block per iteration
+                 for (x = 0; x < (uint32_t) width; x += 2) {
+                         int rgb, r, g, b, r4, g4, b4;
+                         rgb = *(uint16_t *) (src + x * 2);
+                         b4 = b = (rgb << 3) & 0xf8;
+                         g4 = g = (rgb >> 3) & 0xfc;
+                         r4 = r = (rgb >> 8) & 0xf8;
+                         y_out[0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + src_stride);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 3) & 0xfc;
+                         r4 += r = (rgb >> 8) & 0xf8;
+                         y_out[y_stride] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + 2);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 3) & 0xfc;
+                         r4 += r = (rgb >> 8) & 0xf8;
+                         y_out[1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         rgb = *(uint16_t *) (src + x * 2 + src_stride + 2);
+                         b4 += b = (rgb << 3) & 0xf8;
+                         g4 += g = (rgb >> 3) & 0xfc;
+                         r4 += r = (rgb >> 8) & 0xf8;
+                         y_out[y_stride + 1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         *u_out++ =
+                                 (uint8_t) ((-FIX_IN(U_R_IN) * r4 - FIX_IN(U_G_IN) * g4 +
+                                                         FIX_IN(U_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 U_ADD_IN;
+                         *v_out++ =
+                                 (uint8_t) ((FIX_IN(V_R_IN) * r4 - FIX_IN(V_G_IN) * g4 -
+                                                         FIX_IN(V_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 V_ADD_IN;
+                         y_out += 2;
+                 }
+                 src += src_stride * 2;
+                 y_out += y_dif + y_stride;
+                 u_out += uv_dif;
+                 v_out += uv_dif;
+         }
+ }
+ /*      rgb24 -> yuv 4:2:0 planar
+         NOTE: always flips.
+ */
+ void
+ rgb24_to_yv12_c(uint8_t * y_out,
+                                 uint8_t * u_out,
+                                 uint8_t * v_out,
+                                 uint8_t * src,
+                                 int width,
+                                 int height,
+                                 int stride)
+ {
+         uint32_t width3 = (width << 1) + width; /* width * 3 */
+         uint32_t src_dif = (width << 3) + width;        /* width3 * 3 */
+         uint32_t y_dif = (stride << 1) - width;
+         uint32_t uv_dif = (stride - width) >> 1;
+         uint32_t x, y;
+         src += (height - 2) * width3;
+         for (y = height >> 1; y; y--) {
+                 for (x = width >> 1; x; x--) {
+                         uint32_t r, g, b, r4, g4, b4;
+                         b4 = b = src[0];
+                         g4 = g = src[1];
+                         r4 = r = src[2];
+                         y_out[stride + 0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[3]);
+                         g4 += (g = src[4]);
+                         r4 += (r = src[5]);
+                         y_out[stride + 1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[width3 + 0]);
+                         g4 += (g = src[width3 + 1]);
+                         r4 += (r = src[width3 + 2]);
+                         y_out[0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[width3 + 3]);
+                         g4 += (g = src[width3 + 4]);
+                         r4 += (r = src[width3 + 5]);
+                         y_out[1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         *u_out++ =
+                                 (uint8_t) ((-FIX_IN(U_R_IN) * r4 - FIX_IN(U_G_IN) * g4 +
+                                                         FIX_IN(U_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 U_ADD_IN;
+                         *v_out++ =
+                                 (uint8_t) ((FIX_IN(V_R_IN) * r4 - FIX_IN(V_G_IN) * g4 -
+                                                         FIX_IN(V_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 V_ADD_IN;
+                         src += 6;
+                         y_out += 2;
+                 }
+                 src -= src_dif;
+                 y_out += y_dif;
+                 u_out += uv_dif;
+                 v_out += uv_dif;
+         }
+ }
+ /*      rgb32 -> yuv 4:2:0 planar
+         NOTE: always flips
+ */
+ void
+ rgb32_to_yv12_c(uint8_t * y_out,
+                                 uint8_t * u_out,
+                                 uint8_t * v_out,
+                                 uint8_t * src,
+                                 int width,
+                                 int height,
+                                 int stride)
+ {
+         uint32_t width4 = (width << 2); /* width * 4 */
+         uint32_t src_dif = 3 * width4;
+         uint32_t y_dif = (stride << 1) - width;
+         uint32_t uv_dif = (stride - width) >> 1;
+         uint32_t x, y;
+         src += (height - 2) * width4;
+         for (y = height >> 1; y; y--) {
+                 for (x = width >> 1; x; x--) {
+                         uint32_t r, g, b, r4, g4, b4;
+                         b4 = b = src[0];
+                         g4 = g = src[1];
+                         r4 = r = src[2];
+                         y_out[stride + 0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[4]);
+                         g4 += (g = src[5]);
+                         r4 += (r = src[6]);
+                         y_out[stride + 1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[width4 + 0]);
+                         g4 += (g = src[width4 + 1]);
+                         r4 += (r = src[width4 + 2]);
+                         y_out[0] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         b4 += (b = src[width4 + 4]);
+                         g4 += (g = src[width4 + 5]);
+                         r4 += (r = src[width4 + 6]);
+                         y_out[1] =
+                                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +
+                                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         *u_out++ =
+                                 (uint8_t) ((-FIX_IN(U_R_IN) * r4 - FIX_IN(U_G_IN) * g4 +
+                                                         FIX_IN(U_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 U_ADD_IN;
+                         *v_out++ =
+                                 (uint8_t) ((FIX_IN(V_R_IN) * r4 - FIX_IN(V_G_IN) * g4 -
+                                                         FIX_IN(V_B_IN) * b4) >> (SCALEBITS_IN + 2)) +
+                                 V_ADD_IN;
+                         src += 8;
+                         y_out += 2;
+                 }
+                 src -= src_dif;
+                 y_out += y_dif;
+                 u_out += uv_dif;
+                 v_out += uv_dif;
+         }
+ }
+ /*      yuv planar -> yuv 4:2:0 planar
+         NOTE: does not flip */
+ void
+ yuv_to_yv12_c(uint8_t * y_out,
+                           uint8_t * u_out,
+                           uint8_t * v_out,
+                           uint8_t * src,
+                           int width,
+                           int height,
+                           int stride)
+ {
+         uint32_t stride2 = stride >> 1;
+         uint32_t width2 = width >> 1;
+         uint32_t y;
+         for (y = height; y; y--) {
+                 memcpy(y_out, src, width);
+                 src += width;
+                 y_out += stride;
+         }
+         for (y = height >> 1; y; y--) {
+                 memcpy(u_out, src, width2);
+                 src += width2;
+                 u_out += stride2;
+         }
+         for (y = height >> 1; y; y--) {
+                 memcpy(v_out, src, width2);
+                 src += width2;
+                 v_out += stride2;
+         }
+ }
+ /* yuyv (yuv2) packed -> yuv 4:2:0 planar
+    NOTE: does not flip */
+ void
+ yuyv_to_yv12_c(uint8_t * y_out,
+                            uint8_t * u_out,
+                            uint8_t * v_out,
+                            uint8_t * src,
+                            int width,
+                            int height,
+                            int stride)
+ {
+         uint32_t width2 = width + width;
+         uint32_t y_dif = stride - width;
+         uint32_t uv_dif = y_dif >> 1;
+         uint32_t x, y;
+         for (y = height >> 1; y; y--) {
+                 for (x = width >> 1; x; x--) {
+                         *y_out++ = *src++;
+                         //*u_out++ = *src++;
+                         *u_out++ = (*(src + width2) + *src) >> 1;
+                         src++;
+                         *y_out++ = *src++;
+                         //*v_out++ = *src++;
+                         *v_out++ = (*(src + width2) + *src) >> 1;
+                         src++;
+                 }
+                 y_out += y_dif;
+                 u_out += uv_dif;
+                 v_out += uv_dif;
+                 for (x = width >> 1; x; x--) {
+                         *y_out++ = *src++;
+                         src++;
+                         *y_out++ = *src++;
+                         src++;
+                 }
+                 y_out += y_dif;
+         }
+ }
+ /* uyvy packed -> yuv 4:2:0 planar
+    NOTE: does not flip */
- #define MK_RGB555_B(RGB)  ((RGB) << 3) & 0xf8
+ void
- #define MK_RGB555_G(RGB)  ((RGB) >> 2) & 0xf8
+ uyvy_to_yv12_c(uint8_t * y_out,
- #define MK_RGB555_R(RGB)  ((RGB) >> 7) & 0xf8
+                            uint8_t * u_out,
+                            uint8_t * v_out,
- #define MK_RGB565_B(RGB)  ((RGB) << 3) & 0xf8
+                            uint8_t * src,
- #define MK_RGB565_G(RGB)  ((RGB) >> 3) & 0xfc
+                            int width,
- #define MK_RGB565_R(RGB)  ((RGB) >> 8) & 0xf8
+                            int height,
+                            int stride)
+ {
- #define READ_RGB16_Y(ROW, UVID, C1,C2,C3,C4)    \
+         uint32_t width2 = width + width;
-         rgb = *(uint16_t *) (x_ptr + ((ROW)*x_stride) + 0);     \
+         uint32_t y_dif = stride - width;
-         b##UVID += b = C1##_B(rgb);                             \
+         uint32_t uv_dif = y_dif >> 1;
-         g##UVID += g = C1##_G(rgb);                             \
+         uint32_t x, y;
-         r##UVID += r = C1##_R(rgb);                             \
-         y_ptr[(ROW)*y_stride+0] =                               \
+         for (y = height >> 1; y; y--) {
-                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +   \
-                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;        \
+                 for (x = width >> 1; x; x--) {
-         rgb = *(uint16_t *) (x_ptr + ((ROW)*x_stride) + 2);     \
+                         *u_out++ = *src++;
-         b##UVID += b = C1##_B(rgb);                             \
+                         // *u_out++ = (*(src+width2) + *src++) >> 1;
-         g##UVID += g = C1##_G(rgb);                             \
+                         *y_out++ = *src++;
-         r##UVID += r = C1##_R(rgb);                             \
+                         //*v_out++ = *src++;
-         y_ptr[(ROW)*y_stride+1] =                               \
+                         *v_out++ = (*(src + width2) + *src) >> 1;
-                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +                   \
+                         src++;
-                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
+                         *y_out++ = *src++;
+                 }
- #define READ_RGB16_UV(UV_ROW,UVID)      \
-         u_ptr[(UV_ROW)*uv_stride] =                                                                                                             \
-                 (uint8_t) ((-FIX_IN(U_R_IN) * r##UVID - FIX_IN(U_G_IN) * g##UVID +                      \
-                                         FIX_IN(U_B_IN) * b##UVID) >> (SCALEBITS_IN + 2)) + U_ADD_IN;    \
-         v_ptr[(UV_ROW)*uv_stride] =                                                                                                             \
-                 (uint8_t) ((FIX_IN(V_R_IN) * r##UVID - FIX_IN(V_G_IN) * g##UVID -                       \
-                                         FIX_IN(V_B_IN) * b##UVID) >> (SCALEBITS_IN + 2)) + V_ADD_IN;
- #define RGB16_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define RGB16_TO_YV12(SIZE,C1,C2,C3,C4) \
-         uint32_t rgb, r, g, b, r0, g0, b0;      \
-         r0 = g0 = b0 = 0;                                       \
-         READ_RGB16_Y (0, 0, C1,C2,C3,C4)        \
-         READ_RGB16_Y (1, 0, C1,C2,C3,C4)        \
-         READ_RGB16_UV(0, 0)
- #define RGB16I_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define RGB16I_TO_YV12(SIZE,C1,C2,C3,C4)        \
-         uint32_t rgb, r, g, b, r0, g0, b0, r1, g1, b1;  \
-         r0 = g0 = b0 = r1 = g1 = b1 = 0;        \
-         READ_RGB16_Y (0, 0, C1,C2,C3,C4)        \
-         READ_RGB16_Y (1, 1, C1,C2,C3,C4)        \
-         READ_RGB16_Y (2, 0, C1,C2,C3,C4)        \
-         READ_RGB16_Y (3, 1, C1,C2,C3,C4)        \
-         READ_RGB16_UV(0, 0)                                     \
-         READ_RGB16_UV(1, 1)
- /* rgb/rgbi input */
- #define READ_RGB_Y(SIZE, ROW, UVID, C1,C2,C3,C4)        \
-         r##UVID += r = x_ptr[(ROW)*x_stride+(C1)];                                              \
-         g##UVID += g = x_ptr[(ROW)*x_stride+(C2)];                                              \
-         b##UVID += b = x_ptr[(ROW)*x_stride+(C3)];                                              \
-         y_ptr[(ROW)*y_stride+0] =                                                                       \
-                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +   \
-                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;        \
-         r##UVID += r = x_ptr[(ROW)*x_stride+(SIZE)+(C1)];                               \
-         g##UVID += g = x_ptr[(ROW)*x_stride+(SIZE)+(C2)];                               \
-         b##UVID += b = x_ptr[(ROW)*x_stride+(SIZE)+(C3)];                               \
-         y_ptr[(ROW)*y_stride+1] =                                                                       \
-                 (uint8_t) ((FIX_IN(Y_R_IN) * r + FIX_IN(Y_G_IN) * g +   \
-                                         FIX_IN(Y_B_IN) * b) >> SCALEBITS_IN) + Y_ADD_IN;
- #define READ_RGB_UV(UV_ROW,UVID)        \
-         u_ptr[(UV_ROW)*uv_stride] =                                                                                                             \
-                 (uint8_t) ((-FIX_IN(U_R_IN) * r##UVID - FIX_IN(U_G_IN) * g##UVID +                      \
-                                         FIX_IN(U_B_IN) * b##UVID) >> (SCALEBITS_IN + 2)) + U_ADD_IN;    \
-         v_ptr[(UV_ROW)*uv_stride] =                                                                                                             \
-                 (uint8_t) ((FIX_IN(V_R_IN) * r##UVID - FIX_IN(V_G_IN) * g##UVID -                       \
-                                         FIX_IN(V_B_IN) * b##UVID) >> (SCALEBITS_IN + 2)) + V_ADD_IN;
- #define RGB_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define RGB_TO_YV12(SIZE,C1,C2,C3,C4)   \
-         uint32_t r, g, b, r0, g0, b0;           \
-         r0 = g0 = b0 = 0;                                       \
-         READ_RGB_Y(SIZE, 0, 0, C1,C2,C3,C4)     \
-         READ_RGB_Y(SIZE, 1, 0, C1,C2,C3,C4)     \
-         READ_RGB_UV(     0, 0)
- #define RGBI_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define RGBI_TO_YV12(SIZE,C1,C2,C3,C4)  \
-         uint32_t r, g, b, r0, g0, b0, r1, g1, b1;       \
-         r0 = g0 = b0 = r1 = g1 = b1 = 0;        \
-         READ_RGB_Y(SIZE, 0, 0, C1,C2,C3,C4)     \
-         READ_RGB_Y(SIZE, 1, 1, C1,C2,C3,C4)     \
-         READ_RGB_Y(SIZE, 2, 0, C1,C2,C3,C4)     \
-         READ_RGB_Y(SIZE, 3, 1, C1,C2,C3,C4)     \
-         READ_RGB_UV(     0, 0)                          \
-         READ_RGB_UV(     1, 1)
- /* yuyv/yuyvi input */
- #define READ_YUYV_Y(ROW,C1,C2,C3,C4)    \
-         y_ptr[(ROW)*y_stride+0] = x_ptr[(ROW)*x_stride+(C1)];   \
-         y_ptr[(ROW)*y_stride+1] = x_ptr[(ROW)*x_stride+(C3)];
- #define READ_YUYV_UV(UV_ROW,ROW1,ROW2,C1,C2,C3,C4) \
-         u_ptr[(UV_ROW)*uv_stride] = (x_ptr[(ROW1)*x_stride+(C2)] + x_ptr[(ROW2)*x_stride+(C2)] + 1) / 2;        \
-         v_ptr[(UV_ROW)*uv_stride] = (x_ptr[(ROW1)*x_stride+(C4)] + x_ptr[(ROW2)*x_stride+(C4)] + 1) / 2;
- #define YUYV_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define YUYV_TO_YV12(SIZE,C1,C2,C3,C4)  \
-         READ_YUYV_Y (0,      C1,C2,C3,C4)       \
-         READ_YUYV_Y (1,      C1,C2,C3,C4)       \
-         READ_YUYV_UV(0, 0,1, C1,C2,C3,C4)
- #define YUYVI_TO_YV12_ROW(SIZE,C1,C2,C3,C4) \
-         /* nothing */
- #define YUYVI_TO_YV12(SIZE,C1,C2,C3,C4) \
-         READ_YUYV_Y (0, C1,C2,C3,C4)    \
-         READ_YUYV_Y (1, C1,C2,C3,C4)    \
-         READ_YUYV_Y (2, C1,C2,C3,C4)    \
-         READ_YUYV_Y (3, C1,C2,C3,C4)    \
-         READ_YUYV_UV(0, 0,2, C1,C2,C3,C4)       \
-         READ_YUYV_UV(1, 1,3, C1,C2,C3,C4)
- MAKE_COLORSPACE(rgb555_to_yv12_c,  2,2,2, RGB16_TO_YV12,  MK_RGB555, 0,0,0)
- MAKE_COLORSPACE(rgb565_to_yv12_c,  2,2,2, RGB16_TO_YV12,  MK_RGB565, 0,0,0)
- MAKE_COLORSPACE(bgr_to_yv12_c,     3,2,2, RGB_TO_YV12,    2,1,0, 0)
- MAKE_COLORSPACE(bgra_to_yv12_c,    4,2,2, RGB_TO_YV12,    2,1,0, 0)
- MAKE_COLORSPACE(abgr_to_yv12_c,    4,2,2, RGB_TO_YV12,    3,2,1, 0)
- MAKE_COLORSPACE(rgba_to_yv12_c,    4,2,2, RGB_TO_YV12,    0,1,2, 0)
- MAKE_COLORSPACE(yuyv_to_yv12_c,    2,2,2, YUYV_TO_YV12,   0,1,2,3)
- MAKE_COLORSPACE(uyvy_to_yv12_c,    2,2,2, YUYV_TO_YV12,   1,0,3,2)
- MAKE_COLORSPACE(rgb555i_to_yv12_c, 2,2,4, RGB16I_TO_YV12, MK_RGB555, 0,0,0)
- MAKE_COLORSPACE(rgb565i_to_yv12_c, 2,2,4, RGB16I_TO_YV12, MK_RGB565, 0,0,0)
- MAKE_COLORSPACE(bgri_to_yv12_c,    3,2,4, RGBI_TO_YV12,   2,1,0, 0)
- MAKE_COLORSPACE(bgrai_to_yv12_c,   4,2,4, RGBI_TO_YV12,   2,1,0, 0)
- MAKE_COLORSPACE(abgri_to_yv12_c,   4,2,4, RGBI_TO_YV12,   3,2,1, 0)
- MAKE_COLORSPACE(rgbai_to_yv12_c,   4,2,4, RGBI_TO_YV12,   0,1,2, 0)
- MAKE_COLORSPACE(yuyvi_to_yv12_c,   2,2,4, YUYVI_TO_YV12,  0,1,2,3)
- MAKE_COLORSPACE(uyvyi_to_yv12_c,   2,2,4, YUYVI_TO_YV12,  1,0,3,2)
+                 y_out += y_dif;
+                 u_out += uv_dif;;
+                 v_out += uv_dif;;
+                 for (x = width >> 1; x; x--) {
+                         src++;
+                         *y_out++ = *src++;
+                         src++;
+                         *y_out++ = *src++;
+                 }
+                 y_out += y_dif;
+         }
+ }
- /********** colorspace output (yv12_to_xxx) functions **********/
  /* yuv -> rgb def's */
-Line 327
+Line 564
  #define R_V_OUT                 1.596
  #define V_ADD_OUT               128
  #define SCALEBITS_OUT   13
  #define FIX_OUT(x)              ((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
- /* rgb16/rgb16i output */
+ /* initialize rgb lookup tables */
+ void
+ colorspace_init(void)
+ {
+         int32_t i;
+         for (i = 0; i < 256; i++) {
+                 RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
+                 B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
+                 G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
+                 G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
+                 R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
+         }
+ }
+ /* yuv 4:2:0 planar -> rgb555 + very simple error diffusion
+ */
- #define MK_RGB555(R,G,B)        \
+ #define MK_RGB555(R,G,B)        ((MAX(0,MIN(255, R)) << 7) & 0x7c00) | \
-         ((MAX(0,MIN(255, R)) << 7) & 0x7c00) | \
          ((MAX(0,MIN(255, G)) << 2) & 0x03e0) | \
          ((MAX(0,MIN(255, B)) >> 3) & 0x001f)
- #define MK_RGB565(R,G,B)        \
-         ((MAX(0,MIN(255, R)) << 8) & 0xf800) | \
+ void
+ yv12_to_rgb555_c(uint8_t * dst,
+                                  int dst_stride,
+                                  uint8_t * y_src,
+                                  uint8_t * u_src,
+                                  uint8_t * v_src,
+                                  int y_stride,
+                                  int uv_stride,
+                                  int width,
+                                  int height)
+ {
+         const uint32_t dst_dif = 4 * dst_stride - 2 * width;
+         int32_t y_dif = 2 * y_stride - width;
+         uint8_t *dst2 = dst + 2 * dst_stride;
+         uint8_t *y_src2 = y_src + y_stride;
+         uint32_t x, y;
+         if (height < 0) {
+                 height = -height;
+                 y_src += (height - 1) * y_stride;
+                 y_src2 = y_src - y_stride;
+                 u_src += (height / 2 - 1) * uv_stride;
+                 v_src += (height / 2 - 1) * uv_stride;
+                 y_dif = -width - 2 * y_stride;
+                 uv_stride = -uv_stride;
+         }
+         for (y = height / 2; y; y--) {
+                 int r, g, b;
+                 int r2, g2, b2;
+                 r = g = b = 0;
+                 r2 = g2 = b2 = 0;
+                 // process one 2x2 block per iteration
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
+                         int u, v;
+                         int b_u, g_uv, r_v, rgb_y;
+                         u = u_src[x];
+                         v = v_src[x];
+                         b_u = B_U_tab[u];
+                         g_uv = G_U_tab[u] + G_V_tab[v];
+                         r_v = R_V_tab[v];
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (b & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
+                         g = (g & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
+                         r = (r & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
+                         *(uint16_t *) dst = MK_RGB555(r, g, b);
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (b & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
+                         g = (g & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
+                         r = (r & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
+                         *(uint16_t *) (dst + 2) = MK_RGB555(r, g, b);
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b2 = (b2 & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
+                         g2 = (g2 & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
+                         r2 = (r2 & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
+                         *(uint16_t *) (dst2) = MK_RGB555(r2, g2, b2);
+                         y_src2++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b2 = (b2 & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
+                         g2 = (g2 & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
+                         r2 = (r2 & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
+                         *(uint16_t *) (dst2 + 2) = MK_RGB555(r2, g2, b2);
+                         y_src2++;
+                         dst += 4;
+                         dst2 += 4;
+                 }
+                 dst += dst_dif;
+                 dst2 += dst_dif;
+                 y_src += y_dif;
+                 y_src2 += y_dif;
+                 u_src += uv_stride;
+                 v_src += uv_stride;
+         }
+ }
+ /* yuv 4:2:0 planar -> rgb565 + very simple error diffusion
+         NOTE:   identical to rgb555 except for shift/mask  */
+ #define MK_RGB565(R,G,B)        ((MAX(0,MIN(255, R)) << 8) & 0xf800) | \
          ((MAX(0,MIN(255, G)) << 3) & 0x07e0) | \
          ((MAX(0,MIN(255, B)) >> 3) & 0x001f)
- #define WRITE_RGB16(ROW,UV_ROW,C1)      \
+ void
-         rgb_y = RGB_Y_tab[ y_ptr[y_stride + 0] ];                       \
+ yv12_to_rgb565_c(uint8_t * dst,
-         b[ROW] = (b[ROW] & 0x7) + ((rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT);     \
+                                  int dst_stride,
-         g[ROW] = (g[ROW] & 0x7) + ((rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT);    \
+                                  uint8_t * y_src,
-         r[ROW] = (r[ROW] & 0x7) + ((rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT);             \
+                                  uint8_t * u_src,
-         *(uint16_t *) (x_ptr+((ROW)*x_stride)+0) = C1(r[ROW], g[ROW], b[ROW]);  \
+                                  uint8_t * v_src,
-         rgb_y = RGB_Y_tab[ y_ptr[y_stride + 1] ];                               \
+                                  int y_stride,
-         b[ROW] = (b[ROW] & 0x7) + ((rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT);             \
+                                  int uv_stride,
-         g[ROW] = (g[ROW] & 0x7) + ((rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT);    \
+                                  int width,
-         r[ROW] = (r[ROW] & 0x7) + ((rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT);             \
+                                  int height)
-         *(uint16_t *) (x_ptr+((ROW)*x_stride)+2) = C1(r[ROW], g[ROW], b[ROW]);
+ {
+         const uint32_t dst_dif = 4 * dst_stride - 2 * width;
- #define YV12_TO_RGB16_ROW(SIZE,C1,C2,C3,C4) \
+         int32_t y_dif = 2 * y_stride - width;
-         int r[2], g[2], b[2];                                   \
-         r[0] = r[1] = g[0] = g[1] = b[0] = b[1] = 0;
+         uint8_t *dst2 = dst + 2 * dst_stride;
- #define YV12_TO_RGB16(SIZE,C1,C2,C3,C4)         \
+         uint8_t *y_src2 = y_src + y_stride;
-         int rgb_y;                                                                                              \
+         uint32_t x, y;
-         int b_u0 = B_U_tab[ u_ptr[0] ];                                                         \
-         int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];          \
+         if (height < 0) {                       // flip image?
-         int r_v0 = R_V_tab[ v_ptr[0] ];                                                         \
+                 height = -height;
-         WRITE_RGB16(0, 0, C1)                                                                           \
+                 y_src += (height - 1) * y_stride;
-         WRITE_RGB16(1, 0, C1)
+                 y_src2 = y_src - y_stride;
+                 u_src += (height / 2 - 1) * uv_stride;
- #define YV12_TO_RGB16I_ROW(SIZE,C1,C2,C3,C4) \
+                 v_src += (height / 2 - 1) * uv_stride;
-         int r[4], g[4], b[4];                                   \
+                 y_dif = -width - 2 * y_stride;
-         r[0] = r[1] = r[2] = r[3] = 0;                  \
+                 uv_stride = -uv_stride;
-         g[0] = g[1] = g[2] = g[3] = 0;                  \
+         }
-         b[0] = b[1] = b[2] = b[3] = 0;
- #define YV12_TO_RGB16I(SIZE,C1,C2,C3,C4)                \
+         for (y = height / 2; y; y--) {
-         int rgb_y;                                                                                                      \
+                 int r, g, b;
-         int b_u0 = B_U_tab[ u_ptr[0] ];                                                         \
+                 int r2, g2, b2;
-         int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];          \
-         int r_v0 = R_V_tab[ v_ptr[0] ];                                                         \
+                 r = g = b = 0;
-     int b_u1 = B_U_tab[ u_ptr[uv_stride] ];                                             \
+                 r2 = g2 = b2 = 0;
-         int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];  \
-         int r_v1 = R_V_tab[ v_ptr[uv_stride] ];                                         \
+                 // process one 2x2 block per iteration
-     WRITE_RGB16(0, 0, C1)                                                                               \
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
-         WRITE_RGB16(1, 1, C1)                                                                           \
+                         int u, v;
-     WRITE_RGB16(2, 0, C1)                                                                               \
+                         int b_u, g_uv, r_v, rgb_y;
-         WRITE_RGB16(3, 1, C1)                                                                           \
+                         u = u_src[x];
+                         v = v_src[x];
- /* rgb/rgbi output */
+                         b_u = B_U_tab[u];
- #define WRITE_RGB(SIZE,ROW,UV_ROW,C1,C2,C3,C4)  \
+                         g_uv = G_U_tab[u] + G_V_tab[v];
-         rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];                                         \
+                         r_v = R_V_tab[v];
-         x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));  \
-         x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT)); \
+                         rgb_y = RGB_Y_tab[*y_src];
-         x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));  \
+                         b = (b & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
-         if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;                                                                   \
+                         g = (g & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
-         rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];                                                                 \
+                         r = (r & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
-         x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));   \
+                         *(uint16_t *) dst = MK_RGB565(r, g, b);
-         x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));  \
-         x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));   \
+                         y_src++;
-         if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (b & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
+                         g = (g & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
- #define YV12_TO_RGB_ROW(SIZE,C1,C2,C3,C4)       /* nothing */
+                         r = (r & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
- #define YV12_TO_RGB(SIZE,C1,C2,C3,C4)                           \
+                         *(uint16_t *) (dst + 2) = MK_RGB565(r, g, b);
-         int rgb_y;                                                                                              \
+                         y_src++;
-         int b_u0 = B_U_tab[ u_ptr[0] ];                                                 \
-         int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];  \
+                         rgb_y = RGB_Y_tab[*y_src2];
-         int r_v0 = R_V_tab[ v_ptr[0] ];                                                 \
+                         b2 = (b2 & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
-         WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)                                              \
+                         g2 = (g2 & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
-         WRITE_RGB(SIZE, 1, 0, C1,C2,C3,C4)
+                         r2 = (r2 & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
+                         *(uint16_t *) (dst2) = MK_RGB565(r2, g2, b2);
- #define YV12_TO_RGBI_ROW(SIZE,C1,C2,C3,C4)      /* nothing */
+                         y_src2++;
- #define YV12_TO_RGBI(SIZE,C1,C2,C3,C4)                          \
-         int rgb_y;                                                                                              \
+                         rgb_y = RGB_Y_tab[*y_src2];
-         int b_u0 = B_U_tab[ u_ptr[0] ];                                                 \
+                         b2 = (b2 & 0x7) + ((rgb_y + b_u) >> SCALEBITS_OUT);
-         int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];  \
+                         g2 = (g2 & 0x7) + ((rgb_y - g_uv) >> SCALEBITS_OUT);
-         int r_v0 = R_V_tab[ v_ptr[0] ];                                                 \
+                         r2 = (r2 & 0x7) + ((rgb_y + r_v) >> SCALEBITS_OUT);
-     int b_u1 = B_U_tab[ u_ptr[uv_stride] ];                                     \
+                         *(uint16_t *) (dst2 + 2) = MK_RGB565(r2, g2, b2);
-         int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];  \
+                         y_src2++;
-         int r_v1 = R_V_tab[ v_ptr[uv_stride] ];                                 \
-         WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)              \
+                         dst += 4;
-         WRITE_RGB(SIZE, 1, 1, C1,C2,C3,C4)              \
+                         dst2 += 4;
-         WRITE_RGB(SIZE, 2, 0, C1,C2,C3,C4)              \
+                 }
-         WRITE_RGB(SIZE, 3, 1, C1,C2,C3,C4)
+                 dst += dst_dif;
+                 dst2 += dst_dif;
- /* yuyv/yuyvi output */
+                 y_src += y_dif;
- #define WRITE_YUYV(ROW,UV_ROW,C1,C2,C3,C4)      \
+                 y_src2 += y_dif;
-         x_ptr[(ROW)*x_stride+(C1)] = y_ptr[   (ROW)*y_stride +0];       \
-         x_ptr[(ROW)*x_stride+(C2)] = u_ptr[(UV_ROW)*uv_stride+0];       \
+                 u_src += uv_stride;
-         x_ptr[(ROW)*x_stride+(C3)] = y_ptr[   (ROW)*y_stride +1];       \
+                 v_src += uv_stride;
-         x_ptr[(ROW)*x_stride+(C4)] = v_ptr[(UV_ROW)*uv_stride+0];       \
+         }
+ }
- #define YV12_TO_YUYV_ROW(SIZE,C1,C2,C3,C4)      /* nothing */
- #define YV12_TO_YUYV(SIZE,C1,C2,C3,C4)  \
-         WRITE_YUYV(0, 0, C1,C2,C3,C4)           \
-         WRITE_YUYV(1, 0, C1,C2,C3,C4)
+ /* yuv 4:2:0 planar -> rgb24 */
- #define YV12_TO_YUYVI_ROW(SIZE,C1,C2,C3,C4) /* nothing */
+ void
- #define YV12_TO_YUYVI(SIZE,C1,C2,C3,C4) \
+ yv12_to_rgb24_c(uint8_t * dst,
-         WRITE_YUYV(0, 0, C1,C2,C3,C4)           \
+                                 int dst_stride,
-         WRITE_YUYV(1, 1, C1,C2,C3,C4)           \
+                                 uint8_t * y_src,
-         WRITE_YUYV(2, 0, C1,C2,C3,C4)           \
+                                 uint8_t * u_src,
-         WRITE_YUYV(3, 1, C1,C2,C3,C4)
+                                 uint8_t * v_src,
+                                 int y_stride,
+                                 int uv_stride,
- MAKE_COLORSPACE(yv12_to_rgb555_c,  2,2,2, YV12_TO_RGB16,  MK_RGB555, 0,0,0)
+                                 int width,
- MAKE_COLORSPACE(yv12_to_rgb565_c,  2,2,2, YV12_TO_RGB16,  MK_RGB565, 0,0,0)
+                                 int height)
- MAKE_COLORSPACE(yv12_to_bgr_c,     3,2,2, YV12_TO_RGB,    2,1,0, 0)
+ {
- MAKE_COLORSPACE(yv12_to_bgra_c,    4,2,2, YV12_TO_RGB,    2,1,0,3)
+         const uint32_t dst_dif = 6 * dst_stride - 3 * width;
- MAKE_COLORSPACE(yv12_to_abgr_c,    4,2,2, YV12_TO_RGB,    3,2,1,0)
+         int32_t y_dif = 2 * y_stride - width;
- MAKE_COLORSPACE(yv12_to_rgba_c,    4,2,2, YV12_TO_RGB,    0,1,2,3)
- MAKE_COLORSPACE(yv12_to_yuyv_c,    2,2,2, YV12_TO_YUYV,   0,1,2,3)
+         uint8_t *dst2 = dst + 3 * dst_stride;
- MAKE_COLORSPACE(yv12_to_uyvy_c,    2,2,2, YV12_TO_YUYV,   1,0,3,2)
+         uint8_t *y_src2 = y_src + y_stride;
+         uint32_t x, y;
- MAKE_COLORSPACE(yv12_to_rgb555i_c, 2,2,4, YV12_TO_RGB16I, MK_RGB555, 0,0,0)
- MAKE_COLORSPACE(yv12_to_rgb565i_c, 2,2,4, YV12_TO_RGB16I, MK_RGB565, 0,0,0)
+         if (height < 0) {                       // flip image?
- MAKE_COLORSPACE(yv12_to_bgri_c,    3,2,4, YV12_TO_RGBI,   2,1,0, 0)
+                 height = -height;
- MAKE_COLORSPACE(yv12_to_bgrai_c,   4,2,4, YV12_TO_RGBI,   2,1,0,3)
+                 y_src += (height - 1) * y_stride;
- MAKE_COLORSPACE(yv12_to_abgri_c,   4,2,4, YV12_TO_RGBI,   3,2,1,0)
+                 y_src2 = y_src - y_stride;
- MAKE_COLORSPACE(yv12_to_rgbai_c,   4,2,4, YV12_TO_RGBI,   0,1,2,3)
+                 u_src += (height / 2 - 1) * uv_stride;
- MAKE_COLORSPACE(yv12_to_yuyvi_c,   2,2,4, YV12_TO_YUYVI,  0,1,2,3)
+                 v_src += (height / 2 - 1) * uv_stride;
- MAKE_COLORSPACE(yv12_to_uyvyi_c,   2,2,4, YV12_TO_YUYVI,  1,0,3,2)
+                 y_dif = -width - 2 * y_stride;
+                 uv_stride = -uv_stride;
+         }
- /* yv12 to yv12 copy function */
+         for (y = height / 2; y; y--) {
+                 // process one 2x2 block per iteration
- void
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
- yv12_to_yv12_c(uint8_t * y_dst, uint8_t * u_dst, uint8_t * v_dst,
+                         int u, v;
-                                 int y_dst_stride, int uv_dst_stride,
+                         int b_u, g_uv, r_v, rgb_y;
-                                 uint8_t * y_src, uint8_t * u_src, uint8_t * v_src,
+                         int r, g, b;
-                                 int y_src_stride, int uv_src_stride,
-                                 int width, int height, int vflip)
+                         u = u_src[x];
- {
+                         v = v_src[x];
-         int width2 = width / 2;
-         int height2 = height / 2;
+                         b_u = B_U_tab[u];
-         int y;
+                         g_uv = G_U_tab[u] + G_V_tab[v];
+                         r_v = R_V_tab[v];
-         if (vflip) {
-                 y_src += (height - 1) * y_src_stride;
+                         rgb_y = RGB_Y_tab[*y_src];
-                 u_src += (height2 - 1) * uv_src_stride;
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
-                 v_src += (height2 - 1) * uv_src_stride;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
-                 y_src_stride = -y_src_stride;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
-                 uv_src_stride = -uv_src_stride;
+                         dst[0] = MAX(0, MIN(255, b));
+                         dst[1] = MAX(0, MIN(255, g));
+                         dst[2] = MAX(0, MIN(255, r));
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst[3] = MAX(0, MIN(255, b));
+                         dst[4] = MAX(0, MIN(255, g));
+                         dst[5] = MAX(0, MIN(255, r));
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst2[0] = MAX(0, MIN(255, b));
+                         dst2[1] = MAX(0, MIN(255, g));
+                         dst2[2] = MAX(0, MIN(255, r));
+                         y_src2++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst2[3] = MAX(0, MIN(255, b));
+                         dst2[4] = MAX(0, MIN(255, g));
+                         dst2[5] = MAX(0, MIN(255, r));
+                         y_src2++;
+                         dst += 6;
+                         dst2 += 6;
+                 }
+                 dst += dst_dif;
+                 dst2 += dst_dif;
+                 y_src += y_dif;
+                 y_src2 += y_dif;
+                 u_src += uv_stride;
+                 v_src += uv_stride;
+         }
+ }
+ /* yuv 4:2:0 planar -> rgb32 */
+ void
+ yv12_to_rgb32_c(uint8_t * dst,
+                                 int dst_stride,
+                                 uint8_t * y_src,
+                                 uint8_t * v_src,
+                                 uint8_t * u_src,
+                                 int y_stride,
+                                 int uv_stride,
+                                 int width,
+                                 int height)
+ {
+         const uint32_t dst_dif = 8 * dst_stride - 4 * width;
+         int32_t y_dif = 2 * y_stride - width;
+         uint8_t *dst2 = dst + 4 * dst_stride;
+         uint8_t *y_src2 = y_src + y_stride;
+         uint32_t x, y;
+         if (height < 0) {                       // flip image?
+                 height = -height;
+                 y_src += (height - 1) * y_stride;
+                 y_src2 = y_src - y_stride;
+                 u_src += (height / 2 - 1) * uv_stride;
+                 v_src += (height / 2 - 1) * uv_stride;
+                 y_dif = -width - 2 * y_stride;
+                 uv_stride = -uv_stride;
+         }
+         for (y = height / 2; y; y--) {
+                 // process one 2x2 block per iteration
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
+                         int u, v;
+                         int b_u, g_uv, r_v, rgb_y;
+                         int r, g, b;
+                         u = u_src[x];
+                         v = v_src[x];
+                         b_u = B_U_tab[u];
+                         g_uv = G_U_tab[u] + G_V_tab[v];
+                         r_v = R_V_tab[v];
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst[0] = MAX(0, MIN(255, r));
+                         dst[1] = MAX(0, MIN(255, g));
+                         dst[2] = MAX(0, MIN(255, b));
+                         dst[3] = 0;
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst[4] = MAX(0, MIN(255, r));
+                         dst[5] = MAX(0, MIN(255, g));
+                         dst[6] = MAX(0, MIN(255, b));
+                         dst[7] = 0;
+                         y_src++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst2[0] = MAX(0, MIN(255, r));
+                         dst2[1] = MAX(0, MIN(255, g));
+                         dst2[2] = MAX(0, MIN(255, b));
+                         dst2[3] = 0;
+                         y_src2++;
+                         rgb_y = RGB_Y_tab[*y_src2];
+                         b = (rgb_y + b_u) >> SCALEBITS_OUT;
+                         g = (rgb_y - g_uv) >> SCALEBITS_OUT;
+                         r = (rgb_y + r_v) >> SCALEBITS_OUT;
+                         dst2[4] = MAX(0, MIN(255, r));
+                         dst2[5] = MAX(0, MIN(255, g));
+                         dst2[6] = MAX(0, MIN(255, b));
+                         dst2[7] = 0;
+                         y_src2++;
+                         dst += 8;
+                         dst2 += 8;
+                 }
+                 dst += dst_dif;
+                 dst2 += dst_dif;
+                 y_src += y_dif;
+                 y_src2 += y_dif;
+                 u_src += uv_stride;
+                 v_src += uv_stride;
+         }
+ }
+ /*      yuv 4:2:0 planar -> yuv planar */
+ void
+ yv12_to_yuv_c(uint8_t * dst,
+                           int dst_stride,
+                           uint8_t * y_src,
+                           uint8_t * u_src,
+                           uint8_t * v_src,
+                           int y_stride,
+                           int uv_stride,
+                           int width,
+                           int height)
+ {
+         uint32_t dst_stride2 = dst_stride >> 1;
+         uint32_t width2 = width >> 1;
+         uint32_t y;
+         if (height < 0) {
+                 height = -height;
+                 y_src += (height - 1) * y_stride;
+                 u_src += (height / 2 - 1) * uv_stride;
+                 v_src += (height / 2 - 1) * uv_stride;
+                 y_stride = -y_stride;
+                 uv_stride = -uv_stride;
          }
          for (y = height; y; y--) {
-                 memcpy(y_dst, y_src, width);
+                 memcpy(dst, y_src, width);
-                 y_src += y_src_stride;
+                 dst += dst_stride;
-                 y_dst += y_dst_stride;
+                 y_src += y_stride;
          }
-         for (y = height2; y; y--) {
+         for (y = height >> 1; y; y--) {
-                 memcpy(u_dst, u_src, width2);
+                 memcpy(dst, u_src, width2);
-                 u_src += uv_src_stride;
+                 dst += dst_stride2;
-                 u_dst += uv_dst_stride;
+                 u_src += uv_stride;
          }
-         for (y = height2; y; y--) {
+         for (y = height >> 1; y; y--) {
-                 memcpy(v_dst, v_src, width2);
+                 memcpy(dst, v_src, width2);
-                 v_src += uv_src_stride;
+                 dst += dst_stride2;
-                 v_dst += uv_dst_stride;
+                 v_src += uv_stride;
          }
  }
- /* initialize rgb lookup tables */
+ /* yuv 4:2:0 planar -> yuyv (yuv2) packed */
  void
- colorspace_init(void)
+ yv12_to_yuyv_c(uint8_t * dst,
+                            int dst_stride,
+                            uint8_t * y_src,
+                            uint8_t * u_src,
+                            uint8_t * v_src,
+                            int y_stride,
+                            int uv_stride,
+                            int width,
+                            int height)
  {
-         int32_t i;
+         const uint32_t dst_dif = 2 * (dst_stride - width);
+         uint32_t x, y;
-         for (i = 0; i < 256; i++) {
+         if (height < 0) {
-                 RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
+                 height = -height;
-                 B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
+                 y_src += (height - 1) * y_stride;
-                 G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
+                 u_src += (height / 2 - 1) * uv_stride;
-                 G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
+                 v_src += (height / 2 - 1) * uv_stride;
-                 R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
+                 y_stride = -y_stride;
+                 uv_stride = -uv_stride;
+         }
+         for (y = 0; y < (uint32_t) height; y++) {
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
+                         dst[0] = y_src[2 * x];
+                         dst[1] = u_src[x];
+                         dst[2] = y_src[2 * x + 1];
+                         dst[3] = v_src[x];
+                         dst += 4;
+                 }
+                 dst += dst_dif;
+                 y_src += y_stride;
+                 if (y & 1) {
+                         u_src += uv_stride;
+                         v_src += uv_stride;
+                 }
+         }
+ }
+ /* yuv 4:2:0 planar -> uyvy packed */
+ void
+ yv12_to_uyvy_c(uint8_t * dst,
+                            int dst_stride,
+                            uint8_t * y_src,
+                            uint8_t * u_src,
+                            uint8_t * v_src,
+                            int y_stride,
+                            int uv_stride,
+                            int width,
+                            int height)
+ {
+         const uint32_t dst_dif = 2 * (dst_stride - width);
+         uint32_t x, y;
+         if (height < 0) {
+                 height = -height;
+                 y_src += (height - 1) * y_stride;
+                 u_src += (height / 2 - 1) * uv_stride;
+                 v_src += (height / 2 - 1) * uv_stride;
+                 y_stride = -y_stride;
+                 uv_stride = -uv_stride;
+         }
+         for (y = 0; y < (uint32_t) height; y++) {
+                 for (x = 0; x < (uint32_t) width / 2; x++) {
+                         dst[0] = u_src[x];
+                         dst[1] = y_src[2 * x];
+                         dst[2] = v_src[x];
+                         dst[3] = y_src[2 * x + 1];
+                         dst += 4;
+                 }
+                 dst += dst_dif;
+                 y_src += y_stride;
+                 if (y & 1) {
+                         u_src += uv_stride;
+                         v_src += uv_stride;
+                 }
+         }
+ }
+ /*      user yuv planar -> yuv 4:2:0 planar
+         NOTE: does not flip */
+ void
+ user_to_yuv_c(uint8_t * y_out,
+                           uint8_t * u_out,
+                           uint8_t * v_out,
+                           int stride,
+                           DEC_PICTURE * picture,
+                           int width,
+                           int height)
+ {
+         uint32_t stride2 = stride >> 1;
+         uint32_t width2 = width >> 1;
+         uint32_t y;
+         uint8_t *src;
+         src = picture->y;
+         for (y = height; y; y--) {
+                 memcpy(y_out, src, width);
+                 src += picture->stride_y;
+                 y_out += stride;
+         }
+         src = picture->u;
+         for (y = height >> 1; y; y--) {
+                 memcpy(u_out, src, width2);
+                 src += picture->stride_uv;
+                 u_out += stride2;
+         }
+         src = picture->v;
+         for (y = height >> 1; y; y--) {
+                 memcpy(v_out, src, width2);
+                 src += picture->stride_uv;
+                 v_out += stride2;
          }
  }

 Legend:



Removed from v.1.3.2.2
 


changed lines


 
Added in v.1.5
 Legend:



Removed from v.1.3.2.2
 


changed lines


 
Added in v.1.5
-Removed from v.1.3.2.2
+Added in v.1.5

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4