--- mem_transfer.c 2002/09/05 20:42:47 1.5 +++ mem_transfer.c 2005/09/13 12:12:15 1.16 @@ -1,34 +1,25 @@ /***************************************************************************** * * XVID MPEG-4 VIDEO CODEC - * - 8bit<->16bit transfer - + * - 8bit<->16bit transfer - * - * Copyright(C) 2001-2002 Peter Ross + * Copyright(C) 2001-2003 Peter Ross * - * This program is an implementation of a part of one or more MPEG-4 - * Video tools as specified in ISO/IEC 14496-2 standard. Those intending - * to use this software module in hardware or software products are - * advised that its use may infringe existing patents or copyrights, and - * any such use would be at such party's own risk. The original - * developer of this software module and his/her company, and subsequent - * editors and their companies, will have no liability for use of this - * software or modifications or derivatives thereof. - * - * This program is free software; you can redistribute it and/or modify + * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: mem_transfer.c,v 1.5 2002/09/05 20:42:47 edgomez Exp $ + * $Id: mem_transfer.c,v 1.16 2005/09/13 12:12:15 suxen_drol Exp $ * ****************************************************************************/ @@ -41,11 +32,15 @@ TRANSFER_16TO8COPY_PTR transfer_16to8copy; TRANSFER_8TO16SUB_PTR transfer_8to16sub; +TRANSFER_8TO16SUBRO_PTR transfer_8to16subro; TRANSFER_8TO16SUB2_PTR transfer_8to16sub2; +TRANSFER_8TO16SUB2RO_PTR transfer_8to16sub2ro; TRANSFER_16TO8ADD_PTR transfer_16to8add; TRANSFER8X8_COPY_PTR transfer8x8_copy; +TRANSFER8X4_COPY_PTR transfer8x4_copy; +#define USE_REFERENCE_C /***************************************************************************** * @@ -72,8 +67,7 @@ const uint8_t * const src, uint32_t stride) { - uint32_t i, j; - + int i, j; for (j = 0; j < 8; j++) { for (i = 0; i < 8; i++) { dst[j * 8 + i] = (int16_t) src[j * stride + i]; @@ -95,10 +89,11 @@ const int16_t * const src, uint32_t stride) { - uint32_t i, j; + int i, j; for (j = 0; j < 8; j++) { for (i = 0; i < 8; i++) { +#ifdef USE_REFERENCE_C int16_t pixel = src[j * 8 + i]; if (pixel < 0) { @@ -107,7 +102,12 @@ pixel = 255; } dst[j * stride + i] = (uint8_t) pixel; - } +#else + const int16_t pixel = src[j * 8 + i]; + const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel ); + dst[j*stride + i] = value; +#endif + } } } @@ -131,12 +131,12 @@ const uint8_t * ref, const uint32_t stride) { - uint32_t i, j; + int i, j; for (j = 0; j < 8; j++) { for (i = 0; i < 8; i++) { - uint8_t c = cur[j * stride + i]; - uint8_t r = ref[j * stride + i]; + const uint8_t c = cur[j * stride + i]; + const uint8_t r = ref[j * stride + i]; cur[j * stride + i] = r; dct[j * 8 + i] = (int16_t) c - (int16_t) r; @@ -145,6 +145,25 @@ } +void +transfer_8to16subro_c(int16_t * const dct, + const uint8_t * const cur, + const uint8_t * ref, + const uint32_t stride) +{ + int i, j; + + for (j = 0; j < 8; j++) { + for (i = 0; i < 8; i++) { + const uint8_t c = cur[j * stride + i]; + const uint8_t r = ref[j * stride + i]; + dct[j * 8 + i] = (int16_t) c - (int16_t) r; + } + } +} + + + /* * C - the current buffer * R1 - the 1st reference buffer @@ -155,8 +174,9 @@ * * R1 (8bit) = R1 * R2 (8bit) = R2 - * C (8bit) = C - * DCT (16bit)= C - min((R1 + R2)/2, 255) + * R (temp) = min((R1 + R2)/2, 255) + * DCT (16bit)= C - R + * C (8bit) = R */ void transfer_8to16sub2_c(int16_t * const dct, @@ -169,13 +189,27 @@ for (j = 0; j < 8; j++) { for (i = 0; i < 8; i++) { - uint8_t c = cur[j * stride + i]; - int r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) / 2; + const uint8_t c = cur[j * stride + i]; + const uint8_t r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) >> 1; + cur[j * stride + i] = r; + dct[j * 8 + i] = (int16_t) c - (int16_t) r; + } + } +} - if (r > 255) { - r = 255; - } - //cur[j * stride + i] = r; +void +transfer_8to16sub2ro_c(int16_t * const dct, + const uint8_t * const cur, + const uint8_t * ref1, + const uint8_t * ref2, + const uint32_t stride) +{ + uint32_t i, j; + + for (j = 0; j < 8; j++) { + for (i = 0; i < 8; i++) { + const uint8_t c = cur[j * stride + i]; + const uint8_t r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) >> 1; dct[j * 8 + i] = (int16_t) c - (int16_t) r; } } @@ -196,10 +230,11 @@ const int16_t * const src, uint32_t stride) { - uint32_t i, j; + int i, j; for (j = 0; j < 8; j++) { for (i = 0; i < 8; i++) { +#ifdef USE_REFERENCE_C int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i]; if (pixel < 0) { @@ -208,6 +243,12 @@ pixel = 255; } dst[j * stride + i] = (uint8_t) pixel; +#else + const int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i]; + const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel ); + dst[j*stride + i] = value; +#endif + } } } @@ -226,11 +267,39 @@ const uint8_t * const src, const uint32_t stride) { - uint32_t i, j; + int j, i; - for (j = 0; j < 8; j++) { - for (i = 0; i < 8; i++) { - dst[j * stride + i] = src[j * stride + i]; + for (j = 0; j < 8; ++j) { + uint8_t *d = dst + j*stride; + const uint8_t *s = src + j*stride; + + for (i = 0; i < 8; ++i) + { + *d++ = *s++; } } } + +/* + * SRC - the source buffer + * DST - the destination buffer + * + * Then the function does the 8->8 bit transfer and this serie of operations : + * + * SRC (8bit) = SRC + * DST (8bit) = SRC + */ +void +transfer8x4_copy_c(uint8_t * const dst, + const uint8_t * const src, + const uint32_t stride) +{ + uint32_t j; + + for (j = 0; j < 4; j++) { + uint32_t *d= (uint32_t*)(dst + j*stride); + const uint32_t *s = (const uint32_t*)(src + j*stride); + *(d+0) = *(s+0); + *(d+1) = *(s+1); + } +}