--- mem_transfer.c	2002/11/17 00:51:11	1.7
+++ mem_transfer.c	2005/06/14 13:58:21	1.14
@@ -1,56 +1,25 @@
 /*****************************************************************************
  *
  *  XVID MPEG-4 VIDEO CODEC
- *  - 8bit<->16bit transfer -
+ *  - 8bit<->16bit transfer  -
  *
- *  Copyright(C) 2001-2002 Peter Ross <pross@xvid.org>
+ *  Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
  *
- *  This file is part of XviD, a free MPEG-4 video encoder/decoder
- *
- *  XviD is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
+ *  This program is free software ; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation ; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
+ *  along with this program ; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- *  Under section 8 of the GNU General Public License, the copyright
- *  holders of XVID explicitly forbid distribution in the following
- *  countries:
- *
- *    - Japan
- *    - United States of America
- *
- *  Linking XviD statically or dynamically with other modules is making a
- *  combined work based on XviD.  Thus, the terms and conditions of the
- *  GNU General Public License cover the whole combination.
- *
- *  As a special exception, the copyright holders of XviD give you
- *  permission to link XviD with independent modules that communicate with
- *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
- *  license terms of these independent modules, and to copy and distribute
- *  the resulting combined work under terms of your choice, provided that
- *  every copy of the combined work is accompanied by a complete copy of
- *  the source code of XviD (the version of XviD used to produce the
- *  combined work), being distributed under the terms of the GNU General
- *  Public License plus this exception.  An independent module is a module
- *  which is not derived from or based on XviD.
- *
- *  Note that people who make modified versions of XviD are not obligated
- *  to grant this special exception for their modified versions; it is
- *  their choice whether to do so.  The GNU General Public License gives
- *  permission to release a modified version without this exception; this
- *  exception also makes it possible to release a modified version which
- *  carries forward this exception.
- *
- * $Id: mem_transfer.c,v 1.7 2002/11/17 00:51:11 edgomez Exp $
+ * $Id: mem_transfer.c,v 1.14 2005/06/14 13:58:21 Skal Exp $
  *
  ****************************************************************************/
 
@@ -63,11 +32,14 @@
 TRANSFER_16TO8COPY_PTR transfer_16to8copy;
 
 TRANSFER_8TO16SUB_PTR  transfer_8to16sub;
+TRANSFER_8TO16SUBRO_PTR  transfer_8to16subro;
 TRANSFER_8TO16SUB2_PTR transfer_8to16sub2;
+TRANSFER_8TO16SUB2RO_PTR transfer_8to16sub2ro;
 TRANSFER_16TO8ADD_PTR  transfer_16to8add;
 
 TRANSFER8X8_COPY_PTR transfer8x8_copy;
 
+#define USE_REFERENCE_C
 
 /*****************************************************************************
  *
@@ -94,12 +66,11 @@
 					 const uint8_t * const src,
 					 uint32_t stride)
 {
-	uint32_t i, j;
-
+	int i, j;
 	for (j = 0; j < 8; j++) {
 		for (i = 0; i < 8; i++) {
 			dst[j * 8 + i] = (int16_t) src[j * stride + i];
-		}
+    }
 	}
 }
 
@@ -117,10 +88,11 @@
 					 const int16_t * const src,
 					 uint32_t stride)
 {
-	uint32_t i, j;
+	int i, j;
 
 	for (j = 0; j < 8; j++) {
 		for (i = 0; i < 8; i++) {
+#ifdef USE_REFERENCE_C
 			int16_t pixel = src[j * 8 + i];
 
 			if (pixel < 0) {
@@ -129,7 +101,12 @@
 				pixel = 255;
 			}
 			dst[j * stride + i] = (uint8_t) pixel;
-		}
+#else
+			const int16_t pixel = src[j * 8 + i];
+			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
+			dst[j*stride + i] = value;
+#endif
+    }
 	}
 }
 
@@ -153,12 +130,12 @@
 					const uint8_t * ref,
 					const uint32_t stride)
 {
-	uint32_t i, j;
+	int i, j;
 
 	for (j = 0; j < 8; j++) {
 		for (i = 0; i < 8; i++) {
-			uint8_t c = cur[j * stride + i];
-			uint8_t r = ref[j * stride + i];
+			const uint8_t c = cur[j * stride + i];
+			const uint8_t r = ref[j * stride + i];
 
 			cur[j * stride + i] = r;
 			dct[j * 8 + i] = (int16_t) c - (int16_t) r;
@@ -167,6 +144,25 @@
 }
 
 
+void
+transfer_8to16subro_c(int16_t * const dct,
+					const uint8_t * const cur,
+					const uint8_t * ref,
+					const uint32_t stride)
+{
+	int i, j;
+
+	for (j = 0; j < 8; j++) {
+		for (i = 0; i < 8; i++) {
+			const uint8_t c = cur[j * stride + i];
+			const uint8_t r = ref[j * stride + i];
+			dct[j * 8 + i] = (int16_t) c - (int16_t) r;
+		}
+	}
+}
+
+
+
 /*
  * C   - the current buffer
  * R1  - the 1st reference buffer
@@ -177,8 +173,9 @@
  *
  *    R1  (8bit) = R1
  *    R2  (8bit) = R2
- *    C   (8bit) = C
- *    DCT (16bit)= C - min((R1 + R2)/2, 255)
+ *    R   (temp) = min((R1 + R2)/2, 255)
+ *    DCT (16bit)= C - R
+ *    C   (8bit) = R
  */
 void
 transfer_8to16sub2_c(int16_t * const dct,
@@ -191,13 +188,27 @@
 
 	for (j = 0; j < 8; j++) {
 		for (i = 0; i < 8; i++) {
-			uint8_t c = cur[j * stride + i];
-			int r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) / 2;
+			const uint8_t c = cur[j * stride + i];
+			const uint8_t r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) >> 1;
+			cur[j * stride + i] = r;
+			dct[j * 8 + i] = (int16_t) c - (int16_t) r;
+		}
+	}
+}
 
-			if (r > 255) {
-				r = 255;
-			}
-			//cur[j * stride + i] = r;
+void
+transfer_8to16sub2ro_c(int16_t * const dct,
+					 const uint8_t * const cur,
+					 const uint8_t * ref1,
+					 const uint8_t * ref2,
+					 const uint32_t stride)
+{
+	uint32_t i, j;
+
+	for (j = 0; j < 8; j++) {
+		for (i = 0; i < 8; i++) {
+			const uint8_t c = cur[j * stride + i];
+			const uint8_t r = (ref1[j * stride + i] + ref2[j * stride + i] + 1) >> 1;
 			dct[j * 8 + i] = (int16_t) c - (int16_t) r;
 		}
 	}
@@ -218,10 +229,11 @@
 					const int16_t * const src,
 					uint32_t stride)
 {
-	uint32_t i, j;
+	int i, j;
 
 	for (j = 0; j < 8; j++) {
 		for (i = 0; i < 8; i++) {
+#ifdef USE_REFERENCE_C
 			int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
 
 			if (pixel < 0) {
@@ -230,6 +242,12 @@
 				pixel = 255;
 			}
 			dst[j * stride + i] = (uint8_t) pixel;
+#else
+      const int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
+			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
+			dst[j*stride + i] = value;
+#endif
+
 		}
 	}
 }
@@ -248,11 +266,15 @@
 				   const uint8_t * const src,
 				   const uint32_t stride)
 {
-	uint32_t i, j;
+	int j, i;
 
-	for (j = 0; j < 8; j++) {
-		for (i = 0; i < 8; i++) {
-			dst[j * stride + i] = src[j * stride + i];
+	for (j = 0; j < 8; ++j) {
+	    uint8_t *d = dst + j*stride;
+		const uint8_t *s = src + j*stride;
+
+		for (i = 0; i < 8; ++i)
+		{
+			*d++ = *s++;
 		}
 	}
 }