358 |
ret |
ret |
359 |
|
|
360 |
|
|
361 |
|
;=========================================================================== |
362 |
|
; |
363 |
|
; void transfer_8to16sub2_xmm(int16_t * const dct, |
364 |
|
; uint8_t * const cur, |
365 |
|
; const uint8_t * ref1, |
366 |
|
; const uint8_t * ref2, |
367 |
|
; const uint32_t stride); |
368 |
|
; |
369 |
|
;=========================================================================== |
370 |
|
|
371 |
|
align 16 |
372 |
|
cglobal transfer_8to16sub2_xmm |
373 |
|
transfer_8to16sub2_xmm |
374 |
|
|
375 |
|
push edi |
376 |
|
push esi |
377 |
|
push ebx |
378 |
|
|
379 |
|
mov edi, [esp + 12 + 4] ; edi = &dct |
380 |
|
mov esi, [esp + 12 + 8] ; esi = &cur |
381 |
|
mov ebx, [esp + 12 + 12] ; ebx = &ref1 |
382 |
|
mov edx, [esp + 12 + 16] ; edx = &ref2 |
383 |
|
mov eax, [esp + 12 + 20] ; eax = stride |
384 |
|
|
385 |
|
pxor mm7, mm7 ; mm7 = 0 |
386 |
|
shl eax, 1 ; eax = stride<<1 |
387 |
|
add eax, 16 |
388 |
|
|
389 |
|
; Row processing |
390 |
|
; One row at a time |
391 |
|
movq mm0, [esi + 0] ; mm0 = cur row |
392 |
|
movq mm2, [ebx + 0] ; mm2 = ref1 row |
393 |
|
movq mm3, [edx + 0] ; mm3 = ref2 row |
394 |
|
movq mm1, mm0 ; mm1 = cur row |
395 |
|
|
396 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
397 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
398 |
|
|
399 |
|
movq mm3,mm2 ; mm3 = avg |
400 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
401 |
|
|
402 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
403 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
404 |
|
|
405 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
406 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
407 |
|
|
408 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
409 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
410 |
|
|
411 |
|
; Increment all pointers |
412 |
|
add edi, eax ; edi = &(next dct row) |
413 |
|
|
414 |
|
; Row processing |
415 |
|
; One row at a time |
416 |
|
movq mm0, [esi + 8] ; mm0 = cur row |
417 |
|
movq mm2, [ebx + 8] ; mm2 = ref1 row |
418 |
|
movq mm3, [edx + 8] ; mm3 = ref2 row |
419 |
|
movq mm1, mm0 ; mm1 = cur row |
420 |
|
|
421 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
422 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
423 |
|
|
424 |
|
movq mm3,mm2 ; mm3 = avg |
425 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
426 |
|
|
427 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
428 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
429 |
|
|
430 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
431 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
432 |
|
|
433 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
434 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
435 |
|
|
436 |
|
; Increment all pointers |
437 |
|
add edi, eax ; edi = &(next dct row) |
438 |
|
|
439 |
|
; Row processing |
440 |
|
; One row at a time |
441 |
|
movq mm0, [esi + 16] ; mm0 = cur row |
442 |
|
movq mm2, [ebx + 16] ; mm2 = ref1 row |
443 |
|
movq mm3, [edx + 16] ; mm3 = ref2 row |
444 |
|
movq mm1, mm0 ; mm1 = cur row |
445 |
|
|
446 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
447 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
448 |
|
|
449 |
|
movq mm3,mm2 ; mm3 = avg |
450 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
451 |
|
|
452 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
453 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
454 |
|
|
455 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
456 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
457 |
|
|
458 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
459 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
460 |
|
|
461 |
|
; Increment all pointers |
462 |
|
add edi, eax ; edi = &(next dct row) |
463 |
|
|
464 |
|
; Row processing |
465 |
|
; One row at a time |
466 |
|
movq mm0, [esi + 24] ; mm0 = cur row |
467 |
|
movq mm2, [ebx + 24] ; mm2 = ref1 row |
468 |
|
movq mm3, [edx + 24] ; mm3 = ref2 row |
469 |
|
movq mm1, mm0 ; mm1 = cur row |
470 |
|
|
471 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
472 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
473 |
|
|
474 |
|
movq mm3,mm2 ; mm3 = avg |
475 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
476 |
|
|
477 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
478 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
479 |
|
|
480 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
481 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
482 |
|
|
483 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
484 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
485 |
|
|
486 |
|
; Increment all pointers |
487 |
|
add edi, eax ; edi = &(next dct row) |
488 |
|
|
489 |
|
; Row processing |
490 |
|
; One row at a time |
491 |
|
movq mm0, [esi + 32] ; mm0 = cur row |
492 |
|
movq mm2, [ebx + 32] ; mm2 = ref1 row |
493 |
|
movq mm3, [edx + 32] ; mm3 = ref2 row |
494 |
|
movq mm1, mm0 ; mm1 = cur row |
495 |
|
|
496 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
497 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
498 |
|
|
499 |
|
movq mm3,mm2 ; mm3 = avg |
500 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
501 |
|
|
502 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
503 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
504 |
|
|
505 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
506 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
507 |
|
|
508 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
509 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
510 |
|
|
511 |
|
; Increment all pointers |
512 |
|
add edi, eax ; edi = &(next dct row) |
513 |
|
|
514 |
|
; Row processing |
515 |
|
; One row at a time |
516 |
|
movq mm0, [esi + 40] ; mm0 = cur row |
517 |
|
movq mm2, [ebx + 40] ; mm2 = ref1 row |
518 |
|
movq mm3, [edx + 40] ; mm3 = ref2 row |
519 |
|
movq mm1, mm0 ; mm1 = cur row |
520 |
|
|
521 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
522 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
523 |
|
|
524 |
|
movq mm3,mm2 ; mm3 = avg |
525 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
526 |
|
|
527 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
528 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
529 |
|
|
530 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
531 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
532 |
|
|
533 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
534 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
535 |
|
|
536 |
|
; Increment all pointers |
537 |
|
add edi, eax ; edi = &(next dct row) |
538 |
|
|
539 |
|
; Row processing |
540 |
|
; One row at a time |
541 |
|
movq mm0, [esi + 48] ; mm0 = cur row |
542 |
|
movq mm2, [ebx + 48] ; mm2 = ref1 row |
543 |
|
movq mm3, [edx + 48] ; mm3 = ref2 row |
544 |
|
movq mm1, mm0 ; mm1 = cur row |
545 |
|
|
546 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
547 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
548 |
|
|
549 |
|
movq mm3,mm2 ; mm3 = avg |
550 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
551 |
|
|
552 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
553 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
554 |
|
|
555 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
556 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
557 |
|
|
558 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
559 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
560 |
|
|
561 |
|
; Increment all pointers |
562 |
|
add edi, eax ; edi = &(next dct row) |
563 |
|
|
564 |
|
; Row processing |
565 |
|
; One row at a time |
566 |
|
movq mm0, [esi + 56] ; mm0 = cur row |
567 |
|
movq mm2, [ebx + 56] ; mm2 = ref1 row |
568 |
|
movq mm3, [edx + 56] ; mm3 = ref2 row |
569 |
|
movq mm1, mm0 ; mm1 = cur row |
570 |
|
|
571 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
572 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
573 |
|
|
574 |
|
movq mm3,mm2 ; mm3 = avg |
575 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
576 |
|
|
577 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
578 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
579 |
|
|
580 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
581 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
582 |
|
|
583 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
584 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
585 |
|
|
586 |
|
; Exit |
587 |
|
|
588 |
|
pop ebx |
589 |
|
pop esi |
590 |
|
pop edi |
591 |
|
|
592 |
|
ret |
593 |
|
|
594 |
;=========================================================================== |
;=========================================================================== |
595 |
; |
; |