/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */

/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
*         added by Chris Demetriou.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
*  Software Distribution Coordinator  or  [email protected]
*  School of Computer Science
*  Carnegie Mellon University
*  Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/

#include <machine/asm.h>

#if defined(MEMCOPY) || defined(MEMMOVE)
#ifdef MEMCOPY
#define FUNCTION        memcpy
#else
#define FUNCTION        memmove
#endif
#define SRCREG          a1
#define DSTREG          a0
#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
#define FUNCTION        bcopy
#define SRCREG          a0
#define DSTREG          a1
#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */

#define SIZEREG         a2

/*
* Copy bytes.
*
* void bcopy(char *from, char *to, size_t len);
* char *memcpy(void *to, const void *from, size_t len);
* char *memmove(void *to, const void *from, size_t len);
*
* No matter how invoked, the source and destination registers
* for calculation.  There's no point in copying them to "working"
* registers, since the code uses their values "in place," and
* copying them would be slower.
*/

LEAF(FUNCTION,3)

#if defined(MEMCOPY) || defined(MEMMOVE)
       /* set up return value, while we still can */
       mov     DSTREG,v0
#endif

       /* Check for negative length */
       ble     SIZEREG,bcopy_done

       /* Check for overlap */
       subq    DSTREG,SRCREG,t5
       cmpult  t5,SIZEREG,t5
       bne     t5,bcopy_overlap

       /* a3 = end address */
       addq    SRCREG,SIZEREG,a3

       /* Get the first word */
       ldq_u   t2,0(SRCREG)

       /* Do they have the same alignment? */
       xor     SRCREG,DSTREG,t0
       and     t0,7,t0
       and     DSTREG,7,t1
       bne     t0,bcopy_different_alignment

       /* src & dst have same alignment */
       beq     t1,bcopy_all_aligned

       ldq_u   t3,0(DSTREG)
       addq    SIZEREG,t1,SIZEREG
       mskqh   t2,SRCREG,t2
       mskql   t3,SRCREG,t3
       or      t2,t3,t2

       /* Dst is 8-byte aligned */

bcopy_all_aligned:
       /* If less than 8 bytes,skip loop */
       subq    SIZEREG,1,t0
       and     SIZEREG,7,SIZEREG
       bic     t0,7,t0
       beq     t0,bcopy_samealign_lp_end

bcopy_samealign_lp:
       stq_u   t2,0(DSTREG)
       addq    DSTREG,8,DSTREG
       ldq_u   t2,8(SRCREG)
       subq    t0,8,t0
       addq    SRCREG,8,SRCREG
       bne     t0,bcopy_samealign_lp

bcopy_samealign_lp_end:
       /* If we're done, exit */
       bne     SIZEREG,bcopy_small_left
       stq_u   t2,0(DSTREG)
       RET

bcopy_small_left:
       mskql   t2,SIZEREG,t4
       ldq_u   t3,0(DSTREG)
       mskqh   t3,SIZEREG,t3
       or      t4,t3,t4
       stq_u   t4,0(DSTREG)
       RET

bcopy_different_alignment:
       /*
        * this is the fun part
        */
       addq    SRCREG,SIZEREG,a3
       cmpule  SIZEREG,8,t0
       bne     t0,bcopy_da_finish

       beq     t1,bcopy_da_noentry

       /* Do the initial partial word */
       subq    zero,DSTREG,t0
       and     t0,7,t0
       ldq_u   t3,7(SRCREG)
       extql   t2,SRCREG,t2
       extqh   t3,SRCREG,t3
       or      t2,t3,t5
       insql   t5,DSTREG,t5
       ldq_u   t6,0(DSTREG)
       mskql   t6,DSTREG,t6
       or      t5,t6,t5
       stq_u   t5,0(DSTREG)
       addq    SRCREG,t0,SRCREG
       addq    DSTREG,t0,DSTREG
       subq    SIZEREG,t0,SIZEREG
       ldq_u   t2,0(SRCREG)

bcopy_da_noentry:
       subq    SIZEREG,1,t0
       bic     t0,7,t0
       and     SIZEREG,7,SIZEREG
       beq     t0,bcopy_da_finish2

bcopy_da_lp:
       ldq_u   t3,7(SRCREG)
       addq    SRCREG,8,SRCREG
       extql   t2,SRCREG,t4
       extqh   t3,SRCREG,t5
       subq    t0,8,t0
       or      t4,t5,t5
       stq     t5,0(DSTREG)
       addq    DSTREG,8,DSTREG
       beq     t0,bcopy_da_finish1
       ldq_u   t2,7(SRCREG)
       addq    SRCREG,8,SRCREG
       extql   t3,SRCREG,t4
       extqh   t2,SRCREG,t5
       subq    t0,8,t0
       or      t4,t5,t5
       stq     t5,0(DSTREG)
       addq    DSTREG,8,DSTREG
       bne     t0,bcopy_da_lp

bcopy_da_finish2:
       /* Do the last new word */
       mov     t2,t3

bcopy_da_finish1:
       /* Do the last partial word */
       ldq_u   t2,-1(a3)
       extql   t3,SRCREG,t3
       extqh   t2,SRCREG,t2
       or      t2,t3,t2
       br      zero,bcopy_samealign_lp_end

bcopy_da_finish:
       /* Do the last word in the next source word */
       ldq_u   t3,-1(a3)
       extql   t2,SRCREG,t2
       extqh   t3,SRCREG,t3
       or      t2,t3,t2
       insqh   t2,DSTREG,t3
       insql   t2,DSTREG,t2
       lda     t4,-1(zero)
       mskql   t4,SIZEREG,t5
       cmovne  t5,t5,t4
       insqh   t4,DSTREG,t5
       insql   t4,DSTREG,t4
       addq    DSTREG,SIZEREG,a4
       ldq_u   t6,0(DSTREG)
       ldq_u   t7,-1(a4)
       bic     t6,t4,t6
       bic     t7,t5,t7
       and     t2,t4,t2
       and     t3,t5,t3
       or      t2,t6,t2
       or      t3,t7,t3
       stq_u   t3,-1(a4)
       stq_u   t2,0(DSTREG)
       RET

bcopy_overlap:
       /*
        * Basically equivalent to previous case, only backwards.
        * Not quite as highly optimized
        */
       addq    SRCREG,SIZEREG,a3
       addq    DSTREG,SIZEREG,a4

       /* less than 8 bytes - don't worry about overlap */
       cmpule  SIZEREG,8,t0
       bne     t0,bcopy_ov_short

       /* Possibly do a partial first word */
       and     a4,7,t4
       beq     t4,bcopy_ov_nostart2
       subq    a3,t4,a3
       subq    a4,t4,a4
       ldq_u   t1,0(a3)
       subq    SIZEREG,t4,SIZEREG
       ldq_u   t2,7(a3)
       ldq     t3,0(a4)
       extql   t1,a3,t1
       extqh   t2,a3,t2
       or      t1,t2,t1
       mskqh   t3,t4,t3
       mskql   t1,t4,t1
       or      t1,t3,t1
       stq     t1,0(a4)

bcopy_ov_nostart2:
       bic     SIZEREG,7,t4
       and     SIZEREG,7,SIZEREG
       beq     t4,bcopy_ov_lp_end

bcopy_ov_lp:
       /* This could be more pipelined, but it doesn't seem worth it */
       ldq_u   t0,-8(a3)
       subq    a4,8,a4
       ldq_u   t1,-1(a3)
       subq    a3,8,a3
       extql   t0,a3,t0
       extqh   t1,a3,t1
       subq    t4,8,t4
       or      t0,t1,t0
       stq     t0,0(a4)
       bne     t4,bcopy_ov_lp

bcopy_ov_lp_end:
       beq     SIZEREG,bcopy_done

       ldq_u   t0,0(SRCREG)
       ldq_u   t1,7(SRCREG)
       ldq_u   t2,0(DSTREG)
       extql   t0,SRCREG,t0
       extqh   t1,SRCREG,t1
       or      t0,t1,t0
       insql   t0,DSTREG,t0
       mskql   t2,DSTREG,t2
       or      t2,t0,t2
       stq_u   t2,0(DSTREG)

bcopy_done:
       RET

bcopy_ov_short:
       ldq_u   t2,0(SRCREG)
       br      zero,bcopy_da_finish

       END(FUNCTION)