Neon-Shifts examples

Here are some examples of output from the NEON shifts patch.

On the left is the outout without the patch, and on the right with it.

Shift by Register

   1 /* We use a pointer so as not to tie VAL to core-registers.
   2    AMOUNT will be extended and moved to neon-registers. */
   3 void
   4 ashift_by_register (long long *val, int *amount)
   5 {
   6   *val = *val << *amount;
   7 }
   8 
   9 void
  10 ashiftrt_by_register (long long *val, int *amount)
  11 {
  12   *val = *val >> *amount;
  13 }
  14 
  15 void
  16 lshiftrt_by_register (unsigned long long *val, int *amount)
  17 { 
  18   *val = *val >> *amount;
  19 }

ashift_by_register:
        ldr     r2, [r1, #0]  | flds   s14, [r1, #0]   @ int                                                                                                                                         
        push    {r4, r5}      | fldd   d17, [r0, #0]   @ int                                                                                                                                         
        ldr     r3, [r0, #4]  | vshl.u64       d16, d17, d7    @ ashl d16, d17, s14                                                                                                                  
        sub     r4, r2, #32   | fstd   d16, [r0, #0]   @ int                                                                                                                                         
        rsb     r1, r2, #32   | bx     lr                                                                                                                                                            
        ldr     r5, [r0, #0]  |                                                                                                                                                                      
        lsls    r3, r3, r2    |                                                                                                                                                                      
        lsl     r4, r5, r4    |                                                                                                                                                                      
        lsr     r1, r5, r1    |                                                                                                                                                                      
        lsl     r2, r5, r2    |                                                                                                                                                                      
        orrs    r3, r3, r4    |                                                                                                                                                                      
        orrs    r3, r3, r1    |                                                                                                                                                                      
        str     r2, [r0, #0]  |                                                                                                                                                                      
        str     r3, [r0, #4]  |                                                                                                                                                                      
        pop     {r4, r5}      |                                                                                                                                                                      
        bx      lr            |                                                                                                                                                                      

ashiftrt_by_register:
        ldr     r3, [r1, #0]  | ldr    r3, [r1, #0]
        push    {r4, r5}      | fldd   d17, [r0, #0]   @ int
        ldmia   r0, {r2, r4}  | push   {r4, r5}
        rsb     r1, r3, #32   | negs   r3, r3
        subs    r5, r3, #32   | fmsr   s14, r3 @ int
        lsr     r2, r2, r3    | vshl.s64       d16, d17, d7    @ ashr d16, d17, s14
        lsl     r1, r4, r1    | fstd   d16, [r0, #0]   @ int
        it      pl            | pop    {r4, r5}
        asrpl   r5, r4, r5    | bx     lr
        asr     r3, r4, r3    |
        orr     r2, r2, r1    |
        it      pl            |
        orrpl   r2, r2, r5    |
        stmia   r0, {r2, r3}  |
        pop     {r4, r5}      |
        bx      lr            |

lshiftrt_by_register:
        ldr     r2, [r1, #0]  | ldr    r3, [r1, #0]
        push    {r4, r5}      | fldd   d17, [r0, #0]   @ int
        ldmia   r0, {r3, r5}  | push   {r4, r5}
        rsb     r4, r2, #32   | negs   r3, r3
        sub     r1, r2, #32   | fmsr   s14, r3 @ int
        lsrs    r3, r3, r2    | vshl.u64       d16, d17, d7    @ lshr d16, d17, s14
        lsl     r4, r5, r4    | fstd   d16, [r0, #0]   @ int
        lsr     r1, r5, r1    | pop    {r4, r5}
        lsr     r2, r5, r2    | bx     lr
        orrs    r3, r3, r4    |
        str     r2, [r0, #4]  |
        orrs    r3, r3, r1    |
        str     r3, [r0, #0]  |
        pop     {r4, r5}      |
        bx      lr            |

Shift by constant "1"

   1 /* Shift VAL by 1.  This has its own alternative *and* also gets "optimized"
   2    to *val+*val so test it separately.  */
   3 void
   4 ashift_by_constant_1 (long long *val)
   5 {
   6   *val = *val << 1;
   7 }
   8 
   9 void
  10 ashiftrt_by_constant_1 (long long *val)
  11 {
  12   *val = *val >> 1;
  13 }
  14 
  15 void
  16 lshiftrt_by_constant_1 (unsigned long long *val)
  17 {
  18   *val = *val >> 1;
  19 }

ashift_by_constant_1:

        fldd    d16, [r0, #0]   @ int | fldd   d16, [r0, #0]   @ int
        vadd.i64        d16, d16, d16 | vadd.i64       d16, d16, d16
        fstd    d16, [r0, #0]   @ int | fstd   d16, [r0, #0]   @ int
        bx      lr                    | bx     lr

ashiftrt_by_constant_1:
        ldrd    r2, [r0]              | fldd   d17, [r0, #0]   @ int
        movs    r3, r3, asr #1        | vshr.s64       d16, d17, #1
        mov     r2, r2, rrx           | fstd   d16, [r0, #0]   @ int
        strd    r2, [r0]              | bx     lr
        bx      lr                    |

lshiftrt_by_constant_1:
        ldrd    r2, [r0]              | fldd   d17, [r0, #0]   @ int
        movs    r3, r3, lsr #1        | vshr.u64       d16, d17, #1
        mov     r2, r2, rrx           | fstd   d16, [r0, #0]   @ int
        strd    r2, [r0]              | bx     lr
        bx      lr                    |

Shift by constant "2"

   1 /* Shift VAL by 2.  This is treated as a normal immediate constant.  */
   2 void
   3 ashift_by_constant_2 (long long *val)
   4 {
   5   *val = *val << 2;
   6 }
   7 
   8 void
   9 ashiftrt_by_constant_2 (long long *val)
  10 {
  11   *val = *val >> 2;
  12 }
  13 
  14 void
  15 lshiftrt_by_constant_2 (unsigned long long *val)
  16 {
  17   *val = *val >> 2;
  18 }

ashift_by_constant_2:
        ldmia   r0, {r2, r3}          | fldd   d17, [r0, #0]   @ int
        lsls    r3, r3, #2            | vshl.u64       d16, d17, #2
        lsls    r1, r2, #2            | fstd   d16, [r0, #0]   @ int
        orr     r3, r3, r2, lsr #30   | bx     lr
        stmia   r0, {r1, r3}          |
        bx      lr                    |

ashiftrt_by_constant_2:

        ldr     r3, [r0, #0]          | fldd   d17, [r0, #0]   @ int
        ldr     r2, [r0, #4]          | vshr.s64       d16, d17, #2
        lsrs    r3, r3, #2            | fstd   d16, [r0, #0]   @ int
        asrs    r1, r2, #2            | bx     lr
        orr     r3, r3, r2, lsl #30   |
        str     r1, [r0, #4]          |
        str     r3, [r0, #0]          |
        bx      lr                    |

lshiftrt_by_constant_2:
        ldr     r3, [r0, #0]          | fldd   d17, [r0, #0]   @ int
        ldr     r2, [r0, #4]          | vshr.u64       d16, d17, #2
        lsrs    r3, r3, #2            | fstd   d16, [r0, #0]   @ int
        lsrs    r1, r2, #2            | bx     lr
        orr     r3, r3, r2, lsl #30   |
        str     r1, [r0, #4]          |
        str     r3, [r0, #0]          |
        bx      lr                    |

AndrewStubbs/Sandbox/neon-shifts (last modified 2012-05-22 12:28:35)