Thomas Gleixner | d2912cb | 2019-06-04 10:11:33 +0200 | [diff] [blame^] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 2 | /* |
| 3 | * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 4 | */ |
| 5 | |
| 6 | #include <linux/linkage.h> |
| 7 | |
| 8 | #ifdef __LITTLE_ENDIAN__ |
| 9 | # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << |
| 10 | # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> |
| 11 | # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM |
| 12 | # define MERGE_2(RX,RY,IMM) |
| 13 | # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF |
| 14 | # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM |
| 15 | #else |
| 16 | # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> |
| 17 | # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << |
| 18 | # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << |
| 19 | # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << |
| 20 | # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM |
| 21 | # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 |
| 22 | #endif |
| 23 | |
| 24 | #ifdef CONFIG_ARC_HAS_LL64 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 25 | # define LOADX(DST,RX) ldd.ab DST, [RX, 8] |
| 26 | # define STOREX(SRC,RX) std.ab SRC, [RX, 8] |
| 27 | # define ZOLSHFT 5 |
| 28 | # define ZOLAND 0x1F |
| 29 | #else |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 30 | # define LOADX(DST,RX) ld.ab DST, [RX, 4] |
| 31 | # define STOREX(SRC,RX) st.ab SRC, [RX, 4] |
| 32 | # define ZOLSHFT 4 |
| 33 | # define ZOLAND 0xF |
| 34 | #endif |
| 35 | |
Vineet Gupta | 86effd0 | 2016-09-19 16:42:25 -0700 | [diff] [blame] | 36 | ENTRY_CFI(memcpy) |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 37 | mov.f 0, r2 |
| 38 | ;;; if size is zero |
| 39 | jz.d [blink] |
| 40 | mov r3, r0 ; don;t clobber ret val |
| 41 | |
| 42 | ;;; if size <= 8 |
| 43 | cmp r2, 8 |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 44 | bls.d @.Lsmallchunk |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 45 | mov.f lp_count, r2 |
| 46 | |
| 47 | and.f r4, r0, 0x03 |
| 48 | rsub lp_count, r4, 4 |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 49 | lpnz @.Laligndestination |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 50 | ;; LOOP BEGIN |
| 51 | ldb.ab r5, [r1,1] |
| 52 | sub r2, r2, 1 |
| 53 | stb.ab r5, [r3,1] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 54 | .Laligndestination: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 55 | |
| 56 | ;;; Check the alignment of the source |
| 57 | and.f r4, r1, 0x03 |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 58 | bnz.d @.Lsourceunaligned |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 59 | |
| 60 | ;;; CASE 0: Both source and destination are 32bit aligned |
| 61 | ;;; Convert len to Dwords, unfold x4 |
| 62 | lsr.f lp_count, r2, ZOLSHFT |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 63 | lpnz @.Lcopy32_64bytes |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 64 | ;; LOOP START |
| 65 | LOADX (r6, r1) |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 66 | LOADX (r8, r1) |
| 67 | LOADX (r10, r1) |
| 68 | LOADX (r4, r1) |
| 69 | STOREX (r6, r3) |
| 70 | STOREX (r8, r3) |
| 71 | STOREX (r10, r3) |
| 72 | STOREX (r4, r3) |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 73 | .Lcopy32_64bytes: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 74 | |
| 75 | and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 76 | .Lsmallchunk: |
| 77 | lpnz @.Lcopyremainingbytes |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 78 | ;; LOOP START |
| 79 | ldb.ab r5, [r1,1] |
| 80 | stb.ab r5, [r3,1] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 81 | .Lcopyremainingbytes: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 82 | |
| 83 | j [blink] |
| 84 | ;;; END CASE 0 |
| 85 | |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 86 | .Lsourceunaligned: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 87 | cmp r4, 2 |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 88 | beq.d @.LunalignedOffby2 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 89 | sub r2, r2, 1 |
| 90 | |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 91 | bhi.d @.LunalignedOffby3 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 92 | ldb.ab r5, [r1, 1] |
| 93 | |
| 94 | ;;; CASE 1: The source is unaligned, off by 1 |
| 95 | ;; Hence I need to read 1 byte for a 16bit alignment |
| 96 | ;; and 2bytes to reach 32bit alignment |
| 97 | ldh.ab r6, [r1, 2] |
| 98 | sub r2, r2, 2 |
| 99 | ;; Convert to words, unfold x2 |
| 100 | lsr.f lp_count, r2, 3 |
| 101 | MERGE_1 (r6, r6, 8) |
| 102 | MERGE_2 (r5, r5, 24) |
| 103 | or r5, r5, r6 |
| 104 | |
| 105 | ;; Both src and dst are aligned |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 106 | lpnz @.Lcopy8bytes_1 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 107 | ;; LOOP START |
| 108 | ld.ab r6, [r1, 4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 109 | ld.ab r8, [r1,4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 110 | |
| 111 | SHIFT_1 (r7, r6, 24) |
| 112 | or r7, r7, r5 |
| 113 | SHIFT_2 (r5, r6, 8) |
| 114 | |
| 115 | SHIFT_1 (r9, r8, 24) |
| 116 | or r9, r9, r5 |
| 117 | SHIFT_2 (r5, r8, 8) |
| 118 | |
| 119 | st.ab r7, [r3, 4] |
| 120 | st.ab r9, [r3, 4] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 121 | .Lcopy8bytes_1: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 122 | |
| 123 | ;; Write back the remaining 16bits |
| 124 | EXTRACT_1 (r6, r5, 16) |
| 125 | sth.ab r6, [r3, 2] |
| 126 | ;; Write back the remaining 8bits |
| 127 | EXTRACT_2 (r5, r5, 16) |
| 128 | stb.ab r5, [r3, 1] |
| 129 | |
| 130 | and.f lp_count, r2, 0x07 ;Last 8bytes |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 131 | lpnz @.Lcopybytewise_1 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 132 | ;; LOOP START |
| 133 | ldb.ab r6, [r1,1] |
| 134 | stb.ab r6, [r3,1] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 135 | .Lcopybytewise_1: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 136 | j [blink] |
| 137 | |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 138 | .LunalignedOffby2: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 139 | ;;; CASE 2: The source is unaligned, off by 2 |
| 140 | ldh.ab r5, [r1, 2] |
| 141 | sub r2, r2, 1 |
| 142 | |
| 143 | ;; Both src and dst are aligned |
| 144 | ;; Convert to words, unfold x2 |
| 145 | lsr.f lp_count, r2, 3 |
| 146 | #ifdef __BIG_ENDIAN__ |
| 147 | asl.nz r5, r5, 16 |
| 148 | #endif |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 149 | lpnz @.Lcopy8bytes_2 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 150 | ;; LOOP START |
| 151 | ld.ab r6, [r1, 4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 152 | ld.ab r8, [r1,4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 153 | |
| 154 | SHIFT_1 (r7, r6, 16) |
| 155 | or r7, r7, r5 |
| 156 | SHIFT_2 (r5, r6, 16) |
| 157 | |
| 158 | SHIFT_1 (r9, r8, 16) |
| 159 | or r9, r9, r5 |
| 160 | SHIFT_2 (r5, r8, 16) |
| 161 | |
| 162 | st.ab r7, [r3, 4] |
| 163 | st.ab r9, [r3, 4] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 164 | .Lcopy8bytes_2: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 165 | |
| 166 | #ifdef __BIG_ENDIAN__ |
| 167 | lsr.nz r5, r5, 16 |
| 168 | #endif |
| 169 | sth.ab r5, [r3, 2] |
| 170 | |
| 171 | and.f lp_count, r2, 0x07 ;Last 8bytes |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 172 | lpnz @.Lcopybytewise_2 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 173 | ;; LOOP START |
| 174 | ldb.ab r6, [r1,1] |
| 175 | stb.ab r6, [r3,1] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 176 | .Lcopybytewise_2: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 177 | j [blink] |
| 178 | |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 179 | .LunalignedOffby3: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 180 | ;;; CASE 3: The source is unaligned, off by 3 |
| 181 | ;;; Hence, I need to read 1byte for achieve the 32bit alignment |
| 182 | |
| 183 | ;; Both src and dst are aligned |
| 184 | ;; Convert to words, unfold x2 |
| 185 | lsr.f lp_count, r2, 3 |
| 186 | #ifdef __BIG_ENDIAN__ |
| 187 | asl.ne r5, r5, 24 |
| 188 | #endif |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 189 | lpnz @.Lcopy8bytes_3 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 190 | ;; LOOP START |
| 191 | ld.ab r6, [r1, 4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 192 | ld.ab r8, [r1,4] |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 193 | |
| 194 | SHIFT_1 (r7, r6, 8) |
| 195 | or r7, r7, r5 |
| 196 | SHIFT_2 (r5, r6, 24) |
| 197 | |
| 198 | SHIFT_1 (r9, r8, 8) |
| 199 | or r9, r9, r5 |
| 200 | SHIFT_2 (r5, r8, 24) |
| 201 | |
| 202 | st.ab r7, [r3, 4] |
| 203 | st.ab r9, [r3, 4] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 204 | .Lcopy8bytes_3: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 205 | |
| 206 | #ifdef __BIG_ENDIAN__ |
| 207 | lsr.nz r5, r5, 24 |
| 208 | #endif |
| 209 | stb.ab r5, [r3, 1] |
| 210 | |
| 211 | and.f lp_count, r2, 0x07 ;Last 8bytes |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 212 | lpnz @.Lcopybytewise_3 |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 213 | ;; LOOP START |
| 214 | ldb.ab r6, [r1,1] |
| 215 | stb.ab r6, [r3,1] |
Vineet Gupta | ac506b7 | 2015-10-29 19:36:03 +0530 | [diff] [blame] | 216 | .Lcopybytewise_3: |
Claudiu Zissulescu | 1f7e3dc | 2014-11-21 13:39:25 +0530 | [diff] [blame] | 217 | j [blink] |
| 218 | |
Vineet Gupta | 86effd0 | 2016-09-19 16:42:25 -0700 | [diff] [blame] | 219 | END_CFI(memcpy) |