blob: 0051a84f60c0553bad763d53b1d86f331e6da491 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +05302/*
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +05304 */
5
6#include <linux/linkage.h>
7
8#ifdef __LITTLE_ENDIAN__
9# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
10# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
11# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
12# define MERGE_2(RX,RY,IMM)
13# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
14# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
15#else
16# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
17# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
18# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
19# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
20# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
21# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
22#endif
23
24#ifdef CONFIG_ARC_HAS_LL64
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053025# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
26# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
27# define ZOLSHFT 5
28# define ZOLAND 0x1F
29#else
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053030# define LOADX(DST,RX) ld.ab DST, [RX, 4]
31# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
32# define ZOLSHFT 4
33# define ZOLAND 0xF
34#endif
35
Vineet Gupta86effd02016-09-19 16:42:25 -070036ENTRY_CFI(memcpy)
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053037 mov.f 0, r2
38;;; if size is zero
39 jz.d [blink]
40 mov r3, r0 ; don;t clobber ret val
41
42;;; if size <= 8
43 cmp r2, 8
Vineet Guptaac506b72015-10-29 19:36:03 +053044 bls.d @.Lsmallchunk
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053045 mov.f lp_count, r2
46
47 and.f r4, r0, 0x03
48 rsub lp_count, r4, 4
Vineet Guptaac506b72015-10-29 19:36:03 +053049 lpnz @.Laligndestination
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053050 ;; LOOP BEGIN
51 ldb.ab r5, [r1,1]
52 sub r2, r2, 1
53 stb.ab r5, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +053054.Laligndestination:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053055
56;;; Check the alignment of the source
57 and.f r4, r1, 0x03
Vineet Guptaac506b72015-10-29 19:36:03 +053058 bnz.d @.Lsourceunaligned
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053059
60;;; CASE 0: Both source and destination are 32bit aligned
61;;; Convert len to Dwords, unfold x4
62 lsr.f lp_count, r2, ZOLSHFT
Vineet Guptaac506b72015-10-29 19:36:03 +053063 lpnz @.Lcopy32_64bytes
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053064 ;; LOOP START
65 LOADX (r6, r1)
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053066 LOADX (r8, r1)
67 LOADX (r10, r1)
68 LOADX (r4, r1)
69 STOREX (r6, r3)
70 STOREX (r8, r3)
71 STOREX (r10, r3)
72 STOREX (r4, r3)
Vineet Guptaac506b72015-10-29 19:36:03 +053073.Lcopy32_64bytes:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053074
75 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
Vineet Guptaac506b72015-10-29 19:36:03 +053076.Lsmallchunk:
77 lpnz @.Lcopyremainingbytes
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053078 ;; LOOP START
79 ldb.ab r5, [r1,1]
80 stb.ab r5, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +053081.Lcopyremainingbytes:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053082
83 j [blink]
84;;; END CASE 0
85
Vineet Guptaac506b72015-10-29 19:36:03 +053086.Lsourceunaligned:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053087 cmp r4, 2
Vineet Guptaac506b72015-10-29 19:36:03 +053088 beq.d @.LunalignedOffby2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053089 sub r2, r2, 1
90
Vineet Guptaac506b72015-10-29 19:36:03 +053091 bhi.d @.LunalignedOffby3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053092 ldb.ab r5, [r1, 1]
93
94;;; CASE 1: The source is unaligned, off by 1
95 ;; Hence I need to read 1 byte for a 16bit alignment
96 ;; and 2bytes to reach 32bit alignment
97 ldh.ab r6, [r1, 2]
98 sub r2, r2, 2
99 ;; Convert to words, unfold x2
100 lsr.f lp_count, r2, 3
101 MERGE_1 (r6, r6, 8)
102 MERGE_2 (r5, r5, 24)
103 or r5, r5, r6
104
105 ;; Both src and dst are aligned
Vineet Guptaac506b72015-10-29 19:36:03 +0530106 lpnz @.Lcopy8bytes_1
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530107 ;; LOOP START
108 ld.ab r6, [r1, 4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530109 ld.ab r8, [r1,4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530110
111 SHIFT_1 (r7, r6, 24)
112 or r7, r7, r5
113 SHIFT_2 (r5, r6, 8)
114
115 SHIFT_1 (r9, r8, 24)
116 or r9, r9, r5
117 SHIFT_2 (r5, r8, 8)
118
119 st.ab r7, [r3, 4]
120 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530121.Lcopy8bytes_1:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530122
123 ;; Write back the remaining 16bits
124 EXTRACT_1 (r6, r5, 16)
125 sth.ab r6, [r3, 2]
126 ;; Write back the remaining 8bits
127 EXTRACT_2 (r5, r5, 16)
128 stb.ab r5, [r3, 1]
129
130 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530131 lpnz @.Lcopybytewise_1
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530132 ;; LOOP START
133 ldb.ab r6, [r1,1]
134 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530135.Lcopybytewise_1:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530136 j [blink]
137
Vineet Guptaac506b72015-10-29 19:36:03 +0530138.LunalignedOffby2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530139;;; CASE 2: The source is unaligned, off by 2
140 ldh.ab r5, [r1, 2]
141 sub r2, r2, 1
142
143 ;; Both src and dst are aligned
144 ;; Convert to words, unfold x2
145 lsr.f lp_count, r2, 3
146#ifdef __BIG_ENDIAN__
147 asl.nz r5, r5, 16
148#endif
Vineet Guptaac506b72015-10-29 19:36:03 +0530149 lpnz @.Lcopy8bytes_2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530150 ;; LOOP START
151 ld.ab r6, [r1, 4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530152 ld.ab r8, [r1,4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530153
154 SHIFT_1 (r7, r6, 16)
155 or r7, r7, r5
156 SHIFT_2 (r5, r6, 16)
157
158 SHIFT_1 (r9, r8, 16)
159 or r9, r9, r5
160 SHIFT_2 (r5, r8, 16)
161
162 st.ab r7, [r3, 4]
163 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530164.Lcopy8bytes_2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530165
166#ifdef __BIG_ENDIAN__
167 lsr.nz r5, r5, 16
168#endif
169 sth.ab r5, [r3, 2]
170
171 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530172 lpnz @.Lcopybytewise_2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530173 ;; LOOP START
174 ldb.ab r6, [r1,1]
175 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530176.Lcopybytewise_2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530177 j [blink]
178
Vineet Guptaac506b72015-10-29 19:36:03 +0530179.LunalignedOffby3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530180;;; CASE 3: The source is unaligned, off by 3
181;;; Hence, I need to read 1byte for achieve the 32bit alignment
182
183 ;; Both src and dst are aligned
184 ;; Convert to words, unfold x2
185 lsr.f lp_count, r2, 3
186#ifdef __BIG_ENDIAN__
187 asl.ne r5, r5, 24
188#endif
Vineet Guptaac506b72015-10-29 19:36:03 +0530189 lpnz @.Lcopy8bytes_3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530190 ;; LOOP START
191 ld.ab r6, [r1, 4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530192 ld.ab r8, [r1,4]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530193
194 SHIFT_1 (r7, r6, 8)
195 or r7, r7, r5
196 SHIFT_2 (r5, r6, 24)
197
198 SHIFT_1 (r9, r8, 8)
199 or r9, r9, r5
200 SHIFT_2 (r5, r8, 24)
201
202 st.ab r7, [r3, 4]
203 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530204.Lcopy8bytes_3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530205
206#ifdef __BIG_ENDIAN__
207 lsr.nz r5, r5, 24
208#endif
209 stb.ab r5, [r3, 1]
210
211 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530212 lpnz @.Lcopybytewise_3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530213 ;; LOOP START
214 ldb.ab r6, [r1,1]
215 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530216.Lcopybytewise_3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530217 j [blink]
218
Vineet Gupta86effd02016-09-19 16:42:25 -0700219END_CFI(memcpy)