Skip to content

LoongArch64: fixed cscal and zscal #5078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions kernel/loongarch64/cscal_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CMPEQ $fcc1, ALPHAI, a1
bge $r0, I, .L19
/////// INCX == 1 && N >= 4 ////////
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L17 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L17

Expand Down Expand Up @@ -146,6 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d I, I, -1
blt $r0, I, .L17
b .L19

.align 3

/////// INCX == 1 && N < 8 ///////
Expand All @@ -156,7 +157,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
andi I, N, 7
#endif
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

Expand All @@ -171,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CMPEQ $fcc1, ALPHAI, a1
move XX, X
bge $r0, I, .L29
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L25 // if DUMMY2 == 1, called from c/zscal.
bceqz $fcc0, .L25

bceqz $fcc1, .L25
Expand Down Expand Up @@ -341,7 +342,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
andi I, N, 7
#endif
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

Expand Down
218 changes: 58 additions & 160 deletions kernel/loongarch64/cscal_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHAI $f1
#define X $r7
#define INCX $r8
#define DUMMY2 $r9

#define I $r12
#define TEMP $r13
Expand Down Expand Up @@ -65,6 +66,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

bge $r0, N, .L999
bge $r0, INCX, .L999
ld.d DUMMY2, $sp, 0
li.d TEMP, 1
movgr2fr.d a1, $r0
FFINT a1, a1
Expand All @@ -84,24 +86,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
srai.d I, N, 2
bne INCX, TEMP, .L22

/////// INCX == 1 ////////
.L11:
bge $r0, I, .L997
CMPEQ $fcc0, ALPHAR, a1
CMPEQ $fcc1, ALPHAI, a1
bceqz $fcc0, .L13
b .L14
.align 3
bge $r0, I, .L19

.L13:
bceqz $fcc1, .L114 //alpha_r != 0.0 && alpha_i != 0.0
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
/////// INCX == 1 && N >= 4 ////////
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.

.L14:
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
.align 3
bceqz $fcc0, .L17

.L111: //alpha_r == 0.0 && alpha_i == 0.0
bceqz $fcc1, .L17

.L15: //alpha_r == 0.0 && alpha_i == 0.0
vst VXZ, X, 0 * SIZE
#ifdef DOUBLE
vst VXZ, X, 2 * SIZE
Expand All @@ -112,50 +110,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L111
b .L997
.align 3

.L113: //alpha_r != 0.0 && alpha_i == 0.0
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
vld VX1, X, 2 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 2 * SIZE
vld VX0, X, 4 * SIZE
vld VX1, X, 6 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 4 * SIZE
vst VX3, X, 6 * SIZE
#else
vld VX1, X, 4 * SIZE
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
vfmul.s x3, VXAR, x1
vfmul.s x4, VXAR, x2
vilvl.w VX2, x4 ,x3
vilvh.w VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 4 * SIZE
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L113
b .L997
blt $r0, I, .L15
b .L19
.align 3

.L114: //alpha_r != 0.0 && alpha_i != 0.0
.L17:
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
vld VX1, X, 2 * SIZE
Expand Down Expand Up @@ -196,29 +155,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L114
b .L997
blt $r0, I, .L17
b .L19
.align 3

/////// INCX == 1 && N < 8 ///////
.L19:
andi I, N, 3
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

bceqz $fcc1, .L998

b .L995 // alpha_r == 0.0 && alpha_i == 0.0

/////// INCX != 1 ////////
.L22:
bge $r0, I, .L997
move XX, X
CMPEQ $fcc0, ALPHAR, a1
CMPEQ $fcc1, ALPHAI, a1
bceqz $fcc0, .L23
b .L24
.align 3
move XX, X
bge $r0, I, .L29
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.

.L23:
bceqz $fcc1, .L224 //alpha_r != 0.0 && alpha_i != 0.0
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
bceqz $fcc0, .L25

.L24:
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
.align 3
bceqz $fcc1, .L25

.L221: //alpha_r == 0.0 && alpha_i == 0.0
.L27: //alpha_r == 0.0 && alpha_i == 0.0
#ifdef DOUBLE
vstelm.d VXZ, X, 0, 0
vstelm.d VXZ, X, 1 * SIZE, 0
Expand Down Expand Up @@ -246,92 +211,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d X, X, INCX
addi.d I, I, -1
blt $r0, I, .L221
b .L997
blt $r0, I, .L27
b .L29
.align 3

.L223: //alpha_r != 0.0 && alpha_i == 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX

ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
add.d X, X, INCX
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
addi.d I, I, -1
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
#else
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.w x1, t1, 0
vinsgr2vr.w x2, t2, 0
vinsgr2vr.w x1, t3, 1
vinsgr2vr.w x2, t4, 1
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
vinsgr2vr.w x1, t1, 2
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
add.d X, X, INCX

vfmul.s x3, VXAR, x1
vfmul.s x4, VXAR, x2
addi.d I, I, -1
vstelm.w x3, XX, 0 * SIZE, 0
vstelm.w x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 1
vstelm.w x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 2
vstelm.w x4, XX, 1 * SIZE, 2
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 3
vstelm.w x4, XX, 1 * SIZE, 3
#endif
add.d XX, XX, INCX
blt $r0, I, .L223
b .L997
.align 3

.L224: //alpha_r != 0.0 && alpha_i != 0.0
.L25:
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
Expand Down Expand Up @@ -414,15 +298,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstelm.w x4, XX, 1 * SIZE, 3
#endif
add.d XX, XX, INCX
blt $r0, I, .L224
b .L997
blt $r0, I, .L25
b .L29
.align 3

.L997:
andi I, N, 3
bge $r0, I, .L999
.align 3
/////// INCX != 1 && N < 8 ///////
.L29:
andi I, N, 3
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

bceqz $fcc1, .L998

b .L995 // alpha_r == 0.0 && alpha_i == 0.0

.L995: // alpha_r == 0.0 && alpha_i == 0.0
ST a1, X, 0 * SIZE
ST a1, X, 1 * SIZE
addi.d I, I, -1
add.d X, X, INCX
blt $r0, I, .L995
b .L999
.L998:
LD a1, X, 0 * SIZE
LD a2, X, 1 * SIZE
Expand All @@ -435,7 +333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ST s2, X, 1 * SIZE
add.d X, X, INCX
blt $r0, I, .L998
.align 3
b .L999

.L999:
move $r4, $r12
Expand Down
3 changes: 3 additions & 0 deletions kernel/loongarch64/zscal.S
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE

li.d TEMP, 2 * SIZE
ld.d XX, $sp, 0 // Load dummy2
slli.d XX, XX, ZBASE_SHIFT
MTC a1, $r0
slli.d INCX, INCX, ZBASE_SHIFT
bge $r0, N, .L999
CMPEQ $fcc0, ALPHA_R, a1
CMPEQ $fcc1, ALPHA_I, a1
bceqz $fcc0, .L50
bceqz $fcc1, .L50
beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0
srai.d I, N, 2
bne INCX, TEMP, .L20
bge $r0, I, .L15
Expand Down
Loading