From 6f48c0ae7a188b335facfb5b4adcb85bc7959bcf Mon Sep 17 00:00:00 2001 From: Joern Rennecke Date: Wed, 9 Apr 2003 10:48:02 +0000 Subject: [PATCH] * libc/machine/sh/memset.S: Avoid clobbering volatile objects following a tiny to-be-set array in the same quadword. --- newlib/ChangeLog | 5 +++++ newlib/libc/machine/sh/memset.S | 40 +++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 6efbd70eb..a2266a6d2 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,8 @@ +2003-04-09 J"orn Rennecke + + * libc/machine/sh/memset.S: Avoid clobbering volatile + objects following a tiny to-be-set array in the same quadword. + 2001-04-09 Corinna Vinschen * libc/include/wchar.h: Add definitions for wcswidth and wcwidth. diff --git a/newlib/libc/machine/sh/memset.S b/newlib/libc/machine/sh/memset.S index ab71d9580..2b45aff36 100644 --- a/newlib/libc/machine/sh/memset.S +++ b/newlib/libc/machine/sh/memset.S @@ -12,9 +12,7 @@ ENTRY(memset) #if __SHMEDIA__ pta/l multiquad, tr0 - mshflo.b r3,r3,r3 ptabs r18, tr2 - mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 andi r2, -8, r25 add r2, r4, r5 @@ -23,16 +21,40 @@ ENTRY(memset) cmveq r4, r25, r20 bne/u r25, r20, tr0 // multiquad - ldlo.q r2, 0, r7 - shlli r4, 2, r4 - movi -1, r8 - SHHI r8, r4, r8 - SHHI r8, r4, r8 - mcmv r7, r8, r3 - stlo.q r2, 0, r3 +! This sequence could clobber volatile objects that are in the same +! quadword as a very short char array. +! ldlo.q r2, 0, r7 +! shlli r4, 2, r4 +! movi -1, r8 +! SHHI r8, r4, r8 +! SHHI r8, r4, r8 +! mcmv r7, r8, r3 +! stlo.q r2, 0, r3 + + pta/l setlongs, tr0 + movi 4, r8 + bgeu/u r4, r8, tr0 + pta/l endset, tr0 + beqi/u r4, 0, tr0 + st.b r2, 0, r3 + beqi/u r4, 1, tr0 + nop + st.b r2, 1, r3 + beqi/l r4, 2, tr0 + st.b r2,2,r3 +endset: blink tr2, r63 +setlongs: + mshflo.b r3, r3, r3 + mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 + stlo.l r2, 0, r3 + nop + nop + sthi.l r5, -1, r3 blink tr2, r63 multiquad: + mshflo.b r3, r3, r3 + mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 pta/l lastquad, tr0 stlo.q r2, 0, r3 sub r20, r25, r24