patch-2.4.20 linux-2.4.20/arch/ia64/lib/clear_page.S

Next file: linux-2.4.20/arch/ia64/lib/copy_page.S
Previous file: linux-2.4.20/arch/ia64/lib/checksum.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.19/arch/ia64/lib/clear_page.S linux-2.4.20/arch/ia64/lib/clear_page.S
@@ -1,51 +1,77 @@
 /*
- *
- * Optimized function to clear a page of memory.
- *
- * Inputs:
- *	in0:	address of page
- *
- * Output:
- * 	none
- *
- * Copyright (C) 1999-2001 Hewlett-Packard Co
- * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  *
  * 1/06/01 davidm	Tuned for Itanium.
+ * 2/12/02 kchen	Tuned for both Itanium and McKinley
+ * 3/08/02 davidm	Some more tweaking
  */
+#include <linux/config.h>
+
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_ITANIUM
+# define L3_LINE_SIZE	64	// Itanium L3 line size
+# define PREFETCH_LINES	9	// magic number
+#else
+# define L3_LINE_SIZE	128	// McKinley L3 line size
+# define PREFETCH_LINES	12	// magic number
+#endif
+
 #define saved_lc	r2
-#define dst0		in0
+#define dst_fetch	r3
 #define dst1		r8
 #define dst2		r9
 #define dst3		r10
-#define dst_fetch	r11
+#define dst4		r11
+
+#define dst_last	r31
 
 GLOBAL_ENTRY(clear_page)
 	.prologue
 	.regstk 1,0,0,0
-	mov r16 = PAGE_SIZE/64-1	// -1 = repeat/until
-	;;
+	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
 	.save ar.lc, saved_lc
 	mov saved_lc = ar.lc
+
 	.body
-	mov ar.lc = r16
-	adds dst1 = 16, dst0
-	adds dst2 = 32, dst0
-	adds dst3 = 48, dst0
-	adds dst_fetch = 512, dst0
+	mov ar.lc = (PREFETCH_LINES - 1)
+	mov dst_fetch = in0
+	adds dst1 = 16, in0
+	adds dst2 = 32, in0
+	;;
+.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+	adds dst3 = 48, in0		// executing this multiple times is harmless
+	br.cloop.sptk.few .fetch
+	;;
+	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
+	mov ar.lc = r16			// one L3 line per iteration
+	adds dst4 = 64, in0
+	;;
+#ifdef CONFIG_ITANIUM
+	// Optimized for Itanium
+1:	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+	cmp.lt p8,p0=dst_fetch, dst_last
+	;;
+#else
+	// Optimized for McKinley
+1:	stf.spill.nta [dst1] = f0, 64
+	stf.spill.nta [dst2] = f0, 64
+	stf.spill.nta [dst3] = f0, 64
+	stf.spill.nta [dst4] = f0, 128
+	cmp.lt p8,p0=dst_fetch, dst_last
 	;;
-1:	stf.spill.nta [dst0] = f0, 64
 	stf.spill.nta [dst1] = f0, 64
 	stf.spill.nta [dst2] = f0, 64
+#endif
 	stf.spill.nta [dst3] = f0, 64
-
-	lfetch [dst_fetch], 64
-	br.cloop.dptk.few 1b
+(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+	br.cloop.sptk.few 1b
 	;;
-	mov ar.lc = r2		// restore lc
+	mov ar.lc = saved_lc		// restore lc
 	br.ret.sptk.many rp
 END(clear_page)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)