patch-2.4.19 linux-2.4.19/include/asm-ia64/sn/bte_copy.h

Next file: linux-2.4.19/include/asm-ia64/sn/cdl.h
Previous file: linux-2.4.19/include/asm-ia64/sn/bte.h
Back to the patch index
Back to the overall index

diff -urN linux-2.4.18/include/asm-ia64/sn/bte_copy.h linux-2.4.19/include/asm-ia64/sn/bte_copy.h
@@ -0,0 +1,311 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * 
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_BTE_COPY_H
+#define _ASM_IA64_SN_BTE_COPY_H
+
+#ident "$Revision: 1.1 $"
+
+#include <asm/sn/bte.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/pda.h>
+#include <asm/delay.h>
+
+/*
+ * BTE_LOCKING support - Undefining the following line will
+ * adapt the bte_copy code to support one bte per cpu in
+ * synchronous mode.  Even if bte_copy is called with a
+ * notify address, the bte will spin and wait for the transfer
+ * to complete.  By defining the following, spin_locks and
+ * busy checks are placed around the initiation of a BTE
+ * transfer and multiple bte's per cpu are supported.
+ */
+#define CONFIG_IA64_SGI_BTE_LOCKING 1
+
+/*
+ * Some macros to simplify reading.
+ *
+ * Start with macros to locate the BTE control registers.
+ */
+
+#define BTEREG_LNSTAT_ADDR (bte->bte_base_addr)
+#define BTEREG_SOURCE_ADDR (bte->bte_base_addr + IIO_IBSA0 - IIO_IBLS0)
+#define BTEREG_DEST_ADDR (bte->bte_base_addr + IIO_IBDA0 - IIO_IBLS0)
+#define BTEREG_CTRL_ADDR ((volatile char *)bte->bte_base_addr + IIO_IBCT0 - IIO_IBLS0)
+#define BTEREG_NOTIF_ADDR (bte->bte_base_addr + IIO_IBNA0 - IIO_IBLS0)
+
+/* Some macros to force the IBCT0 value valid. */
+
+#define BTE_VALID_MODES BTE_NOTIFY
+#define BTE_VLD_MODE(x) (x & BTE_VALID_MODES)
+
+// #define DEBUG_BTE
+// #define DEBUG_BTE_VERBOSE
+// #define DEBUG_TIME_BTE
+
+#ifdef DEBUG_BTE
+#  define DPRINTK(x) printk x	// Terse
+#  ifdef DEBUG_BTE_VERBOSE
+#    define DPRINTKV(x) printk x	// Verbose
+#  else
+#    define DPRINTKV(x)
+#  endif
+#else
+#  define DPRINTK(x)
+#  define DPRINTKV(x)
+#endif
+
+#ifdef DEBUG_TIME_BTE
+extern u64 BteSetupTime;
+extern u64 BteTransferTime;
+extern u64 BteTeardownTime;
+extern u64 BteExecuteTime;
+#endif
+
+/*
+ * bte_copy(src, dest, len, mode, notification)
+ *
+ * use the block transfer engine to move kernel
+ * memory from src to dest using the assigned mode.
+ *
+ * Paramaters:
+ *   src - physical address of the transfer source.
+ *   dest - physical address of the transfer destination.
+ *   len - number of bytes to transfer from source to dest.
+ *   mode - hardware defined.  See reference information
+ *          for IBCT0/1 in the SHUB Programmers Reference
+ *   notification - kernel virtual address of the notification cache
+ *                  line.  If NULL, the default is used and
+ *                  the bte_copy is synchronous.
+ *
+ * NOTE:  This function requires src, dest, and len to
+ * be cache line aligned.
+ */
+extern __inline__ bte_result_t
+bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+	int bte_to_use;
+#endif
+
+#ifdef DEBUG_TIME_BTE
+	u64 invokeTime = 0;
+	u64 completeTime = 0;
+	u64 xferStartTime = 0;
+	u64 xferCompleteTime = 0;
+#endif
+	u64 transferSize;
+	bteinfo_t *bte;
+
+#ifdef DEBUG_TIME_BTE
+	invokeTime = ia64_get_itc();
+#endif
+
+	DPRINTK(("bte_copy (0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+		 src, dest, len, mode, notification));
+
+	if (len == 0) {
+		return (BTE_SUCCESS);
+	}
+
+	ASSERT(!((len & L1_CACHE_MASK) ||
+		 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)));
+
+	ASSERT(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT));
+
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+	{
+		bte_to_use = 0;
+
+		/* Attempt to lock one of the BTE interfaces */
+		while ((*pda.cpubte[bte_to_use]->
+			mostRecentNotification & IBLS_BUSY)
+		       &&
+		       (!(spin_trylock
+			  (&(pda.cpubte[bte_to_use]->spinlock))))
+		       && (bte_to_use < BTES_PER_NODE)) {
+			bte_to_use++;
+		}
+
+		if ((bte_to_use >= BTES_PER_NODE) &&
+		    !(mode & BTE_WACQUIRE)) {
+			return (BTEFAIL_NOTAVAIL);
+		}
+
+		/* Wait until a bte is available. */
+	}
+	while (bte_to_use >= BTES_PER_NODE);
+
+	bte = pda.cpubte[bte_to_use];
+	DPRINTKV(("Got a lock on bte %d\n", bte_to_use));
+#else
+	/* Assuming one BTE per CPU. */
+	bte = pda.cpubte[0];
+#endif
+
+	/*
+	 * The following are removed for optimization but is
+	 * available in the event that the SHUB exhibits
+	 * notification problems similar to the hub, bedrock et al.
+	 *
+	 * bte->mostRecentSrc = src;
+	 * bte->mostRecentDest = dest;
+	 * bte->mostRecentLen = len;
+	 * bte->mostRecentMode = mode;
+	 */
+	if (notification == NULL) {
+		/* User does not want to be notified. */
+		bte->mostRecentNotification = &bte->notify;
+	} else {
+		bte->mostRecentNotification = notification;
+	}
+
+	/* Calculate the number of cache lines to transfer. */
+	transferSize = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
+
+	DPRINTKV(("Calculated transfer size of %d cache lines\n",
+		  transferSize));
+
+	/* Initialize the notification to a known value. */
+	*bte->mostRecentNotification = -1L;
+
+
+	DPRINTKV(("Before, status is 0x%lx and notify is 0x%lx\n",
+		  HUB_L(BTEREG_LNSTAT_ADDR),
+		  *bte->mostRecentNotification));
+
+	/* Set the status reg busy bit and transfer length */
+	DPRINTKV(("IBLS - HUB_S(0x%lx, 0x%lx)\n",
+		  BTEREG_LNSTAT_ADDR, IBLS_BUSY | transferSize));
+	HUB_S(BTEREG_LNSTAT_ADDR, IBLS_BUSY | transferSize);
+
+
+	DPRINTKV(("After setting status, status is 0x%lx and notify is 0x%lx\n", HUB_L(BTEREG_LNSTAT_ADDR), *bte->mostRecentNotification));
+
+	/* Set the source and destination registers */
+	DPRINTKV(("IBSA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_SOURCE_ADDR,
+		  src));
+	HUB_S(BTEREG_SOURCE_ADDR, src);
+	DPRINTKV(("IBDA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_DEST_ADDR, dest));
+	HUB_S(BTEREG_DEST_ADDR, dest);
+
+
+	/* Set the notification register */
+	DPRINTKV(("IBNA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_NOTIF_ADDR,
+		  __pa(bte->mostRecentNotification)));
+	HUB_S(BTEREG_NOTIF_ADDR, (__pa(bte->mostRecentNotification)));
+
+
+	DPRINTKV(("Set Notify, status is 0x%lx and notify is 0x%lx\n",
+		  HUB_L(BTEREG_LNSTAT_ADDR),
+		  *bte->mostRecentNotification));
+
+	/* Initiate the transfer */
+	DPRINTKV(("IBCT - HUB_S(0x%lx, 0x%lx)\n", BTEREG_CTRL_ADDR, mode));
+#ifdef DEBUG_TIME_BTE
+	xferStartTime = ia64_get_itc();
+#endif
+	HUB_S(BTEREG_CTRL_ADDR, BTE_VLD_MODE(mode));
+
+	DPRINTKV(("Initiated, status is 0x%lx and notify is 0x%lx\n",
+		  HUB_L(BTEREG_LNSTAT_ADDR),
+		  *bte->mostRecentNotification));
+
+	// >>> Temporarily work around not getting a notification
+	// from medusa.
+	// *bte->mostRecentNotification = HUB_L(bte->bte_base_addr);
+
+	if (notification == NULL) {
+		/*
+		 * Calculate our timeout
+		 *
+		 * What are we doing here?  We are trying to determine
+		 * the fastest time the BTE could have transfered our
+		 * block of data.  By takine the clock frequency (ticks/sec)
+		 * divided by the BTE MaxT Transfer Rate (lines/sec)
+		 * times the transfer size (lines), we get a tick
+		 * offset from current time that the transfer should
+		 * complete.
+		 *
+		 * Why do this?  We are watching for a notification
+		 * failure from the BTE.  This behaviour has been
+		 * seen in the SN0 and SN1 hardware on rare circumstances
+		 * and is expected in SN2.  By checking at the
+		 * ideal transfer timeout, we minimize our time
+		 * delay from hardware completing our request and
+		 * our detecting the failure.
+		 */
+		bte->idealTransferTimeout = jiffies +
+		    (HZ / BTE_MAXT_LINES_PER_SECOND * transferSize);
+
+		while ((IBLS_BUSY & bte->notify)) {
+			/*
+			 * Notification Workaround: When the max
+			 * theoretical time has elapsed, read the hub
+			 * status register into the notification area.
+			 * This fakes the shub performing the copy.
+			 */
+			if (jiffies > bte->idealTransferTimeout) {
+				bte->notify = HUB_L(bte->bte_base_addr);
+				bte->idealTransferTimeoutReached++;
+				bte->idealTransferTimeout = jiffies +
+				    (HZ / BTE_MAXT_LINES_PER_SECOND *
+				     (bte->notify & BTE_LEN_MASK));
+			}
+		}
+#ifdef DEBUG_TIME_BTE
+		xferCompleteTime = ia64_get_itc();
+#endif
+		if (bte->notify & IBLS_ERROR) {
+			/* >>> Need to do real error checking. */
+			transferSize = 0;
+
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+			spin_unlock(&(bte->spinlock));
+#endif
+			return (BTEFAIL_ERROR);
+		}
+
+	}
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+	spin_unlock(&(bte->spinlock));
+#endif
+#ifdef DEBUG_TIME_BTE
+	completeTime = ia64_get_itc();
+
+	BteSetupTime = xferStartTime - invokeTime;
+	BteTransferTime = xferCompleteTime - xferStartTime;
+	BteTeardownTime = completeTime - xferCompleteTime;
+	BteExecuteTime = completeTime - invokeTime;
+#endif
+	return (BTE_SUCCESS);
+}
+
+/*
+ * Define the bte_unaligned_copy as an extern.
+ */
+extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64, char *);
+
+/*
+ * The following is the prefered way of calling bte_unaligned_copy
+ * If the copy is fully cache line aligned, then bte_copy is
+ * used instead.  Since bte_copy is inlined, this saves a call
+ * stack.  NOTE: bte_copy is called synchronously and does block
+ * until the transfer is complete.  In order to get the asynch
+ * version of bte_copy, you must perform this check yourself.
+ */
+#define BTE_UNALIGNED_COPY(src, dest, len, mode, bteBlock) \
+	if ((len & L1_CACHE_MASK) || \
+	    (src & L1_CACHE_MASK) || \
+	    (dest & L1_CACHE_MASK)) { \
+		bte_unaligned_copy (src, dest, len, mode, bteBlock); \
+	} else { \
+		bte_copy(src, dest, len, mode, NULL); \
+	}
+
+#endif				/* _ASM_IA64_SN_BTE_COPY_H */

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)