patch-2.4.11-dontuse linux/arch/i386/kernel/smpboot.c
Next file: linux/arch/i386/kernel/trampoline.S
Previous file: linux/arch/i386/kernel/smp.c
Back to the patch index
Back to the overall index
- Lines: 553
- Date:
Thu Oct 4 18:42:54 2001
- Orig file:
v2.4.10/linux/arch/i386/kernel/smpboot.c
- Orig date:
Sun Sep 23 11:40:55 2001
diff -u --recursive --new-file v2.4.10/linux/arch/i386/kernel/smpboot.c linux/arch/i386/kernel/smpboot.c
@@ -29,6 +29,7 @@
* Ingo Molnar : various cleanups and rewrites
* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Martin J. Bligh : Added support for multi-quad systems
*/
#include <linux/config.h>
@@ -44,6 +45,7 @@
#include <linux/mc146818rtc.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
@@ -57,11 +59,6 @@
/* Bitmask of currently online CPUs */
unsigned long cpu_online_map;
-/* which CPU (physical APIC ID) maps to which logical CPU number */
-volatile int x86_apicid_to_cpu[NR_CPUS];
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-volatile int x86_cpu_to_apicid[NR_CPUS];
-
static volatile unsigned long cpu_callin_map;
static volatile unsigned long cpu_callout_map;
@@ -357,7 +354,8 @@
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*/
- while (!atomic_read(&init_deasserted));
+ if (!clustered_apic_mode)
+ while (!atomic_read(&init_deasserted));
/*
* (This works even if the APIC is not enabled.)
@@ -406,9 +404,15 @@
*/
Dprintk("CALLIN, before setup_local_APIC().\n");
+ /*
+ * Because we use NMIs rather than the INIT-STARTUP sequence to
+ * bootstrap the CPUs, the APIC may be in a wierd state. Kick it.
+ */
+ if (clustered_apic_mode)
+ clear_local_APIC();
setup_local_APIC();
- sti();
+ __sti();
#ifdef CONFIG_MTRR
/*
@@ -501,6 +505,61 @@
return do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0);
}
+/* which physical APIC ID maps to which logical CPU number */
+volatile int physical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which physical APIC ID */
+volatile int cpu_2_physical_apicid[NR_CPUS];
+
+/* which logical APIC ID maps to which logical CPU number */
+volatile int logical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which logical APIC ID */
+volatile int cpu_2_logical_apicid[NR_CPUS];
+
+static inline void init_cpu_to_apicid(void)
+/* Initialize all maps between cpu number and apicids */
+{
+ int apicid, cpu;
+
+ for (apicid = 0; apicid < MAX_APICID; apicid++) {
+ physical_apicid_2_cpu[apicid] = -1;
+ logical_apicid_2_cpu[apicid] = -1;
+ }
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ cpu_2_physical_apicid[cpu] = -1;
+ cpu_2_logical_apicid[cpu] = -1;
+ }
+}
+
+static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ if (clustered_apic_mode) {
+ logical_apicid_2_cpu[apicid] = cpu;
+ cpu_2_logical_apicid[cpu] = apicid;
+ } else {
+ physical_apicid_2_cpu[apicid] = cpu;
+ cpu_2_physical_apicid[cpu] = apicid;
+ }
+}
+
+static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ if (clustered_apic_mode) {
+ logical_apicid_2_cpu[apicid] = -1;
+ cpu_2_logical_apicid[cpu] = -1;
+ } else {
+ physical_apicid_2_cpu[apicid] = -1;
+ cpu_2_physical_apicid[cpu] = -1;
+ }
+}
+
#if APIC_DEBUG
static inline void inquire_remote_apic(int apicid)
{
@@ -539,89 +598,65 @@
}
#endif
-static void __init do_boot_cpu (int apicid)
+static int wakeup_secondary_via_NMI(int logical_apicid)
+/*
+ * Poke the other CPU in the eye to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
{
- struct task_struct *idle;
- unsigned long send_status, accept_status, boot_status, maxlvt;
- int timeout, num_starts, j, cpu;
- unsigned long start_eip;
-
- cpu = ++cpucount;
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- if (fork_by_hand() < 0)
- panic("failed fork for CPU %d", cpu);
-
- /*
- * We remove it from the pidhash and the runqueue
- * once we got the process:
- */
- idle = init_task.prev_task;
- if (!idle)
- panic("No idle process for CPU %d", cpu);
-
- idle->processor = cpu;
- x86_cpu_to_apicid[cpu] = apicid;
- x86_apicid_to_cpu[apicid] = cpu;
- idle->has_cpu = 1; /* we schedule the first task manually */
- idle->thread.eip = (unsigned long) start_secondary;
+ unsigned long send_status = 0, accept_status = 0;
+ int timeout, maxlvt;
- del_from_runqueue(idle);
- unhash_process(idle);
- init_tasks[cpu] = idle;
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
- /* So we see what's up */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
/*
- * This grunge runs the startup process for
- * the targeted processor.
+ * Give the other CPU some time to accept the IPI.
*/
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
- Dprintk("3.\n");
-
+ udelay(200);
/*
- * Be paranoid about clearing APIC errors.
+ * Due to the Pentium erratum 3AP.
*/
- if (APIC_INTEGRATED(apic_version[apicid])) {
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) {
apic_read_around(APIC_SPIV);
apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
}
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ Dprintk("NMI sent.\n");
- /*
- * Status is now clean
- */
- send_status = 0;
- accept_status = 0;
- boot_status = 0;
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
- /*
- * Starting actual IPI sequence...
- */
+ return (send_status | accept_status);
+}
+
+static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
Dprintk("Asserting INIT.\n");
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/*
* Send IPI
@@ -642,7 +677,7 @@
Dprintk("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
@@ -661,10 +696,9 @@
* Should we send STARTUP IPIs ?
*
* Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't
- * send the STARTUP IPIs.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
*/
- if (APIC_INTEGRATED(apic_version[apicid]))
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
num_starts = 2;
else
num_starts = 0;
@@ -688,7 +722,7 @@
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */
/* Kick the second */
@@ -732,7 +766,104 @@
if (accept_status)
printk("APIC delivery error (%lx).\n", accept_status);
- if (!send_status && !accept_status) {
+ return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ */
+{
+ struct task_struct *idle;
+ unsigned long boot_error = 0;
+ int timeout, cpu;
+ unsigned long start_eip;
+ unsigned short nmi_high, nmi_low;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to
+ * reschedule the child.
+ */
+ if (fork_by_hand() < 0)
+ panic("failed fork for CPU %d", cpu);
+
+ /*
+ * We remove it from the pidhash and the runqueue
+ * once we got the process:
+ */
+ idle = init_task.prev_task;
+ if (!idle)
+ panic("No idle process for CPU %d", cpu);
+
+ idle->processor = cpu;
+
+ map_cpu_to_boot_apicid(cpu, apicid);
+
+ idle->has_cpu = 1; /* we schedule the first task manually */
+ idle->thread.eip = (unsigned long) start_secondary;
+
+ del_from_runqueue(idle);
+ unhash_process(idle);
+ init_tasks[cpu] = idle;
+
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+ stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ if (clustered_apic_mode) {
+ /* stash the current NMI vector, so we can put things back */
+ nmi_high = *((volatile unsigned short *) TRAMPOLINE_HIGH);
+ nmi_low = *((volatile unsigned short *) TRAMPOLINE_LOW);
+ }
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (!clustered_apic_mode && APIC_INTEGRATED(apic_version[apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ /*
+ * Status is now clean
+ */
+ boot_error = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+
+ if (clustered_apic_mode)
+ boot_error = wakeup_secondary_via_NMI(apicid);
+ else
+ boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
+
+ if (!boot_error) {
/*
* allow APs to start initializing.
*/
@@ -756,7 +887,7 @@
print_cpu_info(&cpu_data[cpu]);
Dprintk("CPU has booted.\n");
} else {
- boot_status = 1;
+ boot_error= 1;
if (*((volatile unsigned char *)phys_to_virt(8192))
== 0xA5)
/* trampoline started but...? */
@@ -765,18 +896,28 @@
/* trampoline code not run */
printk("Not responding.\n");
#if APIC_DEBUG
- inquire_remote_apic(apicid);
+ if (!clustered_apic_mode)
+ inquire_remote_apic(apicid);
#endif
}
}
- if (send_status || accept_status || boot_status) {
- x86_cpu_to_apicid[cpu] = -1;
- x86_apicid_to_cpu[apicid] = -1;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_boot_apicid(cpu, apicid);
+ clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
cpucount--;
}
/* mark "stuck" area as not stuck */
*((volatile unsigned long *)phys_to_virt(8192)) = 0;
+
+ if(clustered_apic_mode) {
+ printk("Restoring NMI vector\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = nmi_high;
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = nmi_low;
+ }
}
cycles_t cacheflush_time;
@@ -826,9 +967,20 @@
extern int prof_old_multiplier[NR_CPUS];
extern int prof_counter[NR_CPUS];
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio = NULL;
+
void __init smp_boot_cpus(void)
{
- int apicid, cpu;
+ int apicid, cpu, bit;
+
+ if (clustered_apic_mode) {
+ /* remap the 1st quad's 256k range for cross-quad I/O */
+ xquad_portio = ioremap (XQUAD_PORTIO_BASE, XQUAD_PORTIO_LEN);
+ printk("Cross quad port I/O vaddr 0x%08lx, len %08lx\n",
+ (u_long) xquad_portio, (u_long) XQUAD_PORTIO_LEN);
+ }
#ifdef CONFIG_MTRR
/* Must be done before other processors booted */
@@ -839,13 +991,14 @@
* and the per-CPU profiling counter/multiplier
*/
- for (apicid = 0; apicid < NR_CPUS; apicid++) {
- x86_apicid_to_cpu[apicid] = -1;
- prof_counter[apicid] = 1;
- prof_old_multiplier[apicid] = 1;
- prof_multiplier[apicid] = 1;
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ prof_counter[cpu] = 1;
+ prof_old_multiplier[cpu] = 1;
+ prof_multiplier[cpu] = 1;
}
+ init_cpu_to_apicid();
+
/*
* Setup boot CPU information
*/
@@ -857,8 +1010,9 @@
* We have the boot CPU online for sure.
*/
set_bit(0, &cpu_online_map);
- x86_apicid_to_cpu[boot_cpu_id] = 0;
- x86_cpu_to_apicid[0] = boot_cpu_id;
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ map_cpu_to_boot_apicid(0, boot_cpu_apicid);
+
global_irq_holder = 0;
current->processor = 0;
init_idle();
@@ -884,20 +1038,22 @@
/*
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
+ * Makes no sense to do this check in clustered apic mode, so skip it
*/
- if (!test_bit(boot_cpu_id, &phys_cpu_present_map)) {
+ if (!clustered_apic_mode &&
+ !test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_id);
+ boot_cpu_physical_apicid);
phys_cpu_present_map |= (1 << hard_smp_processor_id());
}
/*
* If we couldn't find a local APIC, then get out of here now!
*/
- if (APIC_INTEGRATED(apic_version[boot_cpu_id]) &&
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
!test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_id);
+ boot_cpu_physical_apicid);
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
#ifndef CONFIG_VISWS
io_apic_irqs = 0;
@@ -926,22 +1082,27 @@
connect_bsp_APIC();
setup_local_APIC();
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
+ if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
BUG();
/*
- * Now scan the CPU present map and fire up the other CPUs.
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
*/
Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
- for (apicid = 0; apicid < NR_CPUS; apicid++) {
+ for (bit = 0; bit < NR_CPUS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
/*
* Don't even attempt to start the boot CPU!
*/
- if (apicid == boot_cpu_id)
+ if (apicid == boot_cpu_apicid)
continue;
- if (!(phys_cpu_present_map & (1 << apicid)))
+ if (!(phys_cpu_present_map & (1 << bit)))
continue;
if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
continue;
@@ -951,9 +1112,10 @@
/*
* Make sure we unmap all failed CPUs
*/
- if ((x86_apicid_to_cpu[apicid] == -1) &&
- (phys_cpu_present_map & (1 << apicid)))
- printk("phys CPU #%d not responding - cannot use it.\n",apicid);
+ if ((boot_apicid_to_cpu(apicid) == -1) &&
+ (phys_cpu_present_map & (1 << bit)))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
}
/*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)