http://lia64.bkbits.net/linux-ia64-test-2.6.12
davidm@hpl.hp.com|ChangeSet|20050324204925|15199 davidm

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/03/24 12:49:25-08:00 davidm@hpl.hp.com 
#   [IA64] Annotate fsys_bubble_down() with McKinley dispatch info.
#   
#   This patch changes comments & formatting only.  There is no code
#   change.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/fsys.S
#   2005/03/24 12:48:32-08:00 davidm@hpl.hp.com +76 -46
#   Annotate fsys_bubble_down() with McKinley dispatch info.
# 
# ChangeSet
#   2005/03/24 12:48:10-08:00 davidm@hpl.hp.com 
#   [IA64] Reschedule fsys_bubble_down().
#   
#   Improvements come from eliminating srlz.i, not scheduling AR/CR-reads
#   too early (while there are others still pending), scheduling the
#   backing-store switch as well as possible, splitting the BBB bundle
#   into a MIB/MBB pair.
#   
#   Why is it safe to eliminate the srlz.i?  Observe
#   that we used to clear bits ~PSR_PRESERVED_BITS in PSR.L.  Since
#   PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
#   ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.  However,
#   
#    PSR.BE : already is turned off in __kernel_syscall_via_epc()
#    PSR.AC : don't care (kernel normally turns PSR.AC on)
#    PSR.I  : already turned off by the time fsys_bubble_down gets invoked
#    PSR.DFL: always 0 (kernel never turns it on)
#    PSR.DFH: don't care --- kernel never touches f32-f127 on its own
#   	  initiative
#    PSR.DI : always 0 (kernel never turns it on)
#    PSR.SI : always 0 (kernel never turns it on)
#    PSR.DB : don't care --- kernel never enables kernel-level breakpoints
#    PSR.TB : must be 0 already; if it wasn't zero on entry to
#   	  __kernel_syscall_via_epc, the branch to fsys_bubble_down
#   	  will trigger a taken branch; the taken-trap-handler then
#   	  converts the syscall into a break-based system-call.
#   
#   In other words: all the bits we're clearying are either 0 already or
#   are don't cares!  Thus, we don't have to write PSR.L at all and we
#   don't have to do a srlz.i either.
#   
#   Good for another ~20 cycle improvement for EPC-based heavy-weight
#   syscalls.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/fsys.S
#   2005/03/24 12:46:53-08:00 davidm@hpl.hp.com +33 -40
#   Reschedule fsys_bubble_down().
# 
# ChangeSet
#   2005/03/24 12:46:29-08:00 davidm@hpl.hp.com 
#   [IA64] Annotate __kernel_syscall_via_epc() with McKinley dispatch info.
#   
#   Two other very minor changes: use "mov.i" instead of "mov" for reading
#   ar.pfs (for clarity; doesn't affect the code at all).  Also, predicate
#   the load of r14 for consistency.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/gate.S
#   2005/03/24 12:45:33-08:00 davidm@hpl.hp.com +23 -23
#   Annotate __kernel_syscall_via_epc() with McKinley dispatch info.
# 
# ChangeSet
#   2005/03/24 12:45:08-08:00 davidm@hpl.hp.com 
#   [IA64] Reschedule __kernel_syscall_via_epc().
#   
#   Avoid some stalls, which is good for about 2 cycles when invoking a
#   light-weight handler.  When invoking a heavy-weight handler, this
#   helps by about 7 cycles, with most of the improvement coming from the
#   improved branch-prediction achieved by splitting the BBB bundle into
#   two MIB bundles.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/gate.S
#   2005/03/24 12:44:21-08:00 davidm@hpl.hp.com +18 -13
#   Reschedule __kernel_syscall_via_epc().
# 
# ChangeSet
#   2005/03/24 12:44:01-08:00 davidm@hpl.hp.com 
#   [IA64] Reschedule break_fault() for better performance.
#   
#   This patch reorganizes break_fault() to optimistically assume that a
#   system-call is being performed from user-space (which is almost always
#   the case).  If it turns out that (a) we're not being called due to a
#   system call or (b) we're being called from within the kernel, we fixup
#   the no-longer-valid assumptions in non_syscall() and .break_fixup(),
#   respectively.
#   
#   With this approach, there are 3 major phases:
#   
#    - Phase 1: Read various control & application registers, in
#   	    particular the current task pointer from AR.K6.
#    - Phase 2: Do all memory loads (load system-call entry,
#   	    load current_thread_info()->flags, prefetch
#   	    kernel register-backing store) and switch
#   	    to kernel register-stack.
#    - Phase 3: Call ia64_syscall_setup() and invoke
#   	    syscall-handler.
#   
#   Good for 26-30 cycles of improvement on break-based syscall-path.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/ivt.S
#   2005/03/24 12:42:33-08:00 davidm@hpl.hp.com +108 -69
#   Reschedule break_fault() for better performance.
# 
# ChangeSet
#   2005/03/24 12:22:47-08:00 davidm@hpl.hp.com 
#   [IA64] In ia64_leave_syscall(), fix comments and whitespace only.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:21:36-08:00 davidm@hpl.hp.com +47 -48
#   In ia64_leave_syscall(), fix comments and whitespace only.
# 
# ChangeSet
#   2005/03/24 12:21:09-08:00 davidm@hpl.hp.com 
#   [IA64] Schedule ia64_leave_syscall() to read ar.bsp earlier
#   
#   Reschedule code to read ar.bsp as early as possible.  To enable this,
#   don't bother clearing some of the registers when we're returning to
#   kernel stacks.  Also, instead of trying to support the pNonSys case
#   (which makes no sense), do a bugcheck instead (with break 0).  Finally,
#   remove a clear of r14 which is a left-over from the previous patch.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:20:01-08:00 davidm@hpl.hp.com +19 -24
#   Schedule ia64_leave_syscall() to read ar.bsp earlier
# 
# ChangeSet
#   2005/03/24 12:19:41-08:00 davidm@hpl.hp.com 
#   [IA64] In syscall-entry, use st8 instead of stf8 to clear pt_regs.r8
#   
#   Using stf8 seemed like a clever idea at the time, but stf8 forces
#   the cache-line to be invalidated in the L1D (if it happens to be
#   there already).  This patch eliminates a guaranteed L1D cache-miss
#   and, by itself, is good for a 1-2 cycle improvement for heavy-weight
#   syscalls.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/ivt.S
#   2005/03/24 12:18:16-08:00 davidm@hpl.hp.com +3 -3
#   In syscall-entry, use st8 instead of stf8 to clear pt_regs.r8
# 
# ChangeSet
#   2005/03/24 12:17:36-08:00 davidm@hpl.hp.com 
#   [IA64] On return from syscall, hint b7 with __kernel_syscall_via_epc().
#   
#   Why is this a good idea?  Clearing b7 to 0 is guaranteed to do us no
#   good and writing it with __kernel_syscall_via_epc() yields a 6 cycle
#   improvement _if_ the application performs another EPC-based system-
#   call without overwriting b7, which is not all that uncommon.  Well
#   worth the minimal cost of 1 bundle of code.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:16:31-08:00 davidm@hpl.hp.com +8 -1
#   On return from syscall, hint b7 with __kernel_syscall_via_epc().
# 
# ChangeSet
#   2005/03/24 12:15:58-08:00 davidm@hpl.hp.com 
#   [IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp.
#   
#   Decreases syscall overhead by approximately 6 cycles.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:14:56-08:00 davidm@hpl.hp.com +15 -10
#   Schedule fp-clearing insns at least 6 cycles after reading ar.bsp.
# 
# ChangeSet
#   2005/03/24 12:14:18-08:00 davidm@hpl.hp.com 
#   [IA64] Use dynamic prediction for RSE-clearing branches.
#   
#   This by itself is good for a 1-2 cycle speed up.  Effect is bigger
#   when combined with the later patches.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:13:07-08:00 davidm@hpl.hp.com +2 -2
#   Use dynamic prediction for RSE-clearing branches.
# 
# ChangeSet
#   2005/03/24 12:09:16-08:00 davidm@hpl.hp.com 
#   [IA64] __ia64_syscall() is no longer used anywhere in the kernel.  Remove it.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/ia64_ksyms.c
#   2005/03/24 12:07:15-08:00 davidm@hpl.hp.com +0 -3
#    __ia64_syscall() is no longer used anywhere in the kernel.  Remove it.
# 
# arch/ia64/kernel/entry.S
#   2005/03/24 12:07:04-08:00 davidm@hpl.hp.com +0 -12
#    __ia64_syscall() is no longer used anywhere in the kernel.  Remove it.
# 
# ChangeSet
#   2005/03/24 11:53:46-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/3
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/24 11:53:40-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# drivers/char/snsc.c
#   2005/03/24 11:53:40-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# drivers/char/Makefile
#   2005/03/24 11:53:40-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/24 11:52:52-08:00 ghoward@sgi.com 
#   [IA64] Altix system controller event handling
#   
#   The following is an update of the patch I sent yesterday
#   (3/9/05) incorporating suggestions from Christoph Hellwig and
#   Andreas Schwab.  It allows Altix and Altix-like systems to
#   handle environmental events generated by the system controllers,
#   and should apply on top of Jack Steiner's patch of 3/1/05 ("New
#   chipset support for SN platform") and Mark Goodwin's patch of
#   3/8/05 ("Altix SN topology support for new chipsets and pci
#   topology").
#   
#   Signed-off-by: Greg Howard <ghoward@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/24 11:51:40-08:00 ghoward@sgi.com +14 -0
#   Altix system controller event handling
# 
# drivers/char/snsc.h
#   2005/03/24 11:51:39-08:00 ghoward@sgi.com +40 -0
#   Altix system controller event handling
# 
# drivers/char/snsc.c
#   2005/03/24 11:51:38-08:00 ghoward@sgi.com +8 -0
#   Altix system controller event handling
# 
# drivers/char/Makefile
#   2005/03/24 11:51:30-08:00 ghoward@sgi.com +1 -1
#   Altix system controller event handling
# 
# ChangeSet
#   2005/03/23 09:52:04-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/23
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/kernel/iosapic.c
#   2005/03/23 09:51:55-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/23 09:50:53-08:00 kaneshige.kenji@jp.fujitsu.com 
#   [IA64] iosapic.c: typo ... s/spin_unlock_irq/spin_unlock/
#   
#   vector sharing patch had a typo ... mismatched spin_lock() with
#   a spin_unlock_irq().  Fix from Kenji Kaneshige.
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/iosapic.c
#   2005/03/23 09:47:02-08:00 kaneshige.kenji@jp.fujitsu.com +1 -1
#   typo ... s/spin_unlock_irq/spin_unlock/
# 
# ChangeSet
#   2005/03/22 16:13:26-08:00 akpm@bix.(none) 
#   Merge http://lia64.bkbits.net/linux-ia64-test-2.6.12
#   into bix.(none):/usr/src/bk-ia64
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/22 16:13:21-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/22 13:33:10-08:00 tony.luck@intel.com 
#   [IA64] print "siblings" before {physical,core,thread} id
#   
#   Rohit and Suresh changed their mind about the order to print things
#   in /proc/cpuinfo, but didn't include the change in the version of
#   the patch they sent to me.
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/setup.c
#   2005/03/22 13:30:52-08:00 tony.luck@intel.com +1 -1
#   print "siblings" before {physical,core,thread} id
# 
# ChangeSet
#   2005/03/22 11:29:50-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/23
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/kernel/iosapic.c
#   2005/03/22 11:29:44-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/22 11:28:48-08:00 kaneshige.kenji@jp.fujitsu.com 
#   [IA64] vector sharing (Large I/O system support)
#   
#   Current ia64 linux cannot handle greater than 184 interrupt sources
#   because of the lack of vectors. The following patch enables ia64 linux
#   to handle greater than 184 interrupt sources by allowing the same
#   vector number to be shared by multiple IOSAPIC's RTEs. The design of
#   this patch is besed on "Intel(R) Itanium(R) Processor Family Interrupt
#   Architecture Guide".
#   
#   Even if you don't have a large I/O system, you can see the behavior of
#   vector sharing by changing IOSAPIC_LAST_DEVICE_VECTOR to fewer value.
#   
#   Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/hw_irq.h
#   2005/03/22 11:26:58-08:00 kaneshige.kenji@jp.fujitsu.com +1 -0
#   vector sharing (Large I/O system support)
# 
# arch/ia64/kernel/irq_ia64.c
#   2005/03/22 11:26:57-08:00 kaneshige.kenji@jp.fujitsu.com +13 -3
#   vector sharing (Large I/O system support)
# 
# arch/ia64/kernel/iosapic.c
#   2005/03/22 11:26:54-08:00 kaneshige.kenji@jp.fujitsu.com +273 -87
#   vector sharing (Large I/O system support)
# 
# ChangeSet
#   2005/03/21 10:39:53-08:00 rohit.seth@intel.com 
#   [IA64] multi-core/multi-thread identification
#   
#   Version 3 - rediffed to apply on top of Ashok's hotplug cpu
#   patch.  /proc/cpuinfo output in step with x86.
#   
#   This is an updated MC/MT identification patch based on the 
#   previous discussions on list. 
#   
#   Add the Multi-core and Multi-threading detection for IPF.
#     - Add new core and threading related fields in /proc/cpuinfo.
#   		Physical id
#   		Core id
#   		Thread id
#   		Siblings
#     - setup the cpu_core_map and cpu_sibling_map appropriately
#     - Handles Hot plug CPU
#    
#   Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
#   Signed-off-by: Gordon Jin <gordon.jin@intel.com>
#   Signed-off-by: Rohit Seth <rohit.seth@intel.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/smp.h
#   2005/03/21 10:35:19-08:00 rohit.seth@intel.com +5 -0
#   multi-core/multi-thread identification
# 
# include/asm-ia64/sal.h
#   2005/03/21 10:35:18-08:00 rohit.seth@intel.com +12 -0
#   multi-core/multi-thread identification
# 
# include/asm-ia64/processor.h
#   2005/03/21 10:35:17-08:00 rohit.seth@intel.com +7 -0
#   multi-core/multi-thread identification
# 
# include/asm-ia64/pal.h
#   2005/03/21 10:35:16-08:00 rohit.seth@intel.com +68 -0
#   multi-core/multi-thread identification
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/21 10:35:10-08:00 rohit.seth@intel.com +206 -0
#   multi-core/multi-thread identification
# 
# arch/ia64/kernel/setup.c
#   2005/03/21 10:34:05-08:00 rohit.seth@intel.com +67 -2
#   multi-core/multi-thread identification
# 
# ChangeSet
#   2005/03/18 16:09:27-08:00 kaos@sgi.com 
#   [IA64] __copy_user breaks on unaligned src
#   
#   memcpy_mck.S::__copy_user breaks in the prefetch code under these conditions :-
#   
#   * src is unaligned and
#   * dst is near the end of a page and
#   * the page after dst is unmapped.
#   
#   Signed-off-by: Keith Owens <kaos@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/lib/memcpy_mck.S
#   2005/03/18 16:07:39-08:00 kaos@sgi.com +1 -1
#   __copy_user breaks on unaligned src
# 
# ChangeSet
#   2005/03/18 15:43:04-08:00 tony.luck@intel.com 
#   [IA64] Need to handle lfetch in "no_context" case.
#   
#   Thanks to Mark for tracking down this one.  Users of __copy_from_user_inatomic()
#   will be sad if we don't handle lfetch faults for the "no_context" case.
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/mm/fault.c
#   2005/03/18 15:39:11-08:00 tony.luck@intel.com +6 -3
#   Need to handle lfetch in "no_context" case.
# 
# ChangeSet
#   2005/03/18 14:18:57-08:00 markgw@sgi.com 
#   [IA64-SGI] Altix SN add support for slots in geoid_t locator
#   
#   This patch against ia64-test-2.6.12 is needed for forthcoming
#   Altix chipsets. It renames geoid_any_t to geoid_common_t and
#   splits the 8bit 'slab' field into two 4bit fields for 'slab'
#   and 'slot'. Similar changes in the Altix SAL will retain backward
#   compatibility for old kernels.
#   
#   Signed-off-by: Mark Goodwin <markgw@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/types.h
#   2005/03/18 14:17:18-08:00 markgw@sgi.com +2 -1
#   Altix SN add support for slots in geoid_t locator
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/18 14:17:16-08:00 markgw@sgi.com +2 -2
#   Altix SN add support for slots in geoid_t locator
# 
# include/asm-ia64/sn/geo.h
#   2005/03/18 14:17:14-08:00 markgw@sgi.com +27 -18
#   Altix SN add support for slots in geoid_t locator
# 
# arch/ia64/sn/kernel/sn2/sn_hwperf.c
#   2005/03/18 14:17:07-08:00 markgw@sgi.com +23 -46
#   Altix SN add support for slots in geoid_t locator
# 
# ChangeSet
#   2005/03/18 13:42:03-08:00 davidm@hpl.hp.com 
#   [IA64] fix syscall-optimization goof
#   
#   Sadly, I goofed in this syscall-tuning patch:
#   
#   ChangeSet 1.1966.1.40 2005/01/22 13:31:05 davidm@hpl.hp.com
#     [IA64] Improve ia64_leave_syscall() for McKinley-type cores.
#   
#     Optimize ia64_leave_syscall() a bit better for McKinley-type cores.
#     The patch looks big, but that's mostly due to renaming r16/r17 to r2/r3.
#     Good for a 13 cycle improvement.
#   
#   The problem is that the size of the physical stacked registers was
#   loaded into the wrong register (r3 instead of r17).  Since r17 by
#   coincidence always had the value 1, this had the effect of turning
#   rse_clear_invalid into a no-op.  That poses the risk of leaking kernel
#   state back to user-land and is hence not acceptable.
#   
#   The fix below is simple, but unfortunately it costs us about 28 cycles
#   in syscall overhead. ;-(
#   
#   Unfortunately, there isn't much we can do about that since those
#   registers have to be cleared one way or another.
#   
#   	--david
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/18 13:38:42-08:00 davidm@hpl.hp.com +1 -1
#   fix syscall-optimization goof
# 
# ChangeSet
#   2005/03/18 10:12:52-08:00 rja@sgi.com 
#   [IA64-SGI] Shub2 BTE support - BTE recovery code
#   
#   patch 2:
#   	Shub2 BTE recovery code will be implemented in SAL.  
#   	Define the SAL interface.
#   	Modify bte_error to call SAL for shub2.
#   
#   Signed-off-by: Russ Anderson <rja@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/18 10:11:26-08:00 rja@sgi.com +17 -2
#   Shub2 BTE support - BTE recovery code
# 
# arch/ia64/sn/kernel/huberror.c
#   2005/03/18 10:11:25-08:00 rja@sgi.com +6 -3
#   Shub2 BTE support - BTE recovery code
# 
# arch/ia64/sn/kernel/bte_error.c
#   2005/03/18 10:11:05-08:00 rja@sgi.com +48 -28
#   Shub2 BTE support - BTE recovery code
# 
# ChangeSet
#   2005/03/18 10:10:06-08:00 rja@sgi.com 
#   [IA64-SGI] Add new MMR definitions/Modify BTE initialiation&copy.
#   
#   patch 1:
#   	Add new MMR definitions.
#   	Modify BTE initialiation.
#   	Modify BTE copy.
#   
#   Signed-off-by: Russ Anderson <rja@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/shub_mmr.h
#   2005/03/18 10:08:45-08:00 rja@sgi.com +19 -1
#   Add new MMR definitions/Modify BTE initialiation&copy.
# 
# include/asm-ia64/sn/pda.h
#   2005/03/18 10:08:43-08:00 rja@sgi.com +1 -2
#   Add new MMR definitions/Modify BTE initialiation&copy.
# 
# include/asm-ia64/sn/nodepda.h
#   2005/03/18 10:08:41-08:00 rja@sgi.com +2 -2
#   Add new MMR definitions/Modify BTE initialiation&copy.
# 
# include/asm-ia64/sn/bte.h
#   2005/03/18 10:08:34-08:00 rja@sgi.com +47 -6
#   Add new MMR definitions/Modify BTE initialiation&copy.
# 
# arch/ia64/sn/kernel/bte.c
#   2005/03/18 10:07:50-08:00 rja@sgi.com +11 -9
#   Add new MMR definitions/Modify BTE initialiation&copy.
# 
# ChangeSet
#   2005/03/16 15:23:47-08:00 maule@sgi.com 
#   [IA64-SGI] disable TIOCA GART TLB prefetching
#   
#   Patch to disable SGI TIOCA GART TLB prefetching due to hw bug.
#   
#   Signed-off-by: Mark Maule <maule@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/pci/tioca_provider.c
#   2005/03/16 15:22:12-08:00 maule@sgi.com +3 -3
#    disable TIOCA GART TLB prefetching
# 
# ChangeSet
#   2005/03/15 14:45:13-08:00 tony.luck@intel.com 
#   [IA64] MAX_PGT_FREES_PER_PASS must be 'L' to avoid warning
#   
#   'min' is very picky about types of arguments, make it happy
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/mm/init.c
#   2005/03/15 14:43:28-08:00 tony.luck@intel.com +1 -1
#   MAX_PGT_FREES_PER_PASS must be 'L' to avoid warning
# 
# ChangeSet
#   2005/03/15 14:34:56-08:00 tony.luck@intel.com 
#   [IA64] fix: warning: `ql_size' might be used uninitialized
#   
#   Oops.  Should have caught this before I checked it in.
#   
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/pgalloc.h
#   2005/03/15 14:33:21-08:00 tony.luck@intel.com +1 -1
#   fix: warning: `ql_size' might be used uninitialized
# 
# ChangeSet
#   2005/03/15 13:38:46-08:00 alex.williamson@hp.com 
#   [IA64] sba_iommu bug fixes
#   
#      This fixes a couple of bugs in the zx1/sx1000 sba_iommu.  These are
#   all pretty low likelihood of hitting.  The first problem is a simple off
#   by one, deep in the sba_alloc_range() error path.  Surrounding that was
#   a lock ordering problem that could have potentially deadlocked with the
#   order the locks are grabbed in sba_unmap_single().  I moved the resource
#   locking into sba_search_bitmap() to prevent this.  Finally, there's a
#   potential race between unmapping pdir entries and marking incoming DMA
#   pages clean.  If you see any oddities, please let me know, but I've
#   tested it pretty thoroughly here.  Tony, please apply.  Thanks,
#   
#   BTW, many of the options in this driver not on by default are becoming
#   more and more broken.  I'll be working on some patches to clean them
#   out, but I wanted to get this bug fix out first.
#   
#   Signed-off-by: Alex Williamson <alex.williamson@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/hp/common/sba_iommu.c
#   2005/03/15 13:37:24-08:00 alex.williamson@hp.com +56 -40
#   sba_iommu bug fixes
# 
# ChangeSet
#   2005/03/15 13:27:34-08:00 holt@sgi.com 
#   [IA64] Percpu quicklist for combined allocator for pgd/pmd/pte.
#   
#   This patch introduces using the quicklists for pgd, pmd, and pte levels
#   by combining the alloc and free functions into a common set of routines.
#   This greatly simplifies the reading of this header file.
#   
#   This patch is simple but necessary for large numa configurations.
#   It simply ensures that only pages from the local node are added to a
#   cpus quicklist.  This prevents the trapping of pages on a remote nodes
#   quicklist by starting a process, touching a large number of pages to
#   fill pmd and pte entries, migrating to another node, and then unmapping
#   or exiting.  With those conditions, the pages get trapped and if the
#   machine has more than 100 nodes of the same size, the calculation of
#   the pgtable high water mark will be larger than any single node so page
#   table cache flushing will never occur.
#   
#   I ran lmbench lat_proc fork and lat_proc exec on a zx1 with and without
#   this patch and did not notice any change.
#   
#   On an sn2 machine, there was a slight improvement which is possibly
#   due to pages from other nodes trapped on the test node before starting
#   the run.  I did not investigate further.
#   
#   This patch shrinks the quicklist based upon free memory on the node
#   instead of the high/low water marks.  I have written it to enable
#   preemption periodically and recalculate the amount to shrink every time
#   we have freed enough pages that the quicklist size should have grown.
#   I rescan the nodes zones each pass because other processess may be
#   draining node memory at the same time as we are adding.
#   
#   Signed-off-by: Robin Holt <holt@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# ChangeSet
#   2005/03/15 13:26:18-08:00 akpm@bix.(none) 
#   Merge http://lia64.bkbits.net/linux-ia64-test-2.6.12
#   into bix.(none):/usr/src/bk-ia64
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/15 13:26:14-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# arch/ia64/ia32/ia32_signal.c
#   2005/03/15 13:26:14-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# include/asm-ia64/processor.h
#   2005/03/15 13:23:49-08:00 holt@sgi.com +0 -3
#   Percpu quicklist for combined allocator for pte/pmd.
# 
# include/asm-ia64/pgalloc.h
#   2005/03/15 13:23:39-08:00 holt@sgi.com +61 -83
#   Percpu quicklist for combined allocator for pte/pmd.
# 
# arch/ia64/mm/init.c
#   2005/03/15 13:21:58-08:00 holt@sgi.com +47 -27
#   Percpu quicklist for combined allocator for pte/pmd.
#   Node aware shrink code for quicklists.
# 
# arch/ia64/mm/discontig.c
#   2005/03/15 13:21:50-08:00 holt@sgi.com +2 -1
#   sum size of percpu quicklists
# 
# arch/ia64/mm/contig.c
#   2005/03/15 13:21:20-08:00 holt@sgi.com +2 -1
#   sum size of percpu quicklists
# 
# ChangeSet
#   2005/03/15 12:02:14-08:00 blosure@sgi.com 
#   [IA64-SGI]
#   
#   Missed the "bk new" for this file in the last commit.
#   
#   Signed-off-by: Bruce Losure <blosure@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/kernel/tiocx.c
#   2005/03/15 12:00:02-08:00 blosure@sgi.com +548 -0
# 
# arch/ia64/sn/kernel/tiocx.c
#   2005/03/15 12:00:02-08:00 blosure@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/13/arch/ia64/sn/kernel/tiocx.c
# 
# ChangeSet
#   2005/03/15 09:59:25-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/14
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/sn/kernel/setup.c
#   2005/03/15 09:59:19-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/15 09:58:20-08:00 ayoung@sgi.com 
#   [IA64-SGI] Altix: enable poweroff
#   
#   This patch adds the necessary "hook" to allow SGI/SN
#   machines to perform a system power off upon a 
#   'init 0', 'halt -p', 'poweroff' or 'shutdown -h'.
#   
#   The "hook" is to set the pm_power_off callback
#   to ia64_sn_power_down(). pm_power_off is checked
#   in machine_power_off()/do_poweroff() and, if set, is executed. 
#   ia64_sn_power_down() is a function already present (but not 
#   used currently) in the sn kernel.
#   ia64_sn_power_down() makes a SAL call to execute the
#   power off.
#   
#   Signed-off-by: Aaron J Young <ayoung@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/kernel/setup.c
#   2005/03/15 09:56:37-08:00 ayoung@sgi.com +9 -0
#   Altix: enable poweroff
# 
# ChangeSet
#   2005/03/15 09:33:31-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/13
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# include/asm-ia64/sn/addrs.h
#   2005/03/15 09:33:25-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/14 23:52:19-08:00 akpm@bix.(none) 
#   Merge http://lia64.bkbits.net/linux-ia64-test-2.6.12
#   into bix.(none):/usr/src/bk-ia64
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/14 23:52:14-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/14 15:21:16-08:00 blosure@sgi.com 
#   [IA64-SGI] Bus driver for the CX port of SGI's TIO chip.
#   
#   This patch is to provide CX port infrastructure for SGI TIO-based
#   h/w.   Also a 'core services' driver for SGI FPGA-based h/w.
#                                                                                   
#   Signed-off-by: Bruce Losure <blosure@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/tiocx.h
#   2005/03/14 15:19:54-08:00 blosure@sgi.com +71 -0
# 
# drivers/char/mbcs.h
#   2005/03/14 15:19:54-08:00 blosure@sgi.com +553 -0
# 
# include/asm-ia64/sn/tiocx.h
#   2005/03/14 15:19:54-08:00 blosure@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/13/include/asm-ia64/sn/tiocx.h
# 
# drivers/char/mbcs.h
#   2005/03/14 15:19:54-08:00 blosure@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/13/drivers/char/mbcs.h
# 
# drivers/char/mbcs.c
#   2005/03/14 15:19:53-08:00 blosure@sgi.com +849 -0
# 
# drivers/char/mbcs.c
#   2005/03/14 15:19:53-08:00 blosure@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/13/drivers/char/mbcs.c
# 
# include/asm-ia64/sn/addrs.h
#   2005/03/14 15:13:57-08:00 blosure@sgi.com +3 -0
#   Altix: Bus driver for the CX port of SGI's TIO chip.
# 
# drivers/char/Makefile
#   2005/03/14 15:13:56-08:00 blosure@sgi.com +1 -0
#   Altix: Bus driver for the CX port of SGI's TIO chip.
# 
# drivers/char/Kconfig
#   2005/03/14 15:13:55-08:00 blosure@sgi.com +14 -0
#   Altix: Bus driver for the CX port of SGI's TIO chip.
# 
# arch/ia64/sn/kernel/Makefile
#   2005/03/14 15:13:50-08:00 blosure@sgi.com +1 -0
#   Altix: Bus driver for the CX port of SGI's TIO chip.
# 
# arch/ia64/configs/sn2_defconfig
#   2005/03/14 15:13:42-08:00 blosure@sgi.com +2 -0
#   Altix: Bus driver for the CX port of SGI's TIO chip.
# 
# ChangeSet
#   2005/03/14 10:53:23-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/12
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/14 10:53:16-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# arch/ia64/ia32/ia32_signal.c
#   2005/03/14 10:53:16-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/14 10:51:44-08:00 eranian@hpl.hp.com 
#   [IA64] perfmon: make pfm_sysctl a global, and other cleanup
#   
#   - make pfm_sysctl a global such that it is possible
#     to enable/disable debug printk in sampling formats
#     using PFM_DEBUG.
#   
#   - remove unused pfm_debug_var variable
#   
#   - fix a bug in pfm_handle_work where an BUG_ON() could
#     be triggered. There is a path where pfm_handle_work()
#     can be called with interrupts enabled, i.e., when
#     TIF_NEED_RESCHED is set. The fix correct the masking
#     and unmasking of interrupts in pfm_handle_work() such
#     that we restore the interrupt mask as it was upon entry.
#   
#   signed-off-by: stephane eranian <eranian@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/perfmon.h
#   2005/03/14 10:49:52-08:00 eranian@hpl.hp.com +12 -0
#   make pfm_sysctl a global, and other cleanup
# 
# arch/ia64/kernel/perfmon_default_smpl.c
#   2005/03/14 10:49:21-08:00 eranian@hpl.hp.com +2 -11
#   make pfm_sysctl a global, and other cleanup
# 
# arch/ia64/kernel/perfmon.c
#   2005/03/14 10:47:56-08:00 eranian@hpl.hp.com +28 -31
#   make pfm_sysctl a global, and other cleanup
# 
# ChangeSet
#   2005/03/14 10:30:40-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/4
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/sn/kernel/irq.c
#   2005/03/14 10:30:34-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/14 10:29:46-08:00 cngam@sgi.com 
#   [IA64-SGI] support variable length nasids in shub2
#   
#   This patch enables our TIO IO chipset to support variable length nasids in 
#   Shub2 chipset.
#   
#   Signed-off-by: Colin Ngam <cngam@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/addrs.h
#   2005/03/14 10:28:20-08:00 cngam@sgi.com +2 -1
#   support variable length nasids in shub2
# 
# ChangeSet
#   2005/03/14 10:26:21-08:00 cngam@sgi.com 
#   [IA64-SGI] Shub2 provides an addition of 2 External Interrupt events.
#   
#   Signed-off-by: Colin Ngam <cngam@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/shub_mmr.h
#   2005/03/14 10:25:18-08:00 cngam@sgi.com +17 -0
#   Shub2 provides an addition of 2 External Interrupt events.
# 
# arch/ia64/sn/kernel/irq.c
#   2005/03/14 10:25:02-08:00 cngam@sgi.com +2 -13
#   Shub2 provides an addition of 2 External Interrupt events.
# 
# ChangeSet
#   2005/03/14 10:17:55-08:00 markgw@sgi.com 
#   [IA64-SGI] Altix SN topology fix potential infinite loop
#   
#   Fix infinite loop if sn_hwperf_location_to_bpos() fails.
#   
#   Signed-off-by: Mark Goodwin <markgw@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/kernel/sn2/sn_hwperf.c
#   2005/03/14 10:16:19-08:00 markgw@sgi.com +7 -5
#   Altix SN topology fix potential infinite loop
# 
# ChangeSet
#   2005/03/14 10:15:24-08:00 markgw@sgi.com 
#   [IA64-SGI] Altix SN topology support for new chipsets and pci topology
#   
#   please accept this patch to the Altix SN platform topology export
#   interface to support new chipsets and to export PCI topology.
#   
#   This follows on top of Jack Steiner's patch dated March 1st
#   ("New chipset support for SN platform").
#   
#   Signed-off-by: Mark Goodwin <markgw@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/14 10:14:12-08:00 markgw@sgi.com +11 -0
#   Altix SN topology support for new chipsets and pci topology
# 
# arch/ia64/sn/kernel/sn2/sn_hwperf.c
#   2005/03/14 10:13:55-08:00 markgw@sgi.com +124 -9
#   Altix SN topology support for new chipsets and pci topology
# 
# ChangeSet
#   2005/03/13 18:31:59-08:00 akpm@bix.(none) 
#   Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/13 18:31:53-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# arch/ia64/ia32/ia32_signal.c
#   2005/03/13 18:31:53-08:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/11 14:45:47-08:00 davidm@hpl.hp.com 
#   [IA64] speed up syscall path a bit more
#   
#   Recently I noticed that clearing ar.ssd/ar.csd right before srlz.d is
#   causing significant stalling in the syscall path.  The patch below
#   fixes that by moving the register-writes after srlz.d.  On a Madison,
#   this drops break-based getpid() from 241 to 226 cycles (-15 cycles).
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/entry.S
#   2005/03/11 14:44:41-08:00 davidm@hpl.hp.com +6 -6
#   speed up syscall path a bit more
# 
# ChangeSet
#   2005/03/11 14:35:43-08:00 kaos@sgi.com 
#   [IA64] Tighten up unw_unwind_to_user check
#   
#   Detect user space by the unwind frame with predicate PRED_USER_STACK
#   set, instead of a user space IP.  Tighten up the last ditch check for
#   running off the top of the kernel stack.
#   
#   Based on a suggestion by David Mosberger, reworked to fit the current
#   tree.  This survives my stress test which used to break 2.6.9 kernels.
#   Unlike 2.6.11, the stress test now unwinds to the correct point, so
#   gdb can get the user space registers.
#   
#   Signed-off-by: Keith Owens <kaos@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/unwind.c
#   2005/03/11 14:33:45-08:00 kaos@sgi.com +17 -10
#   Tighten up unw_unwind_to_user check
# 
# ChangeSet
#   2005/03/11 13:57:16-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/8
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/11 13:57:09-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/11 13:55:21-08:00 davidm@hpl.hp.com 
#   [IA64] add missing cpu_relax() in ITC syncing code
#   
#   Call cpu_relax() in busy-waiting loops of the ITC-syncing code.
#   
#   Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/11 13:53:18-08:00 davidm@hpl.hp.com +7 -4
#   add missing cpu_relax() in ITC syncing code
# 
# ChangeSet
#   2005/03/11 13:26:01-08:00 steiner@sgi.com 
#   [IA64-SGI] Change SAL call request code for SN systems
#   
#   Change the value of the SAL call number for a new SAL request. The
#   initial implementation in the PROM did not match what the OS expected. 
#   Since the OS can run on PROMs that do not implement the new call, 
#   changing the call number avoids the issue. New PROMs will implement
#   the new call number. (This avoids problems with the 4.05 PROM).
#   
#   Signed-off-by: Jack Steiner <steiner@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/11 13:24:49-08:00 steiner@sgi.com +1 -1
#   Change SAL call request code for SN systems
# 
# ChangeSet
#   2005/03/10 14:34:51-08:00 maule@sgi.com 
#   [IA64-SGI] altix: tioca chip driver (agp)
#   
#   Provide a driver for the altix TIOCA AGP chipset.  An agpgart backend will
#   be provided as a separate patch.
#   
#   Signed-off-by: Mark Maule <maule@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/tioca_provider.h
#   2005/03/10 14:33:05-08:00 maule@sgi.com +206 -0
# 
# include/asm-ia64/sn/tioca.h
#   2005/03/10 14:33:05-08:00 maule@sgi.com +596 -0
# 
# include/asm-ia64/sn/tioca_provider.h
#   2005/03/10 14:33:05-08:00 maule@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/7/include/asm-ia64/sn/tioca_provider.h
# 
# include/asm-ia64/sn/tioca.h
#   2005/03/10 14:33:05-08:00 maule@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/7/include/asm-ia64/sn/tioca.h
# 
# arch/ia64/sn/pci/tioca_provider.c
#   2005/03/10 14:33:04-08:00 maule@sgi.com +668 -0
# 
# arch/ia64/sn/pci/tioca_provider.c
#   2005/03/10 14:33:04-08:00 maule@sgi.com +0 -0
#   BitKeeper file /data/home/aegl/BK/work/7/arch/ia64/sn/pci/tioca_provider.c
# 
# include/asm-ia64/sn/pcibus_provider_defs.h
#   2005/03/10 14:32:28-08:00 maule@sgi.com +2 -1
#   altix: tioca chip driver (agp)
# 
# arch/ia64/sn/pci/Makefile
#   2005/03/10 14:32:27-08:00 maule@sgi.com +1 -1
#   altix: tioca chip driver (agp)
# 
# arch/ia64/sn/kernel/io_init.c
#   2005/03/10 14:32:18-08:00 maule@sgi.com +2 -0
#   altix: tioca chip driver (agp)
# 
# ChangeSet
#   2005/03/10 14:30:55-08:00 maule@sgi.com 
#   [IA64-SGI] sn2-move-pci-headers.patch
#   
#   Move a couple of headers out of arch/ia64/sn/include/pci and into
#   include/asm-ia64/sn.
#   
#   Signed-off-by: Mark Maule <maule@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/pci/pcibr/pcibr_reg.c
#   2005/03/10 14:29:13-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/pci/pcibr/pcibr_provider.c
#   2005/03/10 14:29:11-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/pci/pcibr/pcibr_dma.c
#   2005/03/10 14:29:10-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/pci/pcibr/pcibr_ate.c
#   2005/03/10 14:29:08-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/pci/pci_dma.c
#   2005/03/10 14:29:07-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/kernel/irq.c
#   2005/03/10 14:28:59-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# arch/ia64/sn/kernel/io_init.c
#   2005/03/10 14:27:58-08:00 maule@sgi.com +2 -2
#   move sn includes pcibus_provider_defs.h and pcidev.h to include/asm-ia64/sn/
# 
# include/asm-ia64/sn/pcidev.h
#   2005/03/10 14:26:49-08:00 maule@sgi.com +0 -0
#   Rename: arch/ia64/sn/include/pci/pcidev.h -> include/asm-ia64/sn/pcidev.h
# 
# include/asm-ia64/sn/pcibus_provider_defs.h
#   2005/03/10 14:26:31-08:00 maule@sgi.com +0 -0
#   Rename: arch/ia64/sn/include/pci/pcibus_provider_defs.h -> include/asm-ia64/sn/pcibus_provider_defs.h
# 
# ChangeSet
#   2005/03/10 14:25:47-08:00 maule@sgi.com 
#   [IA64-SGI] sn2-pci-dma-abstraction.patch
#   
#   Provide an abstraction of the altix pci dma runtime layer so that multiple
#   pci-based bridges can be supported.
#   
#   Signed-off-by: Mark Maule <maule@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/sn/pci/pcibr/pcibr_provider.c
#   2005/03/10 14:23:06-08:00 maule@sgi.com +20 -0
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/pci/pcibr/pcibr_dma.c
#   2005/03/10 14:23:05-08:00 maule@sgi.com +50 -51
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/pci/pci_dma.c
#   2005/03/10 14:23:04-08:00 maule@sgi.com +20 -15
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/kernel/io_init.c
#   2005/03/10 14:23:03-08:00 maule@sgi.com +64 -8
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/include/pci/pcidev.h
#   2005/03/10 14:23:02-08:00 maule@sgi.com +4 -0
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/include/pci/pcibus_provider_defs.h
#   2005/03/10 14:23:00-08:00 maule@sgi.com +10 -2
#   sn2-pci-dma-abstraction.patch
# 
# arch/ia64/sn/include/pci/pcibr_provider.h
#   2005/03/10 14:22:58-08:00 maule@sgi.com +4 -2
#   sn2-pci-dma-abstraction.patch
# 
# ChangeSet
#   2005/03/10 12:57:33-08:00 ashok.raj@intel.com 
#   [IA64] Fix build errors for !HOTPLUG case.
#   
#   Signed-off-by: Ashok Raj <ashok.raj@intel.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/head.S
#   2005/03/10 12:56:17-08:00 ashok.raj@intel.com +7 -3
#   Fix build errors for !HOTPLUG case.
# 
# ChangeSet
#   2005/03/09 11:56:28-08:00 ashok.raj@intel.com 
#   [IA64] cpu hotplug: return offlined cpus to SAL
#   
#   This patch is required to support cpu removal for IPF systems. Existing code
#   just fakes the real offline by keeping it run the idle thread, and polling
#   for the bit to re-appear in the cpu_state to get out of the idle loop.
#   
#   For the cpu-offline to work correctly, we need to pass control of this CPU 
#   back to SAL so it can continue in the boot-rendez mode. This gives the
#   SAL control to not pick this cpu as the monarch processor for global MCA
#   events, and addition does not wait for this cpu to checkin with SAL
#   for global MCA events as well. The handoff is implemented as documented in 
#   SAL specification section 3.2.5.1 "OS_BOOT_RENDEZ to SAL return State"
#   
#   Signed-off-by: Ashok Raj <ashok.raj@intel.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sal.h
#   2005/03/09 11:54:05-08:00 ashok.raj@intel.com +38 -0
#   cpu hotplug: return offlined cpus to SAL
# 
# arch/ia64/kernel/smpboot.c
#   2005/03/09 11:54:04-08:00 ashok.raj@intel.com +50 -31
#   cpu hotplug: return offlined cpus to SAL
# 
# arch/ia64/kernel/process.c
#   2005/03/09 11:54:03-08:00 ashok.raj@intel.com +8 -14
#   cpu hotplug: return offlined cpus to SAL
# 
# arch/ia64/kernel/mca_asm.S
#   2005/03/09 11:53:56-08:00 ashok.raj@intel.com +53 -35
#   cpu hotplug: return offlined cpus to SAL
# 
# arch/ia64/kernel/head.S
#   2005/03/09 11:53:33-08:00 ashok.raj@intel.com +250 -30
#   cpu hotplug: return offlined cpus to SAL
# 
# ChangeSet
#   2005/03/09 11:41:18-08:00 arun.sharma@intel.com 
#   [IA64] ia32_signal.c: erroneous use of memset/memcpy
#   
#   Found by Alexander Nyberg, improved by Bjorn Helgaas.
#   
#   - Fix the incorrect argument to sizeof()
#   - looks like memcpy() code pass was dervived from code that used
#     copy_from_user().  But in this case we are doing to kernel space
#     to kernel space copy, so memcpy is the right routine, but it
#     doesn't return an error code.
#    
#   Signed-off-by: Arun Sharma <arun.sharma@intel.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/ia32/ia32_signal.c
#   2005/03/09 11:33:19-08:00 arun.sharma@intel.com +2 -3
#   erroneous use of memset/memcpy
# 
# ChangeSet
#   2005/03/08 16:44:21-08:00 bob.montgomery@hp.com 
#   [IA64] fix bad emulation of unaligned semaphore opcodes
#   The method used to categorize the load/store instructions in
#   arch/ia64/kernel/unaligned.c is masking the entire set of instructions
#   described in Table 4-33 of the 2002 Intel Itanium Volume 3: Instruction
#   Set Reference.
#   
#   It's the set of instructions for opcode 4, mbit 0, x bit 1, described
#   as Semaphore/Get FR/16-Byte Opcode Instructions.
#   
#   Because the IA64_OPCODE_SHIFT and IA64_OPCODE_MASK operations ignore
#   the x bit, this set of instructions (including cmpxchg, xchg, and
#   fetchadd among others) are processed like the corresponding opcode 4,
#   mbit 0, x bit 0 instructions.
#   
#   This means that a cmpxchg.acq with a misaligned pointer will return the
#   old value without setting the new one (rendering spin locks as No-ops)
#   and that the other instructions also appear not to update memory.
#   A cmpxchg.rel will be treated like a ld.s and just retry forever.
#   
#   The correct behavior for this class of instructions is documented
#   in the file as producing failures for user code and kernel oops in
#   kernel mode, but the code as implemented does not behave this way.
#   
#   I have user test code to demonstrate the problem if you would like
#   a copy.
#   
#   The simple fix in this patch has been discussed with Stephane Eranian
#   and David Mosberger.  It has the advantage of not requiring the
#   redesign of the opcode discrimination in unaligned.c.
#   
#   Signed-off-by: Bob Montgomery <bob.montgomery@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/kernel/unaligned.c
#   2005/03/08 16:41:26-08:00 bob.montgomery@hp.com +16 -0
#   fix bad emulation of unaligned semaphore opcodes
# 
# ChangeSet
#   2005/03/08 15:04:50-08:00 rja@sgi.com 
#   [IA64-SGI] Remove unused cpu_bte_if from pda_s
#   
#   Signed-off-by: Russ Anderson <rja@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/pda.h
#   2005/03/08 15:03:47-08:00 rja@sgi.com +0 -2
#   Remove unused cpu_bte_if from pda_s
# 
# ChangeSet
#   2005/03/08 15:02:27-08:00 steiner@sgi.com 
#   [IA64-SGI] [PATCH 2/2] - New chipset support for SN platform
#   
#   - move a number of fields out of the SN pda & into 
#     per-cpu data. The pda is ugly & will be deleted. 
#     This is a first step.  Additional patches will follow.
#   
#   Signed-off-by: Jack Steiner <steiner@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/pda.h
#   2005/03/08 14:47:28-08:00 steiner@sgi.com +0 -13
#   New chipset support for SN platform
# 
# include/asm-ia64/sn/arch.h
#   2005/03/08 14:47:27-08:00 steiner@sgi.com +24 -0
#   New chipset support for SN platform
# 
# include/asm-ia64/sn/addrs.h
#   2005/03/08 14:47:25-08:00 steiner@sgi.com +4 -3
#   New chipset support for SN platform
# 
# arch/ia64/sn/kernel/setup.c
#   2005/03/08 14:46:53-08:00 steiner@sgi.com +7 -5
#   New chipset support for SN platform
# 
# ChangeSet
#   2005/03/08 14:45:52-08:00 steiner@sgi.com 
#   [IA64-SGI] [PATCH 1/2] - New chipset support for SN platform
#   
#   - add new parameters to a platform-specific SAL 
#     call to retrieve addition chipset specific info.
#   
#   - change partition_coherence_id() so that it works
#     on platforms using the new chipset.
#   
#   Signed-off-by: Jack Steiner <steiner@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/sn/sn_sal.h
#   2005/03/08 14:42:37-08:00 steiner@sgi.com +37 -13
#   define new ia64_sn_get_sn_info() (replaces old call
#   ia64_sn_get_hub_info()) to support shub2 chipset
# 
# include/asm-ia64/sn/sn_cpuid.h
#   2005/03/08 14:41:53-08:00 steiner@sgi.com +3 -2
#   define new ia64_sn_get_sn_info() (replaces old call
#   ia64_sn_get_hub_info()) to support shub2 chipset
# 
# arch/ia64/sn/kernel/setup.c
#   2005/03/08 14:40:58-08:00 steiner@sgi.com +13 -5
#   Use new ia64_sn_get_sn_info() call to get system config
# 
# ChangeSet
#   2005/03/08 12:11:37-08:00 tony.luck@intel.com 
#   Merge intel.com:/data/home/aegl/BK/work/2
#   into intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.12
# 
# arch/ia64/pci/pci.c
#   2005/03/08 12:11:27-08:00 tony.luck@intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/08 11:57:10-08:00 matthew@wil.cx 
#   [IA64] pci.c: PCI root busses need resources
#   
#   Using the generic setup-bus.c code currently fails on HP's Integrity
#   servers because the root busses have their resources set to the whole
#   of ioport space and the whole of iomem space, instead of just the ranges
#   that are routed to that particular bus.
#   
#   This patch uses the resources in the pci_windows to provide the ranges
#   to each PCI root bus.  In order to do that, I had to change them from
#   being bus addresses to system addresses.
#   
#   Signed-off-by: Matthew Wilcox <matthew@wil.cx>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/pci/pci.c
#   2005/03/08 11:55:40-08:00 matthew@wil.cx +69 -61
#   PCI root busses need resources
# 
# ChangeSet
#   2005/02/23 14:40:01-08:00 bjorn.helgaas@hp.com 
#   [IA64] fix IOSAPIC destinations when CONFIG_SMP=n
#   
#   Always use cpu_physical_id() (which is really the ID/EID of
#   a processor's Local SAPIC) when programming IOSAPIC entries.
#   
#   Previously we sometimes used hard_smp_processor_id(), which
#   is correct when CONFIG_SMP=y but wrong otherwise.
#   
#   Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/smp.h
#   2005/02/23 14:38:36-08:00 bjorn.helgaas@hp.com +24 -21
#   fix IOSAPIC destinations when CONFIG_SMP=n
# 
# arch/ia64/kernel/iosapic.c
#   2005/02/23 14:38:24-08:00 bjorn.helgaas@hp.com +4 -5
#   fix IOSAPIC destinations when CONFIG_SMP=n
# 
diff -Nru a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
--- a/arch/ia64/configs/sn2_defconfig	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/configs/sn2_defconfig	2005-03-24 18:21:52 -08:00
@@ -574,6 +574,8 @@
 # CONFIG_N_HDLC is not set
 # CONFIG_STALDRV is not set
 CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
diff -Nru a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
--- a/arch/ia64/hp/common/sba_iommu.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/hp/common/sba_iommu.c	2005-03-24 18:21:52 -08:00
@@ -1,9 +1,9 @@
 /*
 **  IA64 System Bus Adapter (SBA) I/O MMU manager
 **
-**	(c) Copyright 2002-2004 Alex Williamson
+**	(c) Copyright 2002-2005 Alex Williamson
 **	(c) Copyright 2002-2003 Grant Grundler
-**	(c) Copyright 2002-2004 Hewlett-Packard Company
+**	(c) Copyright 2002-2005 Hewlett-Packard Company
 **
 **	Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
 **	Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
@@ -459,21 +459,32 @@
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
  * @bits_wanted: number of entries we need.
+ * @use_hint: use res_hint to indicate where to start looking
  *
  * Find consecutive free bits in resource bitmap.
  * Each bit represents one entry in the IO Pdir.
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted)
+sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 {
-	unsigned long *res_ptr = ioc->res_hint;
+	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long pide = ~0UL;
+	unsigned long flags, pide = ~0UL;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	spin_lock_irqsave(&ioc->res_lock, flags);
+
+	/* Allow caller to force a search through the entire resource space */
+	if (likely(use_hint)) {
+		res_ptr = ioc->res_hint;
+	} else {
+		res_ptr = (ulong *)ioc->res_map;
+		ioc->res_bitshift = 0;
+	}
+
 	/*
 	 * N.B.  REO/Grande defect AR2305 can cause TLB fetch timeouts
 	 * if a TLB entry is purged while in use.  sba_mark_invalid()
@@ -570,10 +581,12 @@
 	prefetch(ioc->res_map);
 	ioc->res_hint = (unsigned long *) ioc->res_map;
 	ioc->res_bitshift = 0;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
 	return (pide);
 
 found_it:
 	ioc->res_hint = res_ptr;
+	spin_unlock_irqrestore(&ioc->res_lock, flags);
 	return (pide);
 }
 
@@ -594,36 +607,36 @@
 	unsigned long itc_start;
 #endif
 	unsigned long pide;
-	unsigned long flags;
 
 	ASSERT(pages_needed);
 	ASSERT(0 == (size & ~iovp_mask));
 
-	spin_lock_irqsave(&ioc->res_lock, flags);
-
 #ifdef PDIR_SEARCH_TIMING
 	itc_start = ia64_get_itc();
 #endif
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed);
+	pide = sba_search_bitmap(ioc, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed);
+		pide = sba_search_bitmap(ioc, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
+			unsigned long flags;
+
 			/*
 			** With delayed resource freeing, we can give this one more shot.  We're
 			** getting close to being in trouble here, so do what we can to make this
 			** one count.
 			*/
-			spin_lock(&ioc->saved_lock);
+			spin_lock_irqsave(&ioc->saved_lock, flags);
 			if (ioc->saved_cnt > 0) {
 				struct sba_dma_pair *d;
 				int cnt = ioc->saved_cnt;
 
-				d = &(ioc->saved[ioc->saved_cnt]);
+				d = &(ioc->saved[ioc->saved_cnt - 1]);
 
+				spin_lock(&ioc->res_lock);
 				while (cnt--) {
 					sba_mark_invalid(ioc, d->iova, d->size);
 					sba_free_range(ioc, d->iova, d->size);
@@ -631,10 +644,11 @@
 				}
 				ioc->saved_cnt = 0;
 				READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
+				spin_unlock(&ioc->res_lock);
 			}
-			spin_unlock(&ioc->saved_lock);
+			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed);
+			pide = sba_search_bitmap(ioc, pages_needed, 0);
 			if (unlikely(pide >= (ioc->res_size << 3)))
 				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
 				      ioc->ioc_hpa);
@@ -664,8 +678,6 @@
 		(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
 		ioc->res_bitshift );
 
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-
 	return (pide);
 }
 
@@ -950,6 +962,30 @@
 	return SBA_IOVA(ioc, iovp, offset);
 }
 
+#ifdef ENABLE_MARK_CLEAN
+static SBA_INLINE void
+sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+	u32	iovp = (u32) SBA_IOVP(ioc,iova);
+	int	off = PDIR_INDEX(iovp);
+	void	*addr;
+
+	if (size <= iovp_size) {
+		addr = phys_to_virt(ioc->pdir_base[off] &
+		                    ~0xE000000000000FFFULL);
+		mark_clean(addr, size);
+	} else {
+		do {
+			addr = phys_to_virt(ioc->pdir_base[off] &
+			                    ~0xE000000000000FFFULL);
+			mark_clean(addr, min(size, iovp_size));
+			off++;
+			size -= iovp_size;
+		} while (size > 0);
+	}
+}
+#endif
+
 /**
  * sba_unmap_single - unmap one IOVA and free resources
  * @dev: instance of PCI owned by the driver that's asking.
@@ -995,6 +1031,10 @@
 	size += offset;
 	size = ROUNDUP(size, iovp_size);
 
+#ifdef ENABLE_MARK_CLEAN
+	if (dir == DMA_FROM_DEVICE)
+		sba_mark_clean(ioc, iova, size);
+#endif
 
 #if DELAYED_RESOURCE_CNT > 0
 	spin_lock_irqsave(&ioc->saved_lock, flags);
@@ -1021,30 +1061,6 @@
 	READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif /* DELAYED_RESOURCE_CNT == 0 */
-#ifdef ENABLE_MARK_CLEAN
-	if (dir == DMA_FROM_DEVICE) {
-		u32 iovp = (u32) SBA_IOVP(ioc,iova);
-		int off = PDIR_INDEX(iovp);
-		void *addr;
-
-		if (size <= iovp_size) {
-			addr = phys_to_virt(ioc->pdir_base[off] &
-					    ~0xE000000000000FFFULL);
-			mark_clean(addr, size);
-		} else {
-			size_t byte_cnt = size;
-
-			do {
-				addr = phys_to_virt(ioc->pdir_base[off] &
-				                    ~0xE000000000000FFFULL);
-				mark_clean(addr, min(byte_cnt, iovp_size));
-				off++;
-				byte_cnt -= iovp_size;
-
-			   } while (byte_cnt > 0);
-		}
-	}
-#endif
 }
 
 
diff -Nru a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
--- a/arch/ia64/ia32/ia32_signal.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/ia32/ia32_signal.c	2005-03-24 18:21:52 -08:00
@@ -460,10 +460,9 @@
 	sigset_t oldset, set;
 
 	scr->scratch_unat = 0;	/* avoid leaking kernel bits to user level */
-	memset(&set, 0, sizeof(&set));
+	memset(&set, 0, sizeof(set));
 
-	if (memcpy(&set.sig, &sset->sig, sigsetsize))
-		return -EFAULT;
+	memcpy(&set.sig, &sset->sig, sigsetsize);
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
 
diff -Nru a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
--- a/arch/ia64/kernel/entry.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/entry.S	2005-03-24 18:21:52 -08:00
@@ -470,18 +470,6 @@
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
-GLOBAL_ENTRY(__ia64_syscall)
-	.regstk 6,0,0,0
-	mov r15=in5				// put syscall number in place
-	break __BREAK_SYSCALL
-	movl r2=errno
-	cmp.eq p6,p7=-1,r10
-	;;
-(p6)	st4 [r2]=r8
-(p6)	mov r8=-1
-	br.ret.sptk.many rp
-END(__ia64_syscall)
-
 GLOBAL_ENTRY(execve)
 	mov r15=__NR_execve			// put syscall number in place
 	break __BREAK_SYSCALL
@@ -637,7 +625,7 @@
  *	      r8-r11: restored (syscall return value(s))
  *		 r12: restored (user-level stack pointer)
  *		 r13: restored (user-level thread pointer)
- *		 r14: cleared
+ *		 r14: set to __kernel_syscall_via_epc
  *		 r15: restored (syscall #)
  *	     r16-r17: cleared
  *		 r18: user-level b6
@@ -658,7 +646,7 @@
  *		  pr: restored (user-level pr)
  *		  b0: restored (user-level rp)
  *	          b6: restored
- *		  b7: cleared
+ *		  b7: set to __kernel_syscall_via_epc
  *	     ar.unat: restored (user-level ar.unat)
  *	      ar.pfs: restored (user-level ar.pfs)
  *	      ar.rsc: restored (user-level ar.rsc)
@@ -704,72 +692,79 @@
 	;;
 (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	mov b7=r0		// clear b7
+	nop.i 0
 	;;
-	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
 	;;
-	mov r16=ar.bsp				// M2  get existing backing store pointer
+	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
 (p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
 (p6)	br.cond.spnt .work_pending_syscall
 	;;
 	// start restoring the state saved on the kernel stack (struct pt_regs):
 	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
 	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-	mov f6=f0		// clear f6
+(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
 	;;
 	invala			// M0|1 invalidate ALAT
-	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
-	mov f9=f0		// clear f9
+	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
+	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
 
-	ld8 r29=[r2],16		// load cr.ipsr
-	ld8 r28=[r3],16			// load cr.iip
-	mov f8=f0		// clear f8
+	ld8 r29=[r2],16		// M0|1 load cr.ipsr
+	ld8 r28=[r3],16		// M0|1 load cr.iip
+	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	mov.m ar.ssd=r0		// M2 clear ar.ssd
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
 	ld8 r25=[r3],16		// M0|1 load ar.unat
-	mov.m ar.csd=r0		// M2 clear ar.csd
-	mov r22=r0		// clear r22
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
-	mov f10=f0		// clear f10
-	;;
-	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
-	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
-	mov f11=f0		// clear f11
+(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
+	nop 0
 	;;
-	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
-	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
+	mov f6=f0			// F    clear f6
+	;;
+	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
+	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
+	mov f7=f0				// F    clear f7
+	;;
+	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
+	ld8.fill r1=[r3],16			// M0|1 load r1
+(pUStk) mov r17=1				// A
+	;;
+(pUStk) st1 [r14]=r17				// M2|3
+	ld8.fill r13=[r3],16			// M0|1
+	mov f8=f0				// F    clear f8
+	;;
+	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
+	ld8.fill r15=[r3]			// M0|1 restore r15
+	mov b6=r18				// I0   restore b6
+
+	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+	mov f9=f0					// F    clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
+
+	srlz.d				// M0   ensure interruption collection is off (for cover)
+	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
+	cover				// B    add current frame into dirty partition & set cr.ifs
+	;;
+(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	mov f10=f0			// F    clear f10
+
+	nop.m 0
+	movl r14=__kernel_syscall_via_epc // X
 	;;
-	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
-	ld8.fill r1=[r3],16	// load r1
-(pUStk) mov r17=1
-	;;
-	srlz.d			// M0  ensure interruption collection is off
-	ld8.fill r13=[r3],16
-	mov f7=f0		// clear f7
-	;;
-	ld8.fill r12=[r2]	// restore r12 (sp)
-	ld8.fill r15=[r3]	// restore r15
-	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
-	;;
-(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
-	mov b6=r18		// I0  restore b6
-	;;
-	mov r14=r0		// clear r14
-	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
-
-	mov.m ar.ccv=r0		// clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
-	br.cond.sptk.many rbs_switch
+	mov.m ar.csd=r0			// M2   clear ar.csd
+	mov.m ar.ccv=r0			// M2   clear ar.ccv
+	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
+
+	mov.m ar.ssd=r0			// M2   clear ar.ssd
+	mov f11=f0			// F    clear f11
+	br.cond.sptk.many rbs_switch	// B
 END(ia64_leave_syscall)
 
 #ifdef CONFIG_IA32_SUPPORT
@@ -945,11 +940,10 @@
 	 * NOTE: alloc, loadrs, and cover can't be predicated.
 	 */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-rbs_switch:
 	cover				// add current frame into dirty partition and set cr.ifs
 	;;
 	mov r19=ar.bsp			// get new backing store pointer
+rbs_switch:
 	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
 	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
 	;;
@@ -1024,14 +1018,14 @@
 	mov loc5=0
 	mov loc6=0
 	mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
 	;;
 	mov loc8=0
 	mov loc9=0
 	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
 	mov loc10=0
 	mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
 #endif /* !CONFIG_ITANIUM */
 #	undef pRecurse
 #	undef pReturn
diff -Nru a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
--- a/arch/ia64/kernel/fsys.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/fsys.S	2005-03-24 18:21:52 -08:00
@@ -531,91 +531,114 @@
 	.altrp b6
 	.body
 	/*
-	 * We get here for syscalls that don't have a lightweight handler.  For those, we
-	 * need to bubble down into the kernel and that requires setting up a minimal
-	 * pt_regs structure, and initializing the CPU state more or less as if an
-	 * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
-	 * such that cr_iip points to the second instruction in syscall_via_break.
-	 * Decrementing the IP hence will restart the syscall via break and not
-	 * decrementing IP will return us to the caller, as usual.  Note that we preserve
-	 * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
-	 * possible to distinguish fsyscall execution from other privileged execution.
+	 * We get here for syscalls that don't have a lightweight
+	 * handler.  For those, we need to bubble down into the kernel
+	 * and that requires setting up a minimal pt_regs structure,
+	 * and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work,
+	 * we setup pt_regs such that cr_iip points to the second
+	 * instruction in syscall_via_break.  Decrementing the IP
+	 * hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.
+	 * Note that we preserve the value of psr.pp rather than
+	 * initializing it from dcr.pp.  This makes it possible to
+	 * distinguish fsyscall execution from other privileged
+	 * execution.
 	 *
 	 * On entry:
-	 *	- normal fsyscall handler register usage, except that we also have:
+	 *	- normal fsyscall handler register usage, except
+	 *	  that we also have:
 	 *	- r18: address of syscall entry point
 	 *	- r21: ar.fpsr
 	 *	- r26: ar.pfs
 	 *	- r27: ar.rsc
 	 *	- r29: psr
+	 *
+	 * We used to clear some PSR bits here but that requires slow
+	 * serialization.  Fortuntely, that isn't really necessary.
+	 * The rationale is as follows: we used to clear bits
+	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
+	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+	 * However,
+	 *
+	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
+	 * PSR.I  : already turned off by the time fsys_bubble_down gets
+	 *	    invoked
+	 * PSR.DFL: always 0 (kernel never turns it on)
+	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+	 *	    initiative
+	 * PSR.DI : always 0 (kernel never turns it on)
+	 * PSR.SI : always 0 (kernel never turns it on)
+	 * PSR.DB : don't care --- kernel never enables kernel-level
+	 *	    breakpoints
+	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
+	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+	 *          will trigger a taken branch; the taken-trap-handler then
+	 *          converts the syscall into a break-based system-call.
 	 */
-#	define PSR_PRESERVED_BITS	(IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
-					 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
-					 | IA64_PSR_IC)
 	/*
-	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
-	 * to synthesize.
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+	 * The rest we have to synthesize.
 	 */
-#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
+					 | (0x1 << IA64_PSR_RI_BIT)	\
 					 | IA64_PSR_BN | IA64_PSR_I)
 
-	invala
-	movl r8=PSR_ONE_BITS
-
-	mov r25=ar.unat			// save ar.unat (5 cyc)
-	movl r9=PSR_PRESERVED_BITS
+	invala					// M0|1
+	movl r14=ia64_ret_from_syscall		// X
 
-	mov ar.rsc=0			// set enforced lazy mode, pl 0, little-endian, loadrs=0
-	movl r28=__kernel_syscall_via_break
+	nop.m 0
+	movl r28=__kernel_syscall_via_break	// X	create cr.iip
 	;;
-	mov r23=ar.bspstore		// save ar.bspstore (12 cyc)
-	mov r31=pr			// save pr (2 cyc)
-	mov r20=r1			// save caller's gp in r20
-	;;
-	mov r2=r16			// copy current task addr to addl-addressable register
-	and r9=r9,r29
-	mov r19=b6			// save b6 (2 cyc)
-	;;
-	mov psr.l=r9			// slam the door (17 cyc to srlz.i)
-	or r29=r8,r29			// construct cr.ipsr value to save
-	addl r22=IA64_RBS_OFFSET,r2	// compute base of RBS
-	;;
-	// GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
-	// we may be reading ar.itc after writing to psr.l.  Avoid that message with
-	// this directive:
-	dv_serialize_data
-	mov.m r24=ar.rnat		// read ar.rnat (5 cyc lat)
-	lfetch.fault.excl.nt1 [r22]
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
 
-	// ensure previous insn group is issued before we stall for srlz.i:
-	;;
-	srlz.i				// ensure new psr.l has been established
-	/////////////////////////////////////////////////////////////////////////////
-	////////// from this point on, execution is not interruptible anymore
-	/////////////////////////////////////////////////////////////////////////////
-	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2	// compute base of memory stack
-	cmp.ne pKStk,pUStk=r0,r0	// set pKStk <- 0, pUStk <- 1
-	;;
-	st1 [r16]=r0			// clear current->thread.on_ustack flag
-	mov ar.bspstore=r22		// switch to kernel RBS
-	mov b6=r18			// copy syscall entry-point to b6 (7 cyc)
-	add r3=TI_FLAGS+IA64_TASK_SIZE,r2
-	;;
-	ld4 r3=[r3]				// r2 = current_thread_info()->flags
-	mov r18=ar.bsp			// save (kernel) ar.bsp (12 cyc)
-	mov ar.rsc=0x3			// set eager mode, pl 0, little-endian, loadrs=0
-	br.call.sptk.many b7=ia64_syscall_setup
-	;;
-	ssm psr.i
-	movl r2=ia64_ret_from_syscall
-	;;
-	mov rp=r2				// set the real return addr
-	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
-	;;
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// p10==true means out registers are more than 8
-(p8)	br.call.sptk.many b6=b6		// ignore this return addr
-	br.cond.sptk ia64_trace_syscall
+	mov r2=r16				// A    get task addr to addl-addressable register
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+	mov r31=pr				// I0   save pr (2 cyc)
+	;;
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
+	;;
+	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
+	nop.i 0
+	;;
+	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+	nop.m 0
+	nop.i 0
+	;;
+	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
+	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
+	nop.i 0
+	;;
+	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
+	movl r8=PSR_ONE_BITS			// X
+	;;
+	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
+	mov r19=b6				// I0   save b6 (2 cyc)
+	mov r20=r1				// A    save caller's gp in r20
+	;;
+	or r29=r8,r29				// A    construct cr.ipsr value to save
+	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
+
+	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
+	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+	;;
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	mov rp=r14				// I0   set the real return addr
+	nop.i 0
+	;;
+	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
+	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE	// I0
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+	nop.m 0
+(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
+	br.cond.spnt ia64_trace_syscall		// B
 END(fsys_bubble_down)
 
 	.rodata
diff -Nru a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
--- a/arch/ia64/kernel/gate.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/gate.S	2005-03-24 18:21:52 -08:00
@@ -72,38 +72,41 @@
 	 * bundle get executed.  The remaining code must be safe even if
 	 * they do not get executed.
 	 */
-	adds r17=-1024,r15
-	mov r10=0				// default to successful syscall execution
-	epc
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
 }
 	;;
-	rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
-	LOAD_FSYSCALL_TABLE(r14)
-
-	mov r16=IA64_KR(CURRENT)		// 12 cycle read latency
-	tnat.nz p10,p9=r15
-	mov r19=NR_syscalls-1
+	rsm psr.be				// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	mov r29=psr				// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
 	;;
-	shladd r18=r17,3,r14
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	mov b7=r18				// I0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
 
-	srlz.d
-	cmp.ne p8,p0=r0,r0			// p8 <- FALSE
-	/* Note: if r17 is a NaT, p6 will be set to zero.  */
-	cmp.geu p6,p7=r19,r17			// (syscall > 0 && syscall < 1024+NR_syscalls)?
-	;;
-(p6)	ld8 r18=[r18]
-	mov r21=ar.fpsr
-	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	mov b7=r18
-(p6)	tbit.z p8,p0=r18,0
-(p8)	br.dptk.many b7
+	nop.m 0
+	nop.i 0
+(p8)	br.dptk.many b7				// B
 
-(p6)	rsm psr.i
-	mov r27=ar.rsc
-	mov r26=ar.pfs
-	;;
-	mov r29=psr				// read psr (12 cyc load latency)
+	mov r27=ar.rsc				// M2 (12 cyc)
+(p6)	rsm psr.i				// M2
 /*
  * brl.cond doesn't work as intended because the linker would convert this branch
  * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
@@ -111,6 +114,8 @@
  * instead.
  */
 #ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
 (p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
 	;;
 (p6)	mov b7=r14
diff -Nru a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
--- a/arch/ia64/kernel/head.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/head.S	2005-03-24 18:21:52 -08:00
@@ -15,6 +15,8 @@
  * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
  * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
  *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
+ * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
+ *   Support for CPU Hotplug
  */
 
 #include <linux/config.h>
@@ -29,6 +31,139 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
+#include <asm/mca_asm.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define SAL_PSR_BITS_TO_SET				\
+	(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
+
+#define SAVE_FROM_REG(src, ptr, dest)	\
+	mov dest=src;;						\
+	st8 [ptr]=dest,0x08
+
+#define RESTORE_REG(reg, ptr, _tmp)		\
+	ld8 _tmp=[ptr],0x08;;				\
+	mov reg=_tmp
+
+#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;; 			\
+	mov _idx=0;; 								\
+1: 												\
+	SAVE_FROM_REG(_breg[_idx], ptr, _dest);;	\
+	add _idx=1,_idx;;							\
+	br.cloop.sptk.many 1b
+
+#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;;			\
+	mov _idx=0;;							\
+_lbl:  RESTORE_REG(_breg[_idx], ptr, _tmp);;	\
+	add _idx=1, _idx;;						\
+	br.cloop.sptk.many _lbl
+
+#define SAVE_ONE_RR(num, _reg, _tmp) \
+	movl _tmp=(num<<61);;	\
+	mov _reg=rr[_tmp]
+
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	SAVE_ONE_RR(0,_r0, _tmp);; \
+	SAVE_ONE_RR(1,_r1, _tmp);; \
+	SAVE_ONE_RR(2,_r2, _tmp);; \
+	SAVE_ONE_RR(3,_r3, _tmp);; \
+	SAVE_ONE_RR(4,_r4, _tmp);; \
+	SAVE_ONE_RR(5,_r5, _tmp);; \
+	SAVE_ONE_RR(6,_r6, _tmp);; \
+	SAVE_ONE_RR(7,_r7, _tmp);;
+
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	st8 [ptr]=_r0, 8;; \
+	st8 [ptr]=_r1, 8;; \
+	st8 [ptr]=_r2, 8;; \
+	st8 [ptr]=_r3, 8;; \
+	st8 [ptr]=_r4, 8;; \
+	st8 [ptr]=_r5, 8;; \
+	st8 [ptr]=_r6, 8;; \
+	st8 [ptr]=_r7, 8;;
+
+#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
+	mov		ar.lc=0x08-1;;						\
+	movl	_idx1=0x00;;						\
+RestRR:											\
+	dep.z	_idx2=_idx1,61,3;;					\
+	ld8		_tmp=[ptr],8;;						\
+	mov		rr[_idx2]=_tmp;;					\
+	srlz.d;;									\
+	add		_idx1=1,_idx1;;						\
+	br.cloop.sptk.few	RestRR
+
+#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
+	movl reg1=sal_state_for_booting_cpu;;	\
+	ld8 reg2=[reg1];;
+
+/*
+ * Adjust region registers saved before starting to save
+ * break regs and rest of the states that need to be preserved.
+ */
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred)  \
+	SAVE_FROM_REG(b0,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b1,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b2,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b3,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b4,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b5,_reg1,_reg2);;						\
+	st8 [_reg1]=r1,0x08;;								\
+	st8 [_reg1]=r12,0x08;;								\
+	st8 [_reg1]=r13,0x08;;								\
+	SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.pfs,_reg1,_reg2);;					\
+	SAVE_FROM_REG(ar.rnat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.unat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);;			\
+	SAVE_FROM_REG(cr.dcr,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.iva,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pta,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.itv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pmv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);;				\
+	st8 [_reg1]=r4,0x08;;								\
+	st8 [_reg1]=r5,0x08;;								\
+	st8 [_reg1]=r6,0x08;;								\
+	st8 [_reg1]=r7,0x08;;								\
+	st8 [_reg1]=_pred,0x08;;							\
+	SAVE_FROM_REG(ar.lc, _reg1, _reg2);;				\
+	stf.spill.nta [_reg1]=f2,16;;						\
+	stf.spill.nta [_reg1]=f3,16;;						\
+	stf.spill.nta [_reg1]=f4,16;;						\
+	stf.spill.nta [_reg1]=f5,16;;						\
+	stf.spill.nta [_reg1]=f16,16;;						\
+	stf.spill.nta [_reg1]=f17,16;;						\
+	stf.spill.nta [_reg1]=f18,16;;						\
+	stf.spill.nta [_reg1]=f19,16;;						\
+	stf.spill.nta [_reg1]=f20,16;;						\
+	stf.spill.nta [_reg1]=f21,16;;						\
+	stf.spill.nta [_reg1]=f22,16;;						\
+	stf.spill.nta [_reg1]=f23,16;;						\
+	stf.spill.nta [_reg1]=f24,16;;						\
+	stf.spill.nta [_reg1]=f25,16;;						\
+	stf.spill.nta [_reg1]=f26,16;;						\
+	stf.spill.nta [_reg1]=f27,16;;						\
+	stf.spill.nta [_reg1]=f28,16;;						\
+	stf.spill.nta [_reg1]=f29,16;;						\
+	stf.spill.nta [_reg1]=f30,16;;						\
+	stf.spill.nta [_reg1]=f31,16;;
+
+#else
+#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#endif
+
+#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
+	movl _tmp1=(num << 61);;	\
+	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
+	mov rr[_tmp1]=_tmp2
 
 	.section __special_page_section,"ax"
 
@@ -64,6 +199,12 @@
 	srlz.i
 	;;
 	/*
+	 * Save the region registers, predicate before they get clobbered
+	 */
+	SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
+	mov r25=pr;;
+
+	/*
 	 * Initialize kernel region registers:
 	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
 	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
@@ -76,32 +217,14 @@
 	 * We initialize all of them to prevent inadvertently assuming
 	 * something about the state of address translation early in boot.
 	 */
-	mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r7=(0<<61)
-	mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r9=(1<<61)
-	mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r11=(2<<61)
-	mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r13=(3<<61)
-	mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r15=(4<<61)
-	mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
-	movl r17=(5<<61)
-	mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
-	movl r19=(6<<61)
-	mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
-	movl r21=(7<<61)
-	;;
-	mov rr[r7]=r6
-	mov rr[r9]=r8
-	mov rr[r11]=r10
-	mov rr[r13]=r12
-	mov rr[r15]=r14
-	mov rr[r17]=r16
-	mov rr[r19]=r18
-	mov rr[r21]=r20
-	;;
+	SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
+	SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
 	/*
 	 * Now pin mappings into the TLB for kernel text and data
 	 */
@@ -142,6 +265,12 @@
 	;;
 1:	// now we are in virtual mode
 
+	SET_AREA_FOR_BOOTING_CPU(r2, r16);
+
+	STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
+	SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
+	;;
+
 	// set IVT entry point---can't access I/O ports without it
 	movl r3=ia64_ivt
 	;;
@@ -211,12 +340,13 @@
 	mov IA64_KR(CURRENT_STACK)=r16
 	mov r13=r2
 	/*
-	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
-	 * don't store interesting values in that structure, but the space still needs
-	 * to be there because time-critical stuff such as the context switching can
-	 * be implemented more efficiently (for example, __switch_to()
+	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel
+	 * threads don't store interesting values in that structure, but the space
+	 * still needs to be there because time-critical stuff such as the context
+	 * switching can be implemented more efficiently (for example, __switch_to()
 	 * always sets the psr.dfh bit of the task it is switching to).
 	 */
+
 	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
 	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
 	mov ar.rsc=0		// place RSE in enforced lazy mode
@@ -992,5 +1122,99 @@
 END(ia64_spinlock_contention)
 
 #endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+GLOBAL_ENTRY(ia64_jump_to_sal)
+	alloc r16=ar.pfs,1,0,0,0;;
+	rsm psr.i  | psr.ic
+{
+	flushrs
+	srlz.i
+}
+	tpa r25=in0
+	movl r18=tlb_purge_done;;
+	DATA_VA_TO_PA(r18);;
+	mov b1=r18 	// Return location
+	movl r18=ia64_do_tlb_purge;;
+	DATA_VA_TO_PA(r18);;
+	mov b2=r18 	// doing tlb_flush work
+	mov ar.rsc=0  // Put RSE  in enforced lazy, LE mode
+	movl r17=1f;;
+	DATA_VA_TO_PA(r17);;
+	mov cr.iip=r17
+	movl r16=SAL_PSR_BITS_TO_SET;;
+	mov cr.ipsr=r16
+	mov cr.ifs=r0;;
+	rfi;;
+1:
+	/*
+	 * Invalidate all TLB data/inst
+	 */
+	br.sptk.many b2;; // jump to tlb purge code
+
+tlb_purge_done:
+	RESTORE_REGION_REGS(r25, r17,r18,r19);;
+	RESTORE_REG(b0, r25, r17);;
+	RESTORE_REG(b1, r25, r17);;
+	RESTORE_REG(b2, r25, r17);;
+	RESTORE_REG(b3, r25, r17);;
+	RESTORE_REG(b4, r25, r17);;
+	RESTORE_REG(b5, r25, r17);;
+	ld8 r1=[r25],0x08;;
+	ld8 r12=[r25],0x08;;
+	ld8 r13=[r25],0x08;;
+	RESTORE_REG(ar.fpsr, r25, r17);;
+	RESTORE_REG(ar.pfs, r25, r17);;
+	RESTORE_REG(ar.rnat, r25, r17);;
+	RESTORE_REG(ar.unat, r25, r17);;
+	RESTORE_REG(ar.bspstore, r25, r17);;
+	RESTORE_REG(cr.dcr, r25, r17);;
+	RESTORE_REG(cr.iva, r25, r17);;
+	RESTORE_REG(cr.pta, r25, r17);;
+	RESTORE_REG(cr.itv, r25, r17);;
+	RESTORE_REG(cr.pmv, r25, r17);;
+	RESTORE_REG(cr.cmcv, r25, r17);;
+	RESTORE_REG(cr.lrr0, r25, r17);;
+	RESTORE_REG(cr.lrr1, r25, r17);;
+	ld8 r4=[r25],0x08;;
+	ld8 r5=[r25],0x08;;
+	ld8 r6=[r25],0x08;;
+	ld8 r7=[r25],0x08;;
+	ld8 r17=[r25],0x08;;
+	mov pr=r17,-1;;
+	RESTORE_REG(ar.lc, r25, r17);;
+	/*
+	 * Now Restore floating point regs
+	 */
+	ldf.fill.nta f2=[r25],16;;
+	ldf.fill.nta f3=[r25],16;;
+	ldf.fill.nta f4=[r25],16;;
+	ldf.fill.nta f5=[r25],16;;
+	ldf.fill.nta f16=[r25],16;;
+	ldf.fill.nta f17=[r25],16;;
+	ldf.fill.nta f18=[r25],16;;
+	ldf.fill.nta f19=[r25],16;;
+	ldf.fill.nta f20=[r25],16;;
+	ldf.fill.nta f21=[r25],16;;
+	ldf.fill.nta f22=[r25],16;;
+	ldf.fill.nta f23=[r25],16;;
+	ldf.fill.nta f24=[r25],16;;
+	ldf.fill.nta f25=[r25],16;;
+	ldf.fill.nta f26=[r25],16;;
+	ldf.fill.nta f27=[r25],16;;
+	ldf.fill.nta f28=[r25],16;;
+	ldf.fill.nta f29=[r25],16;;
+	ldf.fill.nta f30=[r25],16;;
+	ldf.fill.nta f31=[r25],16;;
+
+	/*
+	 * Now that we have done all the register restores
+	 * we are now ready for the big DIVE to SAL Land
+	 */
+	ssm psr.ic;;
+	srlz.d;;
+	br.ret.sptk.many b0;;
+END(ia64_jump_to_sal)
+#endif /* CONFIG_HOTPLUG_CPU */
 
 #endif /* CONFIG_SMP */
diff -Nru a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
--- a/arch/ia64/kernel/ia64_ksyms.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/ia64_ksyms.c	2005-03-24 18:21:52 -08:00
@@ -58,9 +58,6 @@
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 
-#include <asm/unistd.h>
-EXPORT_SYMBOL(__ia64_syscall);
-
 /* from arch/ia64/lib */
 extern void __divsi3(void);
 extern void __udivsi3(void);
diff -Nru a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
--- a/arch/ia64/kernel/iosapic.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/iosapic.c	2005-03-24 18:21:52 -08:00
@@ -79,6 +79,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
+#include <linux/bootmem.h>
 
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
@@ -91,7 +92,6 @@
 
 
 #undef DEBUG_INTERRUPT_ROUTING
-#undef OVERRIDE_DEBUG
 
 #ifdef DEBUG_INTERRUPT_ROUTING
 #define DBG(fmt...)	printk(fmt)
@@ -99,19 +99,30 @@
 #define DBG(fmt...)
 #endif
 
+#define NR_PREALLOCATE_RTE_ENTRIES	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define RTE_PREALLOCATED	(1)
+
 static DEFINE_SPINLOCK(iosapic_lock);
 
 /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
 
-static struct iosapic_intr_info {
+struct iosapic_rte_info {
+	struct list_head rte_list;	/* node in list of RTEs sharing the same vector */
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	u32		low32;		/* current value of low word of Redirection table entry */
 	unsigned int	gsi_base;	/* first GSI assigned to this IOSAPIC */
-	char		rte_index;	/* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
+	char		rte_index;	/* IOSAPIC RTE index */
+	int		refcnt;		/* reference counter */
+	unsigned int	flags;		/* flags */
+} ____cacheline_aligned;
+
+static struct iosapic_intr_info {
+	struct list_head rtes;		/* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+	int		count;		/* # of RTEs that shares this vector */
+	u32		low32;		/* current value of low word of Redirection table entry */
+	unsigned int	dest;		/* destination CPU physical ID */
 	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
 	unsigned char 	polarity: 1;	/* interrupt polarity (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
-	int		refcnt;		/* reference counter */
 } iosapic_intr_info[IA64_NUM_VECTORS];
 
 static struct iosapic {
@@ -127,6 +138,8 @@
 
 static unsigned char pcat_compat __initdata;	/* 8259 compatibility flag */
 
+static int iosapic_kmalloc_ok;
+static LIST_HEAD(free_rte_list);
 
 /*
  * Find an IOSAPIC associated with a GSI
@@ -148,10 +161,12 @@
 _gsi_to_vector (unsigned int gsi)
 {
 	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
 
 	for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
-		if (info->gsi_base + info->rte_index == gsi)
-			return info - iosapic_intr_info;
+		list_for_each_entry(rte, &info->rtes, rte_list)
+			if (rte->gsi_base + rte->rte_index == gsi)
+				return info - iosapic_intr_info;
 	return -1;
 }
 
@@ -168,33 +183,52 @@
 int
 gsi_to_irq (unsigned int gsi)
 {
+	unsigned long flags;
+	int irq;
 	/*
 	 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
 	 * numbers...
 	 */
-	return _gsi_to_vector(gsi);
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		irq = _gsi_to_vector(gsi);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	return irq;
+}
+
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+{
+	struct iosapic_rte_info *rte;
+
+	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
+		if (rte->gsi_base + rte->rte_index == gsi)
+			return rte;
+	return NULL;
 }
 
 static void
-set_rte (unsigned int vector, unsigned int dest, int mask)
+set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 {
 	unsigned long pol, trigger, dmode;
 	u32 low32, high32;
 	char __iomem *addr;
 	int rte_index;
 	char redir;
+	struct iosapic_rte_info *rte;
 
 	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
 
-	rte_index = iosapic_intr_info[vector].rte_index;
-	if (rte_index < 0)
+	rte = gsi_vector_to_rte(gsi, vector);
+	if (!rte)
 		return;		/* not an IOSAPIC interrupt */
 
-	addr    = iosapic_intr_info[vector].addr;
+	rte_index = rte->rte_index;
+	addr	= rte->addr;
 	pol     = iosapic_intr_info[vector].polarity;
 	trigger = iosapic_intr_info[vector].trigger;
 	dmode   = iosapic_intr_info[vector].dmode;
-	vector &= (~IA64_IRQ_REDIRECTED);
 
 	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
 
@@ -222,6 +256,7 @@
 	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
 	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
 	iosapic_intr_info[vector].low32 = low32;
+	iosapic_intr_info[vector].dest = dest;
 }
 
 static void
@@ -238,18 +273,20 @@
 	u32 low32;
 	int rte_index;
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
-	addr = iosapic_intr_info[vec].addr;
-	rte_index = iosapic_intr_info[vec].rte_index;
-
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt! */
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		/* set only the mask bit */
 		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
@@ -262,16 +299,19 @@
 	u32 low32;
 	int rte_index;
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
-	addr = iosapic_intr_info[vec].addr;
-	rte_index = iosapic_intr_info[vec].rte_index;
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt! */
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
@@ -287,6 +327,7 @@
 	char __iomem *addr;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	ia64_vector vec;
+	struct iosapic_rte_info *rte;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 	vec = irq_to_vector(irq);
@@ -296,10 +337,7 @@
 
 	dest = cpu_physical_id(first_cpu(mask));
 
-	rte_index = iosapic_intr_info[vec].rte_index;
-	addr = iosapic_intr_info[vec].addr;
-
-	if (rte_index < 0)
+	if (list_empty(&iosapic_intr_info[vec].rtes))
 		return;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
@@ -319,8 +357,13 @@
 			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
 
 		iosapic_intr_info[vec].low32 = low32;
-		iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
-		iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		iosapic_intr_info[vec].dest = dest;
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+			addr = rte->addr;
+			rte_index = rte->rte_index;
+			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+		}
 	}
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 #endif
@@ -341,9 +384,11 @@
 iosapic_end_level_irq (unsigned int irq)
 {
 	ia64_vector vec = irq_to_vector(irq);
+	struct iosapic_rte_info *rte;
 
 	move_irq(irq);
-	iosapic_eoi(iosapic_intr_info[vec].addr, vec);
+	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
+		iosapic_eoi(rte->addr, vec);
 }
 
 #define iosapic_shutdown_level_irq	mask_irq
@@ -423,6 +468,34 @@
 	return iosapic_read(addr, IOSAPIC_VERSION);
 }
 
+static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+{
+	int i, vector = -1, min_count = -1;
+	struct iosapic_intr_info *info;
+
+	/*
+	 * shared vectors for edge-triggered interrupts are not
+	 * supported yet
+	 */
+	if (trigger == IOSAPIC_EDGE)
+		return -1;
+
+	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
+		info = &iosapic_intr_info[i];
+		if (info->trigger == trigger && info->polarity == pol &&
+		    (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+			if (min_count == -1 || info->count < min_count) {
+				vector = i;
+				min_count = info->count;
+			}
+		}
+	}
+	if (vector < 0)
+		panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+
+	return vector;
+}
+
 /*
  * if the given vector is already owned by other,
  *  assign a new vector for the other and make the vector available
@@ -432,19 +505,63 @@
 {
 	int new_vector;
 
-	if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr
-	    || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode
-	    || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
-	{
+	if (!list_empty(&iosapic_intr_info[vector].rtes)) {
 		new_vector = assign_irq_vector(AUTO_ASSIGN);
 		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
 		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
 		       sizeof(struct iosapic_intr_info));
+		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
+		list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
 		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].rte_index = -1;
+		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
 }
 
+static struct iosapic_rte_info *iosapic_alloc_rte (void)
+{
+	int i;
+	struct iosapic_rte_info *rte;
+	int preallocated = 0;
+
+	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
+		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+		if (!rte)
+			return NULL;
+		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
+			list_add(&rte->rte_list, &free_rte_list);
+	}
+
+	if (!list_empty(&free_rte_list)) {
+		rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+		list_del(&rte->rte_list);
+		preallocated++;
+	} else {
+		rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
+		if (!rte)
+			return NULL;
+	}
+
+	memset(rte, 0, sizeof(struct iosapic_rte_info));
+	if (preallocated)
+		rte->flags |= RTE_PREALLOCATED;
+
+	return rte;
+}
+
+static void iosapic_free_rte (struct iosapic_rte_info *rte)
+{
+	if (rte->flags & RTE_PREALLOCATED)
+		list_add_tail(&rte->rte_list, &free_rte_list);
+	else
+		kfree(rte);
+}
+
+static inline int vector_is_shared (int vector)
+{
+	return (iosapic_intr_info[vector].count > 1);
+}
+
 static void
 register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
@@ -455,6 +572,7 @@
 	int index;
 	unsigned long gsi_base;
 	void __iomem *iosapic_address;
+	struct iosapic_rte_info *rte;
 
 	index = find_iosapic(gsi);
 	if (index < 0) {
@@ -465,14 +583,33 @@
 	iosapic_address = iosapic_lists[index].addr;
 	gsi_base = iosapic_lists[index].gsi_base;
 
-	rte_index = gsi - gsi_base;
-	iosapic_intr_info[vector].rte_index = rte_index;
+	rte = gsi_vector_to_rte(gsi, vector);
+	if (!rte) {
+		rte = iosapic_alloc_rte();
+		if (!rte) {
+			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+			return;
+		}
+
+		rte_index = gsi - gsi_base;
+		rte->rte_index	= rte_index;
+		rte->addr	= iosapic_address;
+		rte->gsi_base	= gsi_base;
+		rte->refcnt++;
+		list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
+		iosapic_intr_info[vector].count++;
+	}
+	else if (vector_is_shared(vector)) {
+		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
+		if (info->trigger != trigger || info->polarity != polarity) {
+			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+			return;
+		}
+	}
+
 	iosapic_intr_info[vector].polarity = polarity;
 	iosapic_intr_info[vector].dmode    = delivery;
-	iosapic_intr_info[vector].addr     = iosapic_address;
-	iosapic_intr_info[vector].gsi_base = gsi_base;
 	iosapic_intr_info[vector].trigger  = trigger;
-	iosapic_intr_info[vector].refcnt++;
 
 	if (trigger == IOSAPIC_EDGE)
 		irq_type = &irq_type_iosapic_edge;
@@ -495,18 +632,25 @@
 	static int cpu = -1;
 
 	/*
+	 * In case of vector shared by multiple RTEs, all RTEs that
+	 * share the vector need to use the same destination CPU.
+	 */
+	if (!list_empty(&iosapic_intr_info[vector].rtes))
+		return iosapic_intr_info[vector].dest;
+
+	/*
 	 * If the platform supports redirection via XTP, let it
 	 * distribute interrupts.
 	 */
 	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
-		return hard_smp_processor_id();
+		return cpu_physical_id(smp_processor_id());
 
 	/*
 	 * Some interrupts (ACPI SCI, for instance) are registered
 	 * before the BSP is marked as online.
 	 */
 	if (!cpu_online(smp_processor_id()))
-		return hard_smp_processor_id();
+		return cpu_physical_id(smp_processor_id());
 
 #ifdef CONFIG_NUMA
 	{
@@ -553,7 +697,7 @@
 
 	return cpu_physical_id(cpu);
 #else
-	return hard_smp_processor_id();
+	return cpu_physical_id(smp_processor_id());
 #endif
 }
 
@@ -566,10 +710,12 @@
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector;
+	int vector, mask = 1;
 	unsigned int dest;
 	unsigned long flags;
-
+	struct iosapic_rte_info *rte;
+	u32 low32;
+again:
 	/*
 	 * If this GSI has already been registered (i.e., it's a
 	 * shared interrupt, or we lost a race to register it),
@@ -579,19 +725,45 @@
 	{
 		vector = gsi_to_vector(gsi);
 		if (vector > 0) {
-			iosapic_intr_info[vector].refcnt++;
+			rte = gsi_vector_to_rte(gsi, vector);
+			rte->refcnt++;
 			spin_unlock_irqrestore(&iosapic_lock, flags);
 			return vector;
 		}
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	/* If vector is running out, we try to find a sharable vector */
+	vector = assign_irq_vector_nopanic(AUTO_ASSIGN);
+	if (vector < 0)
+		vector = iosapic_find_sharable_vector(trigger, polarity);
+
+	spin_lock_irqsave(&irq_descp(vector)->lock, flags);
+	spin_lock(&iosapic_lock);
+	{
+		if (gsi_to_vector(gsi) > 0) {
+			if (list_empty(&iosapic_intr_info[vector].rtes))
+				free_irq_vector(vector);
+			spin_unlock(&iosapic_lock);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			goto again;
+		}
 
-		vector = assign_irq_vector(AUTO_ASSIGN);
 		dest = get_target_cpu(gsi, vector);
 		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
-			polarity, trigger);
+			      polarity, trigger);
 
-		set_rte(vector, dest, 1);
+		/*
+		 * If the vector is shared and already unmasked for
+		 * other interrupt sources, don't mask it.
+		 */
+		low32 = iosapic_intr_info[vector].low32;
+		if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
+			mask = 0;
+		set_rte(gsi, vector, dest, mask);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
+	spin_unlock(&iosapic_lock);
+	spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
 
 	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
 	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
@@ -608,8 +780,10 @@
 	unsigned long flags;
 	int irq, vector;
 	irq_desc_t *idesc;
-	int rte_index;
+	u32 low32;
 	unsigned long trigger, polarity;
+	unsigned int dest;
+	struct iosapic_rte_info *rte;
 
 	/*
 	 * If the irq associated with the gsi is not found,
@@ -628,54 +802,56 @@
 	spin_lock_irqsave(&idesc->lock, flags);
 	spin_lock(&iosapic_lock);
 	{
-		rte_index = iosapic_intr_info[vector].rte_index;
-		if (rte_index < 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
+		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
 			printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
 			WARN_ON(1);
-			return;
+			goto out;
 		}
 
-		if (--iosapic_intr_info[vector].refcnt > 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
-			return;
-		}
+		if (--rte->refcnt > 0)
+			goto out;
 
-		/*
-		 * If interrupt handlers still exist on the irq
-		 * associated with the gsi, don't unregister the
-		 * interrupt.
-		 */
-		if (idesc->action) {
-			iosapic_intr_info[vector].refcnt++;
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&idesc->lock, flags);
-			printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq);
-			return;
-		}
+		/* Mask the interrupt */
+		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
+		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+
+		/* Remove the rte entry from the list */
+		list_del(&rte->rte_list);
+		iosapic_intr_info[vector].count--;
+		iosapic_free_rte(rte);
 
-		/* Clear the interrupt controller descriptor. */
-		idesc->handler = &no_irq_type;
-
-		trigger  = iosapic_intr_info[vector].trigger;
+		trigger	 = iosapic_intr_info[vector].trigger;
 		polarity = iosapic_intr_info[vector].polarity;
+		dest     = iosapic_intr_info[vector].dest;
+		printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+		       cpu_logical_id(dest), dest, vector);
+
+		if (list_empty(&iosapic_intr_info[vector].rtes)) {
+			/* Sanity check */
+			BUG_ON(iosapic_intr_info[vector].count);
+
+			/* Clear the interrupt controller descriptor */
+			idesc->handler = &no_irq_type;
+
+			/* Clear the interrupt information */
+			memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
+			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
+
+			if (idesc->action) {
+				printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+				WARN_ON(1);
+			}
 
-		/* Clear the interrupt information. */
-		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+			/* Free the interrupt vector */
+			free_irq_vector(vector);
+		}
 	}
+ out:
 	spin_unlock(&iosapic_lock);
 	spin_unlock_irqrestore(&idesc->lock, flags);
-
-	/* Free the interrupt vector */
-	free_irq_vector(vector);
-
-	printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n",
-	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       vector);
 }
 #endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
 
@@ -725,7 +901,7 @@
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 	       cpu_logical_id(dest), dest, vector);
 
-	set_rte(vector, dest, mask);
+	set_rte(gsi, vector, dest, mask);
 	return vector;
 }
 
@@ -740,7 +916,7 @@
 			  unsigned long trigger)
 {
 	int vector;
-	unsigned int dest = hard_smp_processor_id();
+	unsigned int dest = cpu_physical_id(smp_processor_id());
 
 	vector = isa_irq_to_vector(isa_irq);
 
@@ -751,7 +927,7 @@
 	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
 	    cpu_logical_id(dest), dest, vector);
 
-	set_rte(vector, dest, 1);
+	set_rte(gsi, vector, dest, 1);
 }
 
 void __init
@@ -759,8 +935,10 @@
 {
 	int vector;
 
-	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
-		iosapic_intr_info[vector].rte_index = -1;	/* mark as unused */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
+		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);	/* mark as unused */
+	}
 
 	pcat_compat = system_pcat_compat;
 	if (pcat_compat) {
@@ -826,3 +1004,10 @@
 	return;
 }
 #endif
+
+static int __init iosapic_enable_kmalloc (void)
+{
+	iosapic_kmalloc_ok = 1;
+	return 0;
+}
+core_initcall (iosapic_enable_kmalloc);
diff -Nru a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
--- a/arch/ia64/kernel/irq_ia64.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/irq_ia64.c	2005-03-24 18:21:52 -08:00
@@ -63,17 +63,27 @@
 static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
 
 int
-assign_irq_vector (int irq)
+assign_irq_vector_nopanic (int irq)
 {
 	int pos, vector;
  again:
 	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
 	vector = IA64_FIRST_DEVICE_VECTOR + pos;
 	if (vector > IA64_LAST_DEVICE_VECTOR)
-		/* XXX could look for sharable vectors instead of panic'ing... */
-		panic("assign_irq_vector: out of interrupt vectors!");
+		return -1;
 	if (test_and_set_bit(pos, ia64_vector_mask))
 		goto again;
+	return vector;
+}
+
+int
+assign_irq_vector (int irq)
+{
+	int vector = assign_irq_vector_nopanic(irq);
+
+	if (vector < 0)
+		panic("assign_irq_vector: out of interrupt vectors!");
+
 	return vector;
 }
 
diff -Nru a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
--- a/arch/ia64/kernel/ivt.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/ivt.S	2005-03-24 18:21:52 -08:00
@@ -1,7 +1,7 @@
 /*
  * arch/ia64/kernel/ivt.S
  *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger <davidm@hpl.hp.com>
  * Copyright (C) 2000, 2002-2003 Intel Co
@@ -687,82 +687,118 @@
 	 * to prevent leaking bits from kernel to user level.
 	 */
 	DBG_FAULT(11)
-	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
-	mov r17=cr.iim
-	mov r18=__IA64_BREAK_SYSCALL
-	mov r21=ar.fpsr
-	mov r29=cr.ipsr
-	mov r19=b6
-	mov r25=ar.unat
-	mov r27=ar.rsc
-	mov r26=ar.pfs
-	mov r28=cr.iip
-	mov r31=pr				// prepare to save predicates
-	mov r20=r1
-	;;
+	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
+	mov r29=cr.ipsr				// M2 (12 cyc)
+	mov r31=pr				// I0 (2 cyc)
+
+	mov r17=cr.iim				// M2 (2 cyc)
+	mov.m r27=ar.rsc			// M2 (12 cyc)
+	mov r18=__IA64_BREAK_SYSCALL		// A
+
+	mov.m ar.rsc=0				// M2
+	mov.m r21=ar.fpsr			// M2 (12 cyc)
+	mov r19=b6				// I0 (2 cyc)
+	;;
+	mov.m r23=ar.bspstore			// M2 (12 cyc)
+	mov.m r24=ar.rnat			// M2 (5 cyc)
+	mov.i r26=ar.pfs			// I0 (2 cyc)
+
+	invala					// M0|1
+	nop.m 0					// M
+	mov r20=r1				// A			save r1
+
+	nop.m 0
+	movl r30=sys_call_table			// X
+
+	mov r28=cr.iip				// M2 (2 cyc)
+	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
+(p7)	br.cond.spnt non_syscall		// B  no ->
+	//
+	// From this point on, we are definitely on the syscall-path
+	// and we can use (non-banked) scratch registers.
+	//
+///////////////////////////////////////////////////////////////////////
+	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
+	mov r2=r16				// A    setup r2 for ia64_syscall_setup
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
+
 	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-	cmp.eq p0,p7=r18,r17			// is this a system call? (p7 <- false, if so)
-(p7)	br.cond.spnt non_syscall
+	adds r15=-1024,r15			// A    subtract 1024 from syscall number
+	mov r3=NR_syscalls - 1
 	;;
-	ld1 r17=[r16]				// load current->thread.on_ustack flag
-	st1 [r16]=r0				// clear current->thread.on_ustack flag
-	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
+	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
+	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
+	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
+
+	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
+	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
+	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
 	;;
-	invala
 
-	/* adjust return address so we skip over the break instruction: */
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
+(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
+	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
 
-	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
-	;;
-	cmp.eq p6,p7=2,r8			// isr.ei==2?
-	mov r2=r1				// setup r2 for ia64_syscall_setup
-	;;
-(p6)	mov r8=0				// clear ei to 0
-(p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
-(p7)	adds r8=1,r8				// increment ei to next slot
-	;;
-	cmp.eq pKStk,pUStk=r0,r17		// are we in kernel mode already?
-	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
+	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
+	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
 	;;
 
-	// switch from user to kernel RBS:
-	MINSTATE_START_SAVE_MIN_VIRT
-	br.call.sptk.many b7=ia64_syscall_setup
-	;;
-	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	mov r3=NR_syscalls - 1
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	// p10==true means out registers are more than 8 or r15's Nat is true
-(p10)	br.cond.spnt.many ia64_ret_from_syscall
-	;;
-	movl r16=sys_call_table
+(p8)	mov r8=0				// A    clear ei to 0
+(p7)	movl r30=sys_ni_syscall			// X
 
-	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
-	movl r2=ia64_ret_from_syscall
-	;;
-	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.leu p6,p7=r15,r3			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
-	mov rp=r2				// set the real return addr
+(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
+(p9)	adds r8=1,r8				// A    increment ei to next slot
+	nop.i 0
 	;;
-(p6)	ld8 r20=[r20]				// load address of syscall entry point
-(p7)	movl r20=sys_ni_syscall
 
-	add r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 r2=[r2]				// r2 = current_thread_info()->flags
-	;;
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
-	;;
-	cmp.eq p8,p0=r2,r0
-	mov b6=r20
-	;;
-(p8)	br.call.sptk.many b6=b6			// ignore this return addr
-	br.cond.sptk ia64_trace_syscall
+	mov.m r25=ar.unat			// M2 (5 cyc)
+	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
+	adds r15=1024,r15			// A    restore original syscall number
+	//
+	// If any of the above loads miss in L1D, we'll stall here until
+	// the data arrives.
+	//
+///////////////////////////////////////////////////////////////////////
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	mov b6=r30				// I0   setup syscall handler branch reg early
+	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
+
+	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
+	mov r18=ar.bsp				// M2 (12 cyc)
+(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
+	;;
+.back_from_break_fixup:
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
+	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+1:
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	nop 0
+	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
+	;;
+
+	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
+	movl r3=ia64_ret_from_syscall		// X
+	;;
+
+	srlz.i					// M0   ensure interruption collection is on
+	mov rp=r3				// I0   set the real return addr
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+(p15)	ssm psr.i				// M2   restore psr.i
+(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
+	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
 	// NOT REACHED
+///////////////////////////////////////////////////////////////////////
+	// On entry, we optimistically assumed that we're coming from user-space.
+	// For the rare cases where a system-call is done from within the kernel,
+	// we fix things up at this point:
+.break_fixup:
+	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
+	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
+	;;
+	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
+	br.cond.sptk .back_from_break_fixup
 END(break_fault)
 
 	.org ia64_ivt+0x3000
@@ -837,8 +873,6 @@
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
-	 *	- executing on bank 1 registers
-	 *	- psr.ic enabled, interrupts restored
 	 *	-  p10: TRUE if syscall is invoked with more than 8 out
 	 *		registers or r15's Nat is true
 	 *	-  r1: kernel's gp
@@ -846,8 +880,11 @@
 	 *	-  r8: -EINVAL if p10 is true
 	 *	- r12: points to kernel stack
 	 *	- r13: points to current task
+	 *	- r14: preserved (same as on entry)
+	 *	- p13: preserved
 	 *	- p15: TRUE if interrupts need to be re-enabled
 	 *	- ar.fpsr: set to kernel settings
+	 *	-  b6: preserved (same as on entry)
 	 */
 GLOBAL_ENTRY(ia64_syscall_setup)
 #if PT(B6) != 0
@@ -915,10 +952,10 @@
 (p13)	mov in5=-1
 	;;
 	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
-	tnat.nz p14,p0=in6
+	tnat.nz p13,p0=in6
 	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
 	;;
-	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+	mov r8=1
 (p9)	tnat.nz p10,p0=r15
 	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
 
@@ -929,9 +966,9 @@
 	mov r13=r2				// establish `current'
 	movl r1=__gp				// establish kernel global pointer
 	;;
-(p14)	mov in6=-1
+	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p13)	mov in6=-1
 (p8)	mov in7=-1
-	nop.i 0
 
 	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
 	movl r17=FPSR_DEFAULT
@@ -999,6 +1036,8 @@
 	FAULT(17)
 
 ENTRY(non_syscall)
+	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
+	;;
 	SAVE_MIN_WITH_COVER
 
 	// There is no particular reason for this code to be here, other than that
diff -Nru a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
--- a/arch/ia64/kernel/mca_asm.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/mca_asm.S	2005-03-24 18:21:52 -08:00
@@ -110,46 +110,19 @@
 	.global ia64_os_mca_dispatch_end
 	.global ia64_sal_to_os_handoff_state
 	.global	ia64_os_to_sal_handoff_state
+	.global ia64_do_tlb_purge
 
 	.text
 	.align 16
 
-ia64_os_mca_dispatch:
-
-	// Serialize all MCA processing
-	mov	r3=1;;
-	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
-ia64_os_mca_spin:
-	xchg8	r4=[r2],r3;;
-	cmp.ne	p6,p0=r4,r0
-(p6)	br ia64_os_mca_spin
-
-	// Save the SAL to OS MCA handoff state as defined
-	// by SAL SPEC 3.0
-	// NOTE : The order in which the state gets saved
-	//	  is dependent on the way the C-structure
-	//	  for ia64_mca_sal_to_os_state_t has been
-	//	  defined in include/asm/mca.h
-	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
-	;;
-
-	// LOG PROCESSOR STATE INFO FROM HERE ON..
-begin_os_mca_dump:
-	br	ia64_os_mca_proc_state_dump;;
-
-ia64_os_mca_done_dump:
-
-	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
-	;;
-	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
-	;;
-	tbit.nz p6,p7=r18,60
-(p7)	br.spnt done_tlb_purge_and_reload
-
-	// The following code purges TC and TR entries. Then reload all TC entries.
-	// Purge percpu data TC entries.
-begin_tlb_purge_and_reload:
+/*
+ * Just the TLB purge part is moved to a separate function
+ * so we can re-use the code for cpu hotplug code as well
+ * Caller should now setup b1, so we can branch once the
+ * tlb flush is complete.
+ */
 
+ia64_do_tlb_purge:
 #define O(member)	IA64_CPUINFO_##member##_OFFSET
 
 	GET_THIS_PADDR(r2, cpu_info)	// load phys addr of cpu_info into r2
@@ -230,6 +203,51 @@
 	;;
 	srlz.i
 	;;
+	// Now branch away to caller.
+	br.sptk.many b1
+	;;
+
+ia64_os_mca_dispatch:
+
+	// Serialize all MCA processing
+	mov	r3=1;;
+	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
+ia64_os_mca_spin:
+	xchg8	r4=[r2],r3;;
+	cmp.ne	p6,p0=r4,r0
+(p6)	br ia64_os_mca_spin
+
+	// Save the SAL to OS MCA handoff state as defined
+	// by SAL SPEC 3.0
+	// NOTE : The order in which the state gets saved
+	//	  is dependent on the way the C-structure
+	//	  for ia64_mca_sal_to_os_state_t has been
+	//	  defined in include/asm/mca.h
+	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+	;;
+
+	// LOG PROCESSOR STATE INFO FROM HERE ON..
+begin_os_mca_dump:
+	br	ia64_os_mca_proc_state_dump;;
+
+ia64_os_mca_done_dump:
+
+	LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+	;;
+	ld8 r18=[r16]		// Get processor state parameter on existing PALE_CHECK.
+	;;
+	tbit.nz p6,p7=r18,60
+(p7)	br.spnt done_tlb_purge_and_reload
+
+	// The following code purges TC and TR entries. Then reload all TC entries.
+	// Purge percpu data TC entries.
+begin_tlb_purge_and_reload:
+	movl r18=ia64_reload_tr;;
+	LOAD_PHYSICAL(p0,r18,ia64_reload_tr);;
+	mov b1=r18;;
+	br.sptk.many ia64_do_tlb_purge;;
+
+ia64_reload_tr:
 	// Finally reload the TR registers.
 	// 1. Reload DTR/ITR registers for kernel.
 	mov r18=KERNEL_TR_PAGE_SHIFT<<2
diff -Nru a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
--- a/arch/ia64/kernel/perfmon.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/perfmon.c	2005-03-24 18:21:52 -08:00
@@ -480,14 +480,6 @@
 #define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
 
 typedef struct {
-	int	debug;		/* turn on/off debugging via syslog */
-	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
-	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
-	int	expert_mode;	/* turn on/off value checking */
-	int 	debug_pfm_read;
-} pfm_sysctl_t;
-
-typedef struct {
 	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
 	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
 	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
@@ -514,8 +506,8 @@
 static pmu_config_t		*pmu_conf;
 
 /* sysctl() controls */
-static pfm_sysctl_t pfm_sysctl;
-int pfm_debug_var;
+pfm_sysctl_t pfm_sysctl;
+EXPORT_SYMBOL(pfm_sysctl);
 
 static ctl_table pfm_ctl_table[]={
 	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
@@ -1576,7 +1568,7 @@
 		goto abort_locked;
 	}
 
-	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
+	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
 
 	ret = -EFAULT;
   	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
@@ -3695,8 +3687,6 @@
 
 	pfm_sysctl.debug = m == 0 ? 0 : 1;
 
-	pfm_debug_var = pfm_sysctl.debug;
-
 	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
 
 	if (m == 0) {
@@ -4996,13 +4986,21 @@
 }
 
 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
-
+ /*
+  * pfm_handle_work() can be called with interrupts enabled
+  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
+  * call may sleep, therefore we must re-enable interrupts
+  * to avoid deadlocks. It is safe to do so because this function
+  * is called ONLY when returning to user level (PUStk=1), in which case
+  * there is no risk of kernel stack overflow due to deep
+  * interrupt nesting.
+  */
 void
 pfm_handle_work(void)
 {
 	pfm_context_t *ctx;
 	struct pt_regs *regs;
-	unsigned long flags;
+	unsigned long flags, dummy_flags;
 	unsigned long ovfl_regs;
 	unsigned int reason;
 	int ret;
@@ -5039,18 +5037,15 @@
 	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
 	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
 
+	/*
+	 * restore interrupt mask to what it was on entry.
+	 * Could be enabled/diasbled.
+	 */
 	UNPROTECT_CTX(ctx, flags);
 
-	 /*
-	  * pfm_handle_work() is currently called with interrupts disabled.
-	  * The down_interruptible call may sleep, therefore we
-	  * must re-enable interrupts to avoid deadlocks. It is
-	  * safe to do so because this function is called ONLY
-	  * when returning to user level (PUStk=1), in which case
-	  * there is no risk of kernel stack overflow due to deep
-	  * interrupt nesting.
-	  */
-	BUG_ON(flags & IA64_PSR_I);
+	/*
+	 * force interrupt enable because of down_interruptible()
+	 */
 	local_irq_enable();
 
 	DPRINT(("before block sleeping\n"));
@@ -5064,12 +5059,12 @@
 	DPRINT(("after block sleeping ret=%d\n", ret));
 
 	/*
-	 * disable interrupts to restore state we had upon entering
-	 * this function
+	 * lock context and mask interrupts again
+	 * We save flags into a dummy because we may have
+	 * altered interrupts mask compared to entry in this
+	 * function.
 	 */
-	local_irq_disable();
-
-	PROTECT_CTX(ctx, flags);
+	PROTECT_CTX(ctx, dummy_flags);
 
 	/*
 	 * we need to read the ovfl_regs only after wake-up
@@ -5095,7 +5090,9 @@
 	ctx->ctx_ovfl_regs[0] = 0UL;
 
 nothing_to_do:
-
+	/*
+	 * restore flags as they were upon entry
+	 */
 	UNPROTECT_CTX(ctx, flags);
 }
 
diff -Nru a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
--- a/arch/ia64/kernel/perfmon_default_smpl.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/perfmon_default_smpl.c	2005-03-24 18:21:52 -08:00
@@ -20,32 +20,23 @@
 MODULE_DESCRIPTION("perfmon default sampling format");
 MODULE_LICENSE("GPL");
 
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "debug");
-
-MODULE_PARM(debug_ovfl, "i");
-MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
-
-
 #define DEFAULT_DEBUG 1
 
 #ifdef DEFAULT_DEBUG
 #define DPRINT(a) \
 	do { \
-		if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #else
 #define DPRINT(a)
 #define DPRINT_ovfl(a)
 #endif
-
-static int debug, debug_ovfl;
 
 static int
 default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
diff -Nru a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
--- a/arch/ia64/kernel/process.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/process.c	2005-03-24 18:21:52 -08:00
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * 04/11/17 Ashok Raj	<ashok.raj@intel.com> Added CPU Hotplug Support
  */
 #define __KERNEL_SYSCALLS__	/* see <asm/unistd.h> */
 #include <linux/config.h>
@@ -200,27 +201,20 @@
 static inline void play_dead(void)
 {
 	extern void ia64_cpu_local_tick (void);
+	unsigned int this_cpu = smp_processor_id();
+
 	/* Ack it */
 	__get_cpu_var(cpu_state) = CPU_DEAD;
 
-	/* We shouldn't have to disable interrupts while dead, but
-	 * some interrupts just don't seem to go away, and this makes
-	 * it "work" for testing purposes. */
 	max_xtp();
 	local_irq_disable();
-	/* Death loop */
-	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
-		cpu_relax();
-
+	idle_task_exit();
+	ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
 	/*
-	 * Enable timer interrupts from now on
-	 * Not required if we put processor in SAL_BOOT_RENDEZ mode.
+	 * The above is a point of no-return, the processor is
+	 * expected to be in SAL loop now.
 	 */
-	local_flush_tlb_all();
-	cpu_set(smp_processor_id(), cpu_online_map);
-	wmb();
-	ia64_cpu_local_tick ();
-	local_irq_enable();
+	BUG();
 }
 #else
 static inline void play_dead(void)
diff -Nru a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
--- a/arch/ia64/kernel/setup.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/setup.c	2005-03-24 18:21:52 -08:00
@@ -4,10 +4,15 @@
  * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2000, 2004 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  *
+ * 12/26/04 S.Siddha, G.Jin, R.Seth
+ *			Add multi-threading and multi-core detection
  * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
  * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
  * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
@@ -296,6 +301,34 @@
 #endif
 }
 
+#ifdef CONFIG_SMP
+static void
+check_for_logical_procs (void)
+{
+	pal_logical_to_physical_t info;
+	s64 status;
+
+	status = ia64_pal_logical_to_phys(0, &info);
+	if (status == -1) {
+		printk(KERN_INFO "No logical to physical processor mapping "
+		       "available\n");
+		return;
+	}
+	if (status) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	/*
+	 * Total number of siblings that BSP has.  Though not all of them 
+	 * may have booted successfully. The correct number of siblings 
+	 * booted is in info.overview_num_log.
+	 */
+	smp_num_siblings = info.overview_tpc;
+	smp_num_cpucores = info.overview_cpp;
+}
+#endif
+
 void __init
 setup_arch (char **cmdline_p)
 {
@@ -356,6 +389,19 @@
 
 #ifdef CONFIG_SMP
 	cpu_physical_id(0) = hard_smp_processor_id();
+
+	cpu_set(0, cpu_sibling_map[0]);
+	cpu_set(0, cpu_core_map[0]);
+
+	check_for_logical_procs();
+	if (smp_num_cpucores > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Core capable: number of cores=%d\n",
+		       smp_num_cpucores);
+	if (smp_num_siblings > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
+		       smp_num_siblings);
 #endif
 
 	cpu_init();	/* initialize the bootstrap CPU */
@@ -459,12 +505,23 @@
 		   "cpu regs   : %u\n"
 		   "cpu MHz    : %lu.%06lu\n"
 		   "itc MHz    : %lu.%06lu\n"
-		   "BogoMIPS   : %lu.%02lu\n\n",
+		   "BogoMIPS   : %lu.%02lu\n",
 		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
 		   features, c->ppn, c->number,
 		   c->proc_freq / 1000000, c->proc_freq % 1000000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+#ifdef CONFIG_SMP
+	seq_printf(m, "siblings   : %u\n", c->num_log);
+	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
+		seq_printf(m,
+		   	   "physical id: %u\n"
+		   	   "core id    : %u\n"
+		   	   "thread id  : %u\n",
+		   	   c->socket_id, c->core_id, c->thread_id);
+#endif
+	seq_printf(m,"\n");
+
 	return 0;
 }
 
@@ -533,6 +590,14 @@
 	memcpy(c->vendor, cpuid.field.vendor, 16);
 #ifdef CONFIG_SMP
 	c->cpu = smp_processor_id();
+
+	/* below default values will be overwritten  by identify_siblings() 
+	 * for Multi-Threading/Multi-Core capable cpu's
+	 */
+	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
+	c->socket_id = -1;
+
+	identify_siblings(c);
 #endif
 	c->ppn = cpuid.field.ppn;
 	c->number = cpuid.field.number;
diff -Nru a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
--- a/arch/ia64/kernel/smpboot.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/smpboot.c	2005-03-24 18:21:52 -08:00
@@ -1,14 +1,25 @@
 /*
  * SMP boot-related support
  *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2001, 2004-2005 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ *	Ashok Raj  <ashok.raj@intel.com>
  *
  * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
  * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
  * 02/07/31 David Mosberger <davidm@hpl.hp.com>	Switch over to hotplug-CPU boot-sequence.
  *						smp_boot_cpus()/smp_commence() is replaced by
  *						smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
+ * 04/06/21 Ashok Raj		<ashok.raj@intel.com> Added CPU Hotplug Support
+ * 04/12/26 Jin Gordon <gordon.jin@intel.com>
+ * 04/12/26 Rohit Seth <rohit.seth@intel.com>
+ *						Add multi-threading and multi-core detection
+ * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
+ *						Setup cpu_sibling_map and cpu_core_map
  */
 #include <linux/config.h>
 
@@ -58,6 +69,37 @@
 #define Dprintk(x...)
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Store all idle threads, this can be reused instead of creating
+ * a new thread. Also avoids complicated thread destroy functionality
+ * for idle threads.
+ */
+struct task_struct *idle_thread_array[NR_CPUS];
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+/*
+ * start_ap in head.S uses this to store current booting cpu
+ * info.
+ */
+struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
+
+#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
+
+#define get_idle_for_cpu(x)		(idle_thread_array[(x)])
+#define set_idle_for_cpu(x,p)	(idle_thread_array[(x)] = (p))
+
+#else
+
+#define get_idle_for_cpu(x)		(NULL)
+#define set_idle_for_cpu(x,p)
+#define set_brendez_area(x)
+#endif
+
 
 /*
  * ITC synchronization related stuff:
@@ -90,6 +132,11 @@
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+int smp_num_siblings = 1;
+int smp_num_cpucores = 1;
+
 /* which logical CPU number maps to which CPU (physical APIC ID) */
 volatile int ia64_cpu_to_sapicid[NR_CPUS];
 EXPORT_SYMBOL(ia64_cpu_to_sapicid);
@@ -124,7 +171,8 @@
 	local_irq_save(flags);
 	{
 		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
-			while (!go[MASTER]);
+			while (!go[MASTER])
+				cpu_relax();
 			go[MASTER] = 0;
 			go[SLAVE] = ia64_get_itc();
 		}
@@ -147,7 +195,8 @@
 	for (i = 0; i < NUM_ITERS; ++i) {
 		t0 = ia64_get_itc();
 		go[MASTER] = 1;
-		while (!(tm = go[SLAVE]));
+		while (!(tm = go[SLAVE]))
+			cpu_relax();
 		go[SLAVE] = 0;
 		t1 = ia64_get_itc();
 
@@ -226,7 +275,8 @@
 		return;
 	}
 
-	while (go[MASTER]);	/* wait for master to be ready */
+	while (go[MASTER])
+		cpu_relax();	/* wait for master to be ready */
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{
@@ -345,7 +395,6 @@
 {
 	/* Early console may use I/O ports */
 	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
-
 	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
 	efi_map_pal_code();
 	cpu_init();
@@ -384,6 +433,13 @@
 		.done	= COMPLETION_INITIALIZER(c_idle.done),
 	};
 	DECLARE_WORK(work, do_fork_idle, &c_idle);
+
+ 	c_idle.idle = get_idle_for_cpu(cpu);
+ 	if (c_idle.idle) {
+		init_idle(c_idle.idle, cpu);
+ 		goto do_rest;
+	}
+
 	/*
 	 * We can't use kernel_thread since we must avoid to reschedule the child.
 	 */
@@ -396,10 +452,15 @@
 
 	if (IS_ERR(c_idle.idle))
 		panic("failed fork for CPU %d", cpu);
+
+	set_idle_for_cpu(cpu, c_idle.idle);
+
+do_rest:
 	task_for_booting_cpu = c_idle.idle;
 
 	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
 
+	set_brendez_area(cpu);
 	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
 
 	/*
@@ -552,19 +613,70 @@
 	cpu_set(smp_processor_id(), cpu_callin_map);
 }
 
+/*
+ * mt_info[] is a temporary store for all info returned by
+ * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the
+ * specific cpu comes.
+ */
+static struct {
+	__u32   socket_id;
+	__u16   core_id;
+	__u16   thread_id;
+	__u16   proc_fixed_addr;
+	__u8    valid;
+}mt_info[NR_CPUS] __devinit;
+
 #ifdef CONFIG_HOTPLUG_CPU
-extern void fixup_irqs(void);
-/* must be called with cpucontrol mutex held */
-static int __devinit cpu_enable(unsigned int cpu)
+static inline void
+remove_from_mtinfo(int cpu)
 {
-	per_cpu(cpu_state,cpu) = CPU_UP_PREPARE;
-	wmb();
+	int i;
 
-	while (!cpu_online(cpu))
-		cpu_relax();
-	return 0;
+	for_each_cpu(i)
+		if (mt_info[i].valid &&  mt_info[i].socket_id ==
+		    				cpu_data(cpu)->socket_id)
+			mt_info[i].valid = 0;
+}
+
+static inline void
+clear_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_cpu_mask(i, cpu_sibling_map[cpu])
+		cpu_clear(cpu, cpu_sibling_map[i]);
+	for_each_cpu_mask(i, cpu_core_map[cpu])
+		cpu_clear(cpu, cpu_core_map[i]);
+
+	cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE;
+}
+
+static void
+remove_siblinginfo(int cpu)
+{
+	int last = 0;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_clear(cpu, cpu_core_map[cpu]);
+		cpu_clear(cpu, cpu_sibling_map[cpu]);
+		return;
+	}
+
+	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+
+	/* remove it from all sibling map's */
+	clear_cpu_sibling_map(cpu);
+
+	/* if this cpu is the last in the core group, remove all its info 
+	 * from mt_info structure
+	 */
+	if (last)
+		remove_from_mtinfo(cpu);
 }
 
+extern void fixup_irqs(void);
+/* must be called with cpucontrol mutex held */
 int __cpu_disable(void)
 {
 	int cpu = smp_processor_id();
@@ -575,9 +687,10 @@
 	if (cpu == 0)
 		return -EBUSY;
 
+	remove_siblinginfo(cpu);
 	fixup_irqs();
 	local_flush_tlb_all();
-	printk ("Disabled cpu %u\n", smp_processor_id());
+	cpu_clear(cpu, cpu_callin_map);
 	return 0;
 }
 
@@ -589,12 +702,7 @@
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
 		{
-			/*
-			 * TBD: Enable this when physical removal
-			 * or when we put the processor is put in
-			 * SAL_BOOT_RENDEZ mode
-			 * cpu_clear(cpu, cpu_callin_map);
-			 */
+			printk ("CPU %d is now offline\n", cpu);
 			return;
 		}
 		msleep(100);
@@ -602,11 +710,6 @@
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 #else /* !CONFIG_HOTPLUG_CPU */
-static int __devinit cpu_enable(unsigned int cpu)
-{
-	return 0;
-}
-
 int __cpu_disable(void)
 {
 	return -ENOSYS;
@@ -637,6 +740,23 @@
 	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
 }
 
+static inline void __devinit
+set_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
+				cpu_set(i, cpu_sibling_map[cpu]);
+				cpu_set(cpu, cpu_sibling_map[i]);
+			}
+		}
+	}
+}
+
 int __devinit
 __cpu_up (unsigned int cpu)
 {
@@ -648,21 +768,26 @@
 		return -EINVAL;
 
 	/*
-	 * Already booted.. just enable and get outa idle lool
+	 * Already booted cpu? not valid anymore since we dont
+	 * do idle loop tightspin anymore.
 	 */
 	if (cpu_isset(cpu, cpu_callin_map))
-	{
-		cpu_enable(cpu);
-		local_irq_enable();
-		while (!cpu_isset(cpu, cpu_online_map))
-			mb();
-		return 0;
-	}
+		return -EINVAL;
+
 	/* Processor goes to start_secondary(), sets online flag */
 	ret = do_boot_cpu(sapicid, cpu);
 	if (ret < 0)
 		return ret;
 
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, cpu_core_map[cpu]);
+		return 0;
+	}
+
+	set_cpu_sibling_map(cpu);
+
 	return 0;
 }
 
@@ -690,3 +815,106 @@
 		       ia64_sal_strerror(sal_ret));
 }
 
+static inline int __devinit
+check_for_mtinfo_index(void)
+{
+	int i;
+	
+	for_each_cpu(i)
+		if (!mt_info[i].valid)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Search the mt_info to find out if this socket's cid/tid information is
+ * cached or not. If the socket exists, fill in the core_id and thread_id 
+ * in cpuinfo
+ */
+static int __devinit
+check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c)
+{
+	int i;
+	__u32 sid = c->socket_id;
+
+	for_each_cpu(i) {
+		if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address
+		    && mt_info[i].socket_id == sid) {
+			c->core_id = mt_info[i].core_id;
+			c->thread_id = mt_info[i].thread_id;
+			return 1; /* not a new socket */
+		}
+	}
+	return 0;
+}
+
+/*
+ * identify_siblings(cpu) gets called from identify_cpu. This populates the 
+ * information related to logical execution units in per_cpu_data structure.
+ */
+void __devinit
+identify_siblings(struct cpuinfo_ia64 *c)
+{
+	s64 status;
+	u16 pltid;
+	u64 proc_fixed_addr;
+	int count, i;
+	pal_logical_to_physical_t info;
+
+	if (smp_num_cpucores == 1 && smp_num_siblings == 1)
+		return;
+
+	if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+		return;
+	}
+	if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) {
+		printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status);
+		return;
+	}
+
+	c->socket_id =  (pltid << 8) | info.overview_ppid;
+	c->cores_per_socket = info.overview_cpp;
+	c->threads_per_core = info.overview_tpc;
+	count = c->num_log = info.overview_num_log;
+
+	/* If the thread and core id information is already cached, then
+	 * we will simply update cpu_info and return. Otherwise, we will
+	 * do the PAL calls and cache core and thread id's of all the siblings.
+	 */
+	if (check_for_new_socket(proc_fixed_addr, c))
+		return;
+
+	for (i = 0; i < count; i++) {
+		int index;
+
+		if (i && (status = ia64_pal_logical_to_phys(i, &info))
+			  != PAL_STATUS_SUCCESS) {
+                	printk(KERN_ERR "ia64_pal_logical_to_phys failed"
+					" with %ld\n", status);
+                	return;
+		}
+		if (info.log2_la == proc_fixed_addr) {
+			c->core_id = info.log1_cid;
+			c->thread_id = info.log1_tid;
+		}
+
+		index = check_for_mtinfo_index();
+		/* We will not do the mt_info caching optimization in this case.
+		 */
+		if (index < 0)
+			continue;
+
+		mt_info[index].valid = 1;
+		mt_info[index].socket_id = c->socket_id;
+		mt_info[index].core_id = info.log1_cid;
+		mt_info[index].thread_id = info.log1_tid;
+		mt_info[index].proc_fixed_addr = info.log2_la;
+	}
+}
diff -Nru a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
--- a/arch/ia64/kernel/unaligned.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/unaligned.c	2005-03-24 18:21:52 -08:00
@@ -1380,6 +1380,10 @@
 	 *		- ldX.spill
 	 *		- stX.spill
 	 *	Reason: RNATs are based on addresses
+	 *		- ld16
+	 *		- st16
+	 *	Reason: ld16 and st16 are supposed to occur in a single
+	 *		memory op
 	 *
 	 *	synchronization:
 	 *		- cmpxchg
@@ -1401,6 +1405,10 @@
 	switch (opcode) {
 	      case LDS_OP:
 	      case LDSA_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
 	      case LDS_IMM_OP:
 	      case LDSA_IMM_OP:
 	      case LDFS_OP:
@@ -1425,6 +1433,10 @@
 	      case LDCCLR_OP:
 	      case LDCNC_OP:
 	      case LDCCLRACQ_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
 	      case LD_IMM_OP:
 	      case LDA_IMM_OP:
 	      case LDBIAS_IMM_OP:
@@ -1437,6 +1449,10 @@
 
 	      case ST_OP:
 	      case STREL_OP:
+		if (u.insn.x)
+			/* oops, really a semaphore op (cmpxchg, etc) */
+			goto failure;
+		/* no break */
 	      case ST_IMM_OP:
 	      case STREL_IMM_OP:
 		ret = emulate_store_int(ifa, u.insn, regs);
diff -Nru a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
--- a/arch/ia64/kernel/unwind.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/kernel/unwind.c	2005-03-24 18:21:52 -08:00
@@ -1943,23 +1943,30 @@
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp;
+	unsigned long ip, sp, pr = 0;
 
 	while (unw_unwind(info) >= 0) {
-		if (unw_get_rp(info, &ip) < 0) {
-			unw_get_ip(info, &ip);
-			UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
-				   __FUNCTION__, ip);
-			return -1;
-		}
 		unw_get_sp(info, &sp);
-		if (sp >= (unsigned long)info->task + IA64_STK_OFFSET)
+		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+		    < IA64_PT_REGS_SIZE) {
+			UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
+				   __FUNCTION__);
 			break;
-		if (ip < FIXADDR_USER_END)
+		}
+		if (unw_is_intr_frame(info) &&
+		    (pr & (1UL << PRED_USER_STACK)))
 			return 0;
+		if (unw_get_pr (info, &pr) < 0) {
+			unw_get_rp(info, &ip);
+			UNW_DPRINT(0, "unwind.%s: failed to read "
+				   "predicate register (ip=0x%lx)\n",
+				__FUNCTION__, ip);
+			return -1;
+		}
 	}
 	unw_get_ip(info, &ip);
-	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
+	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+		   __FUNCTION__, ip);
 	return -1;
 }
 EXPORT_SYMBOL(unw_unwind_to_user);
diff -Nru a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
--- a/arch/ia64/lib/memcpy_mck.S	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/lib/memcpy_mck.S	2005-03-24 18:21:52 -08:00
@@ -300,7 +300,7 @@
 	add	src_pre_mem=0,src0	// prefetch src pointer
 	add	dst_pre_mem=0,dst0	// prefetch dest pointer
 	and	src0=-8,src0		// 1st src pointer
-(p7)	mov	ar.lc = r21
+(p7)	mov	ar.lc = cnt
 (p8)	mov	ar.lc = r0
 	;;
 	TEXT_ALIGN(32)
diff -Nru a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
--- a/arch/ia64/mm/contig.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/mm/contig.c	2005-03-24 18:21:52 -08:00
@@ -61,7 +61,8 @@
 	printk("%d reserved pages\n", reserved);
 	printk("%d pages shared\n", shared);
 	printk("%d pages swap cached\n", cached);
-	printk("%ld pages in page table cache\n", pgtable_cache_size);
+	printk("%ld pages in page table cache\n",
+		pgtable_quicklist_total_size());
 }
 
 /* physical address where the bootmem map is located */
diff -Nru a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
--- a/arch/ia64/mm/discontig.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/mm/discontig.c	2005-03-24 18:21:52 -08:00
@@ -582,7 +582,8 @@
 	printk("%d reserved pages\n", total_reserved);
 	printk("%d pages shared\n", total_shared);
 	printk("%d pages swap cached\n", total_cached);
-	printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
+	printk("Total of %ld pages in page table cache\n",
+		pgtable_quicklist_total_size());
 	printk("%d free buffer pages\n", nr_free_buffer_pages());
 }
 
diff -Nru a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
--- a/arch/ia64/mm/fault.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/mm/fault.c	2005-03-24 18:21:52 -08:00
@@ -209,10 +209,13 @@
 	}
 
   no_context:
-	if (isr & IA64_ISR_SP) {
+	if ((isr & IA64_ISR_SP)
+	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
+	{
 		/*
-		 * This fault was due to a speculative load set the "ed" bit in the psr to
-		 * ensure forward progress (target register will get a NaT).
+		 * This fault was due to a speculative load or lfetch.fault, set the "ed"
+		 * bit in the psr to ensure forward progress.  (Target register will get a
+		 * NaT for ld.s, lfetch will be canceled.)
 		 */
 		ia64_psr(regs)->ed = 1;
 		return;
diff -Nru a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
--- a/arch/ia64/mm/init.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/mm/init.c	2005-03-24 18:21:52 -08:00
@@ -39,6 +39,9 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
+DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+DEFINE_PER_CPU(long, __pgtable_quicklist_size);
+
 extern void ia64_tlb_init (void);
 
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -50,27 +53,53 @@
 EXPORT_SYMBOL(vmem_map);
 #endif
 
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr;		/* map entry for zero page */
+struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
+#define MIN_PGT_PAGES			25UL
+#define MAX_PGT_FREES_PER_PASS		16L
+#define PGT_FRACTION_OF_NODE_MEM	16
+
+static inline long
+max_pgt_pages(void)
+{
+	u64 node_free_pages, max_pgt_pages;
+
+#ifndef	CONFIG_NUMA
+	node_free_pages = nr_free_pages();
+#else
+	node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+	max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+	max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+	return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+	long pages_to_free;
+
+	pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+	pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+	return pages_to_free;
+}
+
 void
-check_pgt_cache (void)
+check_pgt_cache(void)
 {
-	int low, high;
+	long pages_to_free;
 
-	low = pgt_cache_water[0];
-	high = pgt_cache_water[1];
+	if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+		return;
 
 	preempt_disable();
-	if (pgtable_cache_size > (u64) high) {
-		do {
-			if (pgd_quicklist)
-				free_page((unsigned long)pgd_alloc_one_fast(NULL));
-			if (pmd_quicklist)
-				free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
-		} while (pgtable_cache_size > (u64) low);
+	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+		while (pages_to_free--) {
+			free_page((unsigned long)pgtable_quicklist_alloc());
+		}
+		preempt_enable();
+		preempt_disable();
 	}
 	preempt_enable();
 }
@@ -524,11 +553,14 @@
 mem_init (void)
 {
 	long reserved_pages, codesize, datasize, initsize;
-	unsigned long num_pgt_pages;
 	pg_data_t *pgdat;
 	int i;
 	static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
 
+	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+	BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
+
 #ifdef CONFIG_PCI
 	/*
 	 * This needs to be called _after_ the command line has been parsed but _before_
@@ -565,18 +597,6 @@
 	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
 	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
 
-	/*
-	 * Allow for enough (cached) page table pages so that we can map the entire memory
-	 * at least once.  Each task also needs a couple of page tables pages, so add in a
-	 * fudge factor for that (don't use "threads-max" here; that would be wrong!).
-	 * Don't allow the cache to be more than 10% of total memory, though.
-	 */
-#	define NUM_TASKS	500	/* typical number of tasks */
-	num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
-	if (num_pgt_pages > nr_free_pages() / 10)
-		num_pgt_pages = nr_free_pages() / 10;
-	if (num_pgt_pages > (u64) pgt_cache_water[1])
-		pgt_cache_water[1] = num_pgt_pages;
 
 	/*
 	 * For fsyscall entrpoints with no light-weight handler, use the ordinary
diff -Nru a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
--- a/arch/ia64/pci/pci.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/pci/pci.c	2005-03-24 18:21:52 -08:00
@@ -178,30 +178,6 @@
 	return controller;
 }
 
-static int __devinit
-alloc_resource (char *name, struct resource *root, unsigned long start, unsigned long end,
-		unsigned long flags)
-{
-	struct resource *res;
-
-	res = kmalloc(sizeof(*res), GFP_KERNEL);
-	if (!res)
-		return -ENOMEM;
-
-	memset(res, 0, sizeof(*res));
-	res->name = name;
-	res->start = start;
-	res->end = end;
-	res->flags = flags;
-
-	if (insert_resource(root, res))	{
-		kfree(res);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
 static u64 __devinit
 add_io_space (struct acpi_resource_address64 *addr)
 {
@@ -254,10 +230,9 @@
 	char *name;
 };
 
-static acpi_status __devinit
-add_window (struct acpi_resource *res, void *data)
+static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
 {
-	struct pci_root_info *info = (struct pci_root_info *) data;
+	struct pci_root_info *info = data;
 	struct pci_window *window;
 	struct acpi_resource_address64 addr;
 	acpi_status status;
@@ -265,45 +240,71 @@
 	struct resource *root;
 
 	status = acpi_resource_to_address64(res, &addr);
-	if (ACPI_SUCCESS(status)) {
-		if (!addr.address_length)
-			return AE_OK;
+	if (!ACPI_SUCCESS(status))
+		return AE_OK;
 
-		if (addr.resource_type == ACPI_MEMORY_RANGE) {
-			flags = IORESOURCE_MEM;
-			root = &iomem_resource;
-			offset = addr.address_translation_offset;
-		} else if (addr.resource_type == ACPI_IO_RANGE) {
-			flags = IORESOURCE_IO;
-			root = &ioport_resource;
-			offset = add_io_space(&addr);
-			if (offset == ~0)
-				return AE_OK;
-		} else
+	if (!addr.address_length)
+		return AE_OK;
+
+	if (addr.resource_type == ACPI_MEMORY_RANGE) {
+		flags = IORESOURCE_MEM;
+		root = &iomem_resource;
+		offset = addr.address_translation_offset;
+	} else if (addr.resource_type == ACPI_IO_RANGE) {
+		flags = IORESOURCE_IO;
+		root = &ioport_resource;
+		offset = add_io_space(&addr);
+		if (offset == ~0)
 			return AE_OK;
+	} else
+		return AE_OK;
 
-		window = &info->controller->window[info->controller->windows++];
-		window->resource.flags	= flags;
-		window->resource.start  = addr.min_address_range;
-		window->resource.end    = addr.max_address_range;
-		window->offset		= offset;
-
-		if (alloc_resource(info->name, root, addr.min_address_range + offset,
-			addr.max_address_range + offset, flags))
-			printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
-				addr.min_address_range + offset, addr.max_address_range + offset,
-				root->name, info->name);
+	window = &info->controller->window[info->controller->windows++];
+	window->resource.name = info->name;
+	window->resource.flags = flags;
+	window->resource.start = addr.min_address_range + offset;
+	window->resource.end = addr.max_address_range + offset;
+	window->resource.child = NULL;
+	window->offset = offset;
+
+	if (insert_resource(root, &window->resource)) {
+		printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
+			window->resource.start, window->resource.end,
+			root->name, info->name);
 	}
 
 	return AE_OK;
 }
 
+static void __devinit
+pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
+{
+	int i, j;
+
+	j = 0;
+	for (i = 0; i < ctrl->windows; i++) {
+		struct resource *res = &ctrl->window[i].resource;
+		/* HP's firmware has a hack to work around a Windows bug.
+		 * Ignore these tiny memory ranges */
+		if ((res->flags & IORESOURCE_MEM) &&
+		    (res->end - res->start < 16))
+			continue;
+		if (j >= PCI_BUS_NUM_RESOURCES) {
+			printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
+					res->end, res->flags);
+			continue;
+		}
+		bus->resource[j++] = res;
+	}
+}
+
 struct pci_bus * __devinit
-pci_acpi_scan_root (struct acpi_device *device, int domain, int bus)
+pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 {
 	struct pci_root_info info;
 	struct pci_controller *controller;
 	unsigned int windows = 0;
+	struct pci_bus *pbus;
 	char *name;
 
 	controller = alloc_pci_controller(domain);
@@ -312,8 +313,10 @@
 
 	controller->acpi_handle = device->handle;
 
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, &windows);
-	controller->window = kmalloc(sizeof(*controller->window) * windows, GFP_KERNEL);
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
+			&windows);
+	controller->window = kmalloc(sizeof(*controller->window) * windows,
+			GFP_KERNEL);
 	if (!controller->window)
 		goto out2;
 
@@ -324,9 +327,14 @@
 	sprintf(name, "PCI Bus %04x:%02x", domain, bus);
 	info.controller = controller;
 	info.name = name;
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window, &info);
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
+			&info);
+
+	pbus = pci_scan_bus(bus, &pci_root_ops, controller);
+	if (pbus)
+		pcibios_setup_root_windows(pbus, controller);
 
-	return pci_scan_bus(bus, &pci_root_ops, controller);
+	return pbus;
 
 out3:
 	kfree(controller->window);
@@ -347,9 +355,9 @@
 		struct pci_window *window = &controller->window[i];
 		if (!(window->resource.flags & res->flags))
 			continue;
-		if (window->resource.start > res->start - window->offset)
+		if (window->resource.start > res->start)
 			continue;
-		if (window->resource.end < res->end - window->offset)
+		if (window->resource.end < res->end)
 			continue;
 		offset = window->offset;
 		break;
@@ -371,9 +379,9 @@
 		struct pci_window *window = &controller->window[i];
 		if (!(window->resource.flags & res->flags))
 			continue;
-		if (window->resource.start > region->start)
+		if (window->resource.start - window->offset > region->start)
 			continue;
-		if (window->resource.end < region->end)
+		if (window->resource.end - window->offset < region->end)
 			continue;
 		offset = window->offset;
 		break;
diff -Nru a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h
--- a/arch/ia64/sn/include/pci/pcibr_provider.h	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/include/pci/pcibr_provider.h	2005-03-24 18:21:52 -08:00
@@ -123,9 +123,11 @@
 }
 #define pcibr_unlock(pcibus_info, flag)  spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
 
+extern int  pcibr_init_provider(void);
 extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
-extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int);
-extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int);
+extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t);
+extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t);
+extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
 
 /*
  * prototypes for the bridge asic register access routines in pcibr_reg.c
diff -Nru a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
--- a/arch/ia64/sn/include/pci/pcibus_provider_defs.h	2005-03-24 18:21:52 -08:00
+++ /dev/null	Wed Dec 31 16:00:00 196900
@@ -1,43 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
- */
-#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
-#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
-
-/*
- * SN pci asic types.  Do not ever renumber these or reuse values.  The
- * values must agree with what prom thinks they are.
- */
-
-#define PCIIO_ASIC_TYPE_UNKNOWN	0
-#define PCIIO_ASIC_TYPE_PPB	1
-#define PCIIO_ASIC_TYPE_PIC	2
-#define PCIIO_ASIC_TYPE_TIOCP	3
-
-/*
- * Common pciio bus provider data.  There should be one of these as the
- * first field in any pciio based provider soft structure (e.g. pcibr_soft
- * tioca_soft, etc).
- */
-
-struct pcibus_bussoft {
-	uint32_t		bs_asic_type;	/* chipset type */
-	uint32_t		bs_xid;		/* xwidget id */
-	uint64_t		bs_persist_busnum; /* Persistent Bus Number */
-	uint64_t		bs_legacy_io;	/* legacy io pio addr */
-	uint64_t		bs_legacy_mem;	/* legacy mem pio addr */
-	uint64_t		bs_base;	/* widget base */
-	struct xwidget_info	*bs_xwidget_info;
-};
-
-/*
- * DMA mapping flags
- */
-
-#define SN_PCIDMA_CONSISTENT    0x0001
-
-#endif				/* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff -Nru a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h
--- a/arch/ia64/sn/include/pci/pcidev.h	2005-03-24 18:21:52 -08:00
+++ /dev/null	Wed Dec 31 16:00:00 196900
@@ -1,54 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
- */
-#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
-#define _ASM_IA64_SN_PCI_PCIDEV_H
-
-#include <linux/pci.h>
-
-extern struct sn_irq_info **sn_irq;
-
-#define SN_PCIDEV_INFO(pci_dev) \
-        ((struct pcidev_info *)(pci_dev)->sysdata)
-
-/*
- * Given a pci_bus, return the sn pcibus_bussoft struct.  Note that
- * this only works for root busses, not for busses represented by PPB's.
- */
-
-#define SN_PCIBUS_BUSSOFT(pci_bus) \
-        ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
-
-/*
- * Given a struct pci_dev, return the sn pcibus_bussoft struct.  Note
- * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
- * due to possible PPB's in the path.
- */
-
-#define SN_PCIDEV_BUSSOFT(pci_dev) \
-	(SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
-
-#define PCIIO_BUS_NONE	255      /* bus 255 reserved */
-#define PCIIO_SLOT_NONE 255
-#define PCIIO_FUNC_NONE 255
-#define PCIIO_VENDOR_ID_NONE	(-1)
-
-struct pcidev_info {
-	uint64_t		pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
-	uint64_t		pdi_slot_host_handle;	/* Bus and devfn Host pci_dev */
-
-	struct pcibus_bussoft	*pdi_pcibus_info;	/* Kernel common bus soft */
-	struct pcidev_info	*pdi_host_pcidev_info;	/* Kernel Host pci_dev */
-	struct pci_dev		*pdi_linux_pcidev;	/* Kernel pci_dev */
-
-	struct sn_irq_info	*pdi_sn_irq_info;
-};
-
-extern void sn_irq_fixup(struct pci_dev *pci_dev,
-			 struct sn_irq_info *sn_irq_info);
-
-#endif				/* _ASM_IA64_SN_PCI_PCIDEV_H */
diff -Nru a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
--- a/arch/ia64/sn/kernel/Makefile	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/Makefile	2005-03-24 18:21:52 -08:00
@@ -10,3 +10,4 @@
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
 				   huberror.o io_init.o iomv.o klconflib.o sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
+obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
diff -Nru a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
--- a/arch/ia64/sn/kernel/bte.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/bte.c	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/config.h>
@@ -170,10 +170,6 @@
 	/* Initialize the notification to a known value. */
 	*bte->most_rcnt_na = BTE_WORD_BUSY;
 
-	/* Set the status reg busy bit and transfer length */
-	BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
-	BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size);
-
 	/* Set the source and destination registers */
 	BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
 	BTE_SRC_STORE(bte, TO_PHYS(src));
@@ -188,7 +184,7 @@
 
 	/* Initiate the transfer */
 	BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
-	BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
+	BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode));
 
 	itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
 
@@ -429,10 +425,16 @@
 	mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
 
 	for (i = 0; i < BTES_PER_NODE; i++) {
+		u64 *base_addr;
+
 		/* Which link status register should we use? */
-		unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1);
-		mynodepda->bte_if[i].bte_base_addr = (u64 *)
-		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status);
+		base_addr = (u64 *)
+		    REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
+		mynodepda->bte_if[i].bte_base_addr = base_addr;
+		mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
+		mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);
 
 		/*
 		 * Initialize the notification and spinlock
diff -Nru a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
--- a/arch/ia64/sn/kernel/bte_error.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/bte_error.c	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
@@ -33,48 +33,28 @@
  * Wait until all BTE related CRBs are completed
  * and then reset the interfaces.
  */
-void bte_error_handler(unsigned long _nodepda)
+void shub1_bte_error_handler(unsigned long _nodepda)
 {
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
-	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
 	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
 	nasid_t nasid;
 	int i;
 	int valid_crbs;
-	unsigned long irq_flags;
-	volatile u64 *notify;
-	bte_result_t bh_error;
 	ii_imem_u_t imem;	/* II IMEM Register */
 	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
 	ii_ibcr_u_t ibcr;
 	ii_icmr_u_t icmr;
 	ii_ieclr_u_t ieclr;
 
-	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+	BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda,
 		    smp_processor_id()));
 
-	spin_lock_irqsave(recovery_lock, irq_flags);
-
 	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
 	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
 		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
 			    smp_processor_id()));
-		spin_unlock_irqrestore(recovery_lock, irq_flags);
 		return;
 	}
-	/*
-	 * Lock all interfaces on this node to prevent new transfers
-	 * from being queued.
-	 */
-	for (i = 0; i < BTES_PER_NODE; i++) {
-		if (err_nodepda->bte_if[i].cleanup_active) {
-			continue;
-		}
-		spin_lock(&err_nodepda->bte_if[i].spinlock);
-		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
-			    smp_processor_id(), i));
-		err_nodepda->bte_if[i].cleanup_active = 1;
-	}
 
 	/* Determine information about our hub */
 	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
@@ -101,7 +81,6 @@
 		mod_timer(recovery_timer, HZ * 5);
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
-		spin_unlock_irqrestore(recovery_lock, irq_flags);
 		return;
 	}
 	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
@@ -120,8 +99,6 @@
 				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
 					    err_nodepda, smp_processor_id(),
 					    i));
-				spin_unlock_irqrestore(recovery_lock,
-						       irq_flags);
 				return;
 			}
 		}
@@ -146,6 +123,51 @@
 	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
 	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
 
+	del_timer(recovery_timer);
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+void bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
+	int i;
+	nasid_t nasid;
+	unsigned long irq_flags;
+	volatile u64 *notify;
+	bte_result_t bh_error;
+
+	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+		    smp_processor_id()));
+
+	spin_lock_irqsave(recovery_lock, irq_flags);
+
+	/*
+	 * Lock all interfaces on this node to prevent new transfers
+	 * from being queued.
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		if (err_nodepda->bte_if[i].cleanup_active) {
+			continue;
+		}
+		spin_lock(&err_nodepda->bte_if[i].spinlock);
+		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
+			    smp_processor_id(), i));
+		err_nodepda->bte_if[i].cleanup_active = 1;
+	}
+
+	if (is_shub1()) {
+		shub1_bte_error_handler(_nodepda);
+	} else {
+		nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+		if (ia64_sn_bte_recovery(nasid))
+			panic("bte_error_handler(): Fatal BTE Error");
+	}
+
 	for (i = 0; i < BTES_PER_NODE; i++) {
 		bh_error = err_nodepda->bte_if[i].bh_error;
 		if (bh_error != BTE_SUCCESS) {
@@ -164,8 +186,6 @@
 			    smp_processor_id(), i));
 		spin_unlock(&err_nodepda->bte_if[i].spinlock);
 	}
-
-	del_timer(recovery_timer);
 
 	spin_unlock_irqrestore(recovery_lock, irq_flags);
 }
diff -Nru a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
--- a/arch/ia64/sn/kernel/huberror.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/huberror.c	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/types.h>
@@ -38,8 +38,11 @@
 	if ((int)ret_stuff.v0)
 		panic("hubii_eint_handler(): Fatal TIO Error");
 
-	if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
-		(void)hubiio_crb_error_handler(hubdev_info);
+	if (is_shub1()) {
+		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
+			(void)hubiio_crb_error_handler(hubdev_info);
+	} else 
+		bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid)));
 
 	return IRQ_HANDLED;
 }
diff -Nru a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
--- a/arch/ia64/sn/kernel/io_init.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/io_init.c	2005-03-24 18:21:52 -08:00
@@ -11,14 +11,15 @@
 #include <asm/sn/types.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include "xtalk/xwidgetdev.h"
 #include <asm/sn/geo.h>
 #include "xtalk/hubdev.h"
 #include <asm/sn/io.h>
 #include <asm/sn/simulator.h>
+#include <asm/sn/tioca_provider.h>
 
 char master_baseio_wid;
 nasid_t master_nasid = INVALID_NASID;	/* Partition Master */
@@ -34,6 +35,37 @@
 
 int sn_ioif_inited = 0;		/* SN I/O infrastructure initialized? */
 
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size)
+{
+	return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+	return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft)
+{
+	return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+	.dma_map = sn_default_pci_map,
+	.dma_map_consistent = sn_default_pci_map,
+	.dma_unmap = sn_default_pci_unmap,
+	.bus_fixup = sn_default_pci_bus_fixup,
+};
+
 /*
  * Retrieve the DMA Flush List given nasid.  This list is needed 
  * to implement the WAR - Flush DMA data on PIO Reads.
@@ -201,6 +233,7 @@
 	struct sn_irq_info *sn_irq_info;
 	struct pci_dev *host_pci_dev;
 	int status = 0;
+	struct pcibus_bussoft *bs;
 
 	dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL);
 	if (SN_PCIDEV_INFO(dev) <= 0)
@@ -241,6 +274,7 @@
 	}
 
 	/* set up host bus linkages */
+	bs = SN_PCIBUS_BUSSOFT(dev->bus);
 	host_pci_dev =
 	    pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32,
 			  SN_PCIDEV_INFO(dev)->
@@ -248,10 +282,16 @@
 	SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info =
 	    SN_PCIDEV_INFO(host_pci_dev);
 	SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev;
-	SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus);
+	SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs;
+
+	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+		SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+	} else {
+		SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+	}
 
 	/* Only set up IRQ stuff if this device has a host bus context */
-	if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) {
+	if (bs && sn_irq_info->irq_irq) {
 		SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info;
 		dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq;
 		sn_irq_fixup(dev, sn_irq_info);
@@ -271,6 +311,7 @@
 	struct pcibus_bussoft *prom_bussoft_ptr;
 	struct hubdev_info *hubdev_info;
 	void *provider_soft;
+	struct sn_pcibus_provider *provider;
 
 	status =
 	    sal_get_pcibus_info((u64) segment, (u64) busnum,
@@ -291,16 +332,22 @@
 	/*
 	 * Per-provider fixup.  Copies the contents from prom to local
 	 * area and links SN_PCIBUS_BUSSOFT().
-	 *
-	 * Note:  Provider is responsible for ensuring that prom_bussoft_ptr
-	 * represents an asic-type that it can handle.
 	 */
 
-	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) {
-		return;		/* no further fixup necessary */
+	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) {
+		return;		/* unsupported asic type */
+	}
+
+	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+	if (provider == NULL) {
+		return;		/* no provider registerd for this asic */
+	}
+
+	provider_soft = NULL;
+	if (provider->bus_fixup) {
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr);
 	}
 
-	provider_soft = pcibr_bus_fixup(prom_bussoft_ptr);
 	if (provider_soft == NULL) {
 		return;		/* fixup failed or not applicable */
 	}
@@ -337,6 +384,17 @@
 
 	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_SIMULATOR())
 		return 0;
+
+	/*
+	 * prime sn_pci_provider[].  Individial provider init routines will
+	 * override their respective default entries.
+	 */
+
+	for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+		sn_pci_provider[i] = &sn_pci_default_provider;
+
+	pcibr_init_provider();
+	tioca_init_provider();
 
 	/*
 	 * This is needed to avoid bounce limit checks in the blk layer
diff -Nru a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
--- a/arch/ia64/sn/kernel/irq.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/irq.c	2005-03-24 18:21:52 -08:00
@@ -13,8 +13,8 @@
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include "xtalk/xwidgetdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include <asm/sn/shub_mmr.h>
 #include <asm/sn/sn_sal.h>
@@ -82,20 +82,9 @@
 	nasid = get_nasid();
 	event_occurred =
 	    HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED));
-	if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT);
-	}
-	if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) {
-		mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT);
-	}
+	mask = event_occurred & SH_ALL_INT_MASK;
 	HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS),
-	      mask);
+		 mask);
 	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
 
 	move_irq(irq);
diff -Nru a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
--- a/arch/ia64/sn/kernel/setup.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/setup.c	2005-03-24 18:21:52 -08:00
@@ -29,6 +29,7 @@
 #include <linux/sched.h>
 #include <linux/root_dev.h>
 #include <linux/nodemask.h>
+#include <linux/pm.h>
 
 #include <asm/io.h>
 #include <asm/sal.h>
@@ -67,15 +68,27 @@
 extern unsigned char acpi_kbd_controller_present;
 
 unsigned long sn_rtc_cycles_per_second;
-
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
+DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+
 partid_t sn_partid = -1;
 EXPORT_SYMBOL(sn_partid);
 char sn_system_serial_number_string[128];
 EXPORT_SYMBOL(sn_system_serial_number_string);
 u64 sn_partition_serial_number;
 EXPORT_SYMBOL(sn_partition_serial_number);
+u8 sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+u8 sn_system_size;
+EXPORT_SYMBOL(sn_system_size);
+u8 sn_sharing_domain_size;
+EXPORT_SYMBOL(sn_sharing_domain_size);
+u8 sn_coherency_id;
+EXPORT_SYMBOL(sn_coherency_id);
+u8 sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
 
 short physical_node_map[MAX_PHYSNODE_ID];
 
@@ -232,7 +245,7 @@
 	} else {
 		for_each_online_node(cnode) {
 			if (is_shub_1_1(cnodeid_to_nasid(cnode)))
-				shub_1_1_found = 1;
+				sn_hub_info->shub_1_1_found = 1;
 		}
 	}
 }
@@ -341,6 +354,14 @@
 	screen_info = sn_screen_info;
 
 	sn_timer_init();
+
+	/*
+	 * set pm_power_off to a SAL call to allow
+	 * sn machines to power off. The SAL call can be replaced
+	 * by an ACPI interface call when ACPI is fully implemented
+	 * for sn.
+	 */
+	pm_power_off = ia64_sn_power_down;
 }
 
 /**
@@ -424,16 +445,14 @@
 	int slice;
 	int cnode;
 	int i;
-	u64 shubtype, nasid_bitmask, nasid_shift;
 	static int wars_have_been_checked;
 
 	memset(pda, 0, sizeof(pda));
-	if (ia64_sn_get_hub_info(0, &shubtype, &nasid_bitmask, &nasid_shift))
+	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
+				&sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
+				&sn_coherency_id, &sn_region_size))
 		BUG();
-	pda->shub2 = (u8)shubtype;
-	pda->nasid_bitmask = (u16)nasid_bitmask;
-	pda->nasid_shift = (u8)nasid_shift;
-	pda->as_shift = pda->nasid_shift - 2;
+	sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
 
 	/*
 	 * The boot cpu makes this call again after platform initialization is
@@ -482,7 +501,7 @@
 		sn_check_for_wars();
 		wars_have_been_checked = 1;
 	}
-	pda->shub_1_1_found = shub_1_1_found;
+	sn_hub_info->shub_1_1_found = shub_1_1_found;
 
 	/*
 	 * Set up addresses of PIO/MEM write status registers.
diff -Nru a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c	2005-03-24 18:21:52 -08:00
@@ -28,6 +28,7 @@
 #include <linux/vmalloc.h>
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
+#include <linux/utsname.h>
 #include <linux/cpumask.h>
 #include <linux/smp_lock.h>
 #include <linux/nodemask.h>
@@ -43,6 +44,7 @@
 #include <asm/sn/module.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/sn2/sn_hwperf.h>
+#include <asm/sn/addrs.h>
 
 static void *sn_hwperf_salheap = NULL;
 static int sn_hwperf_obj_cnt = 0;
@@ -81,26 +83,45 @@
 	return e;
 }
 
+static int sn_hwperf_location_to_bpos(char *location,
+	int *rack, int *bay, int *slot, int *slab)
+{
+	char type;
+
+	/* first scan for an old style geoid string */
+	if (sscanf(location, "%03d%c%02d#%d",
+		rack, &type, bay, slab) == 4)
+		*slot = 0; 
+	else /* scan for a new bladed geoid string */
+	if (sscanf(location, "%03d%c%02d^%02d#%d",
+		rack, &type, bay, slot, slab) != 5)
+		return -1; 
+	/* success */
+	return 0;
+}
+
 static int sn_hwperf_geoid_to_cnode(char *location)
 {
 	int cnode;
 	geoid_t geoid;
 	moduleid_t module_id;
-	char type;
-	int rack, slot, slab;
-	int this_rack, this_slot, this_slab;
+	int rack, bay, slot, slab;
+	int this_rack, this_bay, this_slot, this_slab;
 
-	if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4)
+	if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
 		return -1;
 
 	for (cnode = 0; cnode < numionodes; cnode++) {
 		geoid = cnodeid_get_geoid(cnode);
 		module_id = geo_module(geoid);
 		this_rack = MODULE_GET_RACK(module_id);
-		this_slot = MODULE_GET_BPOS(module_id);
+		this_bay = MODULE_GET_BPOS(module_id);
+		this_slot = geo_slot(geoid);
 		this_slab = geo_slab(geoid);
-		if (rack == this_rack && slot == this_slot && slab == this_slab)
+		if (rack == this_rack && bay == this_bay &&
+			slot == this_slot && slab == this_slab) {
 			break;
+		}
 	}
 
 	return cnode < numionodes ? cnode : -1;
@@ -153,11 +174,36 @@
 	return slabname;
 }
 
+static void print_pci_topology(struct seq_file *s,
+	struct sn_hwperf_object_info *obj, int *ordinal,
+	u64 rack, u64 bay, u64 slot, u64 slab)
+{
+	char *p1;
+	char *p2;
+	char *pg;
+
+	if (!(pg = (char *)get_zeroed_page(GFP_KERNEL)))
+		return; /* ignore */
+	if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab,
+		__pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) {
+		for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) {
+			if (!(p2 = strchr(p1, '\n')))
+				break;
+			*p2 = '\0';
+			seq_printf(s, "pcibus %d %s-%s\n",
+				*ordinal, obj->location, p1);
+			(*ordinal)++;
+			p1 = p2 + 1;
+		}
+	}
+	free_page((unsigned long)pg);
+}
+
 static int sn_topology_show(struct seq_file *s, void *d)
 {
 	int sz;
 	int pt;
-	int e;
+	int e = 0;
 	int i;
 	int j;
 	const char *slabname;
@@ -169,11 +215,44 @@
 	struct sn_hwperf_object_info *p;
 	struct sn_hwperf_object_info *obj = d;	/* this object */
 	struct sn_hwperf_object_info *objs = s->private; /* all objects */
+	int rack, bay, slot, slab;
+	u8 shubtype;
+	u8 system_size;
+	u8 sharing_size;
+	u8 partid;
+	u8 coher;
+	u8 nasid_shift;
+	u8 region_size;
+	u16 nasid_mask;
+	int nasid_msb;
+	int pci_bus_ordinal = 0;
 
 	if (obj == objs) {
-		seq_printf(s, "# sn_topology version 1\n");
+		seq_printf(s, "# sn_topology version 2\n");
 		seq_printf(s, "# objtype ordinal location partition"
 			" [attribute value [, ...]]\n");
+
+		if (ia64_sn_get_sn_info(0,
+			&shubtype, &nasid_mask, &nasid_shift, &system_size,
+			&sharing_size, &partid, &coher, &region_size))
+			BUG();
+		for (nasid_msb=63; nasid_msb > 0; nasid_msb--) {
+			if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb))
+				break;
+		}
+		seq_printf(s, "partition %u %s local "
+			"shubtype %s, "
+			"nasid_mask 0x%016lx, "
+			"nasid_bits %d:%d, "
+			"system_size %d, "
+			"sharing_size %d, "
+			"coherency_domain %d, "
+			"region_size %d\n",
+
+			partid, system_utsname.nodename,
+			shubtype ? "shub2" : "shub1", 
+			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
+			system_size, sharing_size, coher, region_size);
 	}
 
 	if (SN_HWPERF_FOREIGN(obj)) {
@@ -181,7 +260,7 @@
 		return 0;
 	}
 
-	for (i = 0; obj->name[i]; i++) {
+	for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) {
 		if (obj->name[i] == ' ')
 			obj->name[i] = '_';
 	}
@@ -221,6 +300,17 @@
 				seq_putc(s, '\n');
 			}
 		}
+
+		/*
+		 * PCI busses attached to this node, if any
+		 */
+		if (sn_hwperf_location_to_bpos(obj->location,
+			&rack, &bay, &slot, &slab)) {
+			/* export pci bus info */
+			print_pci_topology(s, obj, &pci_bus_ordinal,
+				rack, bay, slot, slab);
+
+		}
 	}
 
 	if (obj->ports) {
@@ -397,6 +487,9 @@
 		break;
 
 	case SN_HWPERF_OP_BUSY:
+		e = -EBUSY;
+		break;
+
 	case SN_HWPERF_OP_RECONFIGURE:
 		e = -EAGAIN;
 		break;
@@ -549,6 +642,7 @@
 		r = sn_hwperf_op_cpu(&op_info);
 		if (r) {
 			r = sn_hwperf_map_err(r);
+			a.v0 = v0;
 			goto error;
 		}
 		break;
diff -Nru a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ia64/sn/kernel/tiocx.c	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,548 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/io.h>
+#include <asm/sn/types.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/tiocx.h>
+#include "tio.h"
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+#define CX_DEV_NONE 0
+#define DEVICE_NAME "tiocx"
+#define WIDGET_ID 0
+#define TIOCX_DEBUG 0
+
+#if TIOCX_DEBUG
+#define DBG(fmt...)    printk(KERN_ALERT fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct device_attribute dev_attr_cxdev_control;
+
+/**
+ * tiocx_match - Try to match driver id list with device.
+ * @dev: device pointer
+ * @drv: driver pointer
+ *
+ * Returns 1 if match, 0 otherwise.
+ */
+static int tiocx_match(struct device *dev, struct device_driver *drv)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = to_cx_driver(drv);
+	const struct cx_device_id *ids = cx_drv->id_table;
+
+	if (!ids)
+		return 0;
+
+	while (ids->part_num) {
+		if (ids->part_num == cx_dev->cx_id.part_num)
+			return 1;
+		ids++;
+	}
+	return 0;
+
+}
+
+static int tiocx_hotplug(struct device *dev, char **envp, int num_envp,
+			 char *buffer, int buffer_size)
+{
+	return -ENODEV;
+}
+
+static void tiocx_bus_release(struct device *dev)
+{
+	kfree(to_cx_dev(dev));
+}
+
+struct bus_type tiocx_bus_type = {
+	.name = "tiocx",
+	.match = tiocx_match,
+	.hotplug = tiocx_hotplug,
+};
+
+/**
+ * cx_device_match - Find cx_device in the id table.
+ * @ids: id table from driver
+ * @cx_device: part/mfg id for the device
+ *
+ */
+static const struct cx_device_id *cx_device_match(const struct cx_device_id
+						  *ids,
+						  struct cx_dev *cx_device)
+{
+	/*
+	 * NOTES: We may want to check for CX_ANY_ID too.
+	 *        Do we want to match against nasid too?
+	 *        CX_DEV_NONE == 0, if the driver tries to register for
+	 *        part/mfg == 0 we should return no-match (NULL) here.
+	 */
+	while (ids->part_num && ids->mfg_num) {
+		if (ids->part_num == cx_device->cx_id.part_num &&
+		    ids->mfg_num == cx_device->cx_id.mfg_num)
+			return ids;
+		ids++;
+	}
+
+	return NULL;
+}
+
+/**
+ * cx_device_probe - Look for matching device.
+ *			Call driver probe routine if found.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * @cx_device: part/mfg id for the device
+ */
+static int cx_device_probe(struct device *dev)
+{
+	const struct cx_device_id *id;
+	struct cx_drv *cx_drv = to_cx_driver(dev->driver);
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	int error = 0;
+
+	if (!cx_dev->driver && cx_drv->probe) {
+		id = cx_device_match(cx_drv->id_table, cx_dev);
+		if (id) {
+			if ((error = cx_drv->probe(cx_dev, id)) < 0)
+				return error;
+			else
+				cx_dev->driver = cx_drv;
+		}
+	}
+
+	return error;
+}
+
+/**
+ * cx_driver_remove - Remove driver from device struct.
+ * @dev: device
+ */
+static int cx_driver_remove(struct device *dev)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct cx_drv *cx_drv = cx_dev->driver;
+	if (cx_drv->remove)
+		cx_drv->remove(cx_dev);
+	cx_dev->driver = NULL;
+	return 0;
+}
+
+/**
+ * cx_driver_register - Register the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ * 
+ * Called from the driver init routine to register a driver.
+ * The cx_drv struct contains the driver name, a pointer to
+ * a table of part/mfg numbers and a pointer to the driver's
+ * probe/attach routine.
+ */
+int cx_driver_register(struct cx_drv *cx_driver)
+{
+	cx_driver->driver.name = cx_driver->name;
+	cx_driver->driver.bus = &tiocx_bus_type;
+	cx_driver->driver.probe = cx_device_probe;
+	cx_driver->driver.remove = cx_driver_remove;
+
+	return driver_register(&cx_driver->driver);
+}
+
+/**
+ * cx_driver_unregister - Unregister the driver.
+ * @cx_driver: driver table (cx_drv struct) from driver
+ */
+int cx_driver_unregister(struct cx_drv *cx_driver)
+{
+	driver_unregister(&cx_driver->driver);
+	return 0;
+}
+
+/**
+ * cx_device_register - Register a device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ * @hubdev: hub info associated with this device
+ *
+ */
+int
+cx_device_register(nasid_t nasid, int part_num, int mfg_num,
+		   struct hubdev_info *hubdev)
+{
+	struct cx_dev *cx_dev;
+
+	cx_dev = kcalloc(1, sizeof(struct cx_dev), GFP_KERNEL);
+	DBG("cx_dev= 0x%p\n", cx_dev);
+	if (cx_dev == NULL)
+		return -ENOMEM;
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+	cx_dev->cx_id.nasid = nasid;
+	cx_dev->hubdev = hubdev;
+
+	cx_dev->dev.parent = NULL;
+	cx_dev->dev.bus = &tiocx_bus_type;
+	cx_dev->dev.release = tiocx_bus_release;
+	snprintf(cx_dev->dev.bus_id, BUS_ID_SIZE, "%d.0x%x",
+		 cx_dev->cx_id.nasid, cx_dev->cx_id.part_num);
+	device_register(&cx_dev->dev);
+	get_device(&cx_dev->dev);
+
+	device_create_file(&cx_dev->dev, &dev_attr_cxdev_control);
+
+	return 0;
+}
+
+/**
+ * cx_device_unregister - Unregister a device.
+ * @cx_dev: part/mfg id for the device
+ */
+int cx_device_unregister(struct cx_dev *cx_dev)
+{
+	put_device(&cx_dev->dev);
+	device_unregister(&cx_dev->dev);
+	return 0;
+}
+
+/**
+ * cx_device_reload - Reload the device.
+ * @nasid: device's nasid
+ * @part_num: device's part number
+ * @mfg_num: device's manufacturer number
+ *
+ * Remove the device associated with 'nasid' from device list and then
+ * call device-register with the given part/mfg numbers.
+ */
+static int cx_device_reload(struct cx_dev *cx_dev)
+{
+	device_remove_file(&cx_dev->dev, &dev_attr_cxdev_control);
+	cx_device_unregister(cx_dev);
+	return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
+				  cx_dev->cx_id.mfg_num, cx_dev->hubdev);
+}
+
+static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
+					u64 sn_irq_info,
+					int req_irq, nasid_t req_nasid,
+					int req_slice)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_ALLOC, nasid,
+				widget, sn_irq_info, req_irq,
+				req_nasid, req_slice);
+	return rv.status;
+}
+
+static inline void tiocx_intr_free(nasid_t nasid, int widget,
+				   struct sn_irq_info *sn_irq_info)
+{
+	struct ia64_sal_retval rv;
+	rv.status = 0;
+	rv.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT,
+				SAL_INTR_FREE, nasid,
+				widget, sn_irq_info->irq_irq,
+				sn_irq_info->irq_cookie, 0, 0);
+}
+
+struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
+				    nasid_t req_nasid, int slice)
+{
+	struct sn_irq_info *sn_irq_info;
+	int status;
+	int sn_irq_size = sizeof(struct sn_irq_info);
+
+	if ((nasid & 1) == 0)
+		return NULL;
+
+	sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL);
+	if (sn_irq_info == NULL)
+		return NULL;
+
+	memset(sn_irq_info, 0x0, sn_irq_size);
+
+	status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq,
+				  req_nasid, slice);
+	if (status) {
+		kfree(sn_irq_info);
+		return NULL;
+	} else {
+		return sn_irq_info;
+	}
+}
+
+void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
+{
+	uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+	nasid_t nasid = NASID_GET(bridge);
+	int widget;
+
+	if (nasid & 1) {
+		widget = TIO_SWIN_WIDGETNUM(bridge);
+		tiocx_intr_free(nasid, widget, sn_irq_info);
+		kfree(sn_irq_info);
+	}
+}
+
+uint64_t
+tiocx_dma_addr(uint64_t addr)
+{
+	return PHYS_TO_TIODMA(addr);
+}
+
+uint64_t
+tiocx_swin_base(int nasid)
+{
+	return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
+}
+
+EXPORT_SYMBOL(cx_driver_register);
+EXPORT_SYMBOL(cx_driver_unregister);
+EXPORT_SYMBOL(cx_device_register);
+EXPORT_SYMBOL(cx_device_unregister);
+EXPORT_SYMBOL(tiocx_irq_alloc);
+EXPORT_SYMBOL(tiocx_irq_free);
+EXPORT_SYMBOL(tiocx_bus_type);
+EXPORT_SYMBOL(tiocx_dma_addr);
+EXPORT_SYMBOL(tiocx_swin_base);
+
+static uint64_t tiocx_get_hubdev_info(u64 handle, u64 address)
+{
+
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	ia64_sal_oemcall_nolock(&ret_stuff,
+				SN_SAL_IOIF_GET_HUBDEV_INFO,
+				handle, address, 0, 0, 0, 0, 0);
+	return ret_stuff.v0;
+}
+
+static void tio_conveyor_set(nasid_t nasid, int enable_flag)
+{
+	uint64_t ice_frz;
+	uint64_t disable_cb = (1ull << 61);
+
+	if (!(nasid & 1))
+		return;
+
+	ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG);
+	if (enable_flag) {
+		if (!(ice_frz & disable_cb))	/* already enabled */
+			return;
+		ice_frz &= ~disable_cb;
+	} else {
+		if (ice_frz & disable_cb)	/* already disabled */
+			return;
+		ice_frz |= disable_cb;
+	}
+	DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz);
+	REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz);
+}
+
+#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1)
+#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0)
+
+static void tio_corelet_reset(nasid_t nasid, int corelet)
+{
+	if (!(nasid & 1))
+		return;
+
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet);
+	udelay(2000);
+	REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0);
+	udelay(2000);
+}
+
+static int fpga_attached(nasid_t nasid)
+{
+	uint64_t cx_credits;
+
+	cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3);
+	cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK;
+	DBG("cx_credits= 0x%lx\n", cx_credits);
+
+	return (cx_credits == 0xf) ? 1 : 0;
+}
+
+static int tiocx_reload(struct cx_dev *cx_dev)
+{
+	int part_num = CX_DEV_NONE;
+	int mfg_num = CX_DEV_NONE;
+	nasid_t nasid = cx_dev->cx_id.nasid;
+
+	if (fpga_attached(nasid)) {
+		uint64_t cx_id;
+
+		cx_id =
+		    *(volatile int32_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
+					  WIDGET_ID);
+		part_num = XWIDGET_PART_NUM(cx_id);
+		mfg_num = XWIDGET_MFG_NUM(cx_id);
+		DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
+		/* just ignore it if it's a CE */
+		if (part_num == TIO_CE_ASIC_PARTNUM)
+			return 0;
+	}
+
+	cx_dev->cx_id.part_num = part_num;
+	cx_dev->cx_id.mfg_num = mfg_num;
+
+	/*
+	 * Delete old device and register the new one.  It's ok if
+	 * part_num/mfg_num == CX_DEV_NONE.  We want to register
+	 * devices in the table even if a bitstream isn't loaded.
+	 * That allows use to see that a bitstream isn't loaded via
+	 * TIOCX_IOCTL_DEV_LIST.
+	 */
+	return cx_device_reload(cx_dev);
+}
+
+static ssize_t show_cxdev_control(struct device *dev, char *buf)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	return sprintf(buf, "0x%x 0x%x 0x%x\n",
+		       cx_dev->cx_id.nasid,
+		       cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num);
+}
+
+static ssize_t store_cxdev_control(struct device *dev, const char *buf,
+				   size_t count)
+{
+	int n;
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count <= 0)
+		return 0;
+
+	n = simple_strtoul(buf, NULL, 0);
+
+	switch (n) {
+	case 1:
+		tiocx_reload(cx_dev);
+		break;
+	case 3:
+		tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET);
+		break;
+	default:
+		break;
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control);
+
+static int __init tiocx_init(void)
+{
+	cnodeid_t cnodeid;
+	int found_tiocx_device = 0;
+
+	bus_register(&tiocx_bus_type);
+
+	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
+		nasid_t nasid;
+
+		if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
+			break;	/* No more nasids .. bail out of loop */
+
+		if (nasid & 0x1) {	/* TIO's are always odd */
+			struct hubdev_info *hubdev;
+			uint64_t status;
+			struct xwidget_info *widgetp;
+
+			DBG("Found TIO at nasid 0x%x\n", nasid);
+
+			hubdev =
+			    (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo);
+			status =
+			    tiocx_get_hubdev_info(nasid,
+						  (uint64_t) __pa(hubdev));
+			if (status)
+				continue;
+
+			widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET];
+
+			/* The CE hangs off of the CX port but is not an FPGA */
+			if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM)
+				continue;
+
+			tio_corelet_reset(nasid, TIOCX_CORELET);
+			tio_conveyor_enable(nasid);
+
+			if (cx_device_register
+			    (nasid, widgetp->xwi_hwid.part_num,
+			     widgetp->xwi_hwid.mfg_num, hubdev) < 0)
+				return -ENXIO;
+			else
+				found_tiocx_device++;
+		}
+	}
+
+	/* It's ok if we find zero devices. */
+	DBG("found_tiocx_device= %d\n", found_tiocx_device);
+
+	return 0;
+}
+
+static void __exit tiocx_exit(void)
+{
+	struct device *dev;
+	struct device *tdev;
+
+	DBG("tiocx_exit\n");
+
+	/*
+	 * Unregister devices.
+	 */
+	list_for_each_entry_safe(dev, tdev, &tiocx_bus_type.devices.list,
+				 bus_list) {
+		if (dev) {
+			struct cx_dev *cx_dev = to_cx_dev(dev);
+			device_remove_file(dev, &dev_attr_cxdev_control);
+			cx_device_unregister(cx_dev);
+		}
+	}
+
+	bus_unregister(&tiocx_bus_type);
+}
+
+module_init(tiocx_init);
+module_exit(tiocx_exit);
+
+/************************************************************************
+ * Module licensing and description
+ ************************************************************************/
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>");
+MODULE_DESCRIPTION("TIOCX module");
+MODULE_SUPPORTED_DEVICE(DEVICE_NAME);
diff -Nru a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
--- a/arch/ia64/sn/pci/Makefile	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/Makefile	2005-03-24 18:21:52 -08:00
@@ -7,4 +7,4 @@
 #
 # Makefile for the sn pci general routines.
 
-obj-y := pci_dma.o pcibr/ 
+obj-y := pci_dma.o tioca_provider.o pcibr/ 
diff -Nru a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
--- a/arch/ia64/sn/pci/pci_dma.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/pci_dma.c	2005-03-24 18:21:52 -08:00
@@ -12,9 +12,8 @@
 #include <linux/module.h>
 #include <asm/dma.h>
 #include <asm/sn/sn_sal.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
-#include "pci/pcibr_provider.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 
 #define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
 #define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
@@ -79,7 +78,8 @@
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
@@ -102,8 +102,7 @@
 	 * resources.
 	 */
 
-	*dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size,
-				    SN_PCIDMA_CONSISTENT);
+	*dma_handle = provider->dma_map_consistent(pdev, phys_addr, size);
 	if (!*dma_handle) {
 		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
 		free_pages((unsigned long)cpuaddr, get_order(size));
@@ -127,11 +126,12 @@
 void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 			  dma_addr_t dma_handle)
 {
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	pcibr_dma_unmap(pcidev_info, dma_handle, 0);
+	provider->dma_unmap(pdev, dma_handle, 0);
 	free_pages((unsigned long)cpu_addr, get_order(size));
 }
 EXPORT_SYMBOL(sn_dma_free_coherent);
@@ -159,12 +159,13 @@
 {
 	dma_addr_t dma_addr;
 	unsigned long phys_addr;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
 	phys_addr = __pa(cpu_addr);
-	dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0);
+	dma_addr = provider->dma_map(pdev, phys_addr, size);
 	if (!dma_addr) {
 		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
 		return 0;
@@ -187,10 +188,12 @@
 void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 			 int direction)
 {
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
-	pcibr_dma_unmap(pcidev_info, dma_addr, direction);
+
+	provider->dma_unmap(pdev, dma_addr, direction);
 }
 EXPORT_SYMBOL(sn_dma_unmap_single);
 
@@ -207,12 +210,13 @@
 		     int nhwentries, int direction)
 {
 	int i;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
 	for (i = 0; i < nhwentries; i++, sg++) {
-		pcibr_dma_unmap(pcidev_info, sg->dma_address, direction);
+		provider->dma_unmap(pdev, sg->dma_address, direction);
 		sg->dma_address = (dma_addr_t) NULL;
 		sg->dma_length = 0;
 	}
@@ -233,7 +237,8 @@
 {
 	unsigned long phys_addr;
 	struct scatterlist *saved_sg = sg;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
 
 	BUG_ON(dev->bus != &pci_bus_type);
@@ -243,8 +248,8 @@
 	 */
 	for (i = 0; i < nhwentries; i++, sg++) {
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr,
-						sg->length, 0);
+		sg->dma_address = provider->dma_map(pdev,
+						    phys_addr, sg->length);
 
 		if (!sg->dma_address) {
 			printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
diff -Nru a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c	2005-03-24 18:21:52 -08:00
@@ -8,8 +8,8 @@
 
 #include <linux/types.h>
 #include <asm/sn/sn_sal.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 
 int pcibr_invalidate_ate = 0;	/* by default don't invalidate ATE on free */
diff -Nru a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c	2005-03-24 18:21:52 -08:00
@@ -12,8 +12,8 @@
 #include <asm/sn/geo.h>
 #include "xtalk/xwidgetdev.h"
 #include "xtalk/hubdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/tiocp.h"
 #include "pci/pic.h"
 #include "pci/pcibr_provider.h"
@@ -40,7 +40,7 @@
  *      we do not have to allocate entries in the PMU.
  */
 
-static uint64_t
+static dma_addr_t
 pcibr_dmamap_ate32(struct pcidev_info *info,
 		   uint64_t paddr, size_t req_size, uint64_t flags)
 {
@@ -109,7 +109,7 @@
 	return pci_addr;
 }
 
-static uint64_t
+static dma_addr_t
 pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
 			uint64_t dma_attributes)
 {
@@ -141,7 +141,7 @@
 
 }
 
-static uint64_t
+static dma_addr_t
 pcibr_dmatrans_direct32(struct pcidev_info * info,
 			uint64_t paddr, size_t req_size, uint64_t flags)
 {
@@ -180,11 +180,11 @@
  * DMA mappings for Direct 64 and 32 do not have any DMA maps.
  */
 void
-pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle,
-		int direction)
+pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
 {
-	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
-	    pdi_pcibus_info;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+	struct pcibus_info *pcibus_info =
+	    (struct pcibus_info *)pcidev_info->pdi_pcibus_info;
 
 	if (IS_PCI32_MAPPED(dma_handle)) {
 		int ate_index;
@@ -316,61 +316,60 @@
 }
 
 /*
- * Wrapper DMA interface.  Called from pci_dma.c routines.
+ * DMA interfaces.  Called from pci_dma.c routines.
  */
 
-uint64_t
-pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr,
-	      size_t size, unsigned int flags)
+dma_addr_t
+pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
 {
 	dma_addr_t dma_handle;
-	struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
 
-	if (flags & SN_PCIDMA_CONSISTENT) {
-		/* sn_pci_alloc_consistent interfaces */
-		if (pcidev->dev.coherent_dma_mask == ~0UL) {
-			dma_handle =
-			    pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-						    PCI64_ATTR_BAR);
-		} else {
-			dma_handle =
-			    (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
-							    phys_addr, size,
-							    PCI32_ATE_BAR);
-		}
-	} else {
-		/* map_sg/map_single interfaces */
+	/* SN cannot support DMA addresses smaller than 32 bits. */
+	if (hwdev->dma_mask < 0x7fffffff) {
+		return 0;
+	}
 
-		/* SN cannot support DMA addresses smaller than 32 bits. */
-		if (pcidev->dma_mask < 0x7fffffff) {
-			return 0;
-		}
+	if (hwdev->dma_mask == ~0UL) {
+		/*
+		 * Handle the most common case: 64 bit cards.  This
+		 * call should always succeed.
+		 */
 
-		if (pcidev->dma_mask == ~0UL) {
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+						     PCI64_ATTR_PREF);
+	} else {
+		/* Handle 32-63 bit cards via direct mapping */
+		dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr,
+						     size, 0);
+		if (!dma_handle) {
 			/*
-			 * Handle the most common case: 64 bit cards.  This
-			 * call should always succeed.
+			 * It is a 32 bit card and we cannot do direct mapping,
+			 * so we use an ATE.
 			 */
 
-			dma_handle =
-			    pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-						    PCI64_ATTR_PREF);
-		} else {
-			/* Handle 32-63 bit cards via direct mapping */
-			dma_handle =
-			    pcibr_dmatrans_direct32(pcidev_info, phys_addr,
-						    size, 0);
-			if (!dma_handle) {
-				/*
-				 * It is a 32 bit card and we cannot do direct mapping,
-				 * so we use an ATE.
-				 */
-
-				dma_handle =
-				    pcibr_dmamap_ate32(pcidev_info, phys_addr,
-						       size, PCI32_ATE_PREF);
-			}
+			dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr,
+							size, PCI32_ATE_PREF);
 		}
+	}
+
+	return dma_handle;
+}
+
+dma_addr_t
+pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr,
+			 size_t size)
+{
+	dma_addr_t dma_handle;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
+
+	if (hwdev->dev.coherent_dma_mask == ~0UL) {
+		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+					    PCI64_ATTR_BAR);
+	} else {
+		dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
+						    phys_addr, size,
+						    PCI32_ATE_BAR);
 	}
 
 	return dma_handle;
diff -Nru a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c	2005-03-24 18:21:52 -08:00
@@ -13,8 +13,8 @@
 #include "xtalk/xwidgetdev.h"
 #include <asm/sn/geo.h>
 #include "xtalk/hubdev.h"
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/pcibr_provider.h"
 #include <asm/sn/addrs.h>
 
@@ -167,4 +167,24 @@
 
 		pcibr_force_interrupt(sn_irq_info);
 	}
+}
+
+/*
+ * Provider entries for PIC/CP
+ */
+
+struct sn_pcibus_provider pcibr_provider = {
+	.dma_map = pcibr_dma_map,
+	.dma_map_consistent = pcibr_dma_map_consistent,
+	.dma_unmap = pcibr_dma_unmap,
+	.bus_fixup = pcibr_bus_fixup,
+};
+
+int
+pcibr_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider;
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider;
+
+	return 0;
 }
diff -Nru a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
--- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c	2005-03-24 18:21:52 -08:00
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c	2005-03-24 18:21:52 -08:00
@@ -8,8 +8,8 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
-#include "pci/pcibus_provider_defs.h"
-#include "pci/pcidev.h"
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include "pci/tiocp.h"
 #include "pci/pic.h"
 #include "pci/pcibr_provider.h"
diff -Nru a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/arch/ia64/sn/pci/tioca_provider.c	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,668 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioca_provider.h>
+
+uint32_t tioca_gart_found;
+EXPORT_SYMBOL(tioca_gart_found);	/* used by agp-sgi */
+
+LIST_HEAD(tioca_list);
+EXPORT_SYMBOL(tioca_list);	/* used by agp-sgi */
+
+static int tioca_gart_init(struct tioca_kernel *);
+
+/**
+ * tioca_gart_init - Initialize SGI TIOCA GART
+ * @tioca_common: ptr to common prom/kernel struct identifying the 
+ *
+ * If the indicated tioca has devices present, initialize its associated
+ * GART MMR's and kernel memory.
+ */
+static int
+tioca_gart_init(struct tioca_kernel *tioca_kern)
+{
+	uint64_t ap_reg;
+	uint64_t offset;
+	struct page *tmp;
+	struct tioca_common *tioca_common;
+	volatile struct tioca *ca_base;
+
+	tioca_common = tioca_kern->ca_common;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	if (list_empty(tioca_kern->ca_devices))
+		return 0;
+
+	ap_reg = 0;
+
+	/*
+	 * Validate aperature size
+	 */
+
+	switch (CA_APERATURE_SIZE >> 20) {
+	case 4:
+		ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT);	/* 4MB */
+		break;
+	case 8:
+		ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT);	/* 8MB */
+		break;
+	case 16:
+		ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT);	/* 16MB */
+		break;
+	case 32:
+		ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT);	/* 32 MB */
+		break;
+	case 64:
+		ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT);	/* 64 MB */
+		break;
+	case 128:
+		ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT);	/* 128 MB */
+		break;
+	case 256:
+		ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT);	/* 256 MB */
+		break;
+	case 512:
+		ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT);	/* 512 MB */
+		break;
+	case 1024:
+		ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT);	/* 1GB */
+		break;
+	case 2048:
+		ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT);	/* 2GB */
+		break;
+	case 4096:
+		ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT);	/* 4 GB */
+		break;
+	default:
+		printk(KERN_ERR "%s:  Invalid CA_APERATURE_SIZE "
+		       "0x%lx\n", __FUNCTION__, (ulong) CA_APERATURE_SIZE);
+		return -1;
+	}
+
+	/*
+	 * Set up other aperature parameters
+	 */
+
+	if (PAGE_SIZE >= 16384) {
+		tioca_kern->ca_ap_pagesize = 16384;
+		ap_reg |= CA_GART_PAGE_SIZE;
+	} else {
+		tioca_kern->ca_ap_pagesize = 4096;
+	}
+
+	tioca_kern->ca_ap_size = CA_APERATURE_SIZE;
+	tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE;
+	tioca_kern->ca_gart_entries =
+	    tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize;
+
+	ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI);
+	ap_reg |= tioca_kern->ca_ap_bus_base;
+
+	/*
+	 * Allocate and set up the GART
+	 */
+
+	tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64);
+	tmp =
+	    alloc_pages_node(tioca_kern->ca_closest_node,
+			     GFP_KERNEL | __GFP_ZERO,
+			     get_order(tioca_kern->ca_gart_size));
+
+	if (!tmp) {
+		printk(KERN_ERR "%s:  Could not allocate "
+		       "%lu bytes (order %d) for GART\n",
+		       __FUNCTION__,
+		       tioca_kern->ca_gart_size,
+		       get_order(tioca_kern->ca_gart_size));
+		return -ENOMEM;
+	}
+
+	tioca_kern->ca_gart = page_address(tmp);
+	tioca_kern->ca_gart_coretalk_addr =
+	    PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart));
+
+	/*
+	 * Compute PCI/AGP convenience fields 
+	 */
+
+	offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE;
+	tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE;
+	tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_pcigart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start];
+	tioca_kern->ca_pcigart_entries =
+	    tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_pcigart_pagemap =
+	    kcalloc(1, tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL);
+	if (!tioca_kern->ca_pcigart_pagemap) {
+		free_pages((unsigned long)tioca_kern->ca_gart,
+			   get_order(tioca_kern->ca_gart_size));
+		return -1;
+	}
+
+	offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE;
+	tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE;
+	tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE;
+	tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize;
+	tioca_kern->ca_gfxgart_base =
+	    tioca_kern->ca_gart_coretalk_addr + offset;
+	tioca_kern->ca_gfxgart =
+	    &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start];
+	tioca_kern->ca_gfxgart_entries =
+	    tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize;
+
+	/*
+	 * various control settings:
+	 *      use agp op-combining
+	 *      use GET semantics to fetch memory
+	 *      participate in coherency domain
+	 * 	DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029
+	 */
+
+	ca_base->ca_control1 |= CA_AGPDMA_OP_ENB_COMBDELAY;	/* PV895469 ? */
+	ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
+	ca_base->ca_control2 |= (0x2ull << CA_GART_MEM_PARAM_SHFT);
+	tioca_kern->ca_gart_iscoherent = 1;
+	ca_base->ca_control2 &=
+	    ~(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB);
+
+	/*
+	 * Unmask GART fetch error interrupts.  Clear residual errors first.
+	 */
+
+	ca_base->ca_int_status_alias = CA_GART_FETCH_ERR;
+	ca_base->ca_mult_error_alias = CA_GART_FETCH_ERR;
+	ca_base->ca_int_mask &= ~CA_GART_FETCH_ERR;
+
+	/*
+	 * Program the aperature and gart registers in TIOCA
+	 */
+
+	ca_base->ca_gart_aperature = ap_reg;
+	ca_base->ca_gart_ptr_table = tioca_kern->ca_gart_coretalk_addr | 1;
+
+	return 0;
+}
+
+/**
+ * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions
+ * @tioca_kernel: structure representing the CA
+ *
+ * Given a CA, scan all attached functions making sure they all support
+ * FastWrite.  If so, enable FastWrite for all functions and the CA itself.
+ */
+
+void
+tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
+{
+	int cap_ptr;
+	uint64_t ca_control1;
+	uint32_t reg;
+	struct tioca *tioca_base;
+	struct pci_dev *pdev;
+	struct tioca_common *common;
+
+	common = tioca_kern->ca_common;
+
+	/*
+	 * Scan all vga controllers on this bus making sure they all
+	 * suport FW.  If not, return.
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		if (!cap_ptr)
+			return;	/* no AGP CAP means no FW */
+
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, &reg);
+		if (!(reg & PCI_AGP_STATUS_FW))
+			return;	/* function doesn't support FW */
+	}
+
+	/*
+	 * Set fw for all vga fn's
+	 */
+
+	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
+		if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8))
+			continue;
+
+		cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
+		pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, &reg);
+		reg |= PCI_AGP_COMMAND_FW;
+		pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg);
+	}
+
+	/*
+	 * Set ca's fw to match
+	 */
+
+	tioca_base = (struct tioca *)common->ca_common.bs_base;
+	ca_control1 = tioca_base->ca_control1;
+	ca_control1 |= CA_AGP_FW_ENABLE;
+	tioca_base->ca_control1 = ca_control1;
+}
+
+EXPORT_SYMBOL(tioca_fastwrite_enable);	/* used by agp-sgi */
+
+/**
+ * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit CA bus space.  No device context is necessary.
+ * Bits 53:0 come from the coretalk address.  We just need to mask in the
+ * following optional bits of the 64-bit pci address:
+ *
+ * 63:60 - Coretalk Packet Type -  0x1 for Mem Get/Put (coherent)
+ *                                 0x2 for PIO (non-coherent)
+ *                                 We will always use 0x1
+ * 55:55 - Swap bytes		   Currently unused
+ */
+static uint64_t
+tioca_dma_d64(unsigned long paddr)
+{
+	dma_addr_t bus_addr;
+
+	bus_addr = PHYS_TO_TIODMA(paddr);
+
+	BUG_ON(!bus_addr);
+	BUG_ON(bus_addr >> 54);
+
+	/* Set upper nibble to Cache Coherent Memory op */
+	bus_addr |= (1UL << 60);
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode
+ * @pdev: linux pci_dev representing the function
+ * @paddr: system physical address
+ *
+ * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info.
+ *
+ * The CA agp 48 bit direct address falls out as follows:
+ *
+ * When direct mapping AGP addresses, the 48 bit AGP address is
+ * constructed as follows:
+ *
+ * [47:40] - Low 8 bits of the page Node ID extracted from coretalk
+ *              address [47:40].  The upper 8 node bits are fixed
+ *              and come from the xxx register bits [5:0]
+ * [39:38] - Chiplet ID extracted from coretalk address [39:38]
+ * [37:00] - node offset extracted from coretalk address [37:00]
+ * 
+ * Since the node id in general will be non-zero, and the chiplet id
+ * will always be non-zero, it follows that the device must support
+ * a dma mask of at least 0xffffffffff (40 bits) to target node 0
+ * and in general should be 0xffffffffffff (48 bits) to target nodes
+ * up to 255.  Nodes above 255 need the support of the xxx register,
+ * and so a given CA can only directly target nodes in the range
+ * xxx - xxx+255.
+ */
+static uint64_t
+tioca_dma_d48(struct pci_dev *pdev, uint64_t paddr)
+{
+	struct tioca_common *tioca_common;
+	struct tioca *ca_base;
+	uint64_t ct_addr;
+	dma_addr_t bus_addr;
+	uint32_t node_upper;
+	uint64_t agp_dma_extn;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	ct_addr = PHYS_TO_TIODMA(paddr);
+	if (!ct_addr)
+		return 0;
+
+	bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffff);
+	node_upper = ct_addr >> 48;
+
+	if (node_upper > 64) {
+		printk(KERN_ERR "%s:  coretalk addr 0x%p node id out "
+		       "of range\n", __FUNCTION__, (void *)ct_addr);
+		return 0;
+	}
+
+	agp_dma_extn = ca_base->ca_agp_dma_addr_extn;
+	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
+		printk(KERN_ERR "%s:  coretalk upper node (%u) "
+		       "mismatch with ca_agp_dma_addr_extn (%lu)\n",
+		       __FUNCTION__,
+		       node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
+		return 0;
+	}
+
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_mapped - create a DMA mapping using a CA GART 
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @req_size: len (bytes) to map
+ *
+ * Map @paddr into CA address space using the GART mechanism.  The mapped
+ * dma_addr_t is guarenteed to be contiguous in CA bus space.
+ */
+static dma_addr_t
+tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size)
+{
+	int i, ps, ps_shift, entry, entries, mapsize, last_entry;
+	uint64_t xio_addr, end_xio_addr;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	dma_addr_t bus_addr = 0;
+	struct tioca_dmamap *ca_dmamap;
+	void *map;
+	unsigned long flags;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);;
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	xio_addr = PHYS_TO_TIODMA(paddr);
+	if (!xio_addr)
+		return 0;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	/*
+	 * allocate a map struct
+	 */
+
+	ca_dmamap = kcalloc(1, sizeof(struct tioca_dmamap), GFP_ATOMIC);
+	if (!ca_dmamap)
+		goto map_return;
+
+	/*
+	 * Locate free entries that can hold req_size.  Account for
+	 * unaligned start/length when allocating.
+	 */
+
+	ps = tioca_kern->ca_ap_pagesize;	/* will be power of 2 */
+	ps_shift = ffs(ps) - 1;
+	end_xio_addr = xio_addr + req_size - 1;
+
+	entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1;
+
+	map = tioca_kern->ca_pcigart_pagemap;
+	mapsize = tioca_kern->ca_pcigart_entries;
+
+	entry = find_first_zero_bit(map, mapsize);
+	while (entry < mapsize) {
+		last_entry = find_next_bit(map, mapsize, entry);
+
+		if (last_entry - entry >= entries)
+			break;
+
+		entry = find_next_zero_bit(map, mapsize, last_entry);
+	}
+
+	if (entry > mapsize)
+		goto map_return;
+
+	for (i = 0; i < entries; i++)
+		set_bit(entry + i, map);
+
+	bus_addr = tioca_kern->ca_pciap_base + (entry * ps);
+
+	ca_dmamap->cad_dma_addr = bus_addr;
+	ca_dmamap->cad_gart_size = entries;
+	ca_dmamap->cad_gart_entry = entry;
+	list_add(&ca_dmamap->cad_list, &tioca_kern->ca_list);
+
+	if (xio_addr % ps) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		bus_addr += xio_addr & (ps - 1);
+		xio_addr &= ~(ps - 1);
+		xio_addr += ps;
+		entry++;
+	}
+
+	while (xio_addr < end_xio_addr) {
+		tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr);
+		xio_addr += ps;
+		entry++;
+	}
+
+	tioca_tlbflush(tioca_kern);
+
+map_return:
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	return bus_addr;
+}
+
+/**
+ * tioca_dma_unmap - release CA mapping resources
+ * @pdev: linux pci_dev representing the function
+ * @bus_addr: bus address returned by an earlier tioca_dma_map
+ * @dir: mapping direction (unused)
+ *
+ * Locate mapping resources associated with @bus_addr and release them.
+ * For mappings created using the direct modes (64 or 48) there are no
+ * resources to release.
+ */
+void
+tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
+{
+	int i, entry;
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct tioca_dmamap *map;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
+	unsigned long flags;
+
+	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
+	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
+
+	/* return straight away if this isn't be a mapped address */
+
+	if (bus_addr < tioca_kern->ca_pciap_base ||
+	    bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size))
+		return;
+
+	spin_lock_irqsave(&tioca_kern->ca_lock, flags);
+
+	list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list)
+	    if (map->cad_dma_addr == bus_addr)
+		break;
+
+	BUG_ON(map == NULL);
+
+	entry = map->cad_gart_entry;
+
+	for (i = 0; i < map->cad_gart_size; i++, entry++) {
+		clear_bit(entry, tioca_kern->ca_pcigart_pagemap);
+		tioca_kern->ca_pcigart[entry] = 0;
+	}
+	tioca_tlbflush(tioca_kern);
+
+	list_del(&map->cad_list);
+	spin_unlock_irqrestore(&tioca_kern->ca_lock, flags);
+	kfree(map);
+}
+
+/**
+ * tioca_dma_map - map pages for PCI DMA
+ * @pdev: linux pci_dev representing the function
+ * @paddr: host physical address to map
+ * @byte_count: bytes to map
+ *
+ * This is the main wrapper for mapping host physical pages to CA PCI space.
+ * The mapping mode used is based on the devices dma_mask.  As a last resort
+ * use the GART mapped mode.
+ */
+uint64_t
+tioca_dma_map(struct pci_dev *pdev, uint64_t paddr, size_t byte_count)
+{
+	uint64_t mapaddr;
+
+	/*
+	 * If card is 64 or 48 bit addresable, use a direct mapping.  32
+	 * bit direct is so restrictive w.r.t. where the memory resides that
+	 * we don't use it even though CA has some support.
+	 */
+
+	if (pdev->dma_mask == ~0UL)
+		mapaddr = tioca_dma_d64(paddr);
+	else if (pdev->dma_mask == 0xffffffffffffUL)
+		mapaddr = tioca_dma_d48(pdev, paddr);
+	else
+		mapaddr = 0;
+
+	/* Last resort ... use PCI portion of CA GART */
+
+	if (mapaddr == 0)
+		mapaddr = tioca_dma_mapped(pdev, paddr, byte_count);
+
+	return mapaddr;
+}
+
+/**
+ * tioca_error_intr_handler - SGI TIO CA error interrupt handler
+ * @irq: unused
+ * @arg: pointer to tioca_common struct for the given CA
+ * @pt: unused
+ *
+ * Handle a CA error interrupt.  Simply a wrapper around a SAL call which
+ * defers processing to the SGI prom.
+ */
+static irqreturn_t
+tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
+{
+	struct tioca_common *soft = arg;
+	struct ia64_sal_retval ret_stuff;
+	uint64_t segment;
+	uint64_t busnum;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	segment = 0;
+	busnum = soft->ca_common.bs_persist_busnum;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+			segment, busnum, 0, 0, 0, 0, 0);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus
+ * @prom_bussoft: Common prom/kernel struct representing the bus
+ *
+ * Replicates the tioca_common pointed to by @prom_bussoft in kernel
+ * space.  Allocates and initializes a kernel-only area for a given CA,
+ * and sets up an irq for handling CA error interrupts.
+ *
+ * On successful setup, returns the kernel version of tioca_common back to
+ * the caller.
+ */
+void *
+tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+{
+	struct tioca_common *tioca_common;
+	struct tioca_kernel *tioca_kern;
+	struct pci_bus *bus;
+
+	/* sanity check prom rev */
+
+	if (sn_sal_rev_major() < 4 ||
+	    (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) {
+		printk
+		    (KERN_ERR "%s:  SGI prom rev 4.06 or greater required "
+		     "for tioca support\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/*
+	 * Allocate kernel bus soft and copy from prom.
+	 */
+
+	tioca_common = kcalloc(1, sizeof(struct tioca_common), GFP_KERNEL);
+	if (!tioca_common)
+		return NULL;
+
+	memcpy(tioca_common, prom_bussoft, sizeof(struct tioca_common));
+	tioca_common->ca_common.bs_base |= __IA64_UNCACHED_OFFSET;
+
+	/* init kernel-private area */
+
+	tioca_kern = kcalloc(1, sizeof(struct tioca_kernel), GFP_KERNEL);
+	if (!tioca_kern) {
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_kern->ca_common = tioca_common;
+	spin_lock_init(&tioca_kern->ca_lock);
+	INIT_LIST_HEAD(&tioca_kern->ca_dmamaps);
+	tioca_kern->ca_closest_node =
+	    nasid_to_cnodeid(tioca_common->ca_closest_nasid);
+	tioca_common->ca_kernel_private = (uint64_t) tioca_kern;
+
+	bus = pci_find_bus(0, tioca_common->ca_common.bs_persist_busnum);
+	BUG_ON(!bus);
+	tioca_kern->ca_devices = &bus->devices;
+
+	/* init GART */
+
+	if (tioca_gart_init(tioca_kern) < 0) {
+		kfree(tioca_kern);
+		kfree(tioca_common);
+		return NULL;
+	}
+
+	tioca_gart_found++;
+	list_add(&tioca_kern->ca_list, &tioca_list);
+
+	if (request_irq(SGI_TIOCA_ERROR,
+			tioca_error_intr_handler,
+			SA_SHIRQ, "TIOCA error", (void *)tioca_common))
+		printk(KERN_WARNING
+		       "%s:  Unable to get irq %d.  "
+		       "Error interrupts won't be routed for TIOCA bus %d\n",
+		       __FUNCTION__, SGI_TIOCA_ERROR,
+		       (int)tioca_common->ca_common.bs_persist_busnum);
+
+	return tioca_common;
+}
+
+static struct sn_pcibus_provider tioca_pci_interfaces = {
+	.dma_map = tioca_dma_map,
+	.dma_map_consistent = tioca_dma_map,
+	.dma_unmap = tioca_dma_unmap,
+	.bus_fixup = tioca_bus_fixup,
+};
+
+/**
+ * tioca_init_provider - init SN PCI provider ops for TIO CA
+ */
+int
+tioca_init_provider(void)
+{
+	sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces;
+	return 0;
+}
diff -Nru a/drivers/char/Kconfig b/drivers/char/Kconfig
--- a/drivers/char/Kconfig	2005-03-24 18:21:52 -08:00
+++ b/drivers/char/Kconfig	2005-03-24 18:21:52 -08:00
@@ -399,6 +399,20 @@
 	  controller communication from user space (you want this!),
 	  say Y.  Otherwise, say N.
 
+config SGI_TIOCX
+       bool "SGI TIO CX driver support"
+       depends on (IA64_SGI_SN2 || IA64_GENERIC)
+       help
+         If you have an SGI Altix and you have fpga devices attached
+         to your TIO, say Y here, otherwise say N.
+
+config SGI_MBCS
+       tristate "SGI FPGA Core Services driver support"
+       depends on (IA64_SGI_SN2 || IA64_GENERIC)
+       help
+         If you have an SGI Altix with an attached SABrick
+         say Y or M here, otherwise say N.
+
 source "drivers/serial/Kconfig"
 
 config UNIX98_PTYS
diff -Nru a/drivers/char/Makefile b/drivers/char/Makefile
--- a/drivers/char/Makefile	2005-03-24 18:21:52 -08:00
+++ b/drivers/char/Makefile	2005-03-24 18:21:52 -08:00
@@ -42,11 +42,12 @@
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvc_console.o hvsi.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
-obj-$(CONFIG_SGI_SNSC)		+= snsc.o
+obj-$(CONFIG_SGI_SNSC)		+= snsc.o snsc_event.o
 obj-$(CONFIG_MMTIMER)		+= mmtimer.o
 obj-$(CONFIG_VIOCONS) += viocons.o
 obj-$(CONFIG_VIOTAPE)		+= viotape.o
 obj-$(CONFIG_HVCS)		+= hvcs.o
+obj-$(CONFIG_SGI_MBCS)		+= mbcs.o
 
 obj-$(CONFIG_PRINTER) += lp.o
 obj-$(CONFIG_TIPAR) += tipar.o
diff -Nru a/drivers/char/mbcs.c b/drivers/char/mbcs.c
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/drivers/char/mbcs.c	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,849 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+/*
+ *	MOATB Core Services driver.
+ */
+
+#include <linux/config.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/tiocx.h>
+#include "mbcs.h"
+
+#define MBCS_DEBUG 0
+#if MBCS_DEBUG
+#define DBG(fmt...)    printk(KERN_ALERT fmt)
+#else
+#define DBG(fmt...)
+#endif
+int mbcs_major;
+
+LIST_HEAD(soft_list);
+
+/*
+ * file operations
+ */
+struct file_operations mbcs_ops = {
+	.open = mbcs_open,
+	.llseek = mbcs_sram_llseek,
+	.read = mbcs_sram_read,
+	.write = mbcs_sram_write,
+	.mmap = mbcs_gscr_mmap,
+};
+
+struct mbcs_callback_arg {
+	int minor;
+	struct cx_dev *cx_dev;
+};
+
+static inline void mbcs_getdma_init(struct getdma *gdma)
+{
+	memset(gdma, 0, sizeof(struct getdma));
+	gdma->DoneIntEnable = 1;
+}
+
+static inline void mbcs_putdma_init(struct putdma *pdma)
+{
+	memset(pdma, 0, sizeof(struct putdma));
+	pdma->DoneIntEnable = 1;
+}
+
+static inline void mbcs_algo_init(struct algoblock *algo_soft)
+{
+	memset(algo_soft, 0, sizeof(struct algoblock));
+}
+
+static inline void mbcs_getdma_set(void *mmr,
+		       uint64_t hostAddr,
+		       uint64_t localAddr,
+		       uint64_t localRamSel,
+		       uint64_t numPkts,
+		       uint64_t amoEnable,
+		       uint64_t intrEnable,
+		       uint64_t peerIO,
+		       uint64_t amoHostDest,
+		       uint64_t amoModType, uint64_t intrHostDest,
+		       uint64_t intrVector)
+{
+	union dma_control rdma_control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union dma_localaddr local_addr;
+	union dma_hostaddr host_addr;
+
+	rdma_control.dma_control_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	local_addr.dma_localaddr_reg = 0;
+	host_addr.dma_hostaddr_reg = 0;
+
+	host_addr.dma_sys_addr = hostAddr;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_SYS_ADDR, host_addr.dma_hostaddr_reg);
+
+	local_addr.dma_ram_addr = localAddr;
+	local_addr.dma_ram_sel = localRamSel;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_LOC_ADDR, local_addr.dma_localaddr_reg);
+
+	rdma_control.dma_op_length = numPkts;
+	rdma_control.done_amo_en = amoEnable;
+	rdma_control.done_int_en = intrEnable;
+	rdma_control.pio_mem_n = peerIO;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_CTRL, rdma_control.dma_control_reg);
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_RD_DMA_INT_DEST, intr_dest.intr_dest_reg);
+
+}
+
+static inline void mbcs_putdma_set(void *mmr,
+		       uint64_t hostAddr,
+		       uint64_t localAddr,
+		       uint64_t localRamSel,
+		       uint64_t numPkts,
+		       uint64_t amoEnable,
+		       uint64_t intrEnable,
+		       uint64_t peerIO,
+		       uint64_t amoHostDest,
+		       uint64_t amoModType,
+		       uint64_t intrHostDest, uint64_t intrVector)
+{
+	union dma_control wdma_control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union dma_localaddr local_addr;
+	union dma_hostaddr host_addr;
+
+	wdma_control.dma_control_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	local_addr.dma_localaddr_reg = 0;
+	host_addr.dma_hostaddr_reg = 0;
+
+	host_addr.dma_sys_addr = hostAddr;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_SYS_ADDR, host_addr.dma_hostaddr_reg);
+
+	local_addr.dma_ram_addr = localAddr;
+	local_addr.dma_ram_sel = localRamSel;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_LOC_ADDR, local_addr.dma_localaddr_reg);
+
+	wdma_control.dma_op_length = numPkts;
+	wdma_control.done_amo_en = amoEnable;
+	wdma_control.done_int_en = intrEnable;
+	wdma_control.pio_mem_n = peerIO;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_CTRL, wdma_control.dma_control_reg);
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_WR_DMA_INT_DEST, intr_dest.intr_dest_reg);
+
+}
+
+static inline void mbcs_algo_set(void *mmr,
+		     uint64_t amoHostDest,
+		     uint64_t amoModType,
+		     uint64_t intrHostDest,
+		     uint64_t intrVector, uint64_t algoStepCount)
+{
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union algo_step step;
+
+	step.algo_step_reg = 0;
+	intr_dest.intr_dest_reg = 0;
+	amo_dest.dma_amo_dest_reg = 0;
+
+	amo_dest.dma_amo_sys_addr = amoHostDest;
+	amo_dest.dma_amo_mod_type = amoModType;
+	MBCS_MMR_SET(mmr, MBCS_ALG_AMO_DEST, amo_dest.dma_amo_dest_reg);
+
+	intr_dest.address = intrHostDest;
+	intr_dest.int_vector = intrVector;
+	MBCS_MMR_SET(mmr, MBCS_ALG_INT_DEST, intr_dest.intr_dest_reg);
+
+	step.alg_step_cnt = algoStepCount;
+	MBCS_MMR_SET(mmr, MBCS_ALG_STEP, step.algo_step_reg);
+}
+
+static inline int mbcs_getdma_start(struct mbcs_soft *soft)
+{
+	void *mmr_base;
+	struct getdma *gdma;
+	uint64_t numPkts;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	gdma = &soft->getdma;
+
+	/* check that host address got setup */
+	if (!gdma->hostAddr)
+		return -1;
+
+	numPkts =
+	    (gdma->bytes + (MBCS_CACHELINE_SIZE - 1)) / MBCS_CACHELINE_SIZE;
+
+	/* program engine */
+	mbcs_getdma_set(mmr_base, tiocx_dma_addr(gdma->hostAddr),
+		   gdma->localAddr,
+		   (gdma->localAddr < MB2) ? 0 :
+		   (gdma->localAddr < MB4) ? 1 :
+		   (gdma->localAddr < MB6) ? 2 : 3,
+		   numPkts,
+		   gdma->DoneAmoEnable,
+		   gdma->DoneIntEnable,
+		   gdma->peerIO,
+		   gdma->amoHostDest,
+		   gdma->amoModType,
+		   gdma->intrHostDest, gdma->intrVector);
+
+	/* start engine */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.rd_dma_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+
+}
+
+static inline int mbcs_putdma_start(struct mbcs_soft *soft)
+{
+	void *mmr_base;
+	struct putdma *pdma;
+	uint64_t numPkts;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	pdma = &soft->putdma;
+
+	/* check that host address got setup */
+	if (!pdma->hostAddr)
+		return -1;
+
+	numPkts =
+	    (pdma->bytes + (MBCS_CACHELINE_SIZE - 1)) / MBCS_CACHELINE_SIZE;
+
+	/* program engine */
+	mbcs_putdma_set(mmr_base, tiocx_dma_addr(pdma->hostAddr),
+		   pdma->localAddr,
+		   (pdma->localAddr < MB2) ? 0 :
+		   (pdma->localAddr < MB4) ? 1 :
+		   (pdma->localAddr < MB6) ? 2 : 3,
+		   numPkts,
+		   pdma->DoneAmoEnable,
+		   pdma->DoneIntEnable,
+		   pdma->peerIO,
+		   pdma->amoHostDest,
+		   pdma->amoModType,
+		   pdma->intrHostDest, pdma->intrVector);
+
+	/* start engine */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.wr_dma_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+
+}
+
+static inline int mbcs_algo_start(struct mbcs_soft *soft)
+{
+	struct algoblock *algo_soft = &soft->algo;
+	void *mmr_base = soft->mmr_base;
+	union cm_control cm_control;
+
+	if (down_interruptible(&soft->algolock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->algo_done, 0);
+
+	mbcs_algo_set(mmr_base,
+		 algo_soft->amoHostDest,
+		 algo_soft->amoModType,
+		 algo_soft->intrHostDest,
+		 algo_soft->intrVector, algo_soft->algoStepCount);
+
+	/* start algorithm */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.alg_done_int_en = 1;
+	cm_control.alg_go = 1;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	up(&soft->algolock);
+
+	return 0;
+}
+
+static inline ssize_t
+do_mbcs_sram_dmawrite(struct mbcs_soft *soft, uint64_t hostAddr,
+		      size_t len, loff_t * off)
+{
+	int rv = 0;
+
+	if (down_interruptible(&soft->dmawritelock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->dmawrite_done, 0);
+
+	soft->putdma.hostAddr = hostAddr;
+	soft->putdma.localAddr = *off;
+	soft->putdma.bytes = len;
+
+	if (mbcs_putdma_start(soft) < 0) {
+		DBG(KERN_ALERT "do_mbcs_sram_dmawrite: "
+					"mbcs_putdma_start failed\n");
+		rv = -EAGAIN;
+		goto dmawrite_exit;
+	}
+
+	if (wait_event_interruptible(soft->dmawrite_queue,
+					atomic_read(&soft->dmawrite_done))) {
+		rv = -ERESTARTSYS;
+		goto dmawrite_exit;
+	}
+
+	rv = len;
+	*off += len;
+
+dmawrite_exit:
+	up(&soft->dmawritelock);
+
+	return rv;
+}
+
+static inline ssize_t
+do_mbcs_sram_dmaread(struct mbcs_soft *soft, uint64_t hostAddr,
+		     size_t len, loff_t * off)
+{
+	int rv = 0;
+
+	if (down_interruptible(&soft->dmareadlock))
+		return -ERESTARTSYS;
+
+	atomic_set(&soft->dmawrite_done, 0);
+
+	soft->getdma.hostAddr = hostAddr;
+	soft->getdma.localAddr = *off;
+	soft->getdma.bytes = len;
+
+	if (mbcs_getdma_start(soft) < 0) {
+		DBG(KERN_ALERT "mbcs_strategy: mbcs_getdma_start failed\n");
+		rv = -EAGAIN;
+		goto dmaread_exit;
+	}
+
+	if (wait_event_interruptible(soft->dmaread_queue,
+					atomic_read(&soft->dmaread_done))) {
+		rv = -ERESTARTSYS;
+		goto dmaread_exit;
+	}
+
+	rv = len;
+	*off += len;
+
+dmaread_exit:
+	up(&soft->dmareadlock);
+
+	return rv;
+}
+
+int mbcs_open(struct inode *ip, struct file *fp)
+{
+	struct mbcs_soft *soft;
+	int minor;
+
+	minor = iminor(ip);
+
+	list_for_each_entry(soft, &soft_list, list) {
+		if (soft->nasid == minor) {
+			fp->private_data = soft->cxdev;
+			return 0;
+		}
+	}
+
+	return -ENODEV;
+}
+
+ssize_t mbcs_sram_read(struct file * fp, char *buf, size_t len, loff_t * off)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t hostAddr;
+	int rv = 0;
+
+	hostAddr = __get_dma_pages(GFP_KERNEL, get_order(len));
+	if (hostAddr == 0)
+		return -ENOMEM;
+
+	rv = do_mbcs_sram_dmawrite(soft, hostAddr, len, off);
+	if (rv < 0)
+		goto exit;
+
+	if (copy_to_user(buf, (void *)hostAddr, len))
+		rv = -EFAULT;
+
+      exit:
+	free_pages(hostAddr, get_order(len));
+
+	return rv;
+}
+
+ssize_t
+mbcs_sram_write(struct file * fp, const char *buf, size_t len, loff_t * off)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t hostAddr;
+	int rv = 0;
+
+	hostAddr = __get_dma_pages(GFP_KERNEL, get_order(len));
+	if (hostAddr == 0)
+		return -ENOMEM;
+
+	if (copy_from_user((void *)hostAddr, buf, len)) {
+		rv = -EFAULT;
+		goto exit;
+	}
+
+	rv = do_mbcs_sram_dmaread(soft, hostAddr, len, off);
+
+      exit:
+	free_pages(hostAddr, get_order(len));
+
+	return rv;
+}
+
+loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence)
+{
+	loff_t newpos;
+
+	switch (whence) {
+	case 0:		/* SEEK_SET */
+		newpos = off;
+		break;
+
+	case 1:		/* SEEK_CUR */
+		newpos = filp->f_pos + off;
+		break;
+
+	case 2:		/* SEEK_END */
+		newpos = MBCS_SRAM_SIZE + off;
+		break;
+
+	default:		/* can't happen */
+		return -EINVAL;
+	}
+
+	if (newpos < 0)
+		return -EINVAL;
+
+	filp->f_pos = newpos;
+
+	return newpos;
+}
+
+static uint64_t mbcs_pioaddr(struct mbcs_soft *soft, uint64_t offset)
+{
+	uint64_t mmr_base;
+
+	mmr_base = (uint64_t) (soft->mmr_base + offset);
+
+	return mmr_base;
+}
+
+static void mbcs_debug_pioaddr_set(struct mbcs_soft *soft)
+{
+	soft->debug_addr = mbcs_pioaddr(soft, MBCS_DEBUG_START);
+}
+
+static void mbcs_gscr_pioaddr_set(struct mbcs_soft *soft)
+{
+	soft->gscr_addr = mbcs_pioaddr(soft, MBCS_GSCR_START);
+}
+
+int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	struct cx_dev *cx_dev = fp->private_data;
+	struct mbcs_soft *soft = cx_dev->soft;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	if (remap_pfn_range(vma,
+			    vma->vm_start,
+			    __pa(soft->gscr_addr) >> PAGE_SHIFT,
+			    PAGE_SIZE,
+			    vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * mbcs_completion_intr_handler - Primary completion handler.
+ * @irq: irq
+ * @arg: soft struct for device
+ * @ep: regs
+ *
+ */
+static irqreturn_t
+mbcs_completion_intr_handler(int irq, void *arg, struct pt_regs *ep)
+{
+	struct mbcs_soft *soft = (struct mbcs_soft *)arg;
+	void *mmr_base;
+	union cm_status cm_status;
+	union cm_control cm_control;
+
+	mmr_base = soft->mmr_base;
+	cm_status.cm_status_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_STATUS);
+
+	if (cm_status.rd_dma_done) {
+		/* stop dma-read engine, clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.rd_dma_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->dmaread_done, 1);
+		wake_up(&soft->dmaread_queue);
+	}
+	if (cm_status.wr_dma_done) {
+		/* stop dma-write engine, clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.wr_dma_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->dmawrite_done, 1);
+		wake_up(&soft->dmawrite_queue);
+	}
+	if (cm_status.alg_done) {
+		/* clear status */
+		cm_control.cm_control_reg =
+		    MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+		cm_control.alg_done_clr = 1;
+		MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL,
+			     cm_control.cm_control_reg);
+		atomic_set(&soft->algo_done, 1);
+		wake_up(&soft->algo_queue);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mbcs_intr_alloc - Allocate interrupts.
+ * @dev: device pointer
+ *
+ */
+static int mbcs_intr_alloc(struct cx_dev *dev)
+{
+	struct sn_irq_info *sn_irq;
+	struct mbcs_soft *soft;
+	struct getdma *getdma;
+	struct putdma *putdma;
+	struct algoblock *algo;
+
+	soft = dev->soft;
+	getdma = &soft->getdma;
+	putdma = &soft->putdma;
+	algo = &soft->algo;
+
+	soft->get_sn_irq = NULL;
+	soft->put_sn_irq = NULL;
+	soft->algo_sn_irq = NULL;
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL)
+		return -EAGAIN;
+	soft->get_sn_irq = sn_irq;
+	getdma->intrHostDest = sn_irq->irq_xtalkaddr;
+	getdma->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS get intr", (void *)soft)) {
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL) {
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+	soft->put_sn_irq = sn_irq;
+	putdma->intrHostDest = sn_irq->irq_xtalkaddr;
+	putdma->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS put intr", (void *)soft)) {
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	sn_irq = tiocx_irq_alloc(dev->cx_id.nasid, TIOCX_CORELET, -1, -1, -1);
+	if (sn_irq == NULL) {
+		free_irq(soft->put_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+	soft->algo_sn_irq = sn_irq;
+	algo->intrHostDest = sn_irq->irq_xtalkaddr;
+	algo->intrVector = sn_irq->irq_irq;
+	if (request_irq(sn_irq->irq_irq,
+			(void *)mbcs_completion_intr_handler, SA_SHIRQ,
+			"MBCS algo intr", (void *)soft)) {
+		tiocx_irq_free(soft->algo_sn_irq);
+		free_irq(soft->put_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->put_sn_irq);
+		free_irq(soft->get_sn_irq->irq_irq, soft);
+		tiocx_irq_free(soft->get_sn_irq);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/**
+ * mbcs_intr_dealloc - Remove interrupts.
+ * @dev: device pointer
+ *
+ */
+static void mbcs_intr_dealloc(struct cx_dev *dev)
+{
+	struct mbcs_soft *soft;
+
+	soft = dev->soft;
+
+	free_irq(soft->get_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->get_sn_irq);
+	free_irq(soft->put_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->put_sn_irq);
+	free_irq(soft->algo_sn_irq->irq_irq, soft);
+	tiocx_irq_free(soft->algo_sn_irq);
+}
+
+static inline int mbcs_hw_init(struct mbcs_soft *soft)
+{
+	void *mmr_base = soft->mmr_base;
+	union cm_control cm_control;
+	union cm_req_timeout cm_req_timeout;
+	uint64_t err_stat;
+
+	cm_req_timeout.cm_req_timeout_reg =
+	    MBCS_MMR_GET(mmr_base, MBCS_CM_REQ_TOUT);
+
+	cm_req_timeout.time_out = MBCS_CM_CONTROL_REQ_TOUT_MASK;
+	MBCS_MMR_SET(mmr_base, MBCS_CM_REQ_TOUT,
+		     cm_req_timeout.cm_req_timeout_reg);
+
+	mbcs_gscr_pioaddr_set(soft);
+	mbcs_debug_pioaddr_set(soft);
+
+	/* clear errors */
+	err_stat = MBCS_MMR_GET(mmr_base, MBCS_CM_ERR_STAT);
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CLR_ERR_STAT, err_stat);
+	MBCS_MMR_ZERO(mmr_base, MBCS_CM_ERROR_DETAIL1);
+
+	/* enable interrupts */
+	/* turn off 2^23 (INT_EN_PIO_REQ_ADDR_INV) */
+	MBCS_MMR_SET(mmr_base, MBCS_CM_ERR_INT_EN, 0x3ffffff7e00ffUL);
+
+	/* arm status regs and clear engines */
+	cm_control.cm_control_reg = MBCS_MMR_GET(mmr_base, MBCS_CM_CONTROL);
+	cm_control.rearm_stat_regs = 1;
+	cm_control.alg_clr = 1;
+	cm_control.wr_dma_clr = 1;
+	cm_control.rd_dma_clr = 1;
+
+	MBCS_MMR_SET(mmr_base, MBCS_CM_CONTROL, cm_control.cm_control_reg);
+
+	return 0;
+}
+
+static ssize_t show_algo(struct device *dev, char *buf)
+{
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct mbcs_soft *soft = cx_dev->soft;
+	uint64_t debug0;
+
+	/*
+	 * By convention, the first debug register contains the
+	 * algorithm number and revision.
+	 */
+	debug0 = *(uint64_t *) soft->debug_addr;
+
+	return sprintf(buf, "0x%lx 0x%lx\n",
+		       (debug0 >> 32), (debug0 & 0xffffffff));
+}
+
+static ssize_t store_algo(struct device *dev, const char *buf, size_t count)
+{
+	int n;
+	struct cx_dev *cx_dev = to_cx_dev(dev);
+	struct mbcs_soft *soft = cx_dev->soft;
+
+	if (count <= 0)
+		return 0;
+
+	n = simple_strtoul(buf, NULL, 0);
+
+	if (n == 1) {
+		mbcs_algo_start(soft);
+		if (wait_event_interruptible(soft->algo_queue,
+					atomic_read(&soft->algo_done)))
+			return -ERESTARTSYS;
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(algo, 0644, show_algo, store_algo);
+
+/**
+ * mbcs_probe - Initialize for device
+ * @dev: device pointer
+ * @device_id: id table pointer
+ *
+ */
+static int mbcs_probe(struct cx_dev *dev, const struct cx_device_id *id)
+{
+	struct mbcs_soft *soft;
+
+	dev->soft = NULL;
+
+	soft = kcalloc(1, sizeof(struct mbcs_soft), GFP_KERNEL);
+	if (soft == NULL)
+		return -ENOMEM;
+
+	soft->nasid = dev->cx_id.nasid;
+	list_add(&soft->list, &soft_list);
+	soft->mmr_base = (void *)tiocx_swin_base(dev->cx_id.nasid);
+	dev->soft = soft;
+	soft->cxdev = dev;
+
+	init_waitqueue_head(&soft->dmawrite_queue);
+	init_waitqueue_head(&soft->dmaread_queue);
+	init_waitqueue_head(&soft->algo_queue);
+
+	init_MUTEX(&soft->dmawritelock);
+	init_MUTEX(&soft->dmareadlock);
+	init_MUTEX(&soft->algolock);
+
+	mbcs_getdma_init(&soft->getdma);
+	mbcs_putdma_init(&soft->putdma);
+	mbcs_algo_init(&soft->algo);
+
+	mbcs_hw_init(soft);
+
+	/* Allocate interrupts */
+	mbcs_intr_alloc(dev);
+
+	device_create_file(&dev->dev, &dev_attr_algo);
+
+	return 0;
+}
+
+static int mbcs_remove(struct cx_dev *dev)
+{
+	if (dev->soft) {
+		mbcs_intr_dealloc(dev);
+		kfree(dev->soft);
+	}
+
+	device_remove_file(&dev->dev, &dev_attr_algo);
+
+	return 0;
+}
+
+const struct cx_device_id __devinitdata mbcs_id_table[] = {
+	{
+	 .part_num = MBCS_PART_NUM,
+	 .mfg_num = MBCS_MFG_NUM,
+	 },
+	{
+	 .part_num = MBCS_PART_NUM_ALG0,
+	 .mfg_num = MBCS_MFG_NUM,
+	 },
+	{0, 0}
+};
+
+MODULE_DEVICE_TABLE(cx, mbcs_id_table);
+
+struct cx_drv mbcs_driver = {
+	.name = DEVICE_NAME,
+	.id_table = mbcs_id_table,
+	.probe = mbcs_probe,
+	.remove = mbcs_remove,
+};
+
+static void __exit mbcs_exit(void)
+{
+	int rv;
+
+	rv = unregister_chrdev(mbcs_major, DEVICE_NAME);
+	if (rv < 0)
+		DBG(KERN_ALERT "Error in unregister_chrdev: %d\n", rv);
+
+	cx_driver_unregister(&mbcs_driver);
+}
+
+static int __init mbcs_init(void)
+{
+	int rv;
+
+	// Put driver into chrdevs[].  Get major number.
+	rv = register_chrdev(mbcs_major, DEVICE_NAME, &mbcs_ops);
+	if (rv < 0) {
+		DBG(KERN_ALERT "mbcs_init: can't get major number. %d\n", rv);
+		return rv;
+	}
+	mbcs_major = rv;
+
+	return cx_driver_register(&mbcs_driver);
+}
+
+module_init(mbcs_init);
+module_exit(mbcs_exit);
+
+MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>");
+MODULE_DESCRIPTION("Driver for MOATB Core Services");
+MODULE_LICENSE("GPL");
diff -Nru a/drivers/char/mbcs.h b/drivers/char/mbcs.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/drivers/char/mbcs.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,553 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef __MBCS_H__
+#define __MBCS_H__
+
+/*
+ * General macros
+ */
+#define MB	(1024*1024)
+#define MB2	(2*MB)
+#define MB4	(4*MB)
+#define MB6	(6*MB)
+
+/*
+ * Offsets and masks
+ */
+#define MBCS_CM_ID		0x0000	/* Identification */
+#define MBCS_CM_STATUS		0x0008	/* Status */
+#define MBCS_CM_ERROR_DETAIL1	0x0010	/* Error Detail1 */
+#define MBCS_CM_ERROR_DETAIL2	0x0018	/* Error Detail2 */
+#define MBCS_CM_CONTROL		0x0020	/* Control */
+#define MBCS_CM_REQ_TOUT	0x0028	/* Request Time-out */
+#define MBCS_CM_ERR_INT_DEST	0x0038	/* Error Interrupt Destination */
+#define MBCS_CM_TARG_FL		0x0050	/* Target Flush */
+#define MBCS_CM_ERR_STAT	0x0060	/* Error Status */
+#define MBCS_CM_CLR_ERR_STAT	0x0068	/* Clear Error Status */
+#define MBCS_CM_ERR_INT_EN	0x0070	/* Error Interrupt Enable */
+#define MBCS_RD_DMA_SYS_ADDR	0x0100	/* Read DMA System Address */
+#define MBCS_RD_DMA_LOC_ADDR	0x0108	/* Read DMA Local Address */
+#define MBCS_RD_DMA_CTRL	0x0110	/* Read DMA Control */
+#define MBCS_RD_DMA_AMO_DEST	0x0118	/* Read DMA AMO Destination */
+#define MBCS_RD_DMA_INT_DEST	0x0120	/* Read DMA Interrupt Destination */
+#define MBCS_RD_DMA_AUX_STAT	0x0130	/* Read DMA Auxillary Status */
+#define MBCS_WR_DMA_SYS_ADDR	0x0200	/* Write DMA System Address */
+#define MBCS_WR_DMA_LOC_ADDR	0x0208	/* Write DMA Local Address */
+#define MBCS_WR_DMA_CTRL	0x0210	/* Write DMA Control */
+#define MBCS_WR_DMA_AMO_DEST	0x0218	/* Write DMA AMO Destination */
+#define MBCS_WR_DMA_INT_DEST	0x0220	/* Write DMA Interrupt Destination */
+#define MBCS_WR_DMA_AUX_STAT	0x0230	/* Write DMA Auxillary Status */
+#define MBCS_ALG_AMO_DEST	0x0300	/* Algorithm AMO Destination */
+#define MBCS_ALG_INT_DEST	0x0308	/* Algorithm Interrupt Destination */
+#define MBCS_ALG_OFFSETS	0x0310
+#define MBCS_ALG_STEP		0x0318	/* Algorithm Step */
+
+#define MBCS_GSCR_START		0x0000000
+#define MBCS_DEBUG_START	0x0100000
+#define MBCS_RAM0_START		0x0200000
+#define MBCS_RAM1_START		0x0400000
+#define MBCS_RAM2_START		0x0600000
+
+#define MBCS_CM_CONTROL_REQ_TOUT_MASK 0x0000000000ffffffUL
+//#define PIO_BASE_ADDR_BASE_OFFSET_MASK 0x00fffffffff00000UL
+
+#define MBCS_SRAM_SIZE		(1024*1024)
+#define MBCS_CACHELINE_SIZE	128
+
+/*
+ * MMR get's and put's
+ */
+#define MBCS_MMR_ADDR(mmr_base, offset)((uint64_t *)(mmr_base + offset))
+#define MBCS_MMR_SET(mmr_base, offset, value) {			\
+	uint64_t *mbcs_mmr_set_u64p, readback;				\
+	mbcs_mmr_set_u64p = (uint64_t *)(mmr_base + offset);	\
+	*mbcs_mmr_set_u64p = value;					\
+	readback = *mbcs_mmr_set_u64p; \
+}
+#define MBCS_MMR_GET(mmr_base, offset) *(uint64_t *)(mmr_base + offset)
+#define MBCS_MMR_ZERO(mmr_base, offset) MBCS_MMR_SET(mmr_base, offset, 0)
+
+/*
+ * MBCS mmr structures
+ */
+union cm_id {
+	uint64_t cm_id_reg;
+	struct {
+		uint64_t always_one:1,	// 0
+		 mfg_id:11,	// 11:1
+		 part_num:16,	// 27:12
+		 bitstream_rev:8,	// 35:28
+		:28;		// 63:36
+	};
+};
+
+union cm_status {
+	uint64_t cm_status_reg;
+	struct {
+		uint64_t pending_reads:8,	// 7:0
+		 pending_writes:8,	// 15:8
+		 ice_rsp_credits:8,	// 23:16
+		 ice_req_credits:8,	// 31:24
+		 cm_req_credits:8,	// 39:32
+		:1,		// 40
+		 rd_dma_in_progress:1,	// 41
+		 rd_dma_done:1,	// 42
+		:1,		// 43
+		 wr_dma_in_progress:1,	// 44
+		 wr_dma_done:1,	// 45
+		 alg_waiting:1,	// 46
+		 alg_pipe_running:1,	// 47
+		 alg_done:1,	// 48
+		:3,		// 51:49
+		 pending_int_reqs:8,	// 59:52
+		:3,		// 62:60
+		 alg_half_speed_sel:1;	// 63
+	};
+};
+
+union cm_error_detail1 {
+	uint64_t cm_error_detail1_reg;
+	struct {
+		uint64_t packet_type:4,	// 3:0
+		 source_id:2,	// 5:4
+		 data_size:2,	// 7:6
+		 tnum:8,	// 15:8
+		 byte_enable:8,	// 23:16
+		 gfx_cred:8,	// 31:24
+		 read_type:2,	// 33:32
+		 pio_or_memory:1,	// 34
+		 head_cw_error:1,	// 35
+		:12,		// 47:36
+		 head_error_bit:1,	// 48
+		 data_error_bit:1,	// 49
+		:13,		// 62:50
+		 valid:1;	// 63
+	};
+};
+
+union cm_error_detail2 {
+	uint64_t cm_error_detail2_reg;
+	struct {
+		uint64_t address:56,	// 55:0
+		:8;		// 63:56
+	};
+};
+
+union cm_control {
+	uint64_t cm_control_reg;
+	struct {
+		uint64_t cm_id:2,	// 1:0
+		:2,		// 3:2
+		 max_trans:5,	// 8:4
+		:3,		// 11:9
+		 address_mode:1,	// 12
+		:7,		// 19:13
+		 credit_limit:8,	// 27:20
+		:5,		// 32:28
+		 rearm_stat_regs:1,	// 33
+		 prescalar_byp:1,	// 34
+		 force_gap_war:1,	// 35
+		 rd_dma_go:1,	// 36
+		 wr_dma_go:1,	// 37
+		 alg_go:1,	// 38
+		 rd_dma_clr:1,	// 39
+		 wr_dma_clr:1,	// 40
+		 alg_clr:1,	// 41
+		:2,		// 43:42
+		 alg_wait_step:1,	// 44
+		 alg_done_amo_en:1,	// 45
+		 alg_done_int_en:1,	// 46
+		:1,		// 47
+		 alg_sram0_locked:1,	// 48
+		 alg_sram1_locked:1,	// 49
+		 alg_sram2_locked:1,	// 50
+		 alg_done_clr:1,	// 51
+		:12;		// 63:52
+	};
+};
+
+union cm_req_timeout {
+	uint64_t cm_req_timeout_reg;
+	struct {
+		uint64_t time_out:24,	// 23:0
+		:40;		// 63:24
+	};
+};
+
+union intr_dest {
+	uint64_t intr_dest_reg;
+	struct {
+		uint64_t address:56,	// 55:0
+		 int_vector:8;	// 63:56
+	};
+};
+
+union cm_error_status {
+	uint64_t cm_error_status_reg;
+	struct {
+		uint64_t ecc_sbe:1,	// 0
+		 ecc_mbe:1,	// 1
+		 unsupported_req:1,	// 2
+		 unexpected_rsp:1,	// 3
+		 bad_length:1,	// 4
+		 bad_datavalid:1,	// 5
+		 buffer_overflow:1,	// 6
+		 request_timeout:1,	// 7
+		:8,		// 15:8
+		 head_inv_data_size:1,	// 16
+		 rsp_pactype_inv:1,	// 17
+		 head_sb_err:1,	// 18
+		 missing_head:1,	// 19
+		 head_inv_rd_type:1,	// 20
+		 head_cmd_err_bit:1,	// 21
+		 req_addr_align_inv:1,	// 22
+		 pio_req_addr_inv:1,	// 23
+		 req_range_dsize_inv:1,	// 24
+		 early_term:1,	// 25
+		 early_tail:1,	// 26
+		 missing_tail:1,	// 27
+		 data_flit_sb_err:1,	// 28
+		 cm2hcm_req_cred_of:1,	// 29
+		 cm2hcm_rsp_cred_of:1,	// 30
+		 rx_bad_didn:1,	// 31
+		 rd_dma_err_rsp:1,	// 32
+		 rd_dma_tnum_tout:1,	// 33
+		 rd_dma_multi_tnum_tou:1,	// 34
+		 wr_dma_err_rsp:1,	// 35
+		 wr_dma_tnum_tout:1,	// 36
+		 wr_dma_multi_tnum_tou:1,	// 37
+		 alg_data_overflow:1,	// 38
+		 alg_data_underflow:1,	// 39
+		 ram0_access_conflict:1,	// 40
+		 ram1_access_conflict:1,	// 41
+		 ram2_access_conflict:1,	// 42
+		 ram0_perr:1,	// 43
+		 ram1_perr:1,	// 44
+		 ram2_perr:1,	// 45
+		 int_gen_rsp_err:1,	// 46
+		 int_gen_tnum_tout:1,	// 47
+		 rd_dma_prog_err:1,	// 48
+		 wr_dma_prog_err:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+union cm_clr_error_status {
+	uint64_t cm_clr_error_status_reg;
+	struct {
+		uint64_t clr_ecc_sbe:1,	// 0
+		 clr_ecc_mbe:1,	// 1
+		 clr_unsupported_req:1,	// 2
+		 clr_unexpected_rsp:1,	// 3
+		 clr_bad_length:1,	// 4
+		 clr_bad_datavalid:1,	// 5
+		 clr_buffer_overflow:1,	// 6
+		 clr_request_timeout:1,	// 7
+		:8,		// 15:8
+		 clr_head_inv_data_siz:1,	// 16
+		 clr_rsp_pactype_inv:1,	// 17
+		 clr_head_sb_err:1,	// 18
+		 clr_missing_head:1,	// 19
+		 clr_head_inv_rd_type:1,	// 20
+		 clr_head_cmd_err_bit:1,	// 21
+		 clr_req_addr_align_in:1,	// 22
+		 clr_pio_req_addr_inv:1,	// 23
+		 clr_req_range_dsize_i:1,	// 24
+		 clr_early_term:1,	// 25
+		 clr_early_tail:1,	// 26
+		 clr_missing_tail:1,	// 27
+		 clr_data_flit_sb_err:1,	// 28
+		 clr_cm2hcm_req_cred_o:1,	// 29
+		 clr_cm2hcm_rsp_cred_o:1,	// 30
+		 clr_rx_bad_didn:1,	// 31
+		 clr_rd_dma_err_rsp:1,	// 32
+		 clr_rd_dma_tnum_tout:1,	// 33
+		 clr_rd_dma_multi_tnum:1,	// 34
+		 clr_wr_dma_err_rsp:1,	// 35
+		 clr_wr_dma_tnum_tout:1,	// 36
+		 clr_wr_dma_multi_tnum:1,	// 37
+		 clr_alg_data_overflow:1,	// 38
+		 clr_alg_data_underflo:1,	// 39
+		 clr_ram0_access_confl:1,	// 40
+		 clr_ram1_access_confl:1,	// 41
+		 clr_ram2_access_confl:1,	// 42
+		 clr_ram0_perr:1,	// 43
+		 clr_ram1_perr:1,	// 44
+		 clr_ram2_perr:1,	// 45
+		 clr_int_gen_rsp_err:1,	// 46
+		 clr_int_gen_tnum_tout:1,	// 47
+		 clr_rd_dma_prog_err:1,	// 48
+		 clr_wr_dma_prog_err:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+union cm_error_intr_enable {
+	uint64_t cm_error_intr_enable_reg;
+	struct {
+		uint64_t int_en_ecc_sbe:1,	// 0
+		 int_en_ecc_mbe:1,	// 1
+		 int_en_unsupported_re:1,	// 2
+		 int_en_unexpected_rsp:1,	// 3
+		 int_en_bad_length:1,	// 4
+		 int_en_bad_datavalid:1,	// 5
+		 int_en_buffer_overflo:1,	// 6
+		 int_en_request_timeou:1,	// 7
+		:8,		// 15:8
+		 int_en_head_inv_data_:1,	// 16
+		 int_en_rsp_pactype_in:1,	// 17
+		 int_en_head_sb_err:1,	// 18
+		 int_en_missing_head:1,	// 19
+		 int_en_head_inv_rd_ty:1,	// 20
+		 int_en_head_cmd_err_b:1,	// 21
+		 int_en_req_addr_align:1,	// 22
+		 int_en_pio_req_addr_i:1,	// 23
+		 int_en_req_range_dsiz:1,	// 24
+		 int_en_early_term:1,	// 25
+		 int_en_early_tail:1,	// 26
+		 int_en_missing_tail:1,	// 27
+		 int_en_data_flit_sb_e:1,	// 28
+		 int_en_cm2hcm_req_cre:1,	// 29
+		 int_en_cm2hcm_rsp_cre:1,	// 30
+		 int_en_rx_bad_didn:1,	// 31
+		 int_en_rd_dma_err_rsp:1,	// 32
+		 int_en_rd_dma_tnum_to:1,	// 33
+		 int_en_rd_dma_multi_t:1,	// 34
+		 int_en_wr_dma_err_rsp:1,	// 35
+		 int_en_wr_dma_tnum_to:1,	// 36
+		 int_en_wr_dma_multi_t:1,	// 37
+		 int_en_alg_data_overf:1,	// 38
+		 int_en_alg_data_under:1,	// 39
+		 int_en_ram0_access_co:1,	// 40
+		 int_en_ram1_access_co:1,	// 41
+		 int_en_ram2_access_co:1,	// 42
+		 int_en_ram0_perr:1,	// 43
+		 int_en_ram1_perr:1,	// 44
+		 int_en_ram2_perr:1,	// 45
+		 int_en_int_gen_rsp_er:1,	// 46
+		 int_en_int_gen_tnum_t:1,	// 47
+		 int_en_rd_dma_prog_er:1,	// 48
+		 int_en_wr_dma_prog_er:1,	// 49
+		:14;		// 63:50
+	};
+};
+
+struct cm_mmr {
+	union cm_id id;
+	union cm_status status;
+	union cm_error_detail1 err_detail1;
+	union cm_error_detail2 err_detail2;
+	union cm_control control;
+	union cm_req_timeout req_timeout;
+	uint64_t reserved1[1];
+	union intr_dest int_dest;
+	uint64_t reserved2[2];
+	uint64_t targ_flush;
+	uint64_t reserved3[1];
+	union cm_error_status err_status;
+	union cm_clr_error_status clr_err_status;
+	union cm_error_intr_enable int_enable;
+};
+
+union dma_hostaddr {
+	uint64_t dma_hostaddr_reg;
+	struct {
+		uint64_t dma_sys_addr:56,	// 55:0
+		:8;		// 63:56
+	};
+};
+
+union dma_localaddr {
+	uint64_t dma_localaddr_reg;
+	struct {
+		uint64_t dma_ram_addr:21,	// 20:0
+		 dma_ram_sel:2,	// 22:21
+		:41;		// 63:23
+	};
+};
+
+union dma_control {
+	uint64_t dma_control_reg;
+	struct {
+		uint64_t dma_op_length:16,	// 15:0
+		:18,		// 33:16
+		 done_amo_en:1,	// 34
+		 done_int_en:1,	// 35
+		:1,		// 36
+		 pio_mem_n:1,	// 37
+		:26;		// 63:38
+	};
+};
+
+union dma_amo_dest {
+	uint64_t dma_amo_dest_reg;
+	struct {
+		uint64_t dma_amo_sys_addr:56,	// 55:0
+		 dma_amo_mod_type:3,	// 58:56
+		:5;		// 63:59
+	};
+};
+
+union rdma_aux_status {
+	uint64_t rdma_aux_status_reg;
+	struct {
+		uint64_t op_num_pacs_left:17,	// 16:0
+		:5,		// 21:17
+		 lrsp_buff_empty:1,	// 22
+		:17,		// 39:23
+		 pending_reqs_left:6,	// 45:40
+		:18;		// 63:46
+	};
+};
+
+struct rdma_mmr {
+	union dma_hostaddr host_addr;
+	union dma_localaddr local_addr;
+	union dma_control control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union rdma_aux_status aux_status;
+};
+
+union wdma_aux_status {
+	uint64_t wdma_aux_status_reg;
+	struct {
+		uint64_t op_num_pacs_left:17,	// 16:0
+		:4,		// 20:17
+		 lreq_buff_empty:1,	// 21
+		:18,		// 39:22
+		 pending_reqs_left:6,	// 45:40
+		:18;		// 63:46
+	};
+};
+
+struct wdma_mmr {
+	union dma_hostaddr host_addr;
+	union dma_localaddr local_addr;
+	union dma_control control;
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union wdma_aux_status aux_status;
+};
+
+union algo_step {
+	uint64_t algo_step_reg;
+	struct {
+		uint64_t alg_step_cnt:16,	// 15:0
+		:48;		// 63:16
+	};
+};
+
+struct algo_mmr {
+	union dma_amo_dest amo_dest;
+	union intr_dest intr_dest;
+	union {
+		uint64_t algo_offset_reg;
+		struct {
+			uint64_t sram0_offset:7,	// 6:0
+			reserved0:1,	// 7
+			sram1_offset:7,	// 14:8
+			reserved1:1,	// 15
+			sram2_offset:7,	// 22:16
+			reserved2:14;	// 63:23
+		};
+	} sram_offset;
+	union algo_step step;
+};
+
+struct mbcs_mmr {
+	struct cm_mmr cm;
+	uint64_t reserved1[17];
+	struct rdma_mmr rdDma;
+	uint64_t reserved2[25];
+	struct wdma_mmr wrDma;
+	uint64_t reserved3[25];
+	struct algo_mmr algo;
+	uint64_t reserved4[156];
+};
+
+/*
+ * defines
+ */
+#define DEVICE_NAME "mbcs"
+#define MBCS_PART_NUM 0xfff0
+#define MBCS_PART_NUM_ALG0 0xf001
+#define MBCS_MFG_NUM  0x1
+
+struct algoblock {
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+	uint64_t algoStepCount;
+};
+
+struct getdma {
+	uint64_t hostAddr;
+	uint64_t localAddr;
+	uint64_t bytes;
+	uint64_t DoneAmoEnable;
+	uint64_t DoneIntEnable;
+	uint64_t peerIO;
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+};
+
+struct putdma {
+	uint64_t hostAddr;
+	uint64_t localAddr;
+	uint64_t bytes;
+	uint64_t DoneAmoEnable;
+	uint64_t DoneIntEnable;
+	uint64_t peerIO;
+	uint64_t amoHostDest;
+	uint64_t amoModType;
+	uint64_t intrHostDest;
+	uint64_t intrVector;
+};
+
+struct mbcs_soft {
+	struct list_head list;
+	struct cx_dev *cxdev;
+	int major;
+	int nasid;
+	void *mmr_base;
+	wait_queue_head_t dmawrite_queue;
+	wait_queue_head_t dmaread_queue;
+	wait_queue_head_t algo_queue;
+	struct sn_irq_info *get_sn_irq;
+	struct sn_irq_info *put_sn_irq;
+	struct sn_irq_info *algo_sn_irq;
+	struct getdma getdma;
+	struct putdma putdma;
+	struct algoblock algo;
+	uint64_t gscr_addr;	// pio addr
+	uint64_t ram0_addr;	// pio addr
+	uint64_t ram1_addr;	// pio addr
+	uint64_t ram2_addr;	// pio addr
+	uint64_t debug_addr;	// pio addr
+	atomic_t dmawrite_done;
+	atomic_t dmaread_done;
+	atomic_t algo_done;
+	struct semaphore dmawritelock;
+	struct semaphore dmareadlock;
+	struct semaphore algolock;
+};
+
+extern int mbcs_open(struct inode *ip, struct file *fp);
+extern ssize_t mbcs_sram_read(struct file *fp, char *buf, size_t len,
+			      loff_t * off);
+extern ssize_t mbcs_sram_write(struct file *fp, const char *buf, size_t len,
+			       loff_t * off);
+extern loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence);
+extern int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma);
+
+#endif				// __MBCS_H__
diff -Nru a/drivers/char/snsc.c b/drivers/char/snsc.c
--- a/drivers/char/snsc.c	2005-03-24 18:21:52 -08:00
+++ b/drivers/char/snsc.c	2005-03-24 18:21:52 -08:00
@@ -374,6 +374,7 @@
 	void *salbuf;
 	struct class_simple *snsc_class;
 	dev_t first_dev, dev;
+	nasid_t event_nasid = ia64_sn_get_console_nasid();
 
 	if (alloc_chrdev_region(&first_dev, 0, numionodes,
 				SYSCTL_BASENAME) < 0) {
@@ -441,6 +442,13 @@
 			ia64_sn_irtr_intr_enable(scd->scd_nasid,
 						 0 /*ignored */ ,
 						 SAL_IROUTER_INTR_RECV);
+
+                        /* on the console nasid, prepare to receive
+                         * system controller environmental events
+                         */
+                        if(scd->scd_nasid == event_nasid) {
+                                scdrv_event_init(scd);
+                        }
 	}
 	return 0;
 }
diff -Nru a/drivers/char/snsc.h b/drivers/char/snsc.h
--- a/drivers/char/snsc.h	2005-03-24 18:21:52 -08:00
+++ b/drivers/char/snsc.h	2005-03-24 18:21:52 -08:00
@@ -47,4 +47,44 @@
 	nasid_t scd_nasid;	/* Node on which subchannels are opened. */
 };
 
+
+/* argument types */
+#define IR_ARG_INT              0x00    /* 4-byte integer (big-endian)  */
+#define IR_ARG_ASCII            0x01    /* null-terminated ASCII string */
+#define IR_ARG_UNKNOWN          0x80    /* unknown data type.  The low
+                                         * 7 bits will contain the data
+                                         * length.                      */
+#define IR_ARG_UNKNOWN_LENGTH_MASK	0x7f
+
+
+/* system controller event codes */
+#define EV_CLASS_MASK		0xf000ul
+#define EV_SEVERITY_MASK	0x0f00ul
+#define EV_COMPONENT_MASK	0x00fful
+
+#define EV_CLASS_POWER		0x1000ul
+#define EV_CLASS_FAN		0x2000ul
+#define EV_CLASS_TEMP		0x3000ul
+#define EV_CLASS_ENV		0x4000ul
+#define EV_CLASS_TEST_FAULT	0x5000ul
+#define EV_CLASS_TEST_WARNING	0x6000ul
+#define EV_CLASS_PWRD_NOTIFY	0x8000ul
+
+#define EV_SEVERITY_POWER_STABLE	0x0000ul
+#define EV_SEVERITY_POWER_LOW_WARNING	0x0100ul
+#define EV_SEVERITY_POWER_HIGH_WARNING	0x0200ul
+#define EV_SEVERITY_POWER_HIGH_FAULT	0x0300ul
+#define EV_SEVERITY_POWER_LOW_FAULT	0x0400ul
+
+#define EV_SEVERITY_FAN_STABLE		0x0000ul
+#define EV_SEVERITY_FAN_WARNING		0x0100ul
+#define EV_SEVERITY_FAN_FAULT		0x0200ul
+
+#define EV_SEVERITY_TEMP_STABLE		0x0000ul
+#define EV_SEVERITY_TEMP_ADVISORY	0x0100ul
+#define EV_SEVERITY_TEMP_CRITICAL	0x0200ul
+#define EV_SEVERITY_TEMP_FAULT		0x0300ul
+
+void scdrv_event_init(struct sysctl_data_s *);
+
 #endif /* _SN_SYSCTL_H_ */
diff -Nru a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h
--- a/include/asm-ia64/hw_irq.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/hw_irq.h	2005-03-24 18:21:52 -08:00
@@ -81,6 +81,7 @@
 
 extern struct hw_interrupt_type irq_type_ia64_lsapic;	/* CPU-internal interrupt controller */
 
+extern int assign_irq_vector_nopanic (int irq); /* allocate a free vector without panic */
 extern int assign_irq_vector (int irq);	/* allocate a free vector */
 extern void free_irq_vector (int vector);
 extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
diff -Nru a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
--- a/include/asm-ia64/pal.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/pal.h	2005-03-24 18:21:52 -08:00
@@ -67,6 +67,7 @@
 #define PAL_REGISTER_INFO	39	/* return AR and CR register information*/
 #define PAL_SHUTDOWN		40	/* enter processor shutdown state */
 #define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
+#define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
 
 #define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
 #define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
@@ -1559,6 +1560,73 @@
 	return iprv.status;
 }
 
+/* data structure for getting information on logical to physical mappings */
+typedef union pal_log_overview_u {
+	struct {
+		u64	num_log		:16,	/* Total number of logical
+						 * processors on this die
+						 */
+			tpc		:8,	/* Threads per core */
+			reserved3	:8,	/* Reserved */
+			cpp		:8,	/* Cores per processor */
+			reserved2	:8,	/* Reserved */
+			ppid		:8,	/* Physical processor ID */
+			reserved1	:8;	/* Reserved */
+	} overview_bits;
+	u64 overview_data;
+} pal_log_overview_t;
+
+typedef union pal_proc_n_log_info1_u{
+	struct {
+		u64	tid		:16,	/* Thread id */
+			reserved2	:16,	/* Reserved */
+			cid		:16,	/* Core id */
+			reserved1	:16;	/* Reserved */
+	} ppli1_bits;
+	u64	ppli1_data;
+} pal_proc_n_log_info1_t;
+
+typedef union pal_proc_n_log_info2_u {
+	struct {
+		u64	la		:16,	/* Logical address */
+			reserved	:48;	/* Reserved */
+	} ppli2_bits;
+	u64	ppli2_data;
+} pal_proc_n_log_info2_t;
+
+typedef struct pal_logical_to_physical_s
+{
+	pal_log_overview_t overview;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_logical_to_physical_t;
+
+#define overview_num_log	overview.overview_bits.num_log
+#define overview_tpc		overview.overview_bits.tpc
+#define overview_cpp		overview.overview_bits.cpp
+#define overview_ppid		overview.overview_bits.ppid
+#define log1_tid		ppli1.ppli1_bits.tid
+#define log1_cid		ppli1.ppli1_bits.cid
+#define log2_la			ppli2.ppli2_bits.la
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0);
+
+	if (iprv.status == PAL_STATUS_SUCCESS)
+	{
+		if (proc_number == 0)
+			mapping->overview.overview_data = iprv.v0;
+		mapping->ppli1.ppli1_data = iprv.v1;
+		mapping->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_PAL_H */
diff -Nru a/include/asm-ia64/perfmon.h b/include/asm-ia64/perfmon.h
--- a/include/asm-ia64/perfmon.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/perfmon.h	2005-03-24 18:21:52 -08:00
@@ -254,6 +254,18 @@
 #define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
 #define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
 
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+	int	debug;		/* turn on/off debugging via syslog */
+	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
+	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
+	int	expert_mode;	/* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IA64_PERFMON_H */
diff -Nru a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
--- a/include/asm-ia64/pgalloc.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/pgalloc.h	2005-03-24 18:21:52 -08:00
@@ -23,146 +23,124 @@
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 
-/*
- * Very stupidly, we used to get new pgd's and pmd's, init their contents
- * to point to the NULL versions of the next level page table, later on
- * completely re-init them the same way, then free them up.  This wasted
- * a lot of work and caused unnecessary memory traffic.  How broken...
- * We fix this by caching them.
- */
-#define pgd_quicklist		(local_cpu_data->pgd_quick)
-#define pmd_quicklist		(local_cpu_data->pmd_quick)
-#define pgtable_cache_size	(local_cpu_data->pgtable_cache_sz)
+DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
+#define pgtable_quicklist __ia64_per_cpu_var(__pgtable_quicklist)
+DECLARE_PER_CPU(long, __pgtable_quicklist_size);
+#define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size)
 
-static inline pgd_t*
-pgd_alloc_one_fast (struct mm_struct *mm)
+static inline long pgtable_quicklist_total_size(void)
+{
+	long ql_size = 0;
+	int cpuid;
+
+	for_each_online_cpu(cpuid) {
+		ql_size += per_cpu(__pgtable_quicklist_size, cpuid);
+	}
+	return ql_size;
+}
+
+static inline void *pgtable_quicklist_alloc(void)
 {
 	unsigned long *ret = NULL;
 
 	preempt_disable();
 
-	ret = pgd_quicklist;
+	ret = pgtable_quicklist;
 	if (likely(ret != NULL)) {
-		pgd_quicklist = (unsigned long *)(*ret);
+		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
-		--pgtable_cache_size;
-	} else
-		ret = NULL;
+		--pgtable_quicklist_size;
+	} else {
+		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	}
 
 	preempt_enable();
 
-	return (pgd_t *) ret;
+	return ret;
 }
 
-static inline pgd_t*
-pgd_alloc (struct mm_struct *mm)
+static inline void pgtable_quicklist_free(void *pgtable_entry)
 {
-	/* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
-	pgd_t *pgd = pgd_alloc_one_fast(mm);
+#ifdef CONFIG_NUMA
+	unsigned long nid = page_to_nid(virt_to_page(pgtable_entry));
 
-	if (unlikely(pgd == NULL)) {
-		pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (unlikely(nid != numa_node_id())) {
+		free_page((unsigned long)pgtable_entry);
+		return;
 	}
-	return pgd;
-}
+#endif
 
-static inline void
-pgd_free (pgd_t *pgd)
-{
 	preempt_disable();
-	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
-	pgd_quicklist = (unsigned long *) pgd;
-	++pgtable_cache_size;
+	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
+	pgtable_quicklist = (unsigned long *)pgtable_entry;
+	++pgtable_quicklist_size;
 	preempt_enable();
 }
 
-static inline void
-pud_populate (struct mm_struct *mm, pud_t *pud_entry, pmd_t *pmd)
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pud_val(*pud_entry) = __pa(pmd);
+	return pgtable_quicklist_alloc();
 }
 
-static inline pmd_t*
-pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
+static inline void pgd_free(pgd_t * pgd)
 {
-	unsigned long *ret = NULL;
-
-	preempt_disable();
-
-	ret = (unsigned long *)pmd_quicklist;
-	if (likely(ret != NULL)) {
-		pmd_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		--pgtable_cache_size;
-	}
-
-	preempt_enable();
-
-	return (pmd_t *)ret;
+	pgtable_quicklist_free(pgd);
 }
 
-static inline pmd_t*
-pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+static inline void
+pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 {
-	pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pud_val(*pud_entry) = __pa(pmd);
+}
 
-	return pmd;
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return pgtable_quicklist_alloc();
 }
 
-static inline void
-pmd_free (pmd_t *pmd)
+static inline void pmd_free(pmd_t * pmd)
 {
-	preempt_disable();
-	*(unsigned long *)pmd = (unsigned long) pmd_quicklist;
-	pmd_quicklist = (unsigned long *) pmd;
-	++pgtable_cache_size;
-	preempt_enable();
+	pgtable_quicklist_free(pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)
 
 static inline void
-pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
 {
 	pmd_val(*pmd_entry) = page_to_phys(pte);
 }
 
 static inline void
-pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 {
 	pmd_val(*pmd_entry) = __pa(pte);
 }
 
-static inline struct page *
-pte_alloc_one (struct mm_struct *mm, unsigned long addr)
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+					 unsigned long addr)
 {
-	struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-
-	return pte;
+	return virt_to_page(pgtable_quicklist_alloc());
 }
 
-static inline pte_t *
-pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long addr)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-
-	return pte;
+	return pgtable_quicklist_alloc();
 }
 
-static inline void
-pte_free (struct page *pte)
+static inline void pte_free(struct page *pte)
 {
-	__free_page(pte);
+	pgtable_quicklist_free(page_address(pte));
 }
 
-static inline void
-pte_free_kernel (pte_t *pte)
+static inline void pte_free_kernel(pte_t * pte)
 {
-	free_page((unsigned long) pte);
+	pgtable_quicklist_free(pte);
 }
 
-#define __pte_free_tlb(tlb, pte)	tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb, pte)	pte_free(pte)
 
-extern void check_pgt_cache (void);
+extern void check_pgt_cache(void);
 
-#endif /* _ASM_IA64_PGALLOC_H */
+#endif				/* _ASM_IA64_PGALLOC_H */
diff -Nru a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
--- a/include/asm-ia64/processor.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/processor.h	2005-03-24 18:21:52 -08:00
@@ -145,9 +145,6 @@
 	__u64 nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
 	__u64 unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
 	__u64 unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
-	__u64 *pgd_quick;
-	__u64 *pmd_quick;
-	__u64 pgtable_cache_sz;
 	__u64 itc_freq;		/* frequency of ITC counter */
 	__u64 proc_freq;	/* frequency of processor */
 	__u64 cyc_per_usec;	/* itc_freq/1000000 */
@@ -159,6 +156,13 @@
 #ifdef CONFIG_SMP
 	__u64 loops_per_jiffy;
 	int cpu;
+	__u32 socket_id;	/* physical processor socket id */
+	__u16 core_id;		/* core id */
+	__u16 thread_id;	/* thread id */
+	__u16 num_log;		/* Total number of logical processors on
+				 * this socket that were successfully booted */
+	__u8  cores_per_socket;	/* Cores per processor socket */
+	__u8  threads_per_core;	/* Threads per core */
 #endif
 
 	/* CPUID-derived information: */
diff -Nru a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
--- a/include/asm-ia64/sal.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sal.h	2005-03-24 18:21:52 -08:00
@@ -91,6 +91,7 @@
 #define SAL_PCI_CONFIG_READ		0x01000010
 #define SAL_PCI_CONFIG_WRITE		0x01000011
 #define SAL_FREQ_BASE			0x01000012
+#define SAL_PHYSICAL_ID_INFO		0x01000013
 
 #define SAL_UPDATE_PAL			0x01000020
 
@@ -815,6 +816,17 @@
 	return isrv.status;
 }
 
+/* Get physical processor die mapping in the platform. */
+static inline s64
+ia64_sal_physical_id_info(u16 *splid)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0);
+	if (splid)
+		*splid = isrv.v0;
+	return isrv.status;
+}
+
 extern unsigned long sal_platform_features;
 
 extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *);
@@ -832,6 +844,44 @@
 				   u64, u64, u64, u64, u64);
 extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64,
 				      u64, u64, u64, u64, u64);
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * System Abstraction Layer Specification
+ * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State.
+ * Note: region regs are stored first in head.S _start. Hence they must
+ * stay up front.
+ */
+struct sal_to_os_boot {
+	u64 rr[8];		/* Region Registers */
+	u64	br[6];		/* br0: return addr into SAL boot rendez routine */
+	u64 gr1;		/* SAL:GP */
+	u64 gr12;		/* SAL:SP */
+	u64 gr13;		/* SAL: Task Pointer */
+	u64 fpsr;
+	u64	pfs;
+	u64 rnat;
+	u64 unat;
+	u64 bspstore;
+	u64 dcr;		/* Default Control Register */
+	u64 iva;
+	u64 pta;
+	u64 itv;
+	u64 pmv;
+	u64 cmcv;
+	u64 lrr[2];
+	u64 gr[4];
+	u64 pr;			/* Predicate registers */
+	u64 lc;			/* Loop Count */
+	struct ia64_fpreg fp[20];
+};
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+extern void ia64_jump_to_sal(struct sal_to_os_boot *);
+#endif
 
 extern void ia64_sal_handler_init(void *entry_point, void *gpval);
 
diff -Nru a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
--- a/include/asm-ia64/smp.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/smp.h	2005-03-24 18:21:52 -08:00
@@ -3,16 +3,14 @@
  *
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 2001-2003 Hewlett-Packard Co
+ * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P.
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  */
 #ifndef _ASM_IA64_SMP_H
 #define _ASM_IA64_SMP_H
 
 #include <linux/config.h>
-
-#ifdef CONFIG_SMP
-
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/kernel.h>
@@ -24,6 +22,25 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
+static inline unsigned int
+ia64_get_lid (void)
+{
+	union {
+		struct {
+			unsigned long reserved : 16;
+			unsigned long eid : 8;
+			unsigned long id : 8;
+			unsigned long ignored : 32;
+		} f;
+		unsigned long bits;
+	} lid;
+
+	lid.bits = ia64_getreg(_IA64_REG_CR_LID);
+	return lid.f.id << 8 | lid.f.eid;
+}
+
+#ifdef CONFIG_SMP
+
 #define XTP_OFFSET		0x1e0008
 
 #define SMP_IRQ_REDIRECTION	(1 << 0)
@@ -39,6 +56,10 @@
 extern char no_int_routing __devinitdata;
 
 extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern int smp_num_siblings;
+extern int smp_num_cpucores;
 extern void __iomem *ipi_base_addr;
 extern unsigned char smp_int_redirect;
 
@@ -90,22 +111,7 @@
 		writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
 }
 
-static inline unsigned int
-hard_smp_processor_id (void)
-{
-	union {
-		struct {
-			unsigned long reserved : 16;
-			unsigned long eid : 8;
-			unsigned long id : 8;
-			unsigned long ignored : 32;
-		} f;
-		unsigned long bits;
-	} lid;
-
-	lid.bits = ia64_getreg(_IA64_REG_CR_LID);
-	return lid.f.id << 8 | lid.f.eid;
-}
+#define hard_smp_processor_id()		ia64_get_lid()
 
 /* Upping and downing of CPUs */
 extern int __cpu_disable (void);
@@ -122,10 +128,12 @@
 extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
+extern void identify_siblings (struct cpuinfo_ia64 *);
 
 #else
 
-#define cpu_logical_id(cpuid)		0
+#define cpu_logical_id(i)		0
+#define cpu_physical_id(i)		ia64_get_lid()
 
 #endif /* CONFIG_SMP */
 #endif /* _ASM_IA64_SMP_H */
diff -Nru a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
--- a/include/asm-ia64/sn/addrs.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/addrs.h	2005-03-24 18:21:52 -08:00
@@ -11,6 +11,7 @@
 
 #include <asm/percpu.h>
 #include <asm/sn/types.h>
+#include <asm/sn/arch.h>
 #include <asm/sn/pda.h>
 
 /*
@@ -57,9 +58,9 @@
 /*
  * Define basic shift & mask constants for manipulating NASIDs and AS values.
  */
-#define NASID_BITMASK		(pda->nasid_bitmask)
-#define NASID_SHIFT		(pda->nasid_shift)
-#define AS_SHIFT		(pda->as_shift)
+#define NASID_BITMASK		(sn_hub_info->nasid_bitmask)
+#define NASID_SHIFT		(sn_hub_info->nasid_shift)
+#define AS_SHIFT		(sn_hub_info->as_shift)
 #define AS_BITMASK		0x3UL
 
 #define NASID_MASK              ((u64)NASID_BITMASK << NASID_SHIFT)
@@ -153,8 +154,9 @@
  *           the chiplet id is zero.  If we implement TIO-TIO dma, we might need
  *           to insert a chiplet id into this macro.  However, it is our belief
  *           right now that this chiplet id will be ICE, which is also zero.
+ *           Nasid starts on bit 40.
  */
-#define PHYS_TO_TIODMA(x)	( (((u64)(x) & NASID_MASK) << 2) | NODE_OFFSET(x))
+#define PHYS_TO_TIODMA(x)	( (((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x))
 #define PHYS_TO_DMA(x)          ( (((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x))
 
 
@@ -167,7 +169,10 @@
 #define TIO_BWIN_SIZE_BITS		30	/* big window size: 1G */
 #define NODE_SWIN_BASE(n, w)		((w == 0) ? NODE_BWIN_BASE((n), SWIN0_BIGWIN) \
 		: RAW_NODE_SWIN_BASE(n, w))
+#define TIO_SWIN_BASE(n, w) 		(TIO_IO_BASE(n) + \
+					    ((u64) (w) << TIO_SWIN_SIZE_BITS))
 #define NODE_IO_BASE(n)			(GLOBAL_MMR_SPACE | NASID_SPACE(n))
+#define TIO_IO_BASE(n)                  (UNCACHED | NASID_SPACE(n))
 #define BWIN_SIZE			(1UL << BWIN_SIZE_BITS)
 #define NODE_BWIN_BASE0(n)		(NODE_IO_BASE(n) + BWIN_SIZE)
 #define NODE_BWIN_BASE(n, w)		(NODE_BWIN_BASE0(n) + ((u64) (w) << BWIN_SIZE_BITS))
diff -Nru a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h
--- a/include/asm-ia64/sn/arch.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/arch.h	2005-03-24 18:21:52 -08:00
@@ -12,8 +12,32 @@
 #define _ASM_IA64_SN_ARCH_H
 
 #include <asm/types.h>
+#include <asm/percpu.h>
 #include <asm/sn/types.h>
 #include <asm/sn/sn_cpuid.h>
+
+/*
+ * The following defines attributes of the HUB chip. These attributes are
+ * frequently referenced. They are kept in the per-cpu data areas of each cpu.
+ * They are kept together in a struct to minimize cache misses.
+ */
+struct sn_hub_info_s {
+	u8 shub2;
+	u8 nasid_shift;
+	u8 as_shift;
+	u8 shub_1_1_found;
+	u16 nasid_bitmask;
+};
+DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+#define sn_hub_info 	(&__get_cpu_var(__sn_hub_info))
+#define is_shub2()	(sn_hub_info->shub2)
+#define is_shub1()	(sn_hub_info->shub2 == 0)
+
+/*
+ * Use this macro to test if shub 1.1 wars should be enabled
+ */
+#define enable_shub_wars_1_1()	(sn_hub_info->shub_1_1_found)
+
 
 /*
  * This is the maximum number of nodes that can be part of a kernel.
diff -Nru a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h
--- a/include/asm-ia64/sn/bte.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/bte.h	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -13,8 +13,12 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <linux/cache.h>
+#include <asm/sn/pda.h>
 #include <asm/sn/types.h>
+#include <asm/sn/shub_mmr.h>
 
+#define IBCT_NOTIFY             (0x1UL << 4)
+#define IBCT_ZFIL_MODE          (0x1UL << 0)
 
 /* #define BTE_DEBUG */
 /* #define BTE_DEBUG_VERBOSE */
@@ -39,8 +43,36 @@
 
 
 /* Define hardware */
-#define BTES_PER_NODE 2
+#define BTES_PER_NODE (is_shub2() ? 4 : 2)
+#define MAX_BTES_PER_NODE 4
 
+#define BTE2OFF_CTRL	(0)
+#define BTE2OFF_SRC	(SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_DEST	(SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_NOTIFY	(SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0)
+
+#define BTE_BASE_ADDR(interface) 				\
+    (is_shub2() ? (interface == 0) ? SH2_BT_ENG_CSR_0 :		\
+		  (interface == 1) ? SH2_BT_ENG_CSR_1 :		\
+		  (interface == 2) ? SH2_BT_ENG_CSR_2 :		\
+		  		     SH2_BT_ENG_CSR_3 		\
+		: (interface == 0) ? IIO_IBLS0 : IIO_IBLS1)
+
+#define BTE_SOURCE_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_SRC/8) 			\
+		: base + (BTEOFF_SRC/8))
+
+#define BTE_DEST_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_DEST/8) 			\
+		: base + (BTEOFF_DEST/8))
+
+#define BTE_CTRL_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_CTRL/8) 			\
+		: base + (BTEOFF_CTRL/8))
+
+#define BTE_NOTIF_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_NOTIFY/8) 			\
+		: base + (BTEOFF_NOTIFY/8))
 
 /* Define hardware modes */
 #define BTE_NOTIFY (IBCT_NOTIFY)
@@ -68,14 +100,18 @@
 #define BTE_LNSTAT_STORE(_bte, _x)					\
 			HUB_S(_bte->bte_base_addr, (_x))
 #define BTE_SRC_STORE(_bte, _x)						\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_SRC/8), (_x))
+			HUB_S(_bte->bte_source_addr, (_x))
 #define BTE_DEST_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_DEST/8), (_x))
+			HUB_S(_bte->bte_destination_addr, (_x))
 #define BTE_CTRL_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_CTRL/8), (_x))
+			HUB_S(_bte->bte_control_addr, (_x))
 #define BTE_NOTIF_STORE(_bte, _x)					\
-			HUB_S(_bte->bte_base_addr + (BTEOFF_NOTIFY/8), (_x))
+			HUB_S(_bte->bte_notify_addr, (_x))
 
+#define BTE_START_TRANSFER(_bte, _len, _mode)				\
+	is_shub2() ? BTE_CTRL_STORE(_bte, IBLS_BUSY | (_mode << 24) | _len) \
+		: BTE_LNSTAT_STORE(_bte, _len);				\
+		  BTE_CTRL_STORE(_bte, _mode)
 
 /* Possible results from bte_copy and bte_unaligned_copy */
 /* The following error codes map into the BTE hardware codes
@@ -110,6 +146,10 @@
 struct bteinfo_s {
 	volatile u64 notify ____cacheline_aligned;
 	u64 *bte_base_addr ____cacheline_aligned;
+	u64 *bte_source_addr;
+	u64 *bte_destination_addr;
+	u64 *bte_control_addr;
+	u64 *bte_notify_addr;
 	spinlock_t spinlock;
 	cnodeid_t bte_cnode;	/* cnode                            */
 	int bte_error_count;	/* Number of errors encountered     */
@@ -117,6 +157,7 @@
 	int cleanup_active;	/* Interface is locked for cleanup  */
 	volatile bte_result_t bh_error;	/* error while processing   */
 	volatile u64 *most_rcnt_na;
+	struct bteinfo_s *btes_to_try[MAX_BTES_PER_NODE];
 };
 
 
diff -Nru a/include/asm-ia64/sn/geo.h b/include/asm-ia64/sn/geo.h
--- a/include/asm-ia64/sn/geo.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/geo.h	2005-03-24 18:21:52 -08:00
@@ -18,32 +18,34 @@
 #define GEOID_SIZE	8	/* Would 16 be better?  The size can
 				   be different on different platforms. */
 
-#define MAX_SLABS	0xe	/* slabs per module */
+#define MAX_SLOTS	0xf	/* slots per module */
+#define MAX_SLABS	0xf	/* slabs per slot */
 
 typedef unsigned char	geo_type_t;
 
 /* Fields common to all substructures */
-typedef struct geo_any_s {
+typedef struct geo_common_s {
     moduleid_t	module;		/* The module (box) this h/w lives in */
     geo_type_t	type;		/* What type of h/w is named by this geoid_t */
-    slabid_t	slab;		/* The logical assembly within the module */
-} geo_any_t;
+    slabid_t	slab:4;		/* slab (ASIC), 0 .. 15 within slot */
+    slotid_t	slot:4;		/* slot (Blade), 0 .. 15 within module */
+} geo_common_t;
 
 /* Additional fields for particular types of hardware */
 typedef struct geo_node_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_node_t;
 
 typedef struct geo_rtr_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_rtr_t;
 
 typedef struct geo_iocntl_s {
-    geo_any_t	any;		/* No additional fields needed */
+    geo_common_t	common;		/* No additional fields needed */
 } geo_iocntl_t;
 
 typedef struct geo_pcicard_s {
-    geo_iocntl_t	any;
+    geo_iocntl_t	common;
     char		bus;	/* Bus/widget number */
     char		slot;	/* PCI slot number */
 } geo_pcicard_t;
@@ -62,14 +64,14 @@
 
 
 typedef union geoid_u {
-    geo_any_t	any;
-    geo_node_t	node;
+    geo_common_t	common;
+    geo_node_t		node;
     geo_iocntl_t	iocntl;
     geo_pcicard_t	pcicard;
-    geo_rtr_t	rtr;
-    geo_cpu_t	cpu;
-    geo_mem_t	mem;
-    char	padsize[GEOID_SIZE];
+    geo_rtr_t		rtr;
+    geo_cpu_t		cpu;
+    geo_mem_t		mem;
+    char		padsize[GEOID_SIZE];
 } geoid_t;
 
 
@@ -104,19 +106,26 @@
 #define INVALID_CNODEID         ((cnodeid_t)-1)
 #define INVALID_PNODEID         ((pnodeid_t)-1)
 #define INVALID_SLAB            (slabid_t)-1
+#define INVALID_SLOT            (slotid_t)-1
 #define INVALID_MODULE          ((moduleid_t)-1)
 #define INVALID_PARTID          ((partid_t)-1)
 
 static inline slabid_t geo_slab(geoid_t g)
 {
-	return (g.any.type == GEO_TYPE_INVALID) ?
-		INVALID_SLAB : g.any.slab;
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLAB : g.common.slab;
+}
+
+static inline slotid_t geo_slot(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLOT : g.common.slot;
 }
 
 static inline moduleid_t geo_module(geoid_t g)
 {
-	return (g.any.type == GEO_TYPE_INVALID) ?
-		INVALID_MODULE : g.any.module;
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_MODULE : g.common.module;
 }
 
 extern geoid_t cnodeid_get_geoid(cnodeid_t cnode);
diff -Nru a/include/asm-ia64/sn/nodepda.h b/include/asm-ia64/sn/nodepda.h
--- a/include/asm-ia64/sn/nodepda.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/nodepda.h	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #ifndef _ASM_IA64_SN_NODEPDA_H
 #define _ASM_IA64_SN_NODEPDA_H
@@ -43,7 +43,7 @@
 	/*
 	 * The BTEs on this node are shared by the local cpus
 	 */
-	struct bteinfo_s	bte_if[BTES_PER_NODE];	/* Virtual Interface */
+	struct bteinfo_s	bte_if[MAX_BTES_PER_NODE];	/* Virtual Interface */
 	struct timer_list	bte_recovery_timer;
 	spinlock_t		bte_recovery_lock;
 
diff -Nru a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/sn/pcibus_provider_defs.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,52 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+
+/*
+ * SN pci asic types.  Do not ever renumber these or reuse values.  The
+ * values must agree with what prom thinks they are.
+ */
+
+#define PCIIO_ASIC_TYPE_UNKNOWN	0
+#define PCIIO_ASIC_TYPE_PPB	1
+#define PCIIO_ASIC_TYPE_PIC	2
+#define PCIIO_ASIC_TYPE_TIOCP	3
+#define PCIIO_ASIC_TYPE_TIOCA	4
+
+#define PCIIO_ASIC_MAX_TYPES	5
+
+/*
+ * Common pciio bus provider data.  There should be one of these as the
+ * first field in any pciio based provider soft structure (e.g. pcibr_soft
+ * tioca_soft, etc).
+ */
+
+struct pcibus_bussoft {
+	uint32_t		bs_asic_type;	/* chipset type */
+	uint32_t		bs_xid;		/* xwidget id */
+	uint64_t		bs_persist_busnum; /* Persistent Bus Number */
+	uint64_t		bs_legacy_io;	/* legacy io pio addr */
+	uint64_t		bs_legacy_mem;	/* legacy mem pio addr */
+	uint64_t		bs_base;	/* widget base */
+	struct xwidget_info	*bs_xwidget_info;
+};
+
+/*
+ * SN pci bus indirection
+ */
+
+struct sn_pcibus_provider {
+	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t);
+	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t);
+	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
+	void *		(*bus_fixup)(struct pcibus_bussoft *);
+};
+
+extern struct sn_pcibus_provider *sn_pci_provider[];
+#endif				/* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff -Nru a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/sn/pcidev.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,58 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
+#define _ASM_IA64_SN_PCI_PCIDEV_H
+
+#include <linux/pci.h>
+
+extern struct sn_irq_info **sn_irq;
+
+#define SN_PCIDEV_INFO(pci_dev) \
+        ((struct pcidev_info *)(pci_dev)->sysdata)
+
+/*
+ * Given a pci_bus, return the sn pcibus_bussoft struct.  Note that
+ * this only works for root busses, not for busses represented by PPB's.
+ */
+
+#define SN_PCIBUS_BUSSOFT(pci_bus) \
+        ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
+
+/*
+ * Given a struct pci_dev, return the sn pcibus_bussoft struct.  Note
+ * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
+ * due to possible PPB's in the path.
+ */
+
+#define SN_PCIDEV_BUSSOFT(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
+
+#define SN_PCIDEV_BUSPROVIDER(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_provider)
+
+#define PCIIO_BUS_NONE	255      /* bus 255 reserved */
+#define PCIIO_SLOT_NONE 255
+#define PCIIO_FUNC_NONE 255
+#define PCIIO_VENDOR_ID_NONE	(-1)
+
+struct pcidev_info {
+	uint64_t		pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
+	uint64_t		pdi_slot_host_handle;	/* Bus and devfn Host pci_dev */
+
+	struct pcibus_bussoft	*pdi_pcibus_info;	/* Kernel common bus soft */
+	struct pcidev_info	*pdi_host_pcidev_info;	/* Kernel Host pci_dev */
+	struct pci_dev		*pdi_linux_pcidev;	/* Kernel pci_dev */
+
+	struct sn_irq_info	*pdi_sn_irq_info;
+	struct sn_pcibus_provider *pdi_provider;	/* sn pci ops */
+};
+
+extern void sn_irq_fixup(struct pci_dev *pci_dev,
+			 struct sn_irq_info *sn_irq_info);
+
+#endif				/* _ASM_IA64_SN_PCI_PCIDEV_H */
diff -Nru a/include/asm-ia64/sn/pda.h b/include/asm-ia64/sn/pda.h
--- a/include/asm-ia64/sn/pda.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/pda.h	2005-03-24 18:21:52 -08:00
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 #ifndef _ASM_IA64_SN_PDA_H
 #define _ASM_IA64_SN_PDA_H
@@ -11,7 +11,6 @@
 #include <linux/cache.h>
 #include <asm/percpu.h>
 #include <asm/system.h>
-#include <asm/sn/bte.h>
 
 
 /*
@@ -37,11 +36,6 @@
 	 * Support for SN LEDs
 	 */
 	volatile short	*led_address;
-	u16		nasid_bitmask;
-	u8		shub2;
-	u8		nasid_shift;
-	u8		as_shift;
-	u8		shub_1_1_found;
 	u8		led_state;
 	u8		hb_state;	/* supports blinking heartbeat leds */
 	unsigned int	hb_count;
@@ -53,8 +47,6 @@
 	unsigned long pio_write_status_val;
 	volatile unsigned long *pio_shub_war_cam_addr;
 
-	struct bteinfo_s *cpu_bte_if[BTES_PER_NODE];	/* cpu interface order */
-
 	unsigned long	sn_soft_irr[4];
 	unsigned long	sn_in_service_ivecs[4];
 	short		cnodeid_to_nasid_table[MAX_NUMNODES];
@@ -83,13 +75,5 @@
 #define pda		(&__ia64_per_cpu_var(pda_percpu))
 
 #define pdacpu(cpu)	(&per_cpu(pda_percpu, cpu))
-
-/*
- * Use this macro to test if shub 1.1 wars should be enabled
- */
-#define enable_shub_wars_1_1()	(pda->shub_1_1_found)
-
-#define is_shub2()	(pda->shub2)
-#define is_shub1()	(pda->shub2 == 0)
 
 #endif /* _ASM_IA64_SN_PDA_H */
diff -Nru a/include/asm-ia64/sn/shub_mmr.h b/include/asm-ia64/sn/shub_mmr.h
--- a/include/asm-ia64/sn/shub_mmr.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/shub_mmr.h	2005-03-24 18:21:52 -08:00
@@ -4,7 +4,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
  */
 
 #ifndef _ASM_IA64_SN_SHUB_MMR_H
@@ -129,6 +129,23 @@
 #define SH_EVENT_OCCURRED_II_INT1_SHFT           30
 #define SH_EVENT_OCCURRED_II_INT1_MASK           0x0000000040000000
 
+/*   SH2_EVENT_OCCURRED_EXTIO_INT2                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT2                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_SHFT       33
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_MASK       0x0000000200000000
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT3                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT3                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_SHFT       34
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_MASK       0x0000000400000000
+
+#define SH_ALL_INT_MASK \
+	(SH_EVENT_OCCURRED_UART_INT_MASK | SH_EVENT_OCCURRED_IPI_INT_MASK | \
+	 SH_EVENT_OCCURRED_II_INT0_MASK | SH_EVENT_OCCURRED_II_INT1_MASK | \
+	 SH_EVENT_OCCURRED_II_INT1_MASK | SH2_EVENT_OCCURRED_EXTIO_INT2_MASK | \
+	 SH2_EVENT_OCCURRED_EXTIO_INT3_MASK)
+
+
 /* ==================================================================== */
 /*                         LEDS                                         */
 /* ==================================================================== */
@@ -437,5 +454,23 @@
 #define SH_INT_CMPB		shubmmr(SH, INT_CMPB)
 #define SH_INT_CMPC		shubmmr(SH, INT_CMPC)
 #define SH_INT_CMPD		shubmmr(SH, INT_CMPD)
+
+/* ========================================================================== */
+/*                        Register "SH2_BT_ENG_CSR_0"                         */
+/*                    Engine 0 Control and Status Register                    */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_0                         0x0000000030040000
+#define SH2_BT_ENG_SRC_ADDR_0                    0x0000000030040080
+#define SH2_BT_ENG_DEST_ADDR_0                   0x0000000030040100
+#define SH2_BT_ENG_NOTIF_ADDR_0                  0x0000000030040180
+
+/* ========================================================================== */
+/*                       BTE interfaces 1-3                                   */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_1                         0x0000000030050000
+#define SH2_BT_ENG_CSR_2                         0x0000000030060000
+#define SH2_BT_ENG_CSR_3                         0x0000000030070000
 
 #endif /* _ASM_IA64_SN_SHUB_MMR_H */
diff -Nru a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h
--- a/include/asm-ia64/sn/sn_cpuid.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/sn_cpuid.h	2005-03-24 18:21:52 -08:00
@@ -135,9 +135,10 @@
 #define nasid_to_cnodeid(nasid)		(physical_node_map[nasid])
 
 /*
- * partition_coherence_id - cget the coherence ID of the current partition
+ * partition_coherence_id - get the coherence ID of the current partition
  */
-#define partition_coherence_id()	(get_nasid() >> 9)
+extern u8 sn_coherency_id;
+#define partition_coherence_id()	(sn_coherency_id)
 
 #endif /* _ASM_IA64_SN_SN_CPUID_H */
 
diff -Nru a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
--- a/include/asm-ia64/sn/sn_sal.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/sn_sal.h	2005-03-24 18:21:52 -08:00
@@ -8,7 +8,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All rights reserved.
  */
 
 
@@ -18,6 +18,7 @@
 #include <asm/sn/arch.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/nodepda.h>
+#include <asm/sn/shub_mmr.h>
 
 // SGI Specific Calls
 #define  SN_SAL_POD_MODE                           0x02000001
@@ -34,8 +35,8 @@
 #define  SN_SAL_PRINT_ERROR			   0x02000012
 #define  SN_SAL_SET_ERROR_HANDLING_FEATURES	   0x0200001a	// reentrant
 #define  SN_SAL_GET_FIT_COMPT			   0x0200001b	// reentrant
-#define  SN_SAL_GET_HUB_INFO                       0x0200001c
 #define  SN_SAL_GET_SAPIC_INFO                     0x0200001d
+#define  SN_SAL_GET_SN_INFO                        0x0200001e
 #define  SN_SAL_CONSOLE_PUTC                       0x02000021
 #define  SN_SAL_CONSOLE_GETC                       0x02000022
 #define  SN_SAL_CONSOLE_PUTS                       0x02000023
@@ -63,6 +64,7 @@
 
 #define  SN_SAL_SYSCTL_IOBRICK_PCI_OP		   0x02000042	// reentrant
 #define	 SN_SAL_IROUTER_OP			   0x02000043
+#define  SN_SAL_SYSCTL_EVENT                       0x02000044
 #define  SN_SAL_IOIF_INTERRUPT			   0x0200004a
 #define  SN_SAL_HWPERF_OP			   0x02000050   // lock
 #define  SN_SAL_IOIF_ERROR_INTERRUPT		   0x02000051
@@ -75,7 +77,8 @@
 #define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058
 
 #define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
-
+#define SN_SAL_BTE_RECOVER			   0x02000061
+#define SN_SAL_IOIF_GET_PCI_TOPOLOGY	           0x02000062
 
 /*
  * Service-specific constants
@@ -848,6 +851,19 @@
 	return (int) rv.v0;
 }
 
+/*
+ * Set up a node as the point of contact for system controller
+ * environmental event delivery.
+ */
+static inline int
+ia64_sn_sysctl_event_init(nasid_t nasid)
+{
+        struct ia64_sal_retval rv;
+        SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_EVENT, (u64) nasid,
+			   0, 0, 0, 0, 0, 0);
+        return (int) rv.v0;
+}
+
 /**
  * ia64_sn_get_fit_compt - read a FIT entry from the PROM header
  * @nasid: NASID of node to read
@@ -935,15 +951,24 @@
 /*
  * Returns information about the HUB/SHUB.
  *  In:
- *	arg0 - SN_SAL_GET_HUB_INFO
+ *	arg0 - SN_SAL_GET_SN_INFO
  * 	arg1 - 0 (other values reserved for future use)
  *  Out:
- *	v0 - shub type (0=shub1, 1=shub2)
- *	v1 - masid mask (ex., 0x7ff for 11 bit nasid)
- *	v2 - bit position of low nasid bit
+ *	v0 
+ *		[7:0]   - shub type (0=shub1, 1=shub2)
+ *		[15:8]  - Log2 max number of nodes in entire system (includes
+ *			  C-bricks, I-bricks, etc)
+ *		[23:16] - Log2 of nodes per sharing domain			 
+ * 		[31:24] - partition ID
+ * 		[39:32] - coherency_id
+ * 		[47:40] - regionsize
+ *	v1 
+ *		[15:0]  - nasid mask (ex., 0x7ff for 11 bit nasid)
+ *	 	[23:15] - bit position of low nasid bit
  */
 static inline u64
-ia64_sn_get_hub_info(int fc, u64 *arg1, u64 *arg2, u64 *arg3)
+ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift, 
+		u8 *systemsize, u8 *sharing_domain_size, u8 *partid, u8 *coher, u8 *reg)
 {
 	struct ia64_sal_retval ret_stuff;
 
@@ -951,13 +976,22 @@
 	ret_stuff.v0 = 0;
 	ret_stuff.v1 = 0;
 	ret_stuff.v2 = 0;
-	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_HUB_INFO, fc, 0, 0, 0, 0, 0, 0);
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SN_INFO, fc, 0, 0, 0, 0, 0, 0);
 
 /***** BEGIN HACK - temp til old proms no longer supported ********/
 	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
-		if (arg1) *arg1 = 0;
-		if (arg2) *arg2 = 0x7ff;
-		if (arg3) *arg3 = 38;
+		int nasid = get_sapicid() & 0xfff;;
+#define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL                                               
+#define SH_SHUB_ID_NODES_PER_BIT_SHFT 48                                                               
+		if (shubtype) *shubtype = 0;
+		if (nasid_bitmask) *nasid_bitmask = 0x7ff;
+		if (nasid_shift) *nasid_shift = 38;
+		if (systemsize) *systemsize = 11;
+		if (sharing_domain_size) *sharing_domain_size = 9;
+		if (partid) *partid = ia64_sn_sysctl_partition_get(nasid);
+		if (coher) *coher = nasid >> 9;
+		if (reg) *reg = (HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_SHUB_ID)) & SH_SHUB_ID_NODES_PER_BIT_MASK) >>
+			SH_SHUB_ID_NODES_PER_BIT_SHFT;
 		return 0;
 	}
 /***** END HACK *******/
@@ -965,9 +999,14 @@
 	if (ret_stuff.status < 0)
 		return ret_stuff.status;
 
-	if (arg1) *arg1 = ret_stuff.v0;
-	if (arg2) *arg2 = ret_stuff.v1;
-	if (arg3) *arg3 = ret_stuff.v2;
+	if (shubtype) *shubtype = ret_stuff.v0 & 0xff;
+	if (systemsize) *systemsize = (ret_stuff.v0 >> 8) & 0xff;
+	if (sharing_domain_size) *sharing_domain_size = (ret_stuff.v0 >> 16) & 0xff;
+	if (partid) *partid = (ret_stuff.v0 >> 24) & 0xff;
+	if (coher) *coher = (ret_stuff.v0 >> 32) & 0xff;
+	if (reg) *reg = (ret_stuff.v0 >> 40) & 0xff;
+	if (nasid_bitmask) *nasid_bitmask = (ret_stuff.v1 & 0xffff);
+	if (nasid_shift) *nasid_shift = (ret_stuff.v1 >> 16) & 0xff;
 	return 0;
 }
  
@@ -985,6 +1024,31 @@
 		opcode, a0, a1, a2, a3, a4);
 	if (v0)
 		*v0 = (int) rv.v0;
+	return (int) rv.status;
+}
+
+static inline int
+ia64_sn_ioif_get_pci_topology(u64 rack, u64 bay, u64 slot, u64 slab,
+			      u64 buf, u64 len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY,
+		rack, bay, slot, slab, buf, len, 0);
+	return (int) rv.status;
+}
+
+/*
+ * BTE error recovery is implemented in SAL
+ */
+static inline int
+ia64_sn_bte_recovery(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+
+	rv.status = 0;
+	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, 0, 0, 0, 0, 0, 0, 0);
+	if (rv.status == SALRET_NOT_IMPLEMENTED)
+		return 0;
 	return (int) rv.status;
 }
 
diff -Nru a/include/asm-ia64/sn/tioca.h b/include/asm-ia64/sn/tioca.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/sn/tioca.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,596 @@
+#ifndef _ASM_IA64_SN_TIO_TIOCA_H
+#define _ASM_IA64_SN_TIO_TIOCA_H
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#define TIOCA_PART_NUM	0xE020
+#define TIOCA_MFGR_NUM	0x24
+#define TIOCA_REV_A	0x1
+
+/*
+ * Register layout for TIO:CA.  See below for bitmasks for each register.
+ */
+
+struct tioca {
+	uint64_t	ca_id;				/* 0x000000 */
+	uint64_t	ca_control1;			/* 0x000008 */
+	uint64_t	ca_control2;			/* 0x000010 */
+	uint64_t	ca_status1;			/* 0x000018 */
+	uint64_t	ca_status2;			/* 0x000020 */
+	uint64_t	ca_gart_aperature;		/* 0x000028 */
+	uint64_t	ca_gfx_detach;			/* 0x000030 */
+	uint64_t	ca_inta_dest_addr;		/* 0x000038 */
+	uint64_t	ca_intb_dest_addr;		/* 0x000040 */
+	uint64_t	ca_err_int_dest_addr;		/* 0x000048 */
+	uint64_t	ca_int_status;			/* 0x000050 */
+	uint64_t	ca_int_status_alias;		/* 0x000058 */
+	uint64_t	ca_mult_error;			/* 0x000060 */
+	uint64_t	ca_mult_error_alias;		/* 0x000068 */
+	uint64_t	ca_first_error;			/* 0x000070 */
+	uint64_t	ca_int_mask;			/* 0x000078 */
+	uint64_t	ca_crm_pkterr_type;		/* 0x000080 */
+	uint64_t	ca_crm_pkterr_type_alias;	/* 0x000088 */
+	uint64_t	ca_crm_ct_error_detail_1;	/* 0x000090 */
+	uint64_t	ca_crm_ct_error_detail_2;	/* 0x000098 */
+	uint64_t	ca_crm_tnumto;			/* 0x0000A0 */
+	uint64_t	ca_gart_err;			/* 0x0000A8 */
+	uint64_t	ca_pcierr_type;			/* 0x0000B0 */
+	uint64_t	ca_pcierr_addr;			/* 0x0000B8 */
+
+	uint64_t	ca_pad_0000C0[3];		/* 0x0000{C0..D0} */
+
+	uint64_t	ca_pci_rd_buf_flush;		/* 0x0000D8 */
+	uint64_t	ca_pci_dma_addr_extn;		/* 0x0000E0 */
+	uint64_t	ca_agp_dma_addr_extn;		/* 0x0000E8 */
+	uint64_t	ca_force_inta;			/* 0x0000F0 */
+	uint64_t	ca_force_intb;			/* 0x0000F8 */
+	uint64_t	ca_debug_vector_sel;		/* 0x000100 */
+	uint64_t	ca_debug_mux_core_sel;		/* 0x000108 */
+	uint64_t	ca_debug_mux_pci_sel;		/* 0x000110 */
+	uint64_t	ca_debug_domain_sel;		/* 0x000118 */
+
+	uint64_t	ca_pad_000120[28];		/* 0x0001{20..F8} */
+
+	uint64_t	ca_gart_ptr_table;		/* 0x200 */
+	uint64_t	ca_gart_tlb_addr[8];		/* 0x2{08..40} */
+};
+
+/*
+ * Mask/shift definitions for TIO:CA registers.  The convention here is
+ * to mainly use the names as they appear in the "TIO AEGIS Programmers'
+ * Reference" with a CA_ prefix added.  Some exceptions were made to fix
+ * duplicate field names or to generalize fields that are common to
+ * different registers (ca_debug_mux_core_sel and ca_debug_mux_pci_sel for
+ * example).
+ *
+ * Fields consisting of a single bit have a single #define have a single
+ * macro declaration to mask the bit.  Fields consisting of multiple bits
+ * have two declarations: one to mask the proper bits in a register, and 
+ * a second with the suffix "_SHFT" to identify how far the mask needs to
+ * be shifted right to get its base value.
+ */
+
+/* ==== ca_control1 */
+#define CA_SYS_BIG_END			(1ull << 0)
+#define CA_DMA_AGP_SWAP			(1ull << 1)
+#define CA_DMA_PCI_SWAP			(1ull << 2)
+#define CA_PIO_IO_SWAP			(1ull << 3)
+#define CA_PIO_MEM_SWAP			(1ull << 4)
+#define CA_GFX_WR_SWAP			(1ull << 5)
+#define CA_AGP_FW_ENABLE		(1ull << 6)
+#define CA_AGP_CAL_CYCLE		(0x7ull << 7)
+#define CA_AGP_CAL_CYCLE_SHFT		7
+#define CA_AGP_CAL_PRSCL_BYP		(1ull << 10)
+#define CA_AGP_INIT_CAL_ENB		(1ull << 11)
+#define CA_INJ_ADDR_PERR		(1ull << 12)
+#define CA_INJ_DATA_PERR		(1ull << 13)
+	/* bits 15:14 unused */
+#define CA_PCIM_IO_NBE_AD		(0x7ull << 16)
+#define CA_PCIM_IO_NBE_AD_SHFT		16
+#define CA_PCIM_FAST_BTB_ENB		(1ull << 19)
+	/* bits 23:20 unused */
+#define CA_PIO_ADDR_OFFSET		(0xffull << 24)
+#define CA_PIO_ADDR_OFFSET_SHFT		24
+	/* bits 35:32 unused */
+#define CA_AGPDMA_OP_COMBDELAY		(0x1full << 36)
+#define CA_AGPDMA_OP_COMBDELAY_SHFT	36
+	/* bit 41 unused */
+#define CA_AGPDMA_OP_ENB_COMBDELAY	(1ull << 42)
+#define	CA_PCI_INT_LPCNT		(0xffull << 44)
+#define CA_PCI_INT_LPCNT_SHFT		44
+	/* bits 63:52 unused */
+
+/* ==== ca_control2 */
+#define CA_AGP_LATENCY_TO		(0xffull << 0)
+#define CA_AGP_LATENCY_TO_SHFT		0
+#define CA_PCI_LATENCY_TO		(0xffull << 8)
+#define CA_PCI_LATENCY_TO_SHFT		8
+#define CA_PCI_MAX_RETRY		(0x3ffull << 16)
+#define CA_PCI_MAX_RETRY_SHFT		16
+	/* bits 27:26 unused */
+#define CA_RT_INT_EN			(0x3ull << 28)
+#define CA_RT_INT_EN_SHFT			28
+#define CA_MSI_INT_ENB			(1ull << 30)
+#define CA_PCI_ARB_ERR_ENB		(1ull << 31)
+#define CA_GART_MEM_PARAM		(0x3ull << 32)
+#define CA_GART_MEM_PARAM_SHFT		32
+#define CA_GART_RD_PREFETCH_ENB		(1ull << 34)
+#define CA_GART_WR_PREFETCH_ENB		(1ull << 35)
+#define CA_GART_FLUSH_TLB		(1ull << 36)
+	/* bits 39:37 unused */
+#define CA_CRM_TNUMTO_PERIOD		(0x1fffull << 40)
+#define CA_CRM_TNUMTO_PERIOD_SHFT	40
+	/* bits 55:53 unused */
+#define CA_CRM_TNUMTO_ENB		(1ull << 56)
+#define CA_CRM_PRESCALER_BYP		(1ull << 57)
+	/* bits 59:58 unused */
+#define CA_CRM_MAX_CREDIT		(0x7ull << 60)
+#define CA_CRM_MAX_CREDIT_SHFT		60
+	/* bit 63 unused */
+
+/* ==== ca_status1 */
+#define CA_CORELET_ID			(0x3ull << 0)
+#define CA_CORELET_ID_SHFT		0
+#define CA_INTA_N			(1ull << 2)
+#define CA_INTB_N			(1ull << 3)
+#define CA_CRM_CREDIT_AVAIL		(0x7ull << 4)
+#define CA_CRM_CREDIT_AVAIL_SHFT	4
+	/* bit 7 unused */
+#define CA_CRM_SPACE_AVAIL		(0x7full << 8)
+#define CA_CRM_SPACE_AVAIL_SHFT		8
+	/* bit 15 unused */
+#define CA_GART_TLB_VAL			(0xffull << 16)
+#define CA_GART_TLB_VAL_SHFT		16
+	/* bits 63:24 unused */
+
+/* ==== ca_status2 */
+#define CA_GFX_CREDIT_AVAIL		(0xffull << 0)
+#define CA_GFX_CREDIT_AVAIL_SHFT	0
+#define CA_GFX_OPQ_AVAIL		(0xffull << 8)
+#define CA_GFX_OPQ_AVAIL_SHFT		8
+#define CA_GFX_WRBUFF_AVAIL		(0xffull << 16)
+#define CA_GFX_WRBUFF_AVAIL_SHFT	16
+#define CA_ADMA_OPQ_AVAIL		(0xffull << 24)
+#define CA_ADMA_OPQ_AVAIL_SHFT		24
+#define CA_ADMA_WRBUFF_AVAIL		(0xffull << 32)
+#define CA_ADMA_WRBUFF_AVAIL_SHFT	32
+#define CA_ADMA_RDBUFF_AVAIL		(0x7full << 40)
+#define CA_ADMA_RDBUFF_AVAIL_SHFT	40
+#define CA_PCI_PIO_OP_STAT		(1ull << 47)
+#define CA_PDMA_OPQ_AVAIL		(0xfull << 48)
+#define CA_PDMA_OPQ_AVAIL_SHFT		48
+#define CA_PDMA_WRBUFF_AVAIL		(0xfull << 52)
+#define CA_PDMA_WRBUFF_AVAIL_SHFT	52
+#define CA_PDMA_RDBUFF_AVAIL		(0x3ull << 56)
+#define CA_PDMA_RDBUFF_AVAIL_SHFT	56
+	/* bits 63:58 unused */
+
+/* ==== ca_gart_aperature */
+#define CA_GART_AP_ENB_AGP		(1ull << 0)
+#define CA_GART_PAGE_SIZE		(1ull << 1)
+#define CA_GART_AP_ENB_PCI		(1ull << 2)
+	/* bits 11:3 unused */
+#define CA_GART_AP_SIZE			(0x3ffull << 12)
+#define CA_GART_AP_SIZE_SHFT		12
+#define CA_GART_AP_BASE			(0x3ffffffffffull << 22)
+#define CA_GART_AP_BASE_SHFT		22
+
+/* ==== ca_inta_dest_addr
+   ==== ca_intb_dest_addr 
+   ==== ca_err_int_dest_addr */
+	/* bits 2:0 unused */
+#define CA_INT_DEST_ADDR		(0x7ffffffffffffull << 3)
+#define CA_INT_DEST_ADDR_SHFT		3
+	/* bits 55:54 unused */
+#define CA_INT_DEST_VECT		(0xffull << 56)
+#define CA_INT_DEST_VECT_SHFT		56
+
+/* ==== ca_int_status */
+/* ==== ca_int_status_alias */
+/* ==== ca_mult_error */
+/* ==== ca_mult_error_alias */
+/* ==== ca_first_error */
+/* ==== ca_int_mask */
+#define CA_PCI_ERR			(1ull << 0)
+	/* bits 3:1 unused */
+#define CA_GART_FETCH_ERR		(1ull << 4)
+#define CA_GFX_WR_OVFLW			(1ull << 5)
+#define CA_PIO_REQ_OVFLW		(1ull << 6)
+#define CA_CRM_PKTERR			(1ull << 7)
+#define CA_CRM_DVERR			(1ull << 8)
+#define CA_TNUMTO			(1ull << 9)
+#define CA_CXM_RSP_CRED_OVFLW		(1ull << 10)
+#define CA_CXM_REQ_CRED_OVFLW		(1ull << 11)
+#define CA_PIO_INVALID_ADDR		(1ull << 12)
+#define CA_PCI_ARB_TO			(1ull << 13)
+#define CA_AGP_REQ_OFLOW		(1ull << 14)
+#define CA_SBA_TYPE1_ERR		(1ull << 15)
+	/* bit 16 unused */
+#define CA_INTA				(1ull << 17)
+#define CA_INTB				(1ull << 18)
+#define CA_MULT_INTA			(1ull << 19)
+#define CA_MULT_INTB			(1ull << 20)
+#define CA_GFX_CREDIT_OVFLW		(1ull << 21)
+	/* bits 63:22 unused */
+
+/* ==== ca_crm_pkterr_type */
+/* ==== ca_crm_pkterr_type_alias */
+#define CA_CRM_PKTERR_SBERR_HDR		(1ull << 0)
+#define CA_CRM_PKTERR_DIDN		(1ull << 1)
+#define CA_CRM_PKTERR_PACTYPE		(1ull << 2)
+#define CA_CRM_PKTERR_INV_TNUM		(1ull << 3)
+#define CA_CRM_PKTERR_ADDR_RNG		(1ull << 4)
+#define CA_CRM_PKTERR_ADDR_ALGN		(1ull << 5)
+#define CA_CRM_PKTERR_HDR_PARAM		(1ull << 6)
+#define CA_CRM_PKTERR_CW_ERR		(1ull << 7)
+#define CA_CRM_PKTERR_SBERR_NH		(1ull << 8)
+#define CA_CRM_PKTERR_EARLY_TERM	(1ull << 9)
+#define CA_CRM_PKTERR_EARLY_TAIL	(1ull << 10)
+#define CA_CRM_PKTERR_MSSNG_TAIL	(1ull << 11)
+#define CA_CRM_PKTERR_MSSNG_HDR		(1ull << 12)
+	/* bits 15:13 unused */
+#define CA_FIRST_CRM_PKTERR_SBERR_HDR	(1ull << 16)
+#define CA_FIRST_CRM_PKTERR_DIDN	(1ull << 17)
+#define CA_FIRST_CRM_PKTERR_PACTYPE	(1ull << 18)
+#define CA_FIRST_CRM_PKTERR_INV_TNUM	(1ull << 19)
+#define CA_FIRST_CRM_PKTERR_ADDR_RNG	(1ull << 20)
+#define CA_FIRST_CRM_PKTERR_ADDR_ALGN	(1ull << 21)
+#define CA_FIRST_CRM_PKTERR_HDR_PARAM	(1ull << 22)
+#define CA_FIRST_CRM_PKTERR_CW_ERR	(1ull << 23)
+#define CA_FIRST_CRM_PKTERR_SBERR_NH	(1ull << 24)
+#define CA_FIRST_CRM_PKTERR_EARLY_TERM	(1ull << 25)
+#define CA_FIRST_CRM_PKTERR_EARLY_TAIL	(1ull << 26)
+#define CA_FIRST_CRM_PKTERR_MSSNG_TAIL	(1ull << 27)
+#define CA_FIRST_CRM_PKTERR_MSSNG_HDR	(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_crm_ct_error_detail_1 */
+#define CA_PKT_TYPE			(0xfull << 0)
+#define CA_PKT_TYPE_SHFT		0
+#define CA_SRC_ID			(0x3ull << 4)
+#define CA_SRC_ID_SHFT			4
+#define CA_DATA_SZ			(0x3ull << 6)
+#define CA_DATA_SZ_SHFT			6
+#define CA_TNUM				(0xffull << 8)
+#define CA_TNUM_SHFT			8
+#define CA_DW_DATA_EN			(0xffull << 16)
+#define CA_DW_DATA_EN_SHFT		16
+#define CA_GFX_CRED			(0xffull << 24)
+#define CA_GFX_CRED_SHFT		24
+#define CA_MEM_RD_PARAM			(0x3ull << 32)
+#define CA_MEM_RD_PARAM_SHFT		32
+#define CA_PIO_OP			(1ull << 34)
+#define CA_CW_ERR			(1ull << 35)
+	/* bits 62:36 unused */
+#define CA_VALID			(1ull << 63)
+
+/* ==== ca_crm_ct_error_detail_2 */
+	/* bits 2:0 unused */
+#define CA_PKT_ADDR			(0x1fffffffffffffull << 3)
+#define CA_PKT_ADDR_SHFT		3
+	/* bits 63:56 unused */
+
+/* ==== ca_crm_tnumto */
+#define CA_CRM_TNUMTO_VAL		(0xffull << 0)
+#define CA_CRM_TNUMTO_VAL_SHFT		0
+#define CA_CRM_TNUMTO_WR		(1ull << 8)
+	/* bits 63:9 unused */
+
+/* ==== ca_gart_err */
+#define CA_GART_ERR_SOURCE		(0x3ull << 0)
+#define CA_GART_ERR_SOURCE_SHFT		0
+	/* bits 3:2 unused */
+#define CA_GART_ERR_ADDR		(0xfffffffffull << 4)
+#define CA_GART_ERR_ADDR_SHFT		4
+	/* bits 63:40 unused */
+
+/* ==== ca_pcierr_type */
+#define CA_PCIERR_DATA			(0xffffffffull << 0)
+#define CA_PCIERR_DATA_SHFT		0
+#define CA_PCIERR_ENB			(0xfull << 32)
+#define CA_PCIERR_ENB_SHFT		32
+#define CA_PCIERR_CMD			(0xfull << 36)
+#define CA_PCIERR_CMD_SHFT		36
+#define CA_PCIERR_A64			(1ull << 40)
+#define CA_PCIERR_SLV_SERR		(1ull << 41)
+#define CA_PCIERR_SLV_WR_PERR		(1ull << 42)
+#define CA_PCIERR_SLV_RD_PERR		(1ull << 43)
+#define CA_PCIERR_MST_SERR		(1ull << 44)
+#define CA_PCIERR_MST_WR_PERR		(1ull << 45)
+#define CA_PCIERR_MST_RD_PERR		(1ull << 46)
+#define CA_PCIERR_MST_MABT		(1ull << 47)
+#define CA_PCIERR_MST_TABT		(1ull << 48)
+#define CA_PCIERR_MST_RETRY_TOUT	(1ull << 49)
+
+#define CA_PCIERR_TYPES \
+	(CA_PCIERR_A64|CA_PCIERR_SLV_SERR| \
+	 CA_PCIERR_SLV_WR_PERR|CA_PCIERR_SLV_RD_PERR| \
+	 CA_PCIERR_MST_SERR|CA_PCIERR_MST_WR_PERR|CA_PCIERR_MST_RD_PERR| \
+	 CA_PCIERR_MST_MABT|CA_PCIERR_MST_TABT|CA_PCIERR_MST_RETRY_TOUT)
+
+	/* bits 63:50 unused */
+
+/* ==== ca_pci_dma_addr_extn */
+#define CA_UPPER_NODE_OFFSET		(0x3full << 0)
+#define CA_UPPER_NODE_OFFSET_SHFT	0
+	/* bits 7:6 unused */
+#define CA_CHIPLET_ID			(0x3ull << 8)
+#define CA_CHIPLET_ID_SHFT		8
+	/* bits 11:10 unused */
+#define CA_PCI_DMA_NODE_ID		(0xffffull << 12)
+#define CA_PCI_DMA_NODE_ID_SHFT		12
+	/* bits 27:26 unused */
+#define CA_PCI_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+
+/* ==== ca_agp_dma_addr_extn */
+	/* bits 19:0 unused */
+#define CA_AGP_DMA_NODE_ID		(0xffffull << 20)
+#define CA_AGP_DMA_NODE_ID_SHFT		20
+	/* bits 27:26 unused */
+#define CA_AGP_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_debug_vector_sel */
+#define CA_DEBUG_MN_VSEL		(0xfull << 0)
+#define CA_DEBUG_MN_VSEL_SHFT		0
+#define CA_DEBUG_PP_VSEL		(0xfull << 4)
+#define CA_DEBUG_PP_VSEL_SHFT		4
+#define CA_DEBUG_GW_VSEL		(0xfull << 8)
+#define CA_DEBUG_GW_VSEL_SHFT		8
+#define CA_DEBUG_GT_VSEL		(0xfull << 12)
+#define CA_DEBUG_GT_VSEL_SHFT		12
+#define CA_DEBUG_PD_VSEL		(0xfull << 16)
+#define CA_DEBUG_PD_VSEL_SHFT		16
+#define CA_DEBUG_AD_VSEL		(0xfull << 20)
+#define CA_DEBUG_AD_VSEL_SHFT		20
+#define CA_DEBUG_CX_VSEL		(0xfull << 24)
+#define CA_DEBUG_CX_VSEL_SHFT		24
+#define CA_DEBUG_CR_VSEL		(0xfull << 28)
+#define CA_DEBUG_CR_VSEL_SHFT		28
+#define CA_DEBUG_BA_VSEL		(0xfull << 32)
+#define CA_DEBUG_BA_VSEL_SHFT		32
+#define CA_DEBUG_PE_VSEL		(0xfull << 36)
+#define CA_DEBUG_PE_VSEL_SHFT		36
+#define CA_DEBUG_BO_VSEL		(0xfull << 40)
+#define CA_DEBUG_BO_VSEL_SHFT		40
+#define CA_DEBUG_BI_VSEL		(0xfull << 44)
+#define CA_DEBUG_BI_VSEL_SHFT		44
+#define CA_DEBUG_AS_VSEL		(0xfull << 48)
+#define CA_DEBUG_AS_VSEL_SHFT		48
+#define CA_DEBUG_PS_VSEL		(0xfull << 52)
+#define CA_DEBUG_PS_VSEL_SHFT		52
+#define CA_DEBUG_PM_VSEL		(0xfull << 56)
+#define CA_DEBUG_PM_VSEL_SHFT		56
+	/* bits 63:60 unused */
+
+/* ==== ca_debug_mux_core_sel */
+/* ==== ca_debug_mux_pci_sel */
+#define CA_DEBUG_MSEL0			(0x7ull << 0)
+#define CA_DEBUG_MSEL0_SHFT		0
+	/* bit 3 unused */
+#define CA_DEBUG_NSEL0			(0x7ull << 4)
+#define CA_DEBUG_NSEL0_SHFT		4
+	/* bit 7 unused */
+#define CA_DEBUG_MSEL1			(0x7ull << 8)
+#define CA_DEBUG_MSEL1_SHFT		8
+	/* bit 11 unused */
+#define CA_DEBUG_NSEL1			(0x7ull << 12)
+#define CA_DEBUG_NSEL1_SHFT		12
+	/* bit 15 unused */
+#define CA_DEBUG_MSEL2			(0x7ull << 16)
+#define CA_DEBUG_MSEL2_SHFT		16
+	/* bit 19 unused */
+#define CA_DEBUG_NSEL2			(0x7ull << 20)
+#define CA_DEBUG_NSEL2_SHFT		20
+	/* bit 23 unused */
+#define CA_DEBUG_MSEL3			(0x7ull << 24)
+#define CA_DEBUG_MSEL3_SHFT		24
+	/* bit 27 unused */
+#define CA_DEBUG_NSEL3			(0x7ull << 28)
+#define CA_DEBUG_NSEL3_SHFT		28
+	/* bit 31 unused */
+#define CA_DEBUG_MSEL4			(0x7ull << 32)
+#define CA_DEBUG_MSEL4_SHFT		32
+	/* bit 35 unused */
+#define CA_DEBUG_NSEL4			(0x7ull << 36)
+#define CA_DEBUG_NSEL4_SHFT		36
+	/* bit 39 unused */
+#define CA_DEBUG_MSEL5			(0x7ull << 40)
+#define CA_DEBUG_MSEL5_SHFT		40
+	/* bit 43 unused */
+#define CA_DEBUG_NSEL5			(0x7ull << 44)
+#define CA_DEBUG_NSEL5_SHFT		44
+	/* bit 47 unused */
+#define CA_DEBUG_MSEL6			(0x7ull << 48)
+#define CA_DEBUG_MSEL6_SHFT		48
+	/* bit 51 unused */
+#define CA_DEBUG_NSEL6			(0x7ull << 52)
+#define CA_DEBUG_NSEL6_SHFT		52
+	/* bit 55 unused */
+#define CA_DEBUG_MSEL7			(0x7ull << 56)
+#define CA_DEBUG_MSEL7_SHFT		56
+	/* bit 59 unused */
+#define CA_DEBUG_NSEL7			(0x7ull << 60)
+#define CA_DEBUG_NSEL7_SHFT		60
+	/* bit 63 unused */
+
+
+/* ==== ca_debug_domain_sel */
+#define CA_DEBUG_DOMAIN_L		(1ull << 0)
+#define CA_DEBUG_DOMAIN_H		(1ull << 1)
+	/* bits 63:2 unused */
+
+/* ==== ca_gart_ptr_table */
+#define CA_GART_PTR_VAL			(1ull << 0)
+	/* bits 11:1 unused */
+#define CA_GART_PTR_ADDR		(0xfffffffffffull << 12)
+#define CA_GART_PTR_ADDR_SHFT		12
+	/* bits 63:56 unused */
+
+/* ==== ca_gart_tlb_addr[0-7] */
+#define CA_GART_TLB_ADDR		(0xffffffffffffffull << 0)
+#define CA_GART_TLB_ADDR_SHFT		0
+	/* bits 62:56 unused */
+#define CA_GART_TLB_ENTRY_VAL		(1ull << 63)
+
+/*
+ * PIO address space ranges for TIO:CA
+ */
+
+/* CA internal registers */
+#define CA_PIO_ADMIN			0x00000000
+#define CA_PIO_ADMIN_LEN		0x00010000
+
+/* GFX Write Buffer - Diagnostics */
+#define CA_PIO_GFX			0x00010000
+#define CA_PIO_GFX_LEN			0x00010000
+
+/* AGP DMA Write Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAWRITE		0x00020000
+#define CA_PIO_AGP_DMAWRITE_LEN		0x00010000
+
+/* AGP DMA READ Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAREAD		0x00030000
+#define CA_PIO_AGP_DMAREAD_LEN		0x00010000
+
+/* PCI Config Type 0 */
+#define CA_PIO_PCI_TYPE0_CONFIG		0x01000000
+#define CA_PIO_PCI_TYPE0_CONFIG_LEN	0x01000000
+
+/* PCI Config Type 1 */
+#define CA_PIO_PCI_TYPE1_CONFIG		0x02000000
+#define CA_PIO_PCI_TYPE1_CONFIG_LEN	0x01000000
+
+/* PCI I/O Cycles - mapped to PCI Address 0x00000000-0x04ffffff */
+#define CA_PIO_PCI_IO			0x03000000
+#define CA_PIO_PCI_IO_LEN		0x05000000
+
+/* PCI MEM Cycles - mapped to PCI with CA_PIO_ADDR_OFFSET of ca_control1 */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM_OFFSET		0x08000000
+#define CA_PIO_PCI_MEM_OFFSET_LEN	0x08000000
+
+/* PCI MEM Cycles - mapped to PCI Address 0x00000000-0xbfffffff */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM			0x40000000
+#define CA_PIO_PCI_MEM_LEN		0xc0000000
+
+/*
+ * DMA space
+ *
+ * The CA aperature (ie. bus address range) mapped by the GART is segmented into
+ * two parts.  The lower portion of the aperature is used for mapping 32 bit
+ * PCI addresses which are managed by the dma interfaces in this file.  The
+ * upper poprtion of the aperature is used for mapping 48 bit AGP addresses.
+ * The AGP portion of the aperature is managed by the agpgart_be.c driver
+ * in drivers/linux/agp.  There are ca-specific hooks in that driver to
+ * manipulate the gart, but management of the AGP portion of the aperature
+ * is the responsibility of that driver.
+ *
+ * CA allows three main types of DMA mapping:
+ *
+ * PCI 64-bit	Managed by this driver
+ * PCI 32-bit 	Managed by this driver
+ * AGP 48-bit	Managed by hooks in the /dev/agpgart driver
+ *
+ * All of the above can optionally be remapped through the GART.  The following
+ * table lists the combinations of addressing types and GART remapping that
+ * is currently supported by the driver (h/w supports all, s/w limits this):
+ *
+ *		PCI64		PCI32		AGP48
+ * GART		no		yes		yes
+ * Direct	yes		yes		no
+ *
+ * GART remapping of PCI64 is not done because there is no need to.  The
+ * 64 bit PCI address holds all of the information necessary to target any
+ * memory in the system.
+ *
+ * AGP48 is always mapped through the GART.  Management of the AGP48 portion
+ * of the aperature is the responsibility of code in the agpgart_be driver.
+ *
+ * The non-64 bit bus address space will currently be partitioned like this:
+ *
+ *	0xffff_ffff_ffff	+--------
+ *				| AGP48 direct
+ *				| Space managed by this driver
+ *	CA_AGP_DIRECT_BASE	+--------
+ *				| AGP GART mapped (gfx aperature)
+ *				| Space managed by /dev/agpgart driver
+ *				| This range is exposed to the agpgart
+ * 				| driver as the "graphics aperature"
+ *	CA_AGP_MAPPED_BASE	+-----
+ *				| PCI GART mapped
+ *				| Space managed by this driver		
+ *	CA_PCI32_MAPPED_BASE	+----
+ *				| PCI32 direct
+ *				| Space managed by this driver
+ *	0xC000_0000		+--------
+ *	(CA_PCI32_DIRECT_BASE)
+ *
+ * The bus address range CA_PCI32_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the CA aperature.  Addresses falling in this range will
+ * be remapped using the GART.
+ *
+ * The bus address range CA_AGP_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the graphics aperature.  This is a subset of the CA
+ * aperature and is under the control of the agpgart_be driver.
+ *
+ * CA_PCI32_MAPPED_BASE, CA_AGP_MAPPED_BASE, and CA_AGP_DIRECT_BASE are
+ * somewhat arbitrary values.  The known constraints on choosing these is:
+ *
+ * 1)  CA_AGP_DIRECT_BASE-CA_PCI32_MAPPED_BASE+1 (the CA aperature size)
+ *     must be one of the values supported by the ca_gart_aperature register.
+ *     Currently valid values are: 4MB through 4096MB in powers of 2 increments
+ *
+ * 2)  CA_AGP_DIRECT_BASE-CA_AGP_MAPPED_BASE+1 (the gfx aperature size)
+ *     must be in MB units since that's what the agpgart driver assumes.
+ */
+
+/*
+ * Define Bus DMA ranges.  These are configurable (see constraints above)
+ * and will probably need tuning based on experience.
+ */
+
+
+/*
+ * 11/24/03
+ * CA has an addressing glitch w.r.t. PCI direct 32 bit DMA that makes it
+ * generally unusable.  The problem is that for PCI direct 32 
+ * DMA's, all 32 bits of the bus address are used to form the lower 32 bits
+ * of the coretalk address, and coretalk bits 38:32 come from a register.
+ * Since only PCI bus addresses 0xC0000000-0xFFFFFFFF (1GB) are available
+ * for DMA (the rest is allocated to PIO), host node addresses need to be
+ * such that their lower 32 bits fall in the 0xC0000000-0xffffffff range
+ * as well.  So there can be no PCI32 direct DMA below 3GB!!  For this
+ * reason we set the CA_PCI32_DIRECT_SIZE to 0 which essentially makes
+ * tioca_dma_direct32() a noop but preserves the code flow should this issue
+ * be fixed in a respin.
+ *
+ * For now, all PCI32 DMA's must be mapped through the GART.
+ */
+
+#define CA_PCI32_DIRECT_BASE	0xC0000000UL	/* BASE not configurable */
+#define CA_PCI32_DIRECT_SIZE	0x00000000UL	/* 0 MB */
+
+#define CA_PCI32_MAPPED_BASE	0xC0000000UL
+#define CA_PCI32_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_MAPPED_BASE	0x80000000UL
+#define CA_AGP_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_DIRECT_BASE	0x40000000UL	/* 2GB */
+#define CA_AGP_DIRECT_SIZE	0x40000000UL
+
+#define CA_APERATURE_BASE	(CA_AGP_MAPPED_BASE)
+#define CA_APERATURE_SIZE	(CA_AGP_MAPPED_SIZE+CA_PCI32_MAPPED_SIZE)
+
+#endif  /* _ASM_IA64_SN_TIO_TIOCA_H */
diff -Nru a/include/asm-ia64/sn/tioca_provider.h b/include/asm-ia64/sn/tioca_provider.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/sn/tioca_provider.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,206 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+#define _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+
+#include <asm/sn/tioca.h>
+
+/*
+ * WAR enables
+ * Defines for individual WARs. Each is a bitmask of applicable
+ * part revision numbers. (1 << 1) == rev A, (1 << 2) == rev B,
+ * (3 << 1) == (rev A or rev B), etc
+ */
+
+#define TIOCA_WAR_ENABLED(pv, tioca_common) \
+	((1 << tioca_common->ca_rev) & pv)
+
+  /* TIO:ICE:FRZ:Freezer loses a PIO data ucred on PIO RD RSP with CW error */
+#define PV907908 (1 << 1)
+  /* ATI config space problems after BIOS execution starts */
+#define PV908234 (1 << 1)
+  /* CA:AGPDMA write request data mismatch with ABC1CL merge */
+#define PV895469 (1 << 1)
+  /* TIO:CA TLB invalidate of written GART entries possibly not occuring in CA*/
+#define PV910244 (1 << 1)
+
+struct tioca_dmamap{
+	struct list_head	cad_list;	/* headed by ca_list */
+
+	dma_addr_t		cad_dma_addr;	/* Linux dma handle */
+	uint			cad_gart_entry; /* start entry in ca_gart_pagemap */
+	uint			cad_gart_size;	/* #entries for this map */
+};
+
+/*
+ * Kernel only fields.  Prom may look at this stuff for debugging only.
+ * Access this structure through the ca_kernel_private ptr.
+ */
+
+struct tioca_common ;
+
+struct tioca_kernel {
+	struct tioca_common	*ca_common;	/* tioca this belongs to */
+	struct list_head	ca_list;	/* list of all ca's */
+	struct list_head	ca_dmamaps;
+	spinlock_t		ca_lock;	/* Kernel lock */
+	cnodeid_t		ca_closest_node;
+	struct list_head	*ca_devices;	/* bus->devices */
+
+	/*
+	 * General GART stuff
+	 */
+	uint64_t	ca_ap_size;		/* size of aperature in bytes */
+	uint32_t	ca_gart_entries;	/* # uint64_t entries in gart */
+	uint32_t	ca_ap_pagesize; 	/* aperature page size in bytes */
+	uint64_t	ca_ap_bus_base; 	/* bus address of CA aperature */
+	uint64_t	ca_gart_size;		/* gart size in bytes */
+	uint64_t	*ca_gart;		/* gart table vaddr */
+	uint64_t	ca_gart_coretalk_addr;	/* gart coretalk addr */
+	uint8_t		ca_gart_iscoherent;	/* used in tioca_tlbflush */
+
+	/* PCI GART convenience values */
+	uint64_t	ca_pciap_base;		/* pci aperature bus base address */
+	uint64_t	ca_pciap_size;		/* pci aperature size (bytes) */
+	uint64_t	ca_pcigart_base;	/* gfx GART bus base address */
+	uint64_t	*ca_pcigart;		/* gfx GART vm address */
+	uint32_t	ca_pcigart_entries;
+	uint32_t	ca_pcigart_start;	/* PCI start index in ca_gart */
+	void		*ca_pcigart_pagemap;
+
+	/* AGP GART convenience values */
+	uint64_t	ca_gfxap_base;		/* gfx aperature bus base address */
+	uint64_t	ca_gfxap_size;		/* gfx aperature size (bytes) */
+	uint64_t	ca_gfxgart_base;	/* gfx GART bus base address */
+	uint64_t	*ca_gfxgart;		/* gfx GART vm address */
+	uint32_t	ca_gfxgart_entries;
+	uint32_t	ca_gfxgart_start;	/* agpgart start index in ca_gart */
+};
+
+/*
+ * Common tioca info shared between kernel and prom
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES
+ * TO THE PROM VERSION.
+ */
+
+struct tioca_common {
+	struct pcibus_bussoft	ca_common;	/* common pciio header */
+
+	uint32_t		ca_rev;
+	uint32_t		ca_closest_nasid;
+
+	uint64_t		ca_prom_private;
+	uint64_t		ca_kernel_private;
+};
+
+/**
+ * tioca_paddr_to_gart - Convert an SGI coretalk address to a CA GART entry
+ * @paddr: page address to convert
+ *
+ * Convert a system [coretalk] address to a GART entry.  GART entries are
+ * formed using the following:
+ *
+ *     data = ( (1<<63) |  ( (REMAP_NODE_ID << 40) | (MD_CHIPLET_ID << 38) | 
+ * (REMAP_SYS_ADDR) ) >> 12 )
+ *
+ * DATA written to 1 GART TABLE Entry in system memory is remapped system
+ * addr for 1 page 
+ *
+ * The data is for coretalk address format right shifted 12 bits with a
+ * valid bit.
+ *
+ *	GART_TABLE_ENTRY [ 25:0 ]  -- REMAP_SYS_ADDRESS[37:12].
+ *	GART_TABLE_ENTRY [ 27:26 ] -- SHUB MD chiplet id.
+ *	GART_TABLE_ENTRY [ 41:28 ] -- REMAP_NODE_ID.
+ *	GART_TABLE_ENTRY [ 63 ]    -- Valid Bit 
+ */
+static inline u64
+tioca_paddr_to_gart(unsigned long paddr)
+{
+	/*
+	 * We are assuming right now that paddr already has the correct
+	 * format since the address from xtalk_dmaXXX should already have
+	 * NODE_ID, CHIPLET_ID, and SYS_ADDR in the correct locations.
+	 */
+
+	return ((paddr) >> 12) | (1UL << 63);
+}
+
+/**
+ * tioca_physpage_to_gart - Map a host physical page for SGI CA based DMA
+ * @page_addr: system page address to map
+ */
+
+static inline unsigned long
+tioca_physpage_to_gart(uint64_t page_addr)
+{
+	uint64_t coretalk_addr;
+
+	coretalk_addr = PHYS_TO_TIODMA(page_addr);
+	if (!coretalk_addr) {
+		return 0;
+	}
+
+	return tioca_paddr_to_gart(coretalk_addr);
+}
+
+/**
+ * tioca_tlbflush - invalidate cached SGI CA GART TLB entries
+ * @tioca_kernel: CA context 
+ *
+ * Invalidate tlb entries for a given CA GART.  Main complexity is to account
+ * for revA bug.
+ */
+static inline void
+tioca_tlbflush(struct tioca_kernel *tioca_kernel)
+{
+	volatile uint64_t tmp;
+	volatile struct tioca *ca_base;
+	struct tioca_common *tioca_common;
+
+	tioca_common = tioca_kernel->ca_common;
+	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
+
+	/*
+	 * Explicit flushes not needed if GART is in cached mode
+	 */
+	if (tioca_kernel->ca_gart_iscoherent) {
+		if (TIOCA_WAR_ENABLED(PV910244, tioca_common)) {
+			/*
+			 * PV910244:  RevA CA needs explicit flushes.
+			 * Need to put GART into uncached mode before
+			 * flushing otherwise the explicit flush is ignored.
+			 *
+			 * Alternate WAR would be to leave GART cached and
+			 * touch every CL aligned GART entry.
+			 */
+
+			ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
+			ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
+			ca_base->ca_control2 |=
+			    (0x2ull << CA_GART_MEM_PARAM_SHFT);
+			tmp = ca_base->ca_control2;
+		}
+
+		return;
+	}
+
+	/*
+	 * Gart in uncached mode ... need an explicit flush.
+	 */
+
+	ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
+	tmp = ca_base->ca_control2;
+}
+
+extern uint32_t	tioca_gart_found;
+extern int tioca_init_provider(void);
+extern void tioca_fastwrite_enable(struct tioca_kernel *tioca_kern);
+#endif /* _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H */
diff -Nru a/include/asm-ia64/sn/tiocx.h b/include/asm-ia64/sn/tiocx.h
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/sn/tiocx.h	2005-03-24 18:21:52 -08:00
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_TIOCX_H
+#define _ASM_IA64_SN_TIO_TIOCX_H
+
+#ifdef __KERNEL__
+
+struct cx_id_s {
+	unsigned int part_num;
+	unsigned int mfg_num;
+	int nasid;
+};
+
+struct cx_dev {
+	struct cx_id_s cx_id;
+	void *soft;			/* driver specific */
+	struct hubdev_info *hubdev;
+	struct device dev;
+	struct cx_drv *driver;
+};
+
+struct cx_device_id {
+	unsigned int part_num;
+	unsigned int mfg_num;
+};
+
+struct cx_drv {
+	char *name;
+	const struct cx_device_id *id_table;
+	struct device_driver driver;
+	int (*probe) (struct cx_dev * dev, const struct cx_device_id * id);
+	int (*remove) (struct cx_dev * dev);
+};
+
+/* create DMA address by stripping AS bits */
+#define TIOCX_DMA_ADDR(a) (uint64_t)((uint64_t)(a) & 0xffffcfffffffffUL)
+
+#define TIOCX_TO_TIOCX_DMA_ADDR(a) (uint64_t)(((uint64_t)(a) & 0xfffffffff) |  \
+                                  ((((uint64_t)(a)) & 0xffffc000000000UL) <<2))
+
+#define TIO_CE_ASIC_PARTNUM 0xce00
+#define TIOCX_CORELET 3
+
+/* These are taken from tio_mmr_as.h */
+#define TIO_ICE_FRZ_CFG               TIO_MMR_ADDR_MOD(0x00000000b0008100UL)
+#define TIO_ICE_PMI_TX_CFG            TIO_MMR_ADDR_MOD(0x00000000b000b100UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3 TIO_MMR_ADDR_MOD(0x00000000b000be18UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK 0x000000000000000fUL
+
+#define to_cx_dev(n) container_of(n, struct cx_dev, dev)
+#define to_cx_driver(drv) container_of(drv, struct cx_drv, driver)
+
+extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int);
+extern void tiocx_irq_free(struct sn_irq_info *);
+extern int cx_device_unregister(struct cx_dev *);
+extern int cx_device_register(nasid_t, int, int, struct hubdev_info *);
+extern int cx_driver_unregister(struct cx_drv *);
+extern int cx_driver_register(struct cx_drv *);
+extern uint64_t tiocx_dma_addr(uint64_t addr);
+extern uint64_t tiocx_swin_base(int nasid);
+extern void tiocx_mmr_store(int nasid, uint64_t offset, uint64_t value);
+extern uint64_t tiocx_mmr_load(int nasid, uint64_t offset);
+
+#endif				//  __KERNEL__
+#endif				// _ASM_IA64_SN_TIO_TIOCX__
diff -Nru a/include/asm-ia64/sn/types.h b/include/asm-ia64/sn/types.h
--- a/include/asm-ia64/sn/types.h	2005-03-24 18:21:52 -08:00
+++ b/include/asm-ia64/sn/types.h	2005-03-24 18:21:52 -08:00
@@ -16,7 +16,8 @@
 typedef signed char	partid_t;	/* partition ID type */
 typedef unsigned int    moduleid_t;     /* user-visible module number type */
 typedef unsigned int    cmoduleid_t;    /* kernel compact module id type */
-typedef signed char     slabid_t;
+typedef unsigned char	slotid_t;	/* slot (blade) within module */
+typedef unsigned char	slabid_t;	/* slab (asic) within slot */
 typedef u64 nic_t;
 typedef unsigned long iopaddr_t;
 typedef unsigned long paddr_t;