From: "Seth, Rohit" <rohit.seth@intel.com>

 Recently on IA-64, we have found an issue where old data could be used by
 apps.  The sequence of operations includes few mprotects from user space
 (glibc) goes like this:

 1- The text region of an executable is mmaped using
    PROT_READ|PROT_EXEC.  As a result, a shared page is allocated to user.

 2- User then requests the text region to be mprotected with
    PROT_READ|PROT_WRITE.  Kernel removes the execute permission and leave
    the read permission on the text region.

 3- Subsequent write operation by user results in page fault and
    eventually resulting in COW break.  User gets a new private copy of the
    page.  At this point kernel marks the new page for defered flush.

 4- User then request the text region to be mprotected back with
    PROT_READ|PROT_EXEC.  mprotect suppport code in kernel, flushes the
    caches, updates the PTEs and then flushes the TLBs.  Though after
    updating the PTEs with new permissions, we don't let the arch specific
    code know about the new mappings (through update_mmu_cache like
    routine).  IA-64 typically uses update_mmu_cache to check for the
    defered flush flag (that got set in step 3) to maintain cache coherency
    lazily (The local I and D caches on IA-64 are incoherent).

DavidM suggeested that we would need to add a hook in the function
change_pte_range in mm/mprotect.c This would let the architecture specific
code to look at the new ptes to decide if it needs to update any other
architectual/kernel state based on the updated (new permissions) PTE
values.

We have added a new hook lazy_mmu_prot_update(pte_t) that gets called
protection bits in PTEs change.  This hook provides an opportunity to arch
specific code to do needful.  On IA-64 this will be used for lazily making
the I and D caches coherent.

Signed-off-by: David Mosberger <davidm@hpl.hp.com> 
Signed-off-by: Rohit Seth <rohit.seth@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/Documentation/cachetlb.txt      |    5 +++++
 25-akpm/arch/ia64/hp/common/sba_iommu.c |    2 +-
 25-akpm/arch/ia64/lib/swiotlb.c         |    2 +-
 25-akpm/arch/ia64/mm/init.c             |    3 +--
 25-akpm/include/asm-generic/pgtable.h   |    4 ++++
 25-akpm/include/asm-ia64/pgtable.h      |    5 ++++-
 25-akpm/mm/memory.c                     |    6 ++++++
 25-akpm/mm/mprotect.c                   |    5 +++--
 8 files changed, 25 insertions(+), 7 deletions(-)

diff -puN arch/ia64/hp/common/sba_iommu.c~arch-hook-for-notifying-changes-in-pte-protections-bits arch/ia64/hp/common/sba_iommu.c
--- 25/arch/ia64/hp/common/sba_iommu.c~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/arch/ia64/hp/common/sba_iommu.c	2005-03-21 23:14:21.000000000 -0800
@@ -774,7 +774,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, unsigne
 #ifdef ENABLE_MARK_CLEAN
 /**
  * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  * flush them when they get mapped into an executable vm-area.
  */
 static void
diff -puN arch/ia64/lib/swiotlb.c~arch-hook-for-notifying-changes-in-pte-protections-bits arch/ia64/lib/swiotlb.c
--- 25/arch/ia64/lib/swiotlb.c~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/arch/ia64/lib/swiotlb.c	2005-03-21 23:14:21.000000000 -0800
@@ -444,7 +444,7 @@ swiotlb_map_single(struct device *hwdev,
 
 /*
  * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  * flush them when they get mapped into an executable vm-area.
  */
 static void
diff -puN arch/ia64/mm/init.c~arch-hook-for-notifying-changes-in-pte-protections-bits arch/ia64/mm/init.c
--- 25/arch/ia64/mm/init.c~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/arch/ia64/mm/init.c	2005-03-21 23:14:21.000000000 -0800
@@ -105,7 +105,7 @@ check_pgt_cache(void)
 }
 
 void
-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+lazy_mmu_prot_update (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
@@ -114,7 +114,6 @@ update_mmu_cache (struct vm_area_struct 
 		return;				/* not an executable page... */
 
 	page = pte_page(pte);
-	/* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
 	addr = (unsigned long) page_address(page);
 
 	if (test_bit(PG_arch_1, &page->flags))
diff -puN Documentation/cachetlb.txt~arch-hook-for-notifying-changes-in-pte-protections-bits Documentation/cachetlb.txt
--- 25/Documentation/cachetlb.txt~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/Documentation/cachetlb.txt	2005-03-21 23:14:21.000000000 -0800
@@ -142,6 +142,11 @@ changes occur:
 	The ia64 sn2 platform is one example of a platform
 	that uses this interface.
 
+8) void lazy_mmu_prot_update(pte_t pte)
+	This interface is called whenever the protection on
+	any user PTEs change.  This interface provides a notification
+	to architecture specific code to take appropiate action.
+
 
 Next, we have the cache flushing interfaces.  In general, when Linux
 is changing an existing virtual-->physical mapping to a new value,
diff -puN include/asm-generic/pgtable.h~arch-hook-for-notifying-changes-in-pte-protections-bits include/asm-generic/pgtable.h
--- 25/include/asm-generic/pgtable.h~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/include/asm-generic/pgtable.h	2005-03-21 23:14:21.000000000 -0800
@@ -135,6 +135,10 @@ static inline void ptep_set_wrprotect(st
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #endif
 
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte)	do { } while (0)
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff -puN include/asm-ia64/pgtable.h~arch-hook-for-notifying-changes-in-pte-protections-bits include/asm-ia64/pgtable.h
--- 25/include/asm-ia64/pgtable.h~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/include/asm-ia64/pgtable.h	2005-03-21 23:14:21.000000000 -0800
@@ -411,6 +411,8 @@ pte_same (pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 }
 
+#define update_mmu_cache(vma, address, pte) do { } while (0)
+
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern void paging_init (void);
 
@@ -472,7 +474,7 @@ void hugetlb_free_pgd_range(struct mmu_g
  * information.  However, we use this routine to take care of any (delayed) i-cache
  * flushing that may be necessary.
  */
-extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte);
+extern void lazy_mmu_prot_update (pte_t pte);
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
@@ -550,6 +552,7 @@ do {											\
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #define __HAVE_ARCH_PGD_OFFSET_GATE
+#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
 
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
diff -puN mm/memory.c~arch-hook-for-notifying-changes-in-pte-protections-bits mm/memory.c
--- 25/mm/memory.c~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/mm/memory.c	2005-03-21 23:14:21.000000000 -0800
@@ -1183,6 +1183,7 @@ static inline void break_cow(struct vm_a
 			      vma);
 	ptep_establish(vma, address, page_table, entry);
 	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
 }
 
 /*
@@ -1235,6 +1236,7 @@ static int do_wp_page(struct mm_struct *
 					      vma);
 			ptep_set_access_flags(vma, address, page_table, entry, 1);
 			update_mmu_cache(vma, address, entry);
+			lazy_mmu_prot_update(entry);
 			pte_unmap(page_table);
 			spin_unlock(&mm->page_table_lock);
 			return VM_FAULT_MINOR;
@@ -1697,6 +1699,7 @@ static int do_swap_page(struct mm_struct
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
+	lazy_mmu_prot_update(pte);
 	pte_unmap(page_table);
 	spin_unlock(&mm->page_table_lock);
 out:
@@ -1754,6 +1757,7 @@ do_anonymous_page(struct mm_struct *mm, 
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, entry);
+	lazy_mmu_prot_update(entry);
 	spin_unlock(&mm->page_table_lock);
 out:
 	return VM_FAULT_MINOR;
@@ -1879,6 +1883,7 @@ retry:
 
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
 	spin_unlock(&mm->page_table_lock);
 out:
 	return ret;
@@ -1973,6 +1978,7 @@ static inline int handle_pte_fault(struc
 	entry = pte_mkyoung(entry);
 	ptep_set_access_flags(vma, address, pte, entry, write_access);
 	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
 	pte_unmap(pte);
 	spin_unlock(&mm->page_table_lock);
 	return VM_FAULT_MINOR;
diff -puN mm/mprotect.c~arch-hook-for-notifying-changes-in-pte-protections-bits mm/mprotect.c
--- 25/mm/mprotect.c~arch-hook-for-notifying-changes-in-pte-protections-bits	2005-03-21 23:14:21.000000000 -0800
+++ 25-akpm/mm/mprotect.c	2005-03-21 23:14:21.000000000 -0800
@@ -39,8 +39,9 @@ static void change_pte_range(struct mm_s
 			 * bits by wiping the pte and then setting the new pte
 			 * into place.
 			 */
-			ptent = ptep_get_and_clear(mm, addr, pte);
-			set_pte_at(mm, addr, pte, pte_modify(ptent, newprot));
+			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
+			set_pte_at(mm, addr, pte, ptent);
+			lazy_mmu_prot_update(ptent);
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap(pte - 1);
_