File: /Users/paulross/dev/linux/linux-3.13/include/asm-generic/pgtable.h

Green shading in the line number column means the source is part of the translation unit, red means it is conditionally excluded. Highlighted line numbers link to the translation unit page. Highlighted macros link to the macro page.
       1: #ifndef _ASM_GENERIC_PGTABLE_H
       2: #define _ASM_GENERIC_PGTABLE_H
       3: 
       4: #ifndef __ASSEMBLY__
       5: #ifdef CONFIG_MMU
       6: 
       7: #include <linux/mm_types.h>
       8: #include <linux/bug.h>
       9: 
      10: /*
      11:  * On almost all architectures and configurations, 0 can be used as the
      12:  * upper ceiling to free_pgtables(): on many architectures it has the same
      13:  * effect as using TASK_SIZE.  However, there is one configuration which
      14:  * must impose a more careful limit, to avoid freeing kernel pgtables.
      15:  */
      16: #ifndef USER_PGTABLES_CEILING
      17: #define USER_PGTABLES_CEILING    0UL
      18: #endif
      19: 
      20: #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
      21: extern int ptep_set_access_flags(struct vm_area_struct *vma,
      22:                  unsigned long address, pte_t *ptep,
      23:                  pte_t entry, int dirty);
      24: #endif
      25: 
      26: #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
      27: extern int pmdp_set_access_flags(struct vm_area_struct *vma,
      28:                  unsigned long address, pmd_t *pmdp,
      29:                  pmd_t entry, int dirty);
      30: #endif
      31: 
      32: #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
      33: static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
      34:                         unsigned long address,
      35:                         pte_t *ptep)
      36: {
      37:     pte_t pte = *ptep;
      38:     int r = 1;
      39:     if (!pte_young(pte))
      40:         r = 0;
      41:     else
      42:         set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
      43:     return r;
      44: }
      45: #endif
      46: 
      47: #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
      48: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
      49: static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
      50:                         unsigned long address,
      51:                         pmd_t *pmdp)
      52: {
      53:     pmd_t pmd = *pmdp;
      54:     int r = 1;
      55:     if (!pmd_young(pmd))
      56:         r = 0;
      57:     else
      58:         set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
      59:     return r;
      60: }
      61: #else /* CONFIG_TRANSPARENT_HUGEPAGE */
      62: static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
      63:                         unsigned long address,
      64:                         pmd_t *pmdp)
      65: {
      66:     BUG();
      67:     return 0;
      68: }
      69: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
      70: #endif
      71: 
      72: #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
      73: int ptep_clear_flush_young(struct vm_area_struct *vma,
      74:                unsigned long address, pte_t *ptep);
      75: #endif
      76: 
      77: #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
      78: int pmdp_clear_flush_young(struct vm_area_struct *vma,
      79:                unsigned long address, pmd_t *pmdp);
      80: #endif
      81: 
      82: #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
      83: static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
      84:                        unsigned long address,
      85:                        pte_t *ptep)
      86: {
      87:     pte_t pte = *ptep;
      88:     pte_clear(mm, address, ptep);
      89:     return pte;
      90: }
      91: #endif
      92: 
      93: #ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR
      94: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
      95: static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
      96:                        unsigned long address,
      97:                        pmd_t *pmdp)
      98: {
      99:     pmd_t pmd = *pmdp;
     100:     pmd_clear(pmdp);
     101:     return pmd;
     102: }
     103: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
     104: #endif
     105: 
     106: #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
     107: static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
     108:                         unsigned long address, pte_t *ptep,
     109:                         int full)
     110: {
     111:     pte_t pte;
     112:     pte = ptep_get_and_clear(mm, address, ptep);
     113:     return pte;
     114: }
     115: #endif
     116: 
     117: /*
     118:  * Some architectures may be able to avoid expensive synchronization
     119:  * primitives when modifications are made to PTE's which are already
     120:  * not present, or in the process of an address space destruction.
     121:  */
     122: #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
     123: static inline void pte_clear_not_present_full(struct mm_struct *mm,
     124:                           unsigned long address,
     125:                           pte_t *ptep,
     126:                           int full)
     127: {
     128:     pte_clear(mm, address, ptep);
     129: }
     130: #endif
     131: 
     132: #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
     133: extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
     134:                   unsigned long address,
     135:                   pte_t *ptep);
     136: #endif
     137: 
     138: #ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH
     139: extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
     140:                   unsigned long address,
     141:                   pmd_t *pmdp);
     142: #endif
     143: 
     144: #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
     145: struct mm_struct;
     146: static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
     147: {
     148:     pte_t old_pte = *ptep;
     149:     set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
     150: }
     151: #endif
     152: 
     153: #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
     154: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     155: static inline void pmdp_set_wrprotect(struct mm_struct *mm,
     156:                       unsigned long address, pmd_t *pmdp)
     157: {
     158:     pmd_t old_pmd = *pmdp;
     159:     set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
     160: }
     161: #else /* CONFIG_TRANSPARENT_HUGEPAGE */
     162: static inline void pmdp_set_wrprotect(struct mm_struct *mm,
     163:                       unsigned long address, pmd_t *pmdp)
     164: {
     165:     BUG();
     166: }
     167: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
     168: #endif
     169: 
     170: #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
     171: extern void pmdp_splitting_flush(struct vm_area_struct *vma,
     172:                  unsigned long address, pmd_t *pmdp);
     173: #endif
     174: 
     175: #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
     176: extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
     177:                        pgtable_t pgtable);
     178: #endif
     179: 
     180: #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
     181: extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
     182: #endif
     183: 
     184: #ifndef __HAVE_ARCH_PMDP_INVALIDATE
     185: extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
     186:                 pmd_t *pmdp);
     187: #endif
     188: 
     189: #ifndef __HAVE_ARCH_PTE_SAME
     190: static inline int pte_same(pte_t pte_a, pte_t pte_b)
     191: {
     192:     return pte_val(pte_a) == pte_val(pte_b);
     193: }
     194: #endif
     195: 
     196: #ifndef __HAVE_ARCH_PMD_SAME
     197: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     198: static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
     199: {
     200:     return pmd_val(pmd_a) == pmd_val(pmd_b);
     201: }
     202: #else /* CONFIG_TRANSPARENT_HUGEPAGE */
     203: static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
     204: {
     205:     BUG();
     206:     return 0;
     207: }
     208: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
     209: #endif
     210: 
     211: #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
     212: #define pgd_offset_gate(mm, addr)    pgd_offset(mm, addr)
     213: #endif
     214: 
     215: #ifndef __HAVE_ARCH_MOVE_PTE
     216: #define move_pte(pte, prot, old_addr, new_addr)    (pte)
     217: #endif
     218: 
     219: #ifndef pte_accessible
     220: # define pte_accessible(mm, pte)    ((void)(pte), 1)
     221: #endif
     222: 
     223: #ifndef flush_tlb_fix_spurious_fault
     224: #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
     225: #endif
     226: 
     227: #ifndef pgprot_noncached
     228: #define pgprot_noncached(prot)    (prot)
     229: #endif
     230: 
     231: #ifndef pgprot_writecombine
     232: #define pgprot_writecombine pgprot_noncached
     233: #endif
     234: 
     235: /*
     236:  * When walking page tables, get the address of the next boundary,
     237:  * or the end address of the range if that comes earlier.  Although no
     238:  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
     239:  */
     240: 
     241: #define pgd_addr_end(addr, end)                        \
     242: ({    unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;    \
     243:     (__boundary - 1 < (end) - 1)? __boundary: (end);        \
     244: })
     245: 
     246: #ifndef pud_addr_end
     247: #define pud_addr_end(addr, end)                        \
     248: ({    unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;    \
     249:     (__boundary - 1 < (end) - 1)? __boundary: (end);        \
     250: })
     251: #endif
     252: 
     253: #ifndef pmd_addr_end
     254: #define pmd_addr_end(addr, end)                        \
     255: ({    unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;    \
     256:     (__boundary - 1 < (end) - 1)? __boundary: (end);        \
     257: })
     258: #endif
     259: 
     260: /*
     261:  * When walking page tables, we usually want to skip any p?d_none entries;
     262:  * and any p?d_bad entries - reporting the error before resetting to none.
     263:  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
     264:  */
     265: void pgd_clear_bad(pgd_t *);
     266: void pud_clear_bad(pud_t *);
     267: void pmd_clear_bad(pmd_t *);
     268: 
     269: static inline int pgd_none_or_clear_bad(pgd_t *pgd)
     270: {
     271:     if (pgd_none(*pgd))
     272:         return 1;
     273:     if (unlikely(pgd_bad(*pgd))) {
     274:         pgd_clear_bad(pgd);
     275:         return 1;
     276:     }
     277:     return 0;
     278: }
     279: 
     280: static inline int pud_none_or_clear_bad(pud_t *pud)
     281: {
     282:     if (pud_none(*pud))
     283:         return 1;
     284:     if (unlikely(pud_bad(*pud))) {
     285:         pud_clear_bad(pud);
     286:         return 1;
     287:     }
     288:     return 0;
     289: }
     290: 
     291: static inline int pmd_none_or_clear_bad(pmd_t *pmd)
     292: {
     293:     if (pmd_none(*pmd))
     294:         return 1;
     295:     if (unlikely(pmd_bad(*pmd))) {
     296:         pmd_clear_bad(pmd);
     297:         return 1;
     298:     }
     299:     return 0;
     300: }
     301: 
     302: static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
     303:                          unsigned long addr,
     304:                          pte_t *ptep)
     305: {
     306:     /*
     307:      * Get the current pte state, but zero it out to make it
     308:      * non-present, preventing the hardware from asynchronously
     309:      * updating it.
     310:      */
     311:     return ptep_get_and_clear(mm, addr, ptep);
     312: }
     313: 
     314: static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
     315:                          unsigned long addr,
     316:                          pte_t *ptep, pte_t pte)
     317: {
     318:     /*
     319:      * The pte is non-present, so there's no hardware state to
     320:      * preserve.
     321:      */
     322:     set_pte_at(mm, addr, ptep, pte);
     323: }
     324: 
     325: #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
     326: /*
     327:  * Start a pte protection read-modify-write transaction, which
     328:  * protects against asynchronous hardware modifications to the pte.
     329:  * The intention is not to prevent the hardware from making pte
     330:  * updates, but to prevent any updates it may make from being lost.
     331:  *
     332:  * This does not protect against other software modifications of the
     333:  * pte; the appropriate pte lock must be held over the transation.
     334:  *
     335:  * Note that this interface is intended to be batchable, meaning that
     336:  * ptep_modify_prot_commit may not actually update the pte, but merely
     337:  * queue the update to be done at some later time.  The update must be
     338:  * actually committed before the pte lock is released, however.
     339:  */
     340: static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
     341:                        unsigned long addr,
     342:                        pte_t *ptep)
     343: {
     344:     return __ptep_modify_prot_start(mm, addr, ptep);
     345: }
     346: 
     347: /*
     348:  * Commit an update to a pte, leaving any hardware-controlled bits in
     349:  * the PTE unmodified.
     350:  */
     351: static inline void ptep_modify_prot_commit(struct mm_struct *mm,
     352:                        unsigned long addr,
     353:                        pte_t *ptep, pte_t pte)
     354: {
     355:     __ptep_modify_prot_commit(mm, addr, ptep, pte);
     356: }
     357: #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
     358: #endif /* CONFIG_MMU */
     359: 
     360: /*
     361:  * A facility to provide lazy MMU batching.  This allows PTE updates and
     362:  * page invalidations to be delayed until a call to leave lazy MMU mode
     363:  * is issued.  Some architectures may benefit from doing this, and it is
     364:  * beneficial for both shadow and direct mode hypervisors, which may batch
     365:  * the PTE updates which happen during this window.  Note that using this
     366:  * interface requires that read hazards be removed from the code.  A read
     367:  * hazard could result in the direct mode hypervisor case, since the actual
     368:  * write to the page tables may not yet have taken place, so reads though
     369:  * a raw PTE pointer after it has been modified are not guaranteed to be
     370:  * up to date.  This mode can only be entered and left under the protection of
     371:  * the page table locks for all page tables which may be modified.  In the UP
     372:  * case, this is required so that preemption is disabled, and in the SMP case,
     373:  * it must synchronize the delayed page table writes properly on other CPUs.
     374:  */
     375: #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
     376: #define arch_enter_lazy_mmu_mode()    do {} while (0)
     377: #define arch_leave_lazy_mmu_mode()    do {} while (0)
     378: #define arch_flush_lazy_mmu_mode()    do {} while (0)
     379: #endif
     380: 
     381: /*
     382:  * A facility to provide batching of the reload of page tables and
     383:  * other process state with the actual context switch code for
     384:  * paravirtualized guests.  By convention, only one of the batched
     385:  * update (lazy) modes (CPU, MMU) should be active at any given time,
     386:  * entry should never be nested, and entry and exits should always be
     387:  * paired.  This is for sanity of maintaining and reasoning about the
     388:  * kernel code.  In this case, the exit (end of the context switch) is
     389:  * in architecture-specific code, and so doesn't need a generic
     390:  * definition.
     391:  */
     392: #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
     393: #define arch_start_context_switch(prev)    do {} while (0)
     394: #endif
     395: 
     396: #ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY
     397: static inline int pte_soft_dirty(pte_t pte)
     398: {
     399:     return 0;
     400: }
     401: 
     402: static inline int pmd_soft_dirty(pmd_t pmd)
     403: {
     404:     return 0;
     405: }
     406: 
     407: static inline pte_t pte_mksoft_dirty(pte_t pte)
     408: {
     409:     return pte;
     410: }
     411: 
     412: static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
     413: {
     414:     return pmd;
     415: }
     416: 
     417: static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
     418: {
     419:     return pte;
     420: }
     421: 
     422: static inline int pte_swp_soft_dirty(pte_t pte)
     423: {
     424:     return 0;
     425: }
     426: 
     427: static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
     428: {
     429:     return pte;
     430: }
     431: 
     432: static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
     433: {
     434:        return pte;
     435: }
     436: 
     437: static inline pte_t pte_file_mksoft_dirty(pte_t pte)
     438: {
     439:        return pte;
     440: }
     441: 
     442: static inline int pte_file_soft_dirty(pte_t pte)
     443: {
     444:        return 0;
     445: }
     446: #endif
     447: 
     448: #ifndef __HAVE_PFNMAP_TRACKING
     449: /*
     450:  * Interfaces that can be used by architecture code to keep track of
     451:  * memory type of pfn mappings specified by the remap_pfn_range,
     452:  * vm_insert_pfn.
     453:  */
     454: 
     455: /*
     456:  * track_pfn_remap is called when a _new_ pfn mapping is being established
     457:  * by remap_pfn_range() for physical range indicated by pfn and size.
     458:  */
     459: static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
     460:                   unsigned long pfn, unsigned long addr,
     461:                   unsigned long size)
     462: {
     463:     return 0;
     464: }
     465: 
     466: /*
     467:  * track_pfn_insert is called when a _new_ single pfn is established
     468:  * by vm_insert_pfn().
     469:  */
     470: static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
     471:                    unsigned long pfn)
     472: {
     473:     return 0;
     474: }
     475: 
     476: /*
     477:  * track_pfn_copy is called when vma that is covering the pfnmap gets
     478:  * copied through copy_page_range().
     479:  */
     480: static inline int track_pfn_copy(struct vm_area_struct *vma)
     481: {
     482:     return 0;
     483: }
     484: 
     485: /*
     486:  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
     487:  * untrack can be called for a specific region indicated by pfn and size or
     488:  * can be for the entire vma (in which case pfn, size are zero).
     489:  */
     490: static inline void untrack_pfn(struct vm_area_struct *vma,
     491:                    unsigned long pfn, unsigned long size)
     492: {
     493: }
     494: #else
     495: extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
     496:                unsigned long pfn, unsigned long addr,
     497:                unsigned long size);
     498: extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
     499:                 unsigned long pfn);
     500: extern int track_pfn_copy(struct vm_area_struct *vma);
     501: extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
     502:             unsigned long size);
     503: #endif
     504: 
     505: #ifdef __HAVE_COLOR_ZERO_PAGE
     506: static inline int is_zero_pfn(unsigned long pfn)
     507: {
     508:     extern unsigned long zero_pfn;
     509:     unsigned long offset_from_zero_pfn = pfn - zero_pfn;
     510:     return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
     511: }
     512: 
     513: #define my_zero_pfn(addr)    page_to_pfn(ZERO_PAGE(addr))
     514: 
     515: #else
     516: static inline int is_zero_pfn(unsigned long pfn)
     517: {
     518:     extern unsigned long zero_pfn;
     519:     return pfn == zero_pfn;
     520: }
     521: 
     522: static inline unsigned long my_zero_pfn(unsigned long addr)
     523: {
     524:     extern unsigned long zero_pfn;
     525:     return zero_pfn;
     526: }
     527: #endif
     528: 
     529: #ifdef CONFIG_MMU
     530: 
     531: #ifndef CONFIG_TRANSPARENT_HUGEPAGE
     532: static inline int pmd_trans_huge(pmd_t pmd)
     533: {
     534:     return 0;
     535: }
     536: static inline int pmd_trans_splitting(pmd_t pmd)
     537: {
     538:     return 0;
     539: }
     540: #ifndef __HAVE_ARCH_PMD_WRITE
     541: static inline int pmd_write(pmd_t pmd)
     542: {
     543:     BUG();
     544:     return 0;
     545: }
     546: #endif /* __HAVE_ARCH_PMD_WRITE */
     547: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
     548: 
     549: #ifndef pmd_read_atomic
     550: static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
     551: {
     552:     /*
     553:      * Depend on compiler for an atomic pmd read. NOTE: this is
     554:      * only going to work, if the pmdval_t isn't larger than
     555:      * an unsigned long.
     556:      */
     557:     return *pmdp;
     558: }
     559: #endif
     560: 
     561: /*
     562:  * This function is meant to be used by sites walking pagetables with
     563:  * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
     564:  * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
     565:  * into a null pmd and the transhuge page fault can convert a null pmd
     566:  * into an hugepmd or into a regular pmd (if the hugepage allocation
     567:  * fails). While holding the mmap_sem in read mode the pmd becomes
     568:  * stable and stops changing under us only if it's not null and not a
     569:  * transhuge pmd. When those races occurs and this function makes a
     570:  * difference vs the standard pmd_none_or_clear_bad, the result is
     571:  * undefined so behaving like if the pmd was none is safe (because it
     572:  * can return none anyway). The compiler level barrier() is critically
     573:  * important to compute the two checks atomically on the same pmdval.
     574:  *
     575:  * For 32bit kernels with a 64bit large pmd_t this automatically takes
     576:  * care of reading the pmd atomically to avoid SMP race conditions
     577:  * against pmd_populate() when the mmap_sem is hold for reading by the
     578:  * caller (a special atomic read not done by "gcc" as in the generic
     579:  * version above, is also needed when THP is disabled because the page
     580:  * fault can populate the pmd from under us).
     581:  */
     582: static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
     583: {
     584:     pmd_t pmdval = pmd_read_atomic(pmd);
     585:     /*
     586:      * The barrier will stabilize the pmdval in a register or on
     587:      * the stack so that it will stop changing under the code.
     588:      *
     589:      * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
     590:      * pmd_read_atomic is allowed to return a not atomic pmdval
     591:      * (for example pointing to an hugepage that has never been
     592:      * mapped in the pmd). The below checks will only care about
     593:      * the low part of the pmd with 32bit PAE x86 anyway, with the
     594:      * exception of pmd_none(). So the important thing is that if
     595:      * the low part of the pmd is found null, the high part will
     596:      * be also null or the pmd_none() check below would be
     597:      * confused.
     598:      */
     599: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     600:     barrier();
     601: #endif
     602:     if (pmd_none(pmdval) || pmd_trans_huge(pmdval))
     603:         return 1;
     604:     if (unlikely(pmd_bad(pmdval))) {
     605:         pmd_clear_bad(pmd);
     606:         return 1;
     607:     }
     608:     return 0;
     609: }
     610: 
     611: /*
     612:  * This is a noop if Transparent Hugepage Support is not built into
     613:  * the kernel. Otherwise it is equivalent to
     614:  * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
     615:  * places that already verified the pmd is not none and they want to
     616:  * walk ptes while holding the mmap sem in read mode (write mode don't
     617:  * need this). If THP is not enabled, the pmd can't go away under the
     618:  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
     619:  * run a pmd_trans_unstable before walking the ptes after
     620:  * split_huge_page_pmd returns (because it may have run when the pmd
     621:  * become null, but then a page fault can map in a THP and not a
     622:  * regular page).
     623:  */
     624: static inline int pmd_trans_unstable(pmd_t *pmd)
     625: {
     626: #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     627:     return pmd_none_or_trans_huge_or_clear_bad(pmd);
     628: #else
     629:     return 0;
     630: #endif
     631: }
     632: 
     633: #ifdef CONFIG_NUMA_BALANCING
     634: #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
     635: /*
     636:  * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
     637:  * same bit too). It's set only when _PAGE_PRESET is not set and it's
     638:  * never set if _PAGE_PRESENT is set.
     639:  *
     640:  * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
     641:  * fault triggers on those regions if pte/pmd_numa returns true
     642:  * (because _PAGE_PRESENT is not set).
     643:  */
     644: #ifndef pte_numa
     645: static inline int pte_numa(pte_t pte)
     646: {
     647:     return (pte_flags(pte) &
     648:         (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
     649: }
     650: #endif
     651: 
     652: #ifndef pmd_numa
     653: static inline int pmd_numa(pmd_t pmd)
     654: {
     655:     return (pmd_flags(pmd) &
     656:         (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
     657: }
     658: #endif
     659: 
     660: /*
     661:  * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
     662:  * because they're called by the NUMA hinting minor page fault. If we
     663:  * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
     664:  * would be forced to set it later while filling the TLB after we
     665:  * return to userland. That would trigger a second write to memory
     666:  * that we optimize away by setting _PAGE_ACCESSED here.
     667:  */
     668: #ifndef pte_mknonnuma
     669: static inline pte_t pte_mknonnuma(pte_t pte)
     670: {
     671:     pte = pte_clear_flags(pte, _PAGE_NUMA);
     672:     return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
     673: }
     674: #endif
     675: 
     676: #ifndef pmd_mknonnuma
     677: static inline pmd_t pmd_mknonnuma(pmd_t pmd)
     678: {
     679:     pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
     680:     return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
     681: }
     682: #endif
     683: 
     684: #ifndef pte_mknuma
     685: static inline pte_t pte_mknuma(pte_t pte)
     686: {
     687:     pte = pte_set_flags(pte, _PAGE_NUMA);
     688:     return pte_clear_flags(pte, _PAGE_PRESENT);
     689: }
     690: #endif
     691: 
     692: #ifndef pmd_mknuma
     693: static inline pmd_t pmd_mknuma(pmd_t pmd)
     694: {
     695:     pmd = pmd_set_flags(pmd, _PAGE_NUMA);
     696:     return pmd_clear_flags(pmd, _PAGE_PRESENT);
     697: }
     698: #endif
     699: #else
     700: extern int pte_numa(pte_t pte);
     701: extern int pmd_numa(pmd_t pmd);
     702: extern pte_t pte_mknonnuma(pte_t pte);
     703: extern pmd_t pmd_mknonnuma(pmd_t pmd);
     704: extern pte_t pte_mknuma(pte_t pte);
     705: extern pmd_t pmd_mknuma(pmd_t pmd);
     706: #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
     707: #else
     708: static inline int pmd_numa(pmd_t pmd)
     709: {
     710:     return 0;
     711: }
     712: 
     713: static inline int pte_numa(pte_t pte)
     714: {
     715:     return 0;
     716: }
     717: 
     718: static inline pte_t pte_mknonnuma(pte_t pte)
     719: {
     720:     return pte;
     721: }
     722: 
     723: static inline pmd_t pmd_mknonnuma(pmd_t pmd)
     724: {
     725:     return pmd;
     726: }
     727: 
     728: static inline pte_t pte_mknuma(pte_t pte)
     729: {
     730:     return pte;
     731: }
     732: 
     733: static inline pmd_t pmd_mknuma(pmd_t pmd)
     734: {
     735:     return pmd;
     736: }
     737: #endif /* CONFIG_NUMA_BALANCING */
     738: 
     739: #endif /* CONFIG_MMU */
     740: 
     741: #endif /* !__ASSEMBLY__ */
     742: 
     743: #ifndef io_remap_pfn_range
     744: #define io_remap_pfn_range remap_pfn_range
     745: #endif
     746: 
     747: #endif /* _ASM_GENERIC_PGTABLE_H */
     748: