您的位置:首页 > 其它

armv7对应的CACHE操作相关文件解析

2016-02-23 14:21 2306 查看
最近在使用TI的DRA726芯片。A15端需要访问图像,而图像是在外设空间的,用DMA拷贝到CACHE空间。

这样就导致了DMA的CACHE一致性的问题,需要在DMA之后清除所使用图像空间的数据CACHE。

以这个A15核心为例,解析一下ARM的CACHE操作,涉及的文件有:cacheflush.h   cache-v7.S  proc-macros.S  proc-v7.S

内存是OS中非常厉害,非常复杂的一套系统,科学严谨,每一部分都需要研究几本书才能够彻底明白。

CACHE最基本的就是加速原理,依据就是程序的局部性原理。

CACHE实际实施起来,细节就非常复杂了,比如启动的过程中,如何建立CACHE,从直接访问内存到CACHE访问等等具体问题。

这次主要就项目中的CACHE一致性问题,借机会给组员们一起分享了。

/*
* arch/arm/include/asm/cacheflush.h
*
* Copyright (C) 1999-2002 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _ASMARM_CACHEFLUSH_H
#define _ASMARM_CACHEFLUSH_H

#include <linux/mm.h>

#include <asm/glue-cache.h>
#include <asm/shmparam.h>
#include <asm/cachetype.h>
#include <asm/outercache.h>

#define CACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)

/*
* This flag is used to indicate that the page pointed to by a pte is clean
* and does not require cleaning before returning it to the user.
*/
#define PG_dcache_clean PG_arch_1

/*
* MM Cache Management
* ===================
*
* The arch/arm/mm/cache-*.S and arch/arm/mm/proc-*.S files
* implement these methods.
*
* Start addresses are inclusive and end addresses are exclusive;
* start addresses should be rounded down, end addresses up.
*
* See Documentation/cachetlb.txt for more information.
* Please note that the implementation of these, and the required
* effects are cache-type (VIVT/VIPT/PIPT) specific.
*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
* Currently only needed for cache-v6.S and cache-v7.S, see
* __flush_icache_all for the generic implementation.
*
* flush_kern_all()
*
* Unconditionally clean and invalidate the entire cache.
*
* flush_kern_louis()
*
* Flush data cache levels up to the level of unification
* inner shareable and invalidate the I-cache.
* Only needed from v7 onwards, falls back to flush_cache_all()
* for all other processor versions.
*
* flush_user_all()
*
* Clean and invalidate all user space cache entries
* before a change of page tables.
*
* flush_user_range(start, end, flags)
*
* Clean and invalidate a range of cache entries in the
* specified address space before a change of page tables.
* - start - user start address (inclusive, page aligned)
* - end - user end address (exclusive, page aligned)
* - flags - vma->vm_flags field
*
* coherent_kern_range(start, end)
*
* Ensure coherency between the Icache and the Dcache in the
* region described by start, end. If you have non-snooping
* Harvard caches, you need to implement this function.
* - start - virtual start address
* - end - virtual end address
*
* coherent_user_range(start, end)
*
* Ensure coherency between the Icache and the Dcache in the
* region described by start, end. If you have non-snooping
* Harvard caches, you need to implement this function.
* - start - virtual start address
* - end - virtual end address
*
* flush_kern_dcache_area(kaddr, size)
*
* Ensure that the data held in page is written back.
* - kaddr - page address
* - size - region size
*
* DMA Cache Coherency
* ===================
*
* dma_flush_range(start, end)
*
* Clean and invalidate the specified virtual address range.
* - start - virtual start address
* - end - virtual end address
*/

struct cpu_cache_fns {
void (*flush_icache_all)(void);
void (*flush_kern_all)(void);
void (*flush_kern_louis)(void);
void (*flush_user_all)(void);
void (*flush_user_range)(unsigned long, unsigned long, unsigned int);

void (*coherent_kern_range)(unsigned long, unsigned long);
int (*coherent_user_range)(unsigned long, unsigned long);
void (*flush_kern_dcache_area)(void *, size_t);

void (*dma_map_area)(const void *, size_t, int);
void (*dma_unmap_area)(const void *, size_t, int);

void (*dma_flush_range)(const void *, const void *);
};

/*
* Select the calling method
*/
#ifdef MULTI_CACHE

extern struct cpu_cache_fns cpu_cache;

#define __cpuc_flush_icache_all cpu_cache.flush_icache_all
#define __cpuc_flush_kern_all cpu_cache.flush_kern_all
#define __cpuc_flush_kern_louis cpu_cache.flush_kern_louis
#define __cpuc_flush_user_all cpu_cache.flush_user_all
#define __cpuc_flush_user_range cpu_cache.flush_user_range
#define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range
#define __cpuc_coherent_user_range cpu_cache.coherent_user_range
#define __cpuc_flush_dcache_area cpu_cache.flush_kern_dcache_area

/*
* These are private to the dma-mapping API. Do not use directly.
* Their sole purpose is to ensure that data held in the cache
* is visible to DMA, or data written by DMA to system memory is
* visible to the CPU.
*/
#define dmac_map_area cpu_cache.dma_map_area
#define dmac_unmap_area cpu_cache.dma_unmap_area
#define dmac_flush_range cpu_cache.dma_flush_range

#else

extern void __cpuc_flush_icache_all(void);
extern void __cpuc_flush_kern_all(void);
extern void __cpuc_flush_kern_louis(void);
extern void __cpuc_flush_user_all(void);
extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
extern int __cpuc_coherent_user_range(unsigned long, unsigned long);
extern void __cpuc_flush_dcache_area(void *, size_t);

/*
* These are private to the dma-mapping API. Do not use directly.
* Their sole purpose is to ensure that data held in the cache
* is visible to DMA, or data written by DMA to system memory is
* visible to the CPU.
*/
extern void dmac_map_area(const void *, size_t, int);
extern void dmac_unmap_area(const void *, size_t, int);
extern void dmac_flush_range(const void *, const void *);

#endif

/*
* Copy user data from/to a page which is mapped into a different
* processes address space. Really, we want to allow our "user
* space" model to handle this.
*/
extern void copy_to_user_page(struct vm_area_struct *, struct page *,
unsigned long, void *, const void *, unsigned long);
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
} while (0)

/*
* Convert calls to our calling convention.
*/

/* Invalidate I-cache */
#define __flush_icache_all_generic() \
asm("mcr p15, 0, %0, c7, c5, 0" \
: : "r" (0));

/* Invalidate I-cache inner shareable */
#define __flush_icache_all_v7_smp() \
asm("mcr p15, 0, %0, c7, c1, 0" \
: : "r" (0));

/*
* Optimized __flush_icache_all for the common cases. Note that UP ARMv7
* will fall through to use __flush_icache_all_generic.
*/
#if (defined(CONFIG_CPU_V7) && \
(defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K))) || \
defined(CONFIG_SMP_ON_UP)
#define __flush_icache_preferred __cpuc_flush_icache_all
#elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
#define __flush_icache_preferred __flush_icache_all_v7_smp
#elif __LINUX_ARM_ARCH__ == 6 && defined(CONFIG_ARM_ERRATA_411920)
#define __flush_icache_preferred __cpuc_flush_icache_all
#else
#define __flush_icache_preferred __flush_icache_all_generic
#endif

static inline void __flush_icache_all(void)
{
__flush_icache_preferred();
dsb();
}

/*
* Flush caches up to Level of Unification Inner Shareable
*/
#define flush_cache_louis() __cpuc_flush_kern_louis()

#define flush_cache_all() __cpuc_flush_kern_all()

static inline void vivt_flush_cache_mm(struct mm_struct *mm)
{
if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_flush_user_all();
}

static inline void
vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;

if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
__cpuc_
4000
flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
vma->vm_flags);
}

static inline void
vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
struct mm_struct *mm = vma->vm_mm;

if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
unsigned long addr = user_addr & PAGE_MASK;
__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
}
}

#ifndef CONFIG_CPU_CACHE_VIPT
#define flush_cache_mm(mm) \
vivt_flush_cache_mm(mm)
#define flush_cache_range(vma,start,end) \
vivt_flush_cache_range(vma,start,end)
#define flush_cache_page(vma,addr,pfn) \
vivt_flush_cache_page(vma,addr,pfn)
#else
extern void flush_cache_mm(struct mm_struct *mm);
extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn);
#endif

#define flush_cache_dup_mm(mm) flush_cache_mm(mm)

/*
* flush_cache_user_range is used when we want to ensure that the
* Harvard caches are synchronised for the user space address range.
* This is used for the ARM private sys_cacheflush system call.
*/
#define flush_cache_user_range(s,e) __cpuc_coherent_user_range(s,e)

/*
* Perform necessary cache operations to ensure that data previously
* stored within this range of addresses can be executed by the CPU.
*/
#define flush_icache_range(s,e) __cpuc_coherent_kern_range(s,e)

/*
* Perform necessary cache operations to ensure that the TLB will
* see data written in the specified area.
*/
#define clean_dcache_area(start,size) cpu_dcache_clean_area(start, size)

/*
* flush_dcache_page is used when the kernel has written to the page
* cache page at virtual address page->virtual.
*
* If this page isn't mapped (ie, page_mapping == NULL), or it might
* have userspace mappings, then we _must_ always clean + invalidate
* the dcache entries associated with the kernel mapping.
*
* Otherwise we can defer the operation, and clean the cache when we are
* about to change to user space. This is the same method as used on SPARC64.
* See update_mmu_cache for the user space part.
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);

static inline void flush_kernel_vmap_range(void *addr, int size)
{
if ((cache_is_vivt() || cache_is_vipt_aliasing()))
__cpuc_flush_dcache_area(addr, (size_t)size);
}
static inline void invalidate_kernel_vmap_range(void *addr, int size)
{
if ((cache_is_vivt() || cache_is_vipt_aliasing()))
__cpuc_flush_dcache_area(addr, (size_t)size);
}

#define ARCH_HAS_FLUSH_ANON_PAGE
static inline void flush_anon_page(struct vm_area_struct *vma,
struct page *page, unsigned long vmaddr)
{
extern void __flush_anon_page(struct vm_area_struct *vma,
struct page *, unsigned long);
if (PageAnon(page))
__flush_anon_page(vma, page, vmaddr);
}

#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
extern void flush_kernel_dcache_page(struct page *);

#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)

#define flush_icache_user_range(vma,page,addr,len) \
flush_dcache_page(page)

/*
* We don't appear to need to do anything here. In fact, if we did, we'd
* duplicate cache flushing elsewhere performed by flush_dcache_page().
*/
#define flush_icache_page(vma,page) do { } while (0)

/*
* flush_cache_vmap() is used when creating mappings (eg, via vmap,
* vmalloc, ioremap etc) in kernel space for pages. On non-VIPT
* caches, since the direct-mappings of these pages may contain cached
* data, we need to do a full cache flush to ensure that writebacks
* don't corrupt data placed into these pages via the new mappings.
*/
static inline void flush_cache_vmap(unsigned long start, unsigned long end)
{
if (!cache_is_vipt_nonaliasing())
flush_cache_all();
else
/*
* set_pte_at() called from vmap_pte_range() does not
* have a DSB after cleaning the cache line.
*/
dsb(ishst);
}

static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
if (!cache_is_vipt_nonaliasing())
flush_cache_all();
}

/*
* Memory synchronization helpers for mixed cached vs non cached accesses.
*
* Some synchronization algorithms have to set states in memory with the
* cache enabled or disabled depending on the code path. It is crucial
* to always ensure proper cache maintenance to update main memory right
* away in that case.
*
* Any cached write must be followed by a cache clean operation.
* Any cached read must be preceded by a cache invalidate operation.
* Yet, in the read case, a cache flush i.e. atomic clean+invalidate
* operation is needed to avoid discarding possible concurrent writes to the
* accessed memory.
*
* Also, in order to prevent a cached writer from interfering with an
* adjacent non-cached writer, each state variable must be located to
* a separate cache line.
*/

/*
* This needs to be >= the max cache writeback size of all
* supported platforms included in the current kernel configuration.
* This is used to align state variables to their own cache lines.
*/
#define __CACHE_WRITEBACK_ORDER 6 /* guessed from existing platforms */
#define __CACHE_WRITEBACK_GRANULE (1 << __CACHE_WRITEBACK_ORDER)

/*
* There is no __cpuc_clean_dcache_area but we use it anyway for
* code intent clarity, and alias it to __cpuc_flush_dcache_area.
*/
#define __cpuc_clean_dcache_area __cpuc_flush_dcache_area

/*
* Ensure preceding writes to *p by this CPU are visible to
* subsequent reads by other CPUs:
*/
static inline void __sync_cache_range_w(volatile void *p, size_t size)
{
char *_p = (char *)p;

__cpuc_clean_dcache_area(_p, size);
outer_clean_range(__pa(_p), __pa(_p + size));
}

/*
* Ensure preceding writes to *p by other CPUs are visible to
* subsequent reads by this CPU. We must be careful not to
* discard data simultaneously written by another CPU, hence the
* usage of flush rather than invalidate operations.
*/
static inline void __sync_cache_range_r(volatile void *p, size_t size)
{
char *_p = (char *)p;

#ifdef CONFIG_OUTER_CACHE
if (outer_cache.flush_range) {
/*
* Ensure dirty data migrated from other CPUs into our cache
* are cleaned out safely before the outer cache is cleaned:
*/
__cpuc_clean_dcache_area(_p, size);

/* Clean and invalidate stale data for *p from outer ... */
outer_flush_range(__pa(_p), __pa(_p + size));
}
#endif

/* ... and inner cache: */
__cpuc_flush_dcache_area(_p, size);
}

#define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))
#define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))

/*
* Disabling cache access for one CPU in an ARMv7 SMP system is tricky.
* To do so we must:
*
* - Clear the SCTLR.C bit to prevent further cache allocations
* - Flush the desired level of cache
* - Clear the ACTLR "SMP" bit to disable local coherency
*
* ... and so without any intervening memory access in between those steps,
* not even to the stack.
*
* WARNING -- After this has been called:
*
* - No ldrex/strex (and similar) instructions must be used.
* - The CPU is obviously no longer coherent with the other CPUs.
* - This is unlikely to work as expected if Linux is running non-secure.
*
* Note:
*
* - This is known to apply to several ARMv7 processor implementations,
* however some exceptions may exist. Caveat emptor.
*
* - The clobber list is dictated by the call to v7_flush_dcache_*.
* fp is preserved to the stack explicitly prior disabling the cache
* since adding it to the clobber list is incompatible with having
* CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering
* trampoline are inserted by the linker and to keep sp 64-bit aligned.
*/
#define v7_exit_coherency_flush(level) \
asm volatile( \
"stmfd sp!, {fp, ip} \n\t" \
"mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \
"bic r0, r0, #"__stringify(CR_C)" \n\t" \
"mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \
"isb \n\t" \
"bl v7_flush_dcache_"__stringify(level)" \n\t" \
"clrex \n\t" \
"mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \
"bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \
"mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \
"isb \n\t" \
"dsb \n\t" \
"ldmfd sp!, {fp, ip}" \
: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
"r9","r10","lr","memory" )

int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);

#endif

/*
*  linux/arch/arm/mm/cache-v7.S
*
*  Copyright (C) 2001 Deep Blue Solutions Ltd.
*  Copyright (C) 2005 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*  This is the "shell" of the ARMv7 processor support.
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>

#include "proc-macros.S"

/*
* The secondary kernel init calls v7_flush_dcache_all before it enables
* the L1; however, the L1 comes out of reset in an undefined state, so
* the clean + invalidate performed by v7_flush_dcache_all causes a bunch
* of cache lines with uninitialized data and uninitialized tags to get
* written out to memory, which does really unpleasant things to the main
* processor.  We fix this by performing an invalidate, rather than a
* clean + invalidate, before jumping into the kernel.
*
* This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
* to be called for both secondary cores startup and primary core resume
* procedures.
*/
ENTRY(v7_invalidate_l1)
mov     r0, #0
mcr     p15, 2, r0, c0, c0, 0
mrc     p15, 1, r0, c0, c0, 0

ldr     r1, =0x7fff
and     r2, r1, r0, lsr #13

ldr     r1, =0x3ff

and     r3, r1, r0, lsr #3      @ NumWays - 1
add     r2, r2, #1              @ NumSets

and     r0, r0, #0x7
add     r0, r0, #4      @ SetShift

clz     r1, r3          @ WayShift
add     r4, r3, #1      @ NumWays
1:     sub     r2, r2, #1      @ NumSets--
mov     r3, r4          @ Temp = NumWays
2:     subs    r3, r3, #1      @ Temp--
mov     r5, r3, lsl r1
mov     r6, r2, lsl r0
orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
mcr     p15, 0, r5, c7, c6, 2
bgt     2b
cmp     r2, #0
bgt     1b
dsb
isb
mov     pc, lr
ENDPROC(v7_invalidate_l1)

/*
*	v7_flush_icache_all()
*
*	Flush the whole I-cache.
*
*	Registers:
*	r0 - set to 0
*/
ENTRY(v7_flush_icache_all)
mov	r0, #0
ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
mov	pc, lr
ENDPROC(v7_flush_icache_all)

/*
*     v7_flush_dcache_louis()
*
*     Flush the D-cache up to the Level of Unification Inner Shareable
*
*     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*/

ENTRY(v7_flush_dcache_louis)
dmb					@ ensure ordering with previous memory accesses
mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
#ifdef CONFIG_ARM_ERRATA_643719
ALT_SMP(mrceq	p15, 0, r2, c0, c0, 0)	@ read main ID register
ALT_UP(moveq	pc, lr)			@ LoUU is zero, so nothing to do
ldreq	r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
biceq	r2, r2, #0x0000000f             @ clear minor revision number
teqeq	r2, r1                          @ test for errata affected core and if so...
orreqs	r3, #(1 << 21)			@   fix LoUIS value (and set flags state to 'ne')
#endif
ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
moveq	pc, lr				@ return if level == 0
mov	r10, #0				@ r10 (starting level) = 0
b	flush_levels			@ start flushing cache levels
ENDPROC(v7_flush_dcache_louis)

/*
*	v7_flush_dcache_all()
*
*	Flush the whole D-cache.
*
*	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
*	- mm    - mm_struct describing address space
*/
ENTRY(v7_flush_dcache_all)
dmb					@ ensure ordering with previous memory accesses
mrc	p15, 1, r0, c0, c0, 1		@ read clidr
ands	r3, r0, #0x7000000		@ extract loc from clidr
mov	r3, r3, lsr #23			@ left align loc bit field
beq	finished			@ if loc is 0, then no need to clean
mov	r10, #0				@ start clean at cache level 0
flush_levels:
add	r2, r10, r10, lsr #1		@ work out 3x current cache level
mov	r1, r0, lsr r2			@ extract cache type bits from clidr
and	r1, r1, #7			@ mask of the bits for current cache only
cmp	r1, #2				@ see what cache we have at this level
blt	skip				@ skip if no cache, or just i-cache
#ifdef CONFIG_PREEMPT
save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
#endif
mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
isb					@ isb to sych the new cssr&csidr
mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
#ifdef CONFIG_PREEMPT
restore_irqs_notrace r9
#endif
and	r2, r1, #7			@ extract the length of the cache lines
add	r2, r2, #4			@ add 4 (line length offset)
ldr	r4, =0x3ff
ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
clz	r5, r4				@ find bit position of way size increment
ldr	r7, =0x7fff
ands	r7, r7, r1, lsr #13		@ extract max number of the index size
loop1:
mov	r9, r7				@ create working copy of max index
loop2:
ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
THUMB(	lsl	r6, r4, r5		)
THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
THUMB(	lsl	r6, r9, r2		)
THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
subs	r9, r9, #1			@ decrement the index
bge	loop2
subs	r4, r4, #1			@ decrement the way
bge	loop1
skip:
add	r10, r10, #2			@ increment cache number
cmp	r3, r10
bgt	flush_levels
finished:
mov	r10, #0				@ swith back to cache level 0
mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
dsb
isb
mov	pc, lr
ENDPROC(v7_flush_dcache_all)

/*
*	v7_flush_cache_all()
*
*	Flush the entire cache system.
*  The data cache flush is now achieved using atomic clean / invalidates
*  working outwards from L1 cache. This is done using Set/Way based cache
*  maintenance instructions.
*  The instruction cache can still be invalidated back to the point of
*  unification in a single instruction.
*
*/
ENTRY(v7_flush_kern_cache_all)
ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
bl	v7_flush_dcache_all
mov	r0, #0
ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
mov	pc, lr
ENDPROC(v7_flush_kern_cache_all)

/*
*     v7_flush_kern_cache_louis(void)
*
*     Flush the data cache up to Level of Unification Inner Shareable.
*     Invalidate the I-cache to the point of unification.
*/
ENTRY(v7_flush_kern_cache_louis)
ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
bl	v7_flush_dcache_louis
mov	r0, #0
ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
mov	pc, lr
ENDPROC(v7_flush_kern_cache_louis)

/*
*	v7_flush_cache_all()
*
*	Flush all TLB entries in a particular address space
*
*	- mm    - mm_struct describing address space
*/
ENTRY(v7_flush_user_cache_all)
/*FALLTHROUGH*/

/*
*	v7_flush_cache_range(start, end, flags)
*
*	Flush a range of TLB entries in the specified address space.
*
*	- start - start address (may not be aligned)
*	- end   - end address (exclusive, may n
10a08
ot be aligned)
*	- flags	- vm_area_struct flags describing address space
*
*	It is assumed that:
*	- we have a VIPT cache.
*/
ENTRY(v7_flush_user_cache_range)
mov	pc, lr
ENDPROC(v7_flush_user_cache_all)
ENDPROC(v7_flush_user_cache_range)

/*
*	v7_coherent_kern_range(start,end)
*
*	Ensure that the I and D caches are coherent within specified
*	region.  This is typically used when code has been written to
*	a memory region, and will be executed.
*
*	- start   - virtual start address of region
*	- end     - virtual end address of region
*
*	It is assumed that:
*	- the Icache does not read data from the write buffer
*/
ENTRY(v7_coherent_kern_range)
/* FALLTHROUGH */

/*
*	v7_coherent_user_range(start,end)
*
*	Ensure that the I and D caches are coherent within specified
*	region.  This is typically used when code has been written to
*	a memory region, and will be executed.
*
*	- start   - virtual start address of region
*	- end     - virtual end address of region
*
*	It is assumed that:
*	- the Icache does not read data from the write buffer
*/
ENTRY(v7_coherent_user_range)
UNWIND(.fnstart		)
dcache_line_size r2, r3
sub	r3, r2, #1
bic	r12, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
ALT_SMP(W(dsb))
ALT_UP(W(nop))
#endif
1:
USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
add	r12, r12, r2
cmp	r12, r1
blo	1b
dsb	ishst
icache_line_size r2, r3
sub	r3, r2, #1
bic	r12, r0, r3
2:
USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
add	r12, r12, r2
cmp	r12, r1
blo	2b
mov	r0, #0
ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
dsb	ishst
isb
mov	pc, lr

/*
* Fault handling for the cache operation above. If the virtual address in r0
* isn't mapped, fail with -EFAULT.
*/
9001:
#ifdef CONFIG_ARM_ERRATA_775420
dsb
#endif
mov	r0, #-EFAULT
mov	pc, lr
UNWIND(.fnend		)
ENDPROC(v7_coherent_kern_range)
ENDPROC(v7_coherent_user_range)

/*
*	v7_flush_kern_dcache_area(void *addr, size_t size)
*
*	Ensure that the data held in the page kaddr is written back
*	to the page in question.
*
*	- addr	- kernel address
*	- size	- region size
*/
ENTRY(v7_flush_kern_dcache_area)
dcache_line_size r2, r3
add	r1, r0, r1
sub	r3, r2, #1
bic	r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
ALT_SMP(W(dsb))
ALT_UP(W(nop))
#endif
1:
mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
add	r0, r0, r2
cmp	r0, r1
blo	1b
dsb
mov	pc, lr
ENDPROC(v7_flush_kern_dcache_area)

/*
*	v7_dma_inv_range(start,end)
*
*	Invalidate the data cache within the specified region; we will
*	be performing a DMA operation in this region and we want to
*	purge old data in the cache.
*
*	- start   - virtual start address of region
*	- end     - virtual end address of region
*/
v7_dma_inv_range:
dcache_line_size r2, r3
sub	r3, r2, #1
tst	r0, r3
bic	r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
ALT_SMP(W(dsb))
ALT_UP(W(nop))
#endif
mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line

tst	r1, r3
bic	r1, r1, r3
mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
1:
mcr	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
add	r0, r0, r2
cmp	r0, r1
blo	1b
dsb
mov	pc, lr
ENDPROC(v7_dma_inv_range)

/*
*	v7_dma_clean_range(start,end)
*	- start   - virtual start address of region
*	- end     - virtual end address of region
*/
v7_dma_clean_range:
dcache_line_size r2, r3
sub	r3, r2, #1
bic	r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
ALT_SMP(W(dsb))
ALT_UP(W(nop))
#endif
1:
mcr	p15, 0, r0, c7, c10, 1		@ clean D / U line
add	r0, r0, r2
cmp	r0, r1
blo	1b
dsb
mov	pc, lr
ENDPROC(v7_dma_clean_range)

/*
*	v7_dma_flush_range(start,end)
*	- start   - virtual start address of region
*	- end     - virtual end address of region
*/
ENTRY(v7_dma_flush_range)
dcache_line_size r2, r3
sub	r3, r2, #1
bic	r0, r0, r3
#ifdef CONFIG_ARM_ERRATA_764369
ALT_SMP(W(dsb))
ALT_UP(W(nop))
#endif
1:
mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
add	r0, r0, r2
cmp	r0, r1
blo	1b
dsb
mov	pc, lr
ENDPROC(v7_dma_flush_range)

/*
*	dma_map_area(start, size, dir)
*	- start	- kernel virtual start address
*	- size	- size of region
*	- dir	- DMA direction
*/
ENTRY(v7_dma_map_area)
add	r1, r1, r0
teq	r2, #DMA_FROM_DEVICE
beq	v7_dma_inv_range
b	v7_dma_clean_range
ENDPROC(v7_dma_map_area)

/*
*	dma_unmap_area(start, size, dir)
*	- start	- kernel virtual start address
*	- size	- size of region
*	- dir	- DMA direction
*/
ENTRY(v7_dma_unmap_area)
add	r1, r1, r0
teq	r2, #DMA_TO_DEVICE
bne	v7_dma_inv_range
mov	pc, lr
ENDPROC(v7_dma_unmap_area)

__INITDATA

@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions v7

/*
* We need constants.h for:
*  VMA_VM_MM
*  VMA_VM_FLAGS
*  VM_EXEC
*/
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>

/*
* vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
*/
.macro	vma_vm_mm, rd, rn
ldr	\rd, [\rn, #VMA_VM_MM]
.endm

/*
* vma_vm_flags - get vma->vm_flags
*/
.macro	vma_vm_flags, rd, rn
ldr	\rd, [\rn, #VMA_VM_FLAGS]
.endm

.macro	tsk_mm, rd, rn
ldr	\rd, [\rn, #TI_TASK]
ldr	\rd, [\rd, #TSK_ACTIVE_MM]
.endm

/*
* act_mm - get current->active_mm
*/
.macro	act_mm, rd
bic	\rd, sp, #8128
bic	\rd, \rd, #63
ldr	\rd, [\rd, #TI_TASK]
ldr	\rd, [\rd, #TSK_ACTIVE_MM]
.endm

/*
* mmid - get context id from mm pointer (mm->context.id)
* note, this field is 64bit, so in big-endian the two words are swapped too.
*/
.macro	mmid, rd, rn
#ifdef __ARMEB__
ldr	\rd, [\rn, #MM_CONTEXT_ID + 4 ]
#else
ldr	\rd, [\rn, #MM_CONTEXT_ID]
#endif
.endm

/*
* mask_asid - mask the ASID from the context ID
*/
.macro	asid, rd, rn
and	\rd, \rn, #255
.endm

.macro	crval, clear, mmuset, ucset
#ifdef CONFIG_MMU
.word	\clear
.word	\mmuset
#else
.word	\clear
.word	\ucset
#endif
.endm

/*
* dcache_line_size - get the minimum D-cache line size from the CTR register
* on ARMv7.
*/
.macro	dcache_line_size, reg, tmp
mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
lsr	\tmp, \tmp, #16
and	\tmp, \tmp, #0xf		@ cache line size encoding
mov	\reg, #4			@ bytes per word
mov	\reg, \reg, lsl \tmp		@ actual cache line size
.endm

/*
* icache_line_size - get the minimum I-cache line size from the CTR register
* on ARMv7.
*/
.macro	icache_line_size, reg, tmp
mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
and	\tmp, \tmp, #0xf		@ cache line size encoding
mov	\reg, #4			@ bytes per word
mov	\reg, \reg, lsl \tmp		@ actual cache line size
.endm

/*
* Sanity check the PTE configuration for the code below - which makes
* certain assumptions about how these bits are laid out.
*/
#ifdef CONFIG_MMU
#if L_PTE_SHARED != PTE_EXT_SHARED
#error PTE shared bit mismatch
#endif
#if !defined (CONFIG_ARM_LPAE) && \
(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED
#error Invalid Linux PTE bit settings
#endif
#endif	/* CONFIG_MMU */

/*
* The ARMv6 and ARMv7 set_pte_ext translation function.
*
* Permission translation:
*  YUWD  APX AP1 AP0	SVC	User
*  0xxx   0   0   0	no acc	no acc
*  100x   1   0   1	r/o	no acc
*  10x0   1   0   1	r/o	no acc
*  1011   0   0   1	r/w	no acc
*  110x   1   1   1	r/o	r/o
*  11x0   1   1   1	r/o	r/o
*  1111   0   1   1	r/w	r/w
*/
.macro	armv6_mt_table pfx
\pfx\()_mt_table:
.long	0x00						@ L_PTE_MT_UNCACHED
.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_SHARED
.long	0x00						@ unused
.long	0x00						@ L_PTE_MT_MINICACHE (not present)
.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
.long	0x00						@ unused
.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
.long	0x00						@ unused
.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
.long	0x00						@ unused
.long	0x00						@ unused
.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS
.endm

.macro	armv6_set_pte_ext pfx
str	r1, [r0], #2048			@ linux version

bic	r3, r1, #0x000003fc
bic	r3, r3, #PTE_TYPE_MASK
orr	r3, r3, r2
orr	r3, r3, #PTE_EXT_AP0 | 2

adr	ip, \pfx\()_mt_table
and	r2, r1, #L_PTE_MT_MASK
ldr	r2, [ip, r2]

eor	r1, r1, #L_PTE_DIRTY
tst	r1, #L_PTE_DIRTY|L_PTE_RDONLY
orrne	r3, r3, #PTE_EXT_APX

tst	r1, #L_PTE_USER
orrne	r3, r3, #PTE_EXT_AP1
tstne	r3, #PTE_EXT_APX

@ user read-only -> kernel read-only
bicne	r3, r3, #PTE_EXT_AP0

tst	r1, #L_PTE_XN
orrne	r3, r3, #PTE_EXT_XN

eor	r3, r3, r2

tst	r1, #L_PTE_YOUNG
tstne	r1, #L_PTE_PRESENT
moveq	r3, #0
tstne	r1, #L_PTE_NONE
movne	r3, #0

str	r3, [r0]
mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
.endm

/*
* The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,
* covering most CPUs except Xscale and Xscale 3.
*
* Permission translation:
*  YUWD   AP	SVC	User
*  0xxx  0x00	no acc	no acc
*  100x  0x00	r/o	no acc
*  10x0  0x00	r/o	no acc
*  1011  0x55	r/w	no acc
*  110x  0xaa	r/w	r/o
*  11x0  0xaa	r/w	r/o
*  1111  0xff	r/w	r/w
*/
.macro	armv3_set_pte_ext wc_disable=1
str	r1, [r0], #2048			@ linux version

eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY

bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
bic	r2, r2, #PTE_TYPE_MASK
orr	r2, r2, #PTE_TYPE_SMALL

tst	r3, #L_PTE_USER			@ user?
orrne	r2, r2, #PTE_SMALL_AP_URO_SRW

tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW

tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
movne	r2, #0

.if	\wc_disable
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
tst	r2, #PTE_CACHEABLE
bicne	r2, r2, #PTE_BUFFERABLE
#endif
.endif
str	r2, [r0]		@ hardware version
.endm

/*
* Xscale set_pte_ext translation, split into two halves to cope
* with work-arounds.  r3 must be preserved by code between these
* two macros.
*
* Permission translation:
*  YUWD  AP	SVC	User
*  0xxx  00	no acc	no acc
*  100x  00	r/o	no acc
*  10x0  00	r/o	no acc
*  1011  01	r/w	no acc
*  110x  10	r/w	r/o
*  11x0  10	r/w	r/o
*  1111  11	r/w	r/w
*/
.macro	xscale_set_pte_ext_prologue
str	r1, [r0]			@ linux version

eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY

bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
orr	r2, r2, #PTE_TYPE_EXT		@ extended page

tst	r3, #L_PTE_USER			@ user?
orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w

tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
@ combined with user -> user r/w
.endm

.macro	xscale_set_pte_ext_epilogue
tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
movne	r2, #0				@ no -> fault

str	r2, [r0, #2048]!		@ hardware version
mov	ip, #0
mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
.endm

.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0
.type	\name\()_processor_functions, #object
.align 2
ENTRY(\name\()_processor_functions)
.word	\dabort
.word	\pabort
.word	cpu_\name\()_proc_init
.word	cpu_\name\()_proc_fin
.word	cpu_\name\()_reset
.word	cpu_\name\()_do_idle
.word	cpu_\name\()_dcache_clean_area
.word	cpu_\name\()_switch_mm

.if \nommu
.word	0
.else
.word	cpu_\name\()_set_pte_ext
.endif

.if \suspend
.word	cpu_\name\()_suspend_size
#ifdef CONFIG_PM_SLEEP
.word	cpu_\name\()_do_suspend
.word	cpu_\name\()_do_resume
#else
.word	0
.word	0
#endif
.else
.word	0
.word	0
.word	0
.endif

.size	\name\()_processor_functions, . - \name\()_processor_functions
.endm

.macro define_cache_functions name:req
.align 2
.type	\name\()_cache_fns, #object
ENTRY(\name\()_cache_fns)
.long	\name\()_flush_icache_all
.long	\name\()_flush_kern_cache_all
.long   \name\()_flush_kern_cache_louis
.long	\name\()_flush_user_cache_all
.long	\name\()_flush_user_cache_range
.long	\name\()_coherent_kern_range
.long	\name\()_coherent_user_range
.long	\name\()_flush_kern_dcache_area
.long	\name\()_dma_map_area
.long	\name\()_dma_unmap_area
.long	\name\()_dma_flush_range
.size	\name\()_cache_fns, . - \name\()_cache_fns
.endm

.macro define_tlb_functions name:req, flags_up:req, flags_smp
.type	\name\()_tlb_fns, #object
ENTRY(\name\()_tlb_fns)
.long	\name\()_flush_user_tlb_range
.long	\name\()_flush_kern_tlb_range
.ifnb \flags_smp
ALT_SMP(.long	\flags_smp )
ALT_UP(.long	\flags_up )
.else
.long	\flags_up
.endif
.size	\name\()_tlb_fns, . - \name\()_tlb_fns
.endm

.macro globl_equ x, y
.globl	\x
.equ	\x, \y
.endm


/*
* linux/arch/arm/mm/proc-v7.S
*
* Copyright (C) 2001 Deep Blue Solutions Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This is the "shell" of the ARMv7 processor support.
*/
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hwcap.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>

#include "proc-macros.S"

#ifdef CONFIG_ARM_LPAE
#include "proc-v7-3level.S"
#else
#include "proc-v7-2level.S"
#endif

ENTRY(cpu_v7_proc_init)
mov pc, lr
ENDPROC(cpu_v7_proc_init)

ENTRY(cpu_v7_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
mov pc, lr
ENDPROC(cpu_v7_proc_fin)

/*
* cpu_v7_reset(loc)
*
* Perform a soft reset of the system. Put the CPU into the
* same state as it would be if it had been reset, and branch
* to what would be the reset vector.
*
* - loc - location to jump to for soft reset
*
* This code must be executed using a flat identity mapping with
* caches disabled.
*/
.align 5
.pushsection .idmap.text, "ax"
ENTRY(cpu_v7_reset)
mrc p15, 0, r1, c1, c0, 0 @ ctrl register
bic r1, r1, #0x1 @ ...............m
THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions)
mcr p15, 0, r1, c1, c0, 0 @ disable MMU
isb
bx r0
ENDPROC(cpu_v7_reset)
.popsection

/*
* cpu_v7_do_idle()
*
* Idle the processor (eg, wait for interrupt).
*
* IRQs are already disabled.
*/
ENTRY(cpu_v7_do_idle)
dsb @ WFI may enter a low-power mode
wfi
mov pc, lr
ENDPROC(cpu_v7_do_idle)

ENTRY(cpu_v7_dcache_clean_area)
ALT_SMP(W(nop)) @ MP extensions imply L1 PTW
ALT_UP_B(1f)
mov pc, lr
1: dcache_line_size r2, r3
2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, r2
subs r1, r1, r2
bhi 2b
dsb ishst
mov pc, lr
ENDPROC(cpu_v7_dcache_clean_area)

string cpu_v7_name, "ARMv7 Processor"
.align

/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
.globl cpu_v7_suspend_size
.equ cpu_v7_suspend_size, 4 * 9
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_v7_do_suspend)
stmfd sp!, {r4 - r10, lr}
mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID
mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID
stmia r0!, {r4 - r5}
#ifdef CONFIG_MMU
mrc p15, 0, r6, c3, c0, 0 @ Domain ID
#ifdef CONFIG_ARM_LPAE
mrrc p15, 1, r5, r7, c2 @ TTB 1
#else
mrc p15, 0, r7, c2, c0, 1 @ TTB 1
#endif
mrc p15, 0, r11, c2, c0, 2 @ TTB control register
#endif
mrc p15, 0, r8, c1, c0, 0 @ Control register
mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register
mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control
stmia r0, {r5 - r11}
ldmfd sp!, {r4 - r10, pc}
ENDPROC(cpu_v7_do_suspend)

ENTRY(cpu_v7_do_resume)
mov ip, #0
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID
ldmia r0!, {r4 - r5}
mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID
mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID
ldmia r0, {r5 - r11}
#ifdef CONFIG_MMU
mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs
mcr p15, 0, r6, c3, c0, 0 @ Domain ID
#ifdef CONFIG_ARM_LPAE
mcrr p15, 0, r1, ip, c2 @ TTB 0
mcrr p15, 1, r5, r7, c2 @ TTB 1
#else
ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP)
ALT_UP(orr r1, r1, #TTB_FLAGS_UP)
mcr p15, 0, r1, c2, c0, 0 @ TTB 0
mcr p15, 0, r7, c2, c0, 1 @ TTB 1
#endif
mcr p15, 0, r11, c2, c0, 2 @ TTB control register
ldr r4, =PRRR @ PRRR
ldr r5, =NMRR @ NMRR
mcr p15, 0, r4, c10, c2, 0 @ write PRRR
mcr p15, 0, r5, c10, c2, 1 @ write NMRR
#endif /* CONFIG_MMU */
mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register
teq r4, r9 @ Is it already set?
mcrne p15, 0, r9, c1, c0, 1 @ No, so write it
mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control
isb
dsb
mov r0, r8 @ control register
b cpu_resume_mmu
ENDPROC(cpu_v7_do_resume)
#endif

#ifdef CONFIG_CPU_PJ4B
globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm
globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext
globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init
globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin
globl_equ cpu_pj4b_reset, cpu_v7_reset
#ifdef CONFIG_PJ4B_ERRATA_4742
ENTRY(cpu_pj4b_do_idle)
dsb @ WFI may enter a low-power mode
wfi
dsb @barrier
mov pc, lr
ENDPROC(cpu_pj4b_do_idle)
#else
globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle
#endif
globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area
globl_equ cpu_pj4b_do_suspend, cpu_v7_do_suspend
globl_equ cpu_pj4b_do_resume, cpu_v7_do_resume
globl_equ cpu_pj4b_suspend_size, cpu_v7_suspend_size

#endif

/*
* __v7_setup
*
* Initialise TLB, Caches, and MMU state ready to switch the MMU
* on. Return in r0 the new CP15 C1 control register setting.
*
* This should be able to cover all ARMv7 cores.
*
* It is assumed that:
* - cache type register is implemented
*/
__v7_ca5mp_setup:
__v7_ca9mp_setup:
__v7_cr7mp_setup:
mov r10, #(1 << 0) @ Cache/TLB ops broadcasting
b 1f
__v7_ca7mp_setup:
__v7_ca15mp_setup:
mov r10, #0
1:
#ifdef CONFIG_SMP
ALT_SMP(mrc p15, 0, r0, c1, c0, 1)
ALT_UP(mov r0, #(1 << 6)) @ fake it for UP
tst r0, #(1 << 6) @ SMP/nAMP mode enabled?
orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode
orreq r0, r0, r10 @ Enable CPU-specific SMP bits
mcreq p15, 0, r0, c1, c0, 1
#endif
b __v7_setup

__v7_pj4b_setup:
#ifdef CONFIG_CPU_PJ4B

/* Auxiliary Debug Modes Control 1 Register */
#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */

/* Auxiliary Debug Modes Control 2 Register */
#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */
#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */
#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */
#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */
#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */
#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\
PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR)

/* Auxiliary Functional Modes Control Register 0 */
#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */
#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */
#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */

/* Auxiliary Debug Modes Control 0 Register */
#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */

/* Auxiliary Debug Modes Control 1 Register */
mrc p15, 1, r0, c15, c1, 1
orr r0, r0, #PJ4B_CLEAN_LINE
orr r0, r0, #PJ4B_INTER_PARITY
bic r0, r0, #PJ4B_STATIC_BP
mcr p15, 1, r0, c15, c1, 1

/* Auxiliary Debug Modes Control 2 Register */
mrc p15, 1, r0, c15, c1, 2
bic r0, r0, #PJ4B_FAST_LDR
orr r0, r0, #PJ4B_AUX_DBG_CTRL2
mcr p15, 1, r0, c15, c1, 2

/* Auxiliary Functional Modes Control Register 0 */
mrc p15, 1, r0, c15, c2, 0
#ifdef CONFIG_SMP
orr r0, r0, #PJ4B_SMP_CFB
#endif
orr r0, r0, #PJ4B_L1_PAR_CHK
orr r0, r0, #PJ4B_BROADCAST_CACHE
mcr p15, 1, r0, c15, c2, 0

/* Auxiliary Debug Modes Control 0 Register */
mrc p15, 1, r0, c15, c1, 0
orr r0, r0, #PJ4B_WFI_WFE
mcr p15, 1, r0, c15, c1, 0

#endif /* CONFIG_CPU_PJ4B */

__v7_setup:
adr r12, __v7_setup_stack @ the local stack
stmia r12, {r0-r5, r7, r9, r11, lr}
bl v7_flush_dcache_louis
ldmia r12, {r0-r5, r7, r9, r11, lr}

mrc p15, 0, r0, c0, c0, 0 @ read main ID register
and r10, r0, #0xff000000 @ ARM?
teq r10, #0x41000000
bne 3f
and r5, r0, #0x00f00000 @ variant
and r6, r0, #0x0000000f @ revision
orr r6, r6, r5, lsr #20-4 @ combine variant and revision
ubfx r0, r0, #4, #12 @ primary part number

/* Cortex-A8 Errata */
ldr r10, =0x00000c08 @ Cortex-A8 primary part number
teq r0, r10
bne 2f
#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)

teq r5, #0x00100000 @ only present in r1p*
mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
orreq r10, r10, #(1 << 6) @ set IBE to 1
mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
#endif
#ifdef CONFIG_ARM_ERRATA_458693
teq r6, #0x20 @ only present in r2p0
mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
orreq r10, r10, #(1 << 5) @ set L1NEON to 1
orreq r10, r10, #(1 << 9) @ set PLDNOP to 1
mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
#endif
#ifdef CONFIG_ARM_ERRATA_460075
teq r6, #0x20 @ only present in r2p0
mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register
tsteq r10, #1 << 22
orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit
mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register
#endif
b 3f

/* Cortex-A9 Errata */
2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number
teq r0, r10
bne 3f
#ifdef CONFIG_ARM_ERRATA_742230
cmp r6, #0x22 @ only present up to r2p2
mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrle r10, r10, #1 << 4 @ set bit #4
mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_742231
teq r6, #0x20 @ present in r2p0
teqne r6, #0x21 @ present in r2p1
teqne r6, #0x22 @ present in r2p2
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 12 @ set bit #12
orreq r10, r10, #1 << 22 @ set bit #22
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_743622
teq r5, #0x00200000 @ only present in r2p*
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 6 @ set bit #6
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
ALT_UP_B(1f)
mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrlt r10, r10, #1 << 11 @ set bit #11
mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
1:
#endif

/* Cortex-A15 Errata */
3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number
teq r0, r10
bne 4f

#ifdef CONFIG_ARM_ERRATA_773022
cmp r6, #0x4 @ only present up to r0p4
mrcle p15, 0, r10, c1, c0, 1 @ read aux control register
orrle r10, r10, #1 << 1 @ disable loop buffer
mcrle p15, 0, r10, c1, c0, 1 @ write aux control register
#endif

4: mov r10, #0
mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
#ifdef CONFIG_MMU
mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs
v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup
ldr r5, =PRRR @ PRRR
ldr r6, =NMRR @ NMRR
mcr p15, 0, r5, c10, c2, 0 @ write PRRR
mcr p15, 0, r6, c10, c2, 1 @ write NMRR
#endif
dsb @ Complete invalidations
#ifndef CONFIG_ARM_THUMBEE
mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE
and r0, r0, #(0xf << 12) @ ThumbEE enabled field
teq r0, #(1 << 12) @ check if ThumbEE is present
bne 1f
mov r5, #0
mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0
mrc p14, 6, r0, c0, c0, 0 @ load TEECR
orr r0, r0, #1 @ set the 1st bit in order to
mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access
1:
#endif
adr r5, v7_crval
ldmia r5, {r5, r6}
ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables
#ifdef CONFIG_SWP_EMULATE
orr r5, r5, #(1 << 10) @ set SW bit in "clear"
bic r6, r6, #(1 << 10) @ clear it in "mmuset"
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
mov pc, lr @ return to head.S:__ret
ENDPROC(__v7_setup)

.align 2
__v7_setup_stack:
.space 4 * 11 @ 11 registers

__INITDATA

@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#ifdef CONFIG_CPU_PJ4B
define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#endif

.section ".rodata"

string cpu_arch_name, "armv7"
string cpu_elf_name, "v7"
.align

.section ".proc.info.init", #alloc, #execinstr

/*
* Standard v7 proc info content
*/
.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
W(b) \initfunc
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
HWCAP_EDSP | HWCAP_TLS | \hwcaps
.long cpu_v7_name
.long \proc_fns
.long v7wbi_tlb_fns
.long v6_user_fns
.long v7_cache_fns
.endm

#ifndef CONFIG_ARM_LPAE
/*
* ARM Ltd. Cortex A5 processor.
*/
.type __v7_ca5mp_proc_info, #object
__v7_ca5mp_proc_info:
.long 0x410fc050
.long 0xff0ffff0
__v7_proc __v7_ca5mp_setup
.size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info

/*
* ARM Ltd. Cortex A9 processor.
*/
.type __v7_ca9mp_proc_info, #object
__v7_ca9mp_proc_info:
.long 0x410fc090
.long 0xff0ffff0
__v7_proc __v7_ca9mp_setup
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info

#endif /* CONFIG_ARM_LPAE */

/*
* Marvell PJ4B processor.
*/
#ifdef CONFIG_CPU_PJ4B
.type __v7_pj4b_proc_info, #object
__v7_pj4b_proc_info:
.long 0x560f5800
.long 0xff0fff00
__v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions
.size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info
#endif

/*
* ARM Ltd. Cortex R7 processor.
*/
.type __v7_cr7mp_proc_info, #object
__v7_cr7mp_proc_info:
.long 0x410fc170
.long 0xff0ffff0
__v7_proc __v7_cr7mp_setup
.size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info

/*
* ARM Ltd. Cortex A7 processor.
*/
.type __v7_ca7mp_proc_info, #object
__v7_ca7mp_proc_info:
.long 0x410fc070
.long 0xff0ffff0
__v7_proc __v7_ca7mp_setup
.size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info

/*
* ARM Ltd. Cortex A15 processor.
*/
.type __v7_ca15mp_proc_info, #object
__v7_ca15mp_proc_info:
.long 0x410fc0f0
.long 0xff0ffff0
__v7_proc __v7_ca15mp_setup
.size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info

/*
* Qualcomm Inc. Krait processors.
*/
.type __krait_proc_info, #object
__krait_proc_info:
.long 0x510f0400 @ Required ID value
.long 0xff0ffc00 @ Mask for ID
/*
* Some Krait processors don't indicate support for SDIV and UDIV
* instructions in the ARM instruction set, even though they actually
* do support them.
*/
__v7_proc __v7_setup, hwcaps = HWCAP_IDIV
.size __krait_proc_info, . - __krait_proc_info

/*
* Match any ARMv7 processor core.
*/
.type __v7_proc_info, #object
__v7_proc_info:
.long 0x000f0000 @ Required ID value
.long 0x000f0000 @ Mask for ID
__v7_proc __v7_setup
.size __v7_proc_info, . - __v7_proc_info
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: