/* * arch/metag/mm/cache.c * * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. * * Cache control code */ #include #include #include #include #include #include #include #include #define DEFAULT_CACHE_WAYS_LOG2 2 /* * Size of a set in the caches. Initialised for default 16K stride, adjusted * according to values passed through TBI global heap segment via LDLK (on ATP) * or config registers (on HTP/MTP) */ static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 - DEFAULT_CACHE_WAYS_LOG2; static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 - DEFAULT_CACHE_WAYS_LOG2; /* * The number of sets in the caches. Initialised for HTP/ATP, adjusted * according to NOMMU setting in config registers */ static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; #ifndef CONFIG_METAG_META12 /** * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache */ static volatile u32 lnkget_testdata[16] __initdata __aligned(64); #define LNKGET_CONSTANT 0xdeadbeef static void __init metag_lnkget_probe(void) { int temp; long flags; /* * It's conceivable the user has configured a globally coherent cache * shared with non-Linux hardware threads, so use LOCK2 to prevent them * from executing and causing cache eviction during the test. */ __global_lock2(flags); /* read a value to bring it into the cache */ (void)lnkget_testdata[0]; lnkget_testdata[0] = 0; /* lnkget/lnkset it to modify it */ asm volatile( "1: LNKGETD %0, [%1]\n" " LNKSETD [%1], %2\n" " DEFR %0, TXSTAT\n" " ANDT %0, %0, #HI(0x3f000000)\n" " CMPT %0, #HI(0x02000000)\n" " BNZ 1b\n" : "=&d" (temp) : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT) : "cc"); /* re-read it to see if the cached value changed */ temp = lnkget_testdata[0]; __global_unlock2(flags); /* flush the cache line to fix any incoherency */ __builtin_dcache_flush((void *)&lnkget_testdata[0]); #if defined(CONFIG_METAG_LNKGET_AROUND_CACHE) /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */ if (temp == LNKGET_CONSTANT) pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n"); #elif defined(CONFIG_METAG_ATOMICITY_LNKGET) /* * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary * because the kernel is configured to use LNKGET/SET for atomicity */ WARN(temp != LNKGET_CONSTANT, "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" "Expect kernel failure as it's used for atomicity primitives\n"); #elif defined(CONFIG_SMP) /* * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the * gateway page won't flush and userland could break. */ WARN(temp != LNKGET_CONSTANT, "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" "Expect userland failure as it's used for user gateway page\n"); #else /* * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it * doesn't actually matter as it doesn't have any effect on !SMP && * !ATOMICITY_LNKGET. */ if (temp != LNKGET_CONSTANT) pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"); #endif } #endif /* !CONFIG_METAG_META12 */ /** * metag_cache_probe() - Probe L1 cache configuration. * * Probe the L1 cache configuration to aid the L1 physical cache flushing * functions. */ void __init metag_cache_probe(void) { #ifndef CONFIG_METAG_META12 int coreid = metag_in32(METAC_CORE_ID); int config = metag_in32(METAC_CORE_CONFIG2); int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS; if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 || cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) { icache_sets_log2 = 1; dcache_sets_log2 = 1; } /* For normal size caches, the smallest size is 4Kb. For small caches, the smallest size is 64b */ icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT) ? 6 : 12; icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS) >> METAC_CORE_C2ICSZ_S; icache_set_shift -= icache_sets_log2; dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT) ? 6 : 12; dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS) >> METAC_CORECFG2_DCSZ_S; dcache_set_shift -= dcache_sets_log2; metag_lnkget_probe(); #else /* Extract cache sizes from global heap segment */ unsigned long val, u; int width, shift, addend; PTBISEG seg; seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL, TBID_SEGSCOPE_GLOBAL, TBID_SEGTYPE_HEAP)); if (seg != NULL) { val = seg->Data[1]; /* Work out width of I-cache size bit-field */ u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS) >> METAG_TBI_ICACHE_SIZE_S; width = 0; while (u & 1) { width++; u >>= 1; } /* Extract sign-extended size addend value */ shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width); addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS) << shift) >> (shift + METAG_TBI_ICACHE_SIZE_S); /* Now calculate I-cache set size */ icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 - DEFAULT_CACHE_WAYS_LOG2) + addend; /* Similarly for D-cache */ u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS) >> METAG_TBI_DCACHE_SIZE_S; width = 0; while (u & 1) { width++; u >>= 1; } shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width); addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS) << shift) >> (shift + METAG_TBI_DCACHE_SIZE_S); dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 - DEFAULT_CACHE_WAYS_LOG2) + addend; } #endif } static void metag_phys_data_cache_flush(const void *start) { unsigned long flush0, flush1, flush2, flush3; int loops, step; int thread; int part, offset; int set_shift; /* Use a sequence of writes to flush the cache region requested */ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) >> TXENABLE_THREAD_S; /* Cache is broken into sets which lie in contiguous RAMs */ set_shift = dcache_set_shift; /* Move to the base of the physical cache flush region */ flush0 = LINSYSCFLUSH_DCACHE_LINE; step = 64; /* Get partition data for this thread */ part = metag_in32(SYSC_DCPART0 + (SYSC_xCPARTn_STRIDE * thread)); if ((int)start < 0) /* Access Global vs Local partition */ part >>= SYSC_xCPARTG_AND_S - SYSC_xCPARTL_AND_S; /* Extract offset and move SetOff */ offset = (part & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S; flush0 += (offset << (set_shift - 4)); /* Shrink size */ part = (part & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S; loops = ((part + 1) << (set_shift - 4)); /* Reduce loops by step of cache line size */ loops /= step; flush1 = flush0 + (1 << set_shift); flush2 = flush0 + (2 << set_shift); flush3 = flush0 + (3 << set_shift); if (dcache_sets_log2 == 1) { flush2 = flush1; flush3 = flush1 + step; flush1 = flush0 + step; step <<= 1; loops >>= 1; } /* Clear loops ways in cache */ while (loops-- != 0) { /* Clear the ways. */ #if 0 /* * GCC doesn't generate very good code for this so we * provide inline assembly instead. */ metag_out8(0, flush0); metag_out8(0, flush1); metag_out8(0, flush2); metag_out8(0, flush3); flush0 += step; flush1 += step; flush2 += step; flush3 += step; #else asm volatile ( "SETB\t[%0+%4++],%5\n" "SETB\t[%1+%4++],%5\n" "SETB\t[%2+%4++],%5\n" "SETB\t[%3+%4++],%5\n" : "+e" (flush0), "+e" (flush1), "+e" (flush2), "+e" (flush3) : "e" (step), "a" (0)); #endif } } void metag_data_cache_flush_all(const void *start) { if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) /* No need to flush the data cache it's not actually enabled */ return; metag_phys_data_cache_flush(start); } void metag_data_cache_flush(const void *start, int bytes) { unsigned long flush0; int loops, step; if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) /* No need to flush the data cache it's not actually enabled */ return; if (bytes >= 4096) { metag_phys_data_cache_flush(start); return; } /* Use linear cache flush mechanism on META IP */ flush0 = (int)start; loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes + (DCACHE_LINE_BYTES - 1); loops >>= DCACHE_LINE_S; #define PRIM_FLUSH(addr, offset) do { \ int __addr = ((int) (addr)) + ((offset) * 64); \ __builtin_dcache_flush((void *)(__addr)); \ } while (0) #define LOOP_INC (4*64) do { /* By default stop */ step = 0; switch (loops) { /* Drop Thru Cases! */ default: PRIM_FLUSH(flush0, 3); loops -= 4; step = 1; case 3: PRIM_FLUSH(flush0, 2); case 2: PRIM_FLUSH(flush0, 1); case 1: PRIM_FLUSH(flush0, 0); flush0 += LOOP_INC; case 0: break; } } while (step); } EXPORT_SYMBOL(metag_data_cache_flush); static void metag_phys_code_cache_flush(const void *start, int bytes) { unsigned long flush0, flush1, flush2, flush3, end_set; int loops, step; int thread; int set_shift, set_size; int part, offset; /* Use a sequence of writes to flush the cache region requested */ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) >> TXENABLE_THREAD_S; set_shift = icache_set_shift; /* Move to the base of the physical cache flush region */ flush0 = LINSYSCFLUSH_ICACHE_LINE; step = 64; /* Get partition code for this thread */ part = metag_in32(SYSC_ICPART0 + (SYSC_xCPARTn_STRIDE * thread)); if ((int)start < 0) /* Access Global vs Local partition */ part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S; /* Extract offset and move SetOff */ offset = (part & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S; flush0 += (offset << (set_shift - 4)); /* Shrink size */ part = (part & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S; loops = ((part + 1) << (set_shift - 4)); /* Where does the Set end? */ end_set = flush0 + loops; set_size = loops; #ifdef CONFIG_METAG_META12 if ((bytes < 4096) && (bytes < loops)) { /* Unreachable on HTP/MTP */ /* Only target the sets that could be relavent */ flush0 += (loops - step) & ((int) start); loops = (((int) start) & (step-1)) + bytes + step - 1; } #endif /* Reduce loops by step of cache line size */ loops /= step; flush1 = flush0 + (1<>= 1; } /* Clear loops ways in cache */ while (loops-- != 0) { #if 0 /* * GCC doesn't generate very good code for this so we * provide inline assembly instead. */ /* Clear the ways */ metag_out8(0, flush0); metag_out8(0, flush1); metag_out8(0, flush2); metag_out8(0, flush3); flush0 += step; flush1 += step; flush2 += step; flush3 += step; #else asm volatile ( "SETB\t[%0+%4++],%5\n" "SETB\t[%1+%4++],%5\n" "SETB\t[%2+%4++],%5\n" "SETB\t[%3+%4++],%5\n" : "+e" (flush0), "+e" (flush1), "+e" (flush2), "+e" (flush3) : "e" (step), "a" (0)); #endif if (flush0 == end_set) { /* Wrap within Set 0 */ flush0 -= set_size; flush1 -= set_size; flush2 -= set_size; flush3 -= set_size; } } } void metag_code_cache_flush_all(const void *start) { if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) /* No need to flush the code cache it's not actually enabled */ return; metag_phys_code_cache_flush(start, 4096); } EXPORT_SYMBOL(metag_code_cache_flush_all); void metag_code_cache_flush(const void *start, int bytes) { #ifndef CONFIG_METAG_META12 void *flush; int loops, step; #endif /* !CONFIG_METAG_META12 */ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) /* No need to flush the code cache it's not actually enabled */ return; #ifdef CONFIG_METAG_META12 /* CACHEWD isn't available on Meta1, so always do full cache flush */ metag_phys_code_cache_flush(start, bytes); #else /* CONFIG_METAG_META12 */ /* If large size do full physical cache flush */ if (bytes >= 4096) { metag_phys_code_cache_flush(start, bytes); return; } /* Use linear cache flush mechanism on META IP */ flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1)); loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes + (ICACHE_LINE_BYTES-1); loops >>= ICACHE_LINE_S; #define PRIM_IFLUSH(addr, offset) \ __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT) #define LOOP_INC (4*64) do { /* By default stop */ step = 0; switch (loops) { /* Drop Thru Cases! */ default: PRIM_IFLUSH(flush, 3); loops -= 4; step = 1; case 3: PRIM_IFLUSH(flush, 2); case 2: PRIM_IFLUSH(flush, 1); case 1: PRIM_IFLUSH(flush, 0); flush += LOOP_INC; case 0: break; } } while (step); #endif /* !CONFIG_METAG_META12 */ } EXPORT_SYMBOL(metag_code_cache_flush);