arch/sh/mm/cache-sh4.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401

/*
 * arch/sh/mm/cache-sh4.c
 *
 * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
 * Copyright (C) 2001 - 2009  Paul Mundt
 * Copyright (C) 2003  Richard Curnow
 * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 */
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/mutex.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>

/*
 * The maximum number of pages we support up to when doing ranged dcache
 * flushing. Anything exceeding this will simply flush the dcache in its
 * entirety.
 */
#define MAX_ICACHE_PAGES	32

static void __flush_cache_4096(unsigned long addr, unsigned long phys,
			       unsigned long exec_offset);

/*
 * Write back the range of D-cache, and purge the I-cache.
 *
 * Called from kernel/module.c:sys_init_module and routine for a.out format,
 * signal handler code and kprobes code
 */
static void __uses_jump_to_uncached sh4_flush_icache_range(void *args)
{
	struct flusher_data *data = args;
	unsigned long start, end;
	unsigned long flags, v;
	int i;

	start = data->addr1;
	end = data->addr2;

	/* If there are too many pages then just blow away the caches */
	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
		local_flush_cache_all(NULL);
		return;
	}

	/*
	 * Selectively flush d-cache then invalidate the i-cache.
	 * This is inefficient, so only use this for small ranges.
	 */
	start &= ~(L1_CACHE_BYTES-1);
	end += L1_CACHE_BYTES-1;
	end &= ~(L1_CACHE_BYTES-1);

	local_irq_save(flags);
	jump_to_uncached();

	for (v = start; v < end; v += L1_CACHE_BYTES) {
		unsigned long icacheaddr;

		__ocbwb(v);

		icacheaddr = CACHE_IC_ADDRESS_ARRAY | (v &
				cpu_data->icache.entry_mask);

		/* Clear i-cache line valid-bit */
		for (i = 0; i < cpu_data->icache.ways; i++) {
			__raw_writel(0, icacheaddr);
			icacheaddr += cpu_data->icache.way_incr;
		}
	}

	back_to_cached();
	local_irq_restore(flags);
}

static inline void flush_cache_4096(unsigned long start,
				    unsigned long phys)
{
	unsigned long flags, exec_offset = 0;

	/*
	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
	 */
	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
	    (start < CACHE_OC_ADDRESS_ARRAY))
		exec_offset = cached_to_uncached;

	local_irq_save(flags);
	__flush_cache_4096(start | SH_CACHE_ASSOC,
			   virt_to_phys(phys), exec_offset);
	local_irq_restore(flags);
}

/*
 * Write back & invalidate the D-cache of the page.
 * (To avoid "alias" issues)
 */
static void sh4_flush_dcache_page(void *arg)
{
	struct page *page = arg;
#ifndef CONFIG_SMP
	struct address_space *mapping = page_mapping(page);

	if (mapping && !mapping_mapped(mapping))
		set_bit(PG_dcache_dirty, &page->flags);
	else
#endif
	{
		unsigned long phys = page_to_phys(page);
		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
		int i, n;

		/* Loop all the D-cache */
		n = boot_cpu_data.dcache.way_incr >> 12;
		for (i = 0; i < n; i++, addr += 4096)
			flush_cache_4096(addr, phys);
	}

	wmb();
}

/* TODO: Selective icache invalidation through IC address array.. */
static void __uses_jump_to_uncached flush_icache_all(void)
{
	unsigned long flags, ccr;

	local_irq_save(flags);
	jump_to_uncached();

	/* Flush I-cache */
	ccr = ctrl_inl(CCR);
	ccr |= CCR_CACHE_ICI;
	ctrl_outl(ccr, CCR);

	/*
	 * back_to_cached() will take care of the barrier for us, don't add
	 * another one!
	 */

	back_to_cached();
	local_irq_restore(flags);
}

static void flush_dcache_all(void)
{
	unsigned long addr, end_addr, entry_offset;

	end_addr = CACHE_OC_ADDRESS_ARRAY +
		(current_cpu_data.dcache.sets <<
		 current_cpu_data.dcache.entry_shift) *
			current_cpu_data.dcache.ways;

	entry_offset = 1 << current_cpu_data.dcache.entry_shift;

	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
	}
}

static void sh4_flush_cache_all(void *unused)
{
	flush_dcache_all();
	flush_icache_all();
}

/*
 * Note : (RPC) since the caches are physically tagged, the only point
 * of flush_cache_mm for SH-4 is to get rid of aliases from the
 * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
 * lines can stay resident so long as the virtual address they were
 * accessed with (hence cache set) is in accord with the physical
 * address (i.e. tag).  It's no different here.
 *
 * Caller takes mm->mmap_sem.
 */
static void sh4_flush_cache_mm(void *arg)
{
	struct mm_struct *mm = arg;

	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
		return;

	flush_dcache_all();
}

/*
 * Write back and invalidate I/D-caches for the page.
 *
 * ADDR: Virtual Address (U0 address)
 * PFN: Physical page number
 */
static void sh4_flush_cache_page(void *args)
{
	struct flusher_data *data = args;
	struct vm_area_struct *vma;
	struct page *page;
	unsigned long address, pfn, phys;
	int map_coherent = 0;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	void *vaddr;

	vma = data->vma;
	address = data->addr1;
	pfn = data->addr2;
	phys = pfn << PAGE_SHIFT;
	page = pfn_to_page(pfn);

	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
		return;

	address &= PAGE_MASK;
	pgd = pgd_offset(vma->vm_mm, address);
	pud = pud_offset(pgd, address);
	pmd = pmd_offset(pud, address);
	pte = pte_offset_kernel(pmd, address);

	/* If the page isn't present, there is nothing to do here. */
	if (!(pte_val(*pte) & _PAGE_PRESENT))
		return;

	if ((vma->vm_mm == current->active_mm))
		vaddr = NULL;
	else {
		/*
		 * Use kmap_coherent or kmap_atomic to do flushes for
		 * another ASID than the current one.
		 */
		map_coherent = (current_cpu_data.dcache.n_aliases &&
			!test_bit(PG_dcache_dirty, &page->flags) &&
			page_mapped(page));
		if (map_coherent)
			vaddr = kmap_coherent(page, address);
		else
			vaddr = kmap_atomic(page, KM_USER0);

		address = (unsigned long)vaddr;
	}

	if (pages_do_alias(address, phys))
		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY |
			(address & shm_align_mask), phys);

	if (vma->vm_flags & VM_EXEC)
		flush_icache_all();

	if (vaddr) {
		if (map_coherent)
			kunmap_coherent(vaddr);
		else
			kunmap_atomic(vaddr, KM_USER0);
	}
}

/*
 * Write back and invalidate D-caches.
 *
 * START, END: Virtual Address (U0 address)
 *
 * NOTE: We need to flush the _physical_ page entry.
 * Flushing the cache lines for U0 only isn't enough.
 * We need to flush for P1 too, which may contain aliases.
 */
static void sh4_flush_cache_range(void *args)
{
	struct flusher_data *data = args;
	struct vm_area_struct *vma;
	unsigned long start, end;

	vma = data->vma;
	start = data->addr1;
	end = data->addr2;

	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
		return;

	/*
	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
	 * the cache is physically tagged, the data can just be left in there.
	 */
	if (boot_cpu_data.dcache.n_aliases == 0)
		return;

	flush_dcache_all();

	if (vma->vm_flags & VM_EXEC)
		flush_icache_all();
}

/**
 * __flush_cache_4096
 *
 * @addr:  address in memory mapped cache array
 * @phys:  P1 address to flush (has to match tags if addr has 'A' bit
 *         set i.e. associative write)
 * @exec_offset: set to 0x20000000 if flush has to be executed from P2
 *               region else 0x0
 *
 * The offset into the cache array implied by 'addr' selects the
 * 'colour' of the virtual address range that will be flushed.  The
 * operation (purge/write-back) is selected by the lower 2 bits of
 * 'phys'.
 */
static void __flush_cache_4096(unsigned long addr, unsigned long phys,
			       unsigned long exec_offset)
{
	int way_count;
	unsigned long base_addr = addr;
	struct cache_info *dcache;
	unsigned long way_incr;
	unsigned long a, ea, p;
	unsigned long temp_pc;

	dcache = &boot_cpu_data.dcache;
	/* Write this way for better assembly. */
	way_count = dcache->ways;
	way_incr = dcache->way_incr;

	/*
	 * Apply exec_offset (i.e. branch to P2 if required.).
	 *
	 * FIXME:
	 *
	 *	If I write "=r" for the (temp_pc), it puts this in r6 hence
	 *	trashing exec_offset before it's been added on - why?  Hence
	 *	"=&r" as a 'workaround'
	 */
	asm volatile("mov.l 1f, %0\n\t"
		     "add   %1, %0\n\t"
		     "jmp   @%0\n\t"
		     "nop\n\t"
		     ".balign 4\n\t"
		     "1:  .long 2f\n\t"
		     "2:\n" : "=&r" (temp_pc) : "r" (exec_offset));

	/*
	 * We know there will be >=1 iteration, so write as do-while to avoid
	 * pointless nead-of-loop check for 0 iterations.
	 */
	do {
		ea = base_addr + 4096;
		a = base_addr;
		p = phys;

		do {
			*(volatile unsigned long *)a = p;
			/*
			 * Next line: intentionally not p+32, saves an add, p
			 * will do since only the cache tag bits need to
			 * match.
			 */
			*(volatile unsigned long *)(a+32) = p;
			a += 64;
			p += 64;
		} while (a < ea);

		base_addr += way_incr;
	} while (--way_count != 0);
}

extern void __weak sh4__flush_region_init(void);

/*
 * SH-4 has virtually indexed and physically tagged cache.
 */
void __init sh4_cache_init(void)
{
	printk("PVR=%08x CVR=%08x PRR=%08x\n",
		ctrl_inl(CCN_PVR),
		ctrl_inl(CCN_CVR),
		ctrl_inl(CCN_PRR));

	local_flush_icache_range	= sh4_flush_icache_range;
	local_flush_dcache_page		= sh4_flush_dcache_page;
	local_flush_cache_all		= sh4_flush_cache_all;
	local_flush_cache_mm		= sh4_flush_cache_mm;
	local_flush_cache_dup_mm	= sh4_flush_cache_mm;
	local_flush_cache_page		= sh4_flush_cache_page;
	local_flush_cache_range		= sh4_flush_cache_range;

	sh4__flush_region_init();
}